summaryrefslogtreecommitdiffstats
path: root/src/d3d11
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 20:38:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 20:38:23 +0000
commitff6e3c025658a5fa1affd094f220b623e7e1b24b (patch)
tree9faab72d69c92d24e349d184f5869b9796f17e0c /src/d3d11
parentInitial commit. (diff)
downloadlibplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.tar.xz
libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.zip
Adding upstream version 6.338.2.upstream/6.338.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/d3d11')
-rw-r--r--src/d3d11/common.h66
-rw-r--r--src/d3d11/context.c488
-rw-r--r--src/d3d11/formats.c293
-rw-r--r--src/d3d11/formats.h36
-rw-r--r--src/d3d11/gpu.c685
-rw-r--r--src/d3d11/gpu.h212
-rw-r--r--src/d3d11/gpu_buf.c310
-rw-r--r--src/d3d11/gpu_pass.c1293
-rw-r--r--src/d3d11/gpu_tex.c745
-rw-r--r--src/d3d11/meson.build41
-rw-r--r--src/d3d11/stubs.c56
-rw-r--r--src/d3d11/swapchain.c667
-rw-r--r--src/d3d11/utils.c500
-rw-r--r--src/d3d11/utils.h88
14 files changed, 5480 insertions, 0 deletions
diff --git a/src/d3d11/common.h b/src/d3d11/common.h
new file mode 100644
index 0000000..e14b709
--- /dev/null
+++ b/src/d3d11/common.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "../common.h"
+#include "../log.h"
+
+#ifdef PL_HAVE_DXGI_DEBUG
+#include <dxgidebug.h>
+#endif
+
+#include <libplacebo/d3d11.h>
+
+// Shared struct used to hold the D3D11 device and associated interfaces
+struct d3d11_ctx {
+ pl_log log;
+ pl_d3d11 d3d11;
+
+ // Copy of the device from pl_d3d11 for convenience. Does not hold an
+ // additional reference.
+ ID3D11Device *dev;
+
+ // DXGI device. This does hold a reference.
+ IDXGIDevice1 *dxgi_dev;
+
+#ifdef PL_HAVE_DXGI_DEBUG
+ // Debug interfaces
+ IDXGIDebug *debug;
+ IDXGIInfoQueue *iqueue;
+ uint64_t last_discarded; // Last count of discarded messages
+ DXGI_INFO_QUEUE_MESSAGE *dxgi_msg;
+#endif
+
+ // pl_gpu_is_failed (We saw a device removed error!)
+ bool is_failed;
+};
+
+// DDK value. Apparently some D3D functions can return this instead of the
+// proper user-mode error code. See:
+// https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgiswapchain-present
+#define D3DDDIERR_DEVICEREMOVED (0x88760870)
+
+#ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE
+#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
+#endif
+#ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD
+#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD (0x40)
+#endif
+#ifndef PL_HAVE_DXGI_DEBUG_D3D11
+DEFINE_GUID(DXGI_DEBUG_D3D11, 0x4b99317b, 0xac39, 0x4aa6, 0xbb, 0xb, 0xba, 0xa0, 0x47, 0x84, 0x79, 0x8f);
+#endif
diff --git a/src/d3d11/context.c b/src/d3d11/context.c
new file mode 100644
index 0000000..e0ba90f
--- /dev/null
+++ b/src/d3d11/context.c
@@ -0,0 +1,488 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gpu.h"
+
+// Windows 8 enum value, not present in mingw-w64 v7
+#define DXGI_ADAPTER_FLAG_SOFTWARE (2)
+
+const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS };
+
+static INIT_ONCE d3d11_once = INIT_ONCE_STATIC_INIT;
+static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL;
+static __typeof__(&CreateDXGIFactory1) pCreateDXGIFactory1 = NULL;
+#ifdef PL_HAVE_DXGI_DEBUG
+static __typeof__(&DXGIGetDebugInterface) pDXGIGetDebugInterface = NULL;
+#endif
+
+static void d3d11_load(void)
+{
+ BOOL bPending = FALSE;
+ InitOnceBeginInitialize(&d3d11_once, 0, &bPending, NULL);
+
+ if (bPending)
+ {
+ HMODULE d3d11 = LoadLibraryW(L"d3d11.dll");
+ if (d3d11) {
+ pD3D11CreateDevice = (void *)
+ GetProcAddress(d3d11, "D3D11CreateDevice");
+ }
+
+ HMODULE dxgi = LoadLibraryW(L"dxgi.dll");
+ if (dxgi) {
+ pCreateDXGIFactory1 = (void *)
+ GetProcAddress(dxgi, "CreateDXGIFactory1");
+ }
+
+#ifdef PL_HAVE_DXGI_DEBUG
+ HMODULE dxgi_debug = LoadLibraryW(L"dxgidebug.dll");
+ if (dxgi_debug) {
+ pDXGIGetDebugInterface = (void *)
+ GetProcAddress(dxgi_debug, "DXGIGetDebugInterface");
+ }
+#endif
+ }
+
+ InitOnceComplete(&d3d11_once, 0, NULL);
+}
+
+// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive)
+static int get_feature_levels(int max_fl, int min_fl,
+ const D3D_FEATURE_LEVEL **out)
+{
+ static const D3D_FEATURE_LEVEL levels[] = {
+ D3D_FEATURE_LEVEL_12_1,
+ D3D_FEATURE_LEVEL_12_0,
+ D3D_FEATURE_LEVEL_11_1,
+ D3D_FEATURE_LEVEL_11_0,
+ D3D_FEATURE_LEVEL_10_1,
+ D3D_FEATURE_LEVEL_10_0,
+ D3D_FEATURE_LEVEL_9_3,
+ D3D_FEATURE_LEVEL_9_2,
+ D3D_FEATURE_LEVEL_9_1,
+ };
+ static const int levels_len = PL_ARRAY_SIZE(levels);
+
+ int start = 0;
+ for (; start < levels_len; start++) {
+ if (levels[start] <= max_fl)
+ break;
+ }
+ int len = 0;
+ for (; start + len < levels_len; len++) {
+ if (levels[start + len] < min_fl)
+ break;
+ }
+ *out = &levels[start];
+ return len;
+}
+
+static bool is_null_luid(LUID luid)
+{
+ return luid.LowPart == 0 && luid.HighPart == 0;
+}
+
+static IDXGIAdapter *get_adapter(pl_d3d11 d3d11, LUID adapter_luid)
+{
+ struct d3d11_ctx *ctx = PL_PRIV(d3d11);
+ IDXGIFactory1 *factory = NULL;
+ IDXGIAdapter1 *adapter1 = NULL;
+ IDXGIAdapter *adapter = NULL;
+ HRESULT hr;
+
+ if (!pCreateDXGIFactory1) {
+ PL_FATAL(ctx, "Failed to load dxgi.dll");
+ goto error;
+ }
+ pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory);
+
+ for (int i = 0;; i++) {
+ hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1);
+ if (hr == DXGI_ERROR_NOT_FOUND)
+ break;
+ if (FAILED(hr)) {
+ PL_FATAL(ctx, "Failed to enumerate adapters");
+ goto error;
+ }
+
+ DXGI_ADAPTER_DESC1 desc;
+ D3D(IDXGIAdapter1_GetDesc1(adapter1, &desc));
+ if (desc.AdapterLuid.LowPart == adapter_luid.LowPart &&
+ desc.AdapterLuid.HighPart == adapter_luid.HighPart)
+ {
+ break;
+ }
+
+ SAFE_RELEASE(adapter1);
+ }
+ if (!adapter1) {
+ PL_FATAL(ctx, "Adapter with LUID %08lx%08lx not found",
+ adapter_luid.HighPart, adapter_luid.LowPart);
+ goto error;
+ }
+
+ D3D(IDXGIAdapter1_QueryInterface(adapter1, &IID_IDXGIAdapter,
+ (void **) &adapter));
+
+error:
+ SAFE_RELEASE(factory);
+ SAFE_RELEASE(adapter1);
+ return adapter;
+}
+
+static bool has_sdk_layers(void)
+{
+ // This will fail if the SDK layers aren't installed
+ return SUCCEEDED(pD3D11CreateDevice(NULL, D3D_DRIVER_TYPE_NULL, NULL,
+ D3D11_CREATE_DEVICE_DEBUG, NULL, 0, D3D11_SDK_VERSION, NULL, NULL,
+ NULL));
+}
+
+static ID3D11Device *create_device(struct pl_d3d11_t *d3d11,
+ const struct pl_d3d11_params *params)
+{
+ struct d3d11_ctx *ctx = PL_PRIV(d3d11);
+ bool debug = params->debug;
+ bool warp = params->force_software;
+ int max_fl = params->max_feature_level;
+ int min_fl = params->min_feature_level;
+ ID3D11Device *dev = NULL;
+ IDXGIDevice1 *dxgi_dev = NULL;
+ IDXGIAdapter *adapter = NULL;
+ bool release_adapter = false;
+ HRESULT hr;
+
+ d3d11_load();
+
+ if (!pD3D11CreateDevice) {
+ PL_FATAL(ctx, "Failed to load d3d11.dll");
+ goto error;
+ }
+
+ if (params->adapter) {
+ adapter = params->adapter;
+ } else if (!is_null_luid(params->adapter_luid)) {
+ adapter = get_adapter(d3d11, params->adapter_luid);
+ release_adapter = true;
+ }
+
+ if (debug && !has_sdk_layers()) {
+ PL_INFO(ctx, "Debug layer not available, removing debug flag");
+ debug = false;
+ }
+
+ // Return here to retry creating the device
+ do {
+ // Use these default feature levels if they are not set
+ max_fl = PL_DEF(max_fl, D3D_FEATURE_LEVEL_12_1);
+ min_fl = PL_DEF(min_fl, D3D_FEATURE_LEVEL_9_1);
+
+ // Get a list of feature levels from min_fl to max_fl
+ const D3D_FEATURE_LEVEL *levels;
+ int levels_len = get_feature_levels(max_fl, min_fl, &levels);
+ if (!levels_len) {
+ PL_FATAL(ctx, "No suitable Direct3D feature level found");
+ goto error;
+ }
+
+ D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_UNKNOWN;
+ if (!adapter) {
+ if (warp) {
+ type = D3D_DRIVER_TYPE_WARP;
+ } else {
+ type = D3D_DRIVER_TYPE_HARDWARE;
+ }
+ }
+
+ UINT flags = params->flags;
+ if (debug)
+ flags |= D3D11_CREATE_DEVICE_DEBUG;
+
+ hr = pD3D11CreateDevice(adapter, type, NULL, flags, levels, levels_len,
+ D3D11_SDK_VERSION, &dev, NULL, NULL);
+ if (SUCCEEDED(hr))
+ break;
+
+ pl_d3d11_after_error(ctx, hr);
+
+ // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or
+ // below will not succeed. Try an 11_1 device.
+ if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_12_0 &&
+ min_fl <= D3D_FEATURE_LEVEL_11_1) {
+ PL_DEBUG(ctx, "Failed to create 12_0+ device, trying 11_1");
+ max_fl = D3D_FEATURE_LEVEL_11_1;
+ continue;
+ }
+
+ // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7
+ // without the platform update will not succeed. Try an 11_0 device.
+ if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_11_1 &&
+ min_fl <= D3D_FEATURE_LEVEL_11_0) {
+ PL_DEBUG(ctx, "Failed to create 11_1+ device, trying 11_0");
+ max_fl = D3D_FEATURE_LEVEL_11_0;
+ continue;
+ }
+
+ // Retry with WARP if allowed
+ if (!adapter && !warp && params->allow_software) {
+ PL_DEBUG(ctx, "Failed to create hardware device, trying WARP: %s",
+ pl_hresult_to_str(hr));
+ warp = true;
+ max_fl = params->max_feature_level;
+ min_fl = params->min_feature_level;
+ continue;
+ }
+
+ PL_FATAL(ctx, "Failed to create Direct3D 11 device: %s",
+ pl_hresult_to_str(hr));
+ goto error;
+ } while (true);
+
+ if (params->max_frame_latency) {
+ D3D(ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1,
+ (void **) &dxgi_dev));
+ IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, params->max_frame_latency);
+ }
+
+ d3d11->software = warp;
+
+error:
+ if (release_adapter)
+ SAFE_RELEASE(adapter);
+ SAFE_RELEASE(dxgi_dev);
+ return dev;
+}
+
+static void init_debug_layer(struct d3d11_ctx *ctx, bool leak_check)
+{
+#ifdef PL_HAVE_DXGI_DEBUG
+ if (!pDXGIGetDebugInterface)
+ d3d11_load();
+
+ if (!pDXGIGetDebugInterface)
+ goto error;
+
+ D3D(pDXGIGetDebugInterface(&IID_IDXGIInfoQueue, (void **) &ctx->iqueue));
+
+ // Push empty filter to get everything
+ IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_ALL,
+ &(DXGI_INFO_QUEUE_FILTER){0});
+
+ // Filter some annoying D3D11 messages
+ DXGI_INFO_QUEUE_MESSAGE_ID deny_ids[] = {
+ // This false-positive error occurs every time we Draw() with a shader
+ // that samples from a texture format that only supports point sampling.
+ // Since we already use CheckFormatSupport to know which formats can be
+ // linearly sampled from, we shouldn't ever bind a non-point sampler to
+ // a format that doesn't support it.
+ D3D11_MESSAGE_ID_DEVICE_DRAW_RESOURCE_FORMAT_SAMPLE_UNSUPPORTED,
+ };
+ DXGI_INFO_QUEUE_FILTER filter = {
+ .DenyList = {
+ .NumIDs = PL_ARRAY_SIZE(deny_ids),
+ .pIDList = deny_ids,
+ },
+ };
+ IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_D3D11, &filter);
+
+ IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_D3D11, -1);
+ IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_DXGI, -1);
+
+ if (leak_check)
+ D3D(pDXGIGetDebugInterface(&IID_IDXGIDebug, (void **) &ctx->debug));
+
+error:
+ return;
+#endif
+}
+
+void pl_d3d11_destroy(pl_d3d11 *ptr)
+{
+ pl_d3d11 d3d11 = *ptr;
+ if (!d3d11)
+ return;
+ struct d3d11_ctx *ctx = PL_PRIV(d3d11);
+
+ pl_gpu_destroy(d3d11->gpu);
+
+ SAFE_RELEASE(ctx->dev);
+ SAFE_RELEASE(ctx->dxgi_dev);
+
+#ifdef PL_HAVE_DXGI_DEBUG
+ if (ctx->debug) {
+ // Report any leaked objects
+ pl_d3d11_flush_message_queue(ctx, "After destroy");
+ IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_DETAIL);
+ pl_d3d11_flush_message_queue(ctx, "After leak check");
+ IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_SUMMARY);
+ pl_d3d11_flush_message_queue(ctx, "After leak summary");
+ }
+
+ SAFE_RELEASE(ctx->debug);
+ SAFE_RELEASE(ctx->iqueue);
+#endif
+
+ pl_free_ptr((void **) ptr);
+}
+
+pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params)
+{
+ params = PL_DEF(params, &pl_d3d11_default_params);
+ IDXGIAdapter1 *adapter = NULL;
+ IDXGIAdapter2 *adapter2 = NULL;
+ bool success = false;
+ HRESULT hr;
+
+ struct pl_d3d11_t *d3d11 = pl_zalloc_obj(NULL, d3d11, struct d3d11_ctx);
+ struct d3d11_ctx *ctx = PL_PRIV(d3d11);
+ ctx->log = log;
+ ctx->d3d11 = d3d11;
+
+ if (params->device) {
+ d3d11->device = params->device;
+ ID3D11Device_AddRef(d3d11->device);
+ } else {
+ d3d11->device = create_device(d3d11, params);
+ if (!d3d11->device)
+ goto error;
+ }
+ ctx->dev = d3d11->device;
+
+ if (params->debug ||
+ ID3D11Device_GetCreationFlags(d3d11->device) & D3D11_CREATE_DEVICE_DEBUG)
+ {
+ // Do not report live object on pl_d3d11_destroy if device was created
+ // externally, it makes no sense as there will be a lot of things alive.
+ init_debug_layer(ctx, !params->device);
+ }
+
+ D3D(ID3D11Device_QueryInterface(d3d11->device, &IID_IDXGIDevice1,
+ (void **) &ctx->dxgi_dev));
+ D3D(IDXGIDevice1_GetParent(ctx->dxgi_dev, &IID_IDXGIAdapter1,
+ (void **) &adapter));
+
+ hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter2,
+ (void **) &adapter2);
+ if (FAILED(hr))
+ adapter2 = NULL;
+
+ if (adapter2) {
+ PL_INFO(ctx, "Using DXGI 1.2+");
+ } else {
+ PL_INFO(ctx, "Using DXGI 1.1");
+ }
+
+ D3D_FEATURE_LEVEL fl = ID3D11Device_GetFeatureLevel(d3d11->device);
+ PL_INFO(ctx, "Using Direct3D 11 feature level %u_%u",
+ ((unsigned) fl) >> 12, (((unsigned) fl) >> 8) & 0xf);
+
+ char *dev_name = NULL;
+ UINT vendor_id, device_id, revision, subsys_id;
+ LUID adapter_luid;
+ UINT flags;
+
+ if (adapter2) {
+ // DXGI 1.2 IDXGIAdapter2::GetDesc2 is preferred over the DXGI 1.1
+ // version because it reports the real adapter information when using
+ // feature level 9 hardware
+ DXGI_ADAPTER_DESC2 desc;
+ D3D(IDXGIAdapter2_GetDesc2(adapter2, &desc));
+
+ dev_name = pl_to_utf8(NULL, desc.Description);
+ vendor_id = desc.VendorId;
+ device_id = desc.DeviceId;
+ revision = desc.Revision;
+ subsys_id = desc.SubSysId;
+ adapter_luid = desc.AdapterLuid;
+ flags = desc.Flags;
+ } else {
+ DXGI_ADAPTER_DESC1 desc;
+ D3D(IDXGIAdapter1_GetDesc1(adapter, &desc));
+
+ dev_name = pl_to_utf8(NULL, desc.Description);
+ vendor_id = desc.VendorId;
+ device_id = desc.DeviceId;
+ revision = desc.Revision;
+ subsys_id = desc.SubSysId;
+ adapter_luid = desc.AdapterLuid;
+ flags = desc.Flags;
+ }
+
+ PL_INFO(ctx, "Direct3D 11 device properties:");
+ PL_INFO(ctx, " Device Name: %s", dev_name);
+ PL_INFO(ctx, " Device ID: %04x:%04x (rev %02x)",
+ vendor_id, device_id, revision);
+ PL_INFO(ctx, " Subsystem ID: %04x:%04x",
+ LOWORD(subsys_id), HIWORD(subsys_id));
+ PL_INFO(ctx, " LUID: %08lx%08lx",
+ adapter_luid.HighPart, adapter_luid.LowPart);
+ pl_free(dev_name);
+
+ LARGE_INTEGER version;
+ hr = IDXGIAdapter1_CheckInterfaceSupport(adapter, &IID_IDXGIDevice, &version);
+ if (SUCCEEDED(hr)) {
+ PL_INFO(ctx, " Driver version: %u.%u.%u.%u",
+ HIWORD(version.HighPart), LOWORD(version.HighPart),
+ HIWORD(version.LowPart), LOWORD(version.LowPart));
+ }
+
+ // Note: DXGI_ADAPTER_FLAG_SOFTWARE doesn't exist before Windows 8, but we
+ // also set d3d11->software in create_device if we pick WARP ourselves
+ if (flags & DXGI_ADAPTER_FLAG_SOFTWARE)
+ d3d11->software = true;
+
+ // If the primary display adapter is a software adapter, the
+ // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should
+ // still match the Microsoft Basic Render Driver
+ if (vendor_id == 0x1414 && device_id == 0x8c)
+ d3d11->software = true;
+
+ if (d3d11->software) {
+ bool external_adapter = params->device || params->adapter ||
+ !is_null_luid(params->adapter_luid);
+
+ // The allow_software flag only applies if the API user didn't manually
+ // specify an adapter or a device
+ if (!params->allow_software && !external_adapter) {
+ // If we got this far with allow_software set, the primary adapter
+ // must be a software adapter
+ PL_ERR(ctx, "Primary adapter is a software adapter");
+ goto error;
+ }
+
+ // If a software adapter was manually specified, don't show a warning
+ enum pl_log_level level = PL_LOG_WARN;
+ if (external_adapter || params->force_software)
+ level = PL_LOG_INFO;
+
+ PL_MSG(ctx, level, "Using a software adapter");
+ }
+
+ d3d11->gpu = pl_gpu_create_d3d11(ctx);
+ if (!d3d11->gpu)
+ goto error;
+
+ success = true;
+error:
+ if (!success) {
+ PL_FATAL(ctx, "Failed initializing Direct3D 11 device");
+ pl_d3d11_destroy((pl_d3d11 *) &d3d11);
+ }
+ SAFE_RELEASE(adapter);
+ SAFE_RELEASE(adapter2);
+ return d3d11;
+}
diff --git a/src/d3d11/formats.c b/src/d3d11/formats.c
new file mode 100644
index 0000000..7aaec26
--- /dev/null
+++ b/src/d3d11/formats.c
@@ -0,0 +1,293 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "formats.h"
+#include "gpu.h"
+
+#define FMT(_minor, _name, _dxfmt, _type, num, size, bits, order) \
+ (struct d3d_format) { \
+ .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \
+ .minor = _minor, \
+ .fmt = { \
+ .name = _name, \
+ .type = PL_FMT_##_type, \
+ .num_components = num, \
+ .component_depth = bits, \
+ .texel_size = size, \
+ .texel_align = 1, \
+ .internal_size = size, \
+ .host_bits = bits, \
+ .sample_order = order, \
+ }, \
+ }
+
+#define IDX(...) {__VA_ARGS__}
+#define BITS(...) {__VA_ARGS__}
+
+#define REGFMT(name, dxfmt, type, num, bits) \
+ FMT(0, name, dxfmt, type, num, (num) * (bits) / 8, \
+ BITS(bits, bits, bits, bits), \
+ IDX(0, 1, 2, 3))
+
+#define EMUFMT(_name, _dxfmt, _type, in, en, ib, eb) \
+ (struct d3d_format) { \
+ .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \
+ .minor = 0, \
+ .fmt = { \
+ .name = _name, \
+ .type = PL_FMT_##_type, \
+ .num_components = en, \
+ .component_depth = BITS(ib, ib, ib, ib), \
+ .internal_size = (in) * (ib) / 8, \
+ .opaque = false, \
+ .emulated = true, \
+ .texel_size = (en) * (eb) / 8, \
+ .texel_align = (eb) / 8, \
+ .host_bits = BITS(eb, eb, eb, eb), \
+ .sample_order = IDX(0, 1, 2, 3), \
+ }, \
+ }
+
+const struct d3d_format pl_d3d11_formats[] = {
+ REGFMT("r8", R8, UNORM, 1, 8),
+ REGFMT("rg8", R8G8, UNORM, 2, 8),
+ EMUFMT("rgb8", R8G8B8A8, UNORM, 4, 3, 8, 8),
+ REGFMT("rgba8", R8G8B8A8, UNORM, 4, 8),
+ REGFMT("r16", R16, UNORM, 1, 16),
+ REGFMT("rg16", R16G16, UNORM, 2, 16),
+ EMUFMT("rgb16", R16G16B16A16, UNORM, 4, 3, 16, 16),
+ REGFMT("rgba16", R16G16B16A16, UNORM, 4, 16),
+
+ REGFMT("r8s", R8, SNORM, 1, 8),
+ REGFMT("rg8s", R8G8, SNORM, 2, 8),
+ REGFMT("rgba8s", R8G8B8A8, SNORM, 4, 8),
+ REGFMT("r16s", R16, SNORM, 1, 16),
+ REGFMT("rg16s", R16G16, SNORM, 2, 16),
+ REGFMT("rgba16s", R16G16B16A16, SNORM, 4, 16),
+
+ REGFMT("r16hf", R16, FLOAT, 1, 16),
+ REGFMT("rg16hf", R16G16, FLOAT, 2, 16),
+ EMUFMT("rgb16hf", R16G16B16A16, FLOAT, 4, 3, 16, 16),
+ REGFMT("rgba16hf", R16G16B16A16, FLOAT, 4, 16),
+ REGFMT("r32f", R32, FLOAT, 1, 32),
+ REGFMT("rg32f", R32G32, FLOAT, 2, 32),
+ REGFMT("rgb32f", R32G32B32, FLOAT, 3, 32),
+ REGFMT("rgba32f", R32G32B32A32, FLOAT, 4, 32),
+
+ EMUFMT("r16f", R16, FLOAT, 1, 1, 16, 32),
+ EMUFMT("rg16f", R16G16, FLOAT, 2, 2, 16, 32),
+ EMUFMT("rgb16f", R16G16B16A16, FLOAT, 4, 3, 16, 32),
+ EMUFMT("rgba16f", R16G16B16A16, FLOAT, 4, 4, 16, 32),
+
+ REGFMT("r8u", R8, UINT, 1, 8),
+ REGFMT("rg8u", R8G8, UINT, 2, 8),
+ REGFMT("rgba8u", R8G8B8A8, UINT, 4, 8),
+ REGFMT("r16u", R16, UINT, 1, 16),
+ REGFMT("rg16u", R16G16, UINT, 2, 16),
+ REGFMT("rgba16u", R16G16B16A16, UINT, 4, 16),
+ REGFMT("r32u", R32, UINT, 1, 32),
+ REGFMT("rg32u", R32G32, UINT, 2, 32),
+ REGFMT("rgb32u", R32G32B32, UINT, 3, 32),
+ REGFMT("rgba32u", R32G32B32A32, UINT, 4, 32),
+
+ REGFMT("r8i", R8, SINT, 1, 8),
+ REGFMT("rg8i", R8G8, SINT, 2, 8),
+ REGFMT("rgba8i", R8G8B8A8, SINT, 4, 8),
+ REGFMT("r16i", R16, SINT, 1, 16),
+ REGFMT("rg16i", R16G16, SINT, 2, 16),
+ REGFMT("rgba16i", R16G16B16A16, SINT, 4, 16),
+ REGFMT("r32i", R32, SINT, 1, 32),
+ REGFMT("rg32i", R32G32, SINT, 2, 32),
+ REGFMT("rgb32i", R32G32B32, SINT, 3, 32),
+ REGFMT("rgba32i", R32G32B32A32, SINT, 4, 32),
+
+ FMT(0, "rgb10a2", R10G10B10A2, UNORM, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)),
+ FMT(0, "rgb10a2u", R10G10B10A2, UINT, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)),
+
+ FMT(0, "bgra8", B8G8R8A8, UNORM, 4, 4, BITS( 8, 8, 8, 8), IDX(2, 1, 0, 3)),
+ FMT(0, "bgrx8", B8G8R8X8, UNORM, 3, 4, BITS( 8, 8, 8), IDX(2, 1, 0)),
+ FMT(0, "rg11b10f", R11G11B10, FLOAT, 3, 4, BITS(11, 11, 10), IDX(0, 1, 2)),
+
+ // D3D11.1 16-bit formats (resurrected D3D9 formats)
+ FMT(1, "bgr565", B5G6R5, UNORM, 3, 2, BITS( 5, 6, 5), IDX(2, 1, 0)),
+ FMT(1, "bgr5a1", B5G5R5A1, UNORM, 4, 2, BITS( 5, 5, 5, 1), IDX(2, 1, 0, 3)),
+ FMT(1, "bgra4", B4G4R4A4, UNORM, 4, 2, BITS( 4, 4, 4, 4), IDX(2, 1, 0, 3)),
+
+ {0}
+};
+#undef BITS
+#undef IDX
+#undef REGFMT
+#undef FMT
+
+void pl_d3d11_setup_formats(struct pl_gpu_t *gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ PL_ARRAY(pl_fmt) formats = {0};
+ HRESULT hr;
+
+ for (int i = 0; pl_d3d11_formats[i].dxfmt; i++) {
+ const struct d3d_format *d3d_fmt = &pl_d3d11_formats[i];
+
+ // The Direct3D 11.0 debug layer will segfault if CheckFormatSupport is
+ // called on a format it doesn't know about
+ if (pl_d3d11_formats[i].minor > p->minor)
+ continue;
+
+ UINT sup = 0;
+ hr = ID3D11Device_CheckFormatSupport(p->dev, d3d_fmt->dxfmt, &sup);
+ if (FAILED(hr))
+ continue;
+
+ D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3d_fmt->dxfmt };
+ ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2,
+ &sup2, sizeof(sup2));
+
+ struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, struct d3d_fmt *);
+ const struct d3d_format **fmtp = PL_PRIV(fmt);
+ *fmt = d3d_fmt->fmt;
+ *fmtp = d3d_fmt;
+
+ // For sanity, clear the superfluous fields
+ for (int j = fmt->num_components; j < 4; j++) {
+ fmt->component_depth[j] = 0;
+ fmt->sample_order[j] = 0;
+ fmt->host_bits[j] = 0;
+ }
+
+ static const struct {
+ enum pl_fmt_caps caps;
+ UINT sup;
+ UINT sup2;
+ } support[] = {
+ {
+ .caps = PL_FMT_CAP_SAMPLEABLE,
+ .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D,
+ },
+ {
+ .caps = PL_FMT_CAP_STORABLE,
+ // SHADER_LOAD is for readonly images, which can use a SRV
+ .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D |
+ D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW |
+ D3D11_FORMAT_SUPPORT_SHADER_LOAD,
+ .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE,
+ },
+ {
+ .caps = PL_FMT_CAP_READWRITE,
+ .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D |
+ D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW,
+ .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD,
+ },
+ {
+ .caps = PL_FMT_CAP_LINEAR,
+ .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D |
+ D3D11_FORMAT_SUPPORT_SHADER_SAMPLE,
+ },
+ {
+ .caps = PL_FMT_CAP_RENDERABLE,
+ .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET,
+ },
+ {
+ .caps = PL_FMT_CAP_BLENDABLE,
+ .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+ D3D11_FORMAT_SUPPORT_BLENDABLE,
+ },
+ {
+ .caps = PL_FMT_CAP_VERTEX,
+ .sup = D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER,
+ },
+ {
+ .caps = PL_FMT_CAP_TEXEL_UNIFORM,
+ .sup = D3D11_FORMAT_SUPPORT_BUFFER |
+ D3D11_FORMAT_SUPPORT_SHADER_LOAD,
+ },
+ {
+ .caps = PL_FMT_CAP_TEXEL_STORAGE,
+ // SHADER_LOAD is for readonly buffers, which can use a SRV
+ .sup = D3D11_FORMAT_SUPPORT_BUFFER |
+ D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW |
+ D3D11_FORMAT_SUPPORT_SHADER_LOAD,
+ .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE,
+ },
+ {
+ .caps = PL_FMT_CAP_HOST_READABLE,
+ .sup = D3D11_FORMAT_SUPPORT_CPU_LOCKABLE,
+ },
+ };
+
+ for (int j = 0; j < PL_ARRAY_SIZE(support); j++) {
+ if ((sup & support[j].sup) == support[j].sup &&
+ (sup2.OutFormatSupport2 & support[j].sup2) == support[j].sup2)
+ {
+ fmt->caps |= support[j].caps;
+ }
+ }
+
+ // PL_FMT_CAP_STORABLE implies compute shaders, so don't set it if we
+ // don't have them
+ if (!gpu->glsl.compute)
+ fmt->caps &= ~PL_FMT_CAP_STORABLE;
+
+ // PL_FMT_CAP_READWRITE implies PL_FMT_CAP_STORABLE
+ if (!(fmt->caps & PL_FMT_CAP_STORABLE))
+ fmt->caps &= ~PL_FMT_CAP_READWRITE;
+
+ // `fmt->gatherable` must have PL_FMT_CAP_SAMPLEABLE
+ if ((fmt->caps & PL_FMT_CAP_SAMPLEABLE) &&
+ (sup & D3D11_FORMAT_SUPPORT_SHADER_GATHER))
+ {
+ fmt->gatherable = true;
+ }
+
+ // PL_FMT_CAP_BLITTABLE implies support for stretching, flipping and
+ // loose format conversion, which require a shader pass in D3D11
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ // On >=FL11_0, we use a compute pass, which supports 1D and 3D
+ // textures
+ if (fmt->caps & PL_FMT_CAP_STORABLE)
+ fmt->caps |= PL_FMT_CAP_BLITTABLE;
+ } else {
+ // On <FL11_0 we use a raster pass
+ static const enum pl_fmt_caps req = PL_FMT_CAP_RENDERABLE |
+ PL_FMT_CAP_SAMPLEABLE;
+ if ((fmt->caps & req) == req)
+ fmt->caps |= PL_FMT_CAP_BLITTABLE;
+ }
+
+ if (fmt->caps & (PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM |
+ PL_FMT_CAP_TEXEL_STORAGE)) {
+ fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, ""));
+ pl_assert(fmt->glsl_type);
+ }
+
+ if (fmt->caps & (PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE))
+ fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components);
+
+ fmt->fourcc = pl_fmt_fourcc(fmt);
+
+ // If no caps, D3D11 only supports this for things we don't care about
+ if (!fmt->caps) {
+ pl_free(fmt);
+ continue;
+ }
+
+ PL_ARRAY_APPEND(gpu, formats, fmt);
+ }
+
+ gpu->formats = formats.elem;
+ gpu->num_formats = formats.num;
+}
diff --git a/src/d3d11/formats.h b/src/d3d11/formats.h
new file mode 100644
index 0000000..08336c0
--- /dev/null
+++ b/src/d3d11/formats.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "common.h"
+
+struct d3d_format {
+ DXGI_FORMAT dxfmt;
+ int minor; // The D3D11 minor version number which supports this format
+ struct pl_fmt_t fmt;
+};
+
+extern const struct d3d_format pl_d3d11_formats[];
+
+static inline DXGI_FORMAT fmt_to_dxgi(pl_fmt fmt)
+{
+ const struct d3d_format **fmtp = PL_PRIV(fmt);
+ return (*fmtp)->dxfmt;
+}
+
+void pl_d3d11_setup_formats(struct pl_gpu_t *gpu);
diff --git a/src/d3d11/gpu.c b/src/d3d11/gpu.c
new file mode 100644
index 0000000..05a08a3
--- /dev/null
+++ b/src/d3d11/gpu.c
@@ -0,0 +1,685 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <initguid.h>
+#include <windows.h>
+#include <versionhelpers.h>
+
+#include "common.h"
+#include "gpu.h"
+#include "formats.h"
+#include "glsl/spirv.h"
+
+#define DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES (0x8)
+
+struct timer_query {
+ ID3D11Query *ts_start;
+ ID3D11Query *ts_end;
+ ID3D11Query *disjoint;
+};
+
+struct pl_timer_t {
+ // Ring buffer of timer queries to use
+ int current;
+ int pending;
+ struct timer_query queries[16];
+};
+
+void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ if (!timer)
+ return;
+ struct timer_query *query = &timer->queries[timer->current];
+
+ // Create the query objects lazilly
+ if (!query->ts_start) {
+ D3D(ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_start));
+ D3D(ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_end));
+
+ // Measuring duration in D3D11 requires three queries: start and end
+ // timestamp queries, and a disjoint query containing a flag which says
+ // whether the timestamps are usable or if a discontinuity occurred
+ // between them, like a change in power state or clock speed. The
+ // disjoint query also contains the timer frequency, so the timestamps
+ // are useless without it.
+ D3D(ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &query->disjoint));
+ }
+
+ // Query the start timestamp
+ ID3D11DeviceContext_Begin(p->imm, (ID3D11Asynchronous *) query->disjoint);
+ ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_start);
+ return;
+
+error:
+ SAFE_RELEASE(query->ts_start);
+ SAFE_RELEASE(query->ts_end);
+ SAFE_RELEASE(query->disjoint);
+}
+
+void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+
+ if (!timer)
+ return;
+ struct timer_query *query = &timer->queries[timer->current];
+
+ // Even if timer_start and timer_end are called in-order, timer_start might
+ // have failed to create the timer objects
+ if (!query->ts_start)
+ return;
+
+ // Query the end timestamp
+ ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_end);
+ ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->disjoint);
+
+ // Advance to the next set of queries, for the next call to timer_start
+ timer->current++;
+ if (timer->current >= PL_ARRAY_SIZE(timer->queries))
+ timer->current = 0; // Wrap around
+
+ // Increment the number of pending queries, unless the ring buffer is full,
+ // in which case, timer->current now points to the oldest one, which will be
+ // dropped and reused
+ if (timer->pending < PL_ARRAY_SIZE(timer->queries))
+ timer->pending++;
+}
+
+static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq)
+{
+ static const uint64_t ns_per_s = 1000000000llu;
+ return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq;
+}
+
+static uint64_t d3d11_timer_query(pl_gpu gpu, pl_timer timer)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ HRESULT hr;
+
+ for (; timer->pending > 0; timer->pending--) {
+ int index = timer->current - timer->pending;
+ if (index < 0)
+ index += PL_ARRAY_SIZE(timer->queries);
+ struct timer_query *query = &timer->queries[index];
+
+ UINT64 start, end;
+ D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
+
+ // Fetch the results of each query, or on S_FALSE, return 0 to indicate
+ // the queries are still pending
+ D3D(hr = ID3D11DeviceContext_GetData(p->imm,
+ (ID3D11Asynchronous *) query->disjoint, &dj, sizeof(dj),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH));
+ if (hr == S_FALSE)
+ return 0;
+ D3D(hr = ID3D11DeviceContext_GetData(p->imm,
+ (ID3D11Asynchronous *) query->ts_end, &end, sizeof(end),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH));
+ if (hr == S_FALSE)
+ return 0;
+ D3D(hr = ID3D11DeviceContext_GetData(p->imm,
+ (ID3D11Asynchronous *) query->ts_start, &start, sizeof(start),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH));
+ if (hr == S_FALSE)
+ return 0;
+
+ // There was a discontinuity during the queries, so a timestamp can't be
+ // produced. Skip it and try the next one.
+ if (dj.Disjoint || !dj.Frequency)
+ continue;
+
+ // We got a result. Return it to the caller.
+ timer->pending--;
+ pl_d3d11_flush_message_queue(ctx, "After timer query");
+
+ uint64_t ns = timestamp_to_ns(end - start, dj.Frequency);
+ return PL_MAX(ns, 1);
+
+ error:
+ // There was an error fetching the timer result, so skip it and try the
+ // next one
+ continue;
+ }
+
+ // No more unprocessed results
+ return 0;
+}
+
+static void d3d11_timer_destroy(pl_gpu gpu, pl_timer timer)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ for (int i = 0; i < PL_ARRAY_SIZE(timer->queries); i++) {
+ SAFE_RELEASE(timer->queries[i].ts_start);
+ SAFE_RELEASE(timer->queries[i].ts_end);
+ SAFE_RELEASE(timer->queries[i].disjoint);
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After timer destroy");
+
+ pl_free(timer);
+}
+
+static pl_timer d3d11_timer_create(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ if (!p->has_timestamp_queries)
+ return NULL;
+
+ struct pl_timer_t *timer = pl_alloc_ptr(NULL, timer);
+ *timer = (struct pl_timer_t) {0};
+ return timer;
+}
+
+static int d3d11_desc_namespace(pl_gpu gpu, enum pl_desc_type type)
+{
+ // Vulkan-style binding, where all descriptors are in the same namespace, is
+ // required to use SPIRV-Cross' HLSL resource mapping API, which targets
+ // resources by binding number
+ return 0;
+}
+
+static void d3d11_gpu_flush(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ ID3D11DeviceContext_Flush(p->imm);
+
+ pl_d3d11_flush_message_queue(ctx, "After gpu flush");
+}
+
+static void d3d11_gpu_finish(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ HRESULT hr;
+
+ if (p->finish_fence) {
+ p->finish_value++;
+ D3D(ID3D11Fence_SetEventOnCompletion(p->finish_fence, p->finish_value,
+ p->finish_event));
+ ID3D11DeviceContext4_Signal(p->imm4, p->finish_fence, p->finish_value);
+ ID3D11DeviceContext_Flush(p->imm);
+ WaitForSingleObject(p->finish_event, INFINITE);
+ } else {
+ ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) p->finish_query);
+
+ // D3D11 doesn't have blocking queries, but it does have blocking
+ // readback. As a performance hack to try to avoid polling, do a dummy
+ // copy/readback between two buffers. Hopefully this will block until
+ // all prior commands are finished. If it does, the first GetData call
+ // will return a result and we won't have to poll.
+ pl_buf_copy(gpu, p->finish_buf_dst, 0, p->finish_buf_src, 0, sizeof(uint32_t));
+ pl_buf_read(gpu, p->finish_buf_dst, 0, &(uint32_t) {0}, sizeof(uint32_t));
+
+ // Poll the event query until it completes
+ for (;;) {
+ BOOL idle;
+ D3D(hr = ID3D11DeviceContext_GetData(p->imm,
+ (ID3D11Asynchronous *) p->finish_query, &idle, sizeof(idle), 0));
+ if (hr == S_OK && idle)
+ break;
+ Sleep(1);
+ }
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After gpu finish");
+
+error:
+ return;
+}
+
+static bool d3d11_gpu_is_failed(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ if (ctx->is_failed)
+ return true;
+
+ // GetDeviceRemovedReason returns S_OK if the device isn't removed
+ HRESULT hr = ID3D11Device_GetDeviceRemovedReason(p->dev);
+ if (FAILED(hr)) {
+ ctx->is_failed = true;
+ pl_d3d11_after_error(ctx, hr);
+ }
+
+ return ctx->is_failed;
+}
+
+static void d3d11_gpu_destroy(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+
+ pl_buf_destroy(gpu, &p->finish_buf_src);
+ pl_buf_destroy(gpu, &p->finish_buf_dst);
+
+ // Release everything except the immediate context
+ SAFE_RELEASE(p->dev);
+ SAFE_RELEASE(p->dev1);
+ SAFE_RELEASE(p->dev5);
+ SAFE_RELEASE(p->imm1);
+ SAFE_RELEASE(p->imm4);
+ SAFE_RELEASE(p->vbuf.buf);
+ SAFE_RELEASE(p->ibuf.buf);
+ SAFE_RELEASE(p->rstate);
+ SAFE_RELEASE(p->dsstate);
+ for (int i = 0; i < PL_TEX_SAMPLE_MODE_COUNT; i++) {
+ for (int j = 0; j < PL_TEX_ADDRESS_MODE_COUNT; j++) {
+ SAFE_RELEASE(p->samplers[i][j]);
+ }
+ }
+ SAFE_RELEASE(p->finish_fence);
+ if (p->finish_event)
+ CloseHandle(p->finish_event);
+ SAFE_RELEASE(p->finish_query);
+
+ // Destroy the immediate context synchronously so referenced objects don't
+ // show up in the leak check
+ if (p->imm) {
+ ID3D11DeviceContext_ClearState(p->imm);
+ ID3D11DeviceContext_Flush(p->imm);
+ SAFE_RELEASE(p->imm);
+ }
+
+ pl_spirv_destroy(&p->spirv);
+ pl_free((void *) gpu);
+}
+
+pl_d3d11 pl_d3d11_get(pl_gpu gpu)
+{
+ const struct pl_gpu_fns *impl = PL_PRIV(gpu);
+ if (impl->destroy == d3d11_gpu_destroy) {
+ struct pl_gpu_d3d11 *p = (struct pl_gpu_d3d11 *) impl;
+ return p->ctx->d3d11;
+ }
+
+ return NULL;
+}
+
+static bool load_d3d_compiler(pl_gpu gpu)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ HMODULE d3dcompiler = NULL;
+
+ static const struct {
+ const wchar_t *name;
+ bool inbox;
+ } compiler_dlls[] = {
+ // Try the inbox D3DCompiler first (Windows 8.1 and up)
+ { .name = L"d3dcompiler_47.dll", .inbox = true },
+ // Check for a packaged version of d3dcompiler_47.dll
+ { .name = L"d3dcompiler_47.dll" },
+ // Try d3dcompiler_46.dll from the Windows 8 SDK
+ { .name = L"d3dcompiler_46.dll" },
+ // Try d3dcompiler_43.dll from the June 2010 DirectX SDK
+ { .name = L"d3dcompiler_43.dll" },
+ };
+
+ for (int i = 0; i < PL_ARRAY_SIZE(compiler_dlls); i++) {
+ if (compiler_dlls[i].inbox) {
+ if (!IsWindows8Point1OrGreater())
+ continue;
+ d3dcompiler = LoadLibraryExW(compiler_dlls[i].name, NULL,
+ LOAD_LIBRARY_SEARCH_SYSTEM32);
+ } else {
+ d3dcompiler = LoadLibraryW(compiler_dlls[i].name);
+ }
+ if (!d3dcompiler)
+ continue;
+
+ p->D3DCompile = (void *) GetProcAddress(d3dcompiler, "D3DCompile");
+ if (!p->D3DCompile)
+ return false;
+ p->d3d_compiler_ver = pl_get_dll_version(compiler_dlls[i].name);
+
+ return true;
+ }
+
+ return false;
+}
+
+static struct pl_gpu_fns pl_fns_d3d11 = {
+ .tex_create = pl_d3d11_tex_create,
+ .tex_destroy = pl_d3d11_tex_destroy,
+ .tex_invalidate = pl_d3d11_tex_invalidate,
+ .tex_clear_ex = pl_d3d11_tex_clear_ex,
+ .tex_blit = pl_d3d11_tex_blit,
+ .tex_upload = pl_d3d11_tex_upload,
+ .tex_download = pl_d3d11_tex_download,
+ .buf_create = pl_d3d11_buf_create,
+ .buf_destroy = pl_d3d11_buf_destroy,
+ .buf_write = pl_d3d11_buf_write,
+ .buf_read = pl_d3d11_buf_read,
+ .buf_copy = pl_d3d11_buf_copy,
+ .desc_namespace = d3d11_desc_namespace,
+ .pass_create = pl_d3d11_pass_create,
+ .pass_destroy = pl_d3d11_pass_destroy,
+ .pass_run = pl_d3d11_pass_run,
+ .timer_create = d3d11_timer_create,
+ .timer_destroy = d3d11_timer_destroy,
+ .timer_query = d3d11_timer_query,
+ .gpu_flush = d3d11_gpu_flush,
+ .gpu_finish = d3d11_gpu_finish,
+ .gpu_is_failed = d3d11_gpu_is_failed,
+ .destroy = d3d11_gpu_destroy,
+};
+
+pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx)
+{
+ pl_assert(ctx->dev);
+ IDXGIDevice1 *dxgi_dev = NULL;
+ IDXGIAdapter1 *adapter = NULL;
+ IDXGIAdapter4 *adapter4 = NULL;
+ bool success = false;
+ HRESULT hr;
+
+ struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gpu_d3d11);
+ gpu->log = ctx->log;
+
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ uint32_t spirv_ver = PL_MIN(SPV_VERSION, PL_MAX_SPIRV_VER);
+ *p = (struct pl_gpu_d3d11) {
+ .ctx = ctx,
+ .impl = pl_fns_d3d11,
+ .dev = ctx->dev,
+ .spirv = pl_spirv_create(ctx->log, (struct pl_spirv_version) {
+ .env_version = pl_spirv_version_to_vulkan(spirv_ver),
+ .spv_version = spirv_ver,
+ }),
+ .vbuf.bind_flags = D3D11_BIND_VERTEX_BUFFER,
+ .ibuf.bind_flags = D3D11_BIND_INDEX_BUFFER,
+ };
+ if (!p->spirv)
+ goto error;
+
+ ID3D11Device_AddRef(p->dev);
+ ID3D11Device_GetImmediateContext(p->dev, &p->imm);
+
+ // Check D3D11.1 interfaces
+ hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1,
+ (void **) &p->dev1);
+ if (SUCCEEDED(hr)) {
+ p->minor = 1;
+ ID3D11Device1_GetImmediateContext1(p->dev1, &p->imm1);
+ }
+
+ // Check D3D11.4 interfaces
+ hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device5,
+ (void **) &p->dev5);
+ if (SUCCEEDED(hr)) {
+ // There is no GetImmediateContext4 method
+ hr = ID3D11DeviceContext_QueryInterface(p->imm, &IID_ID3D11DeviceContext4,
+ (void **) &p->imm4);
+ if (SUCCEEDED(hr))
+ p->minor = 4;
+ }
+
+ PL_INFO(gpu, "Using Direct3D 11.%d runtime", p->minor);
+
+ D3D(ID3D11Device_QueryInterface(p->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev));
+ D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter));
+
+ DXGI_ADAPTER_DESC1 adapter_desc = {0};
+ IDXGIAdapter1_GetDesc1(adapter, &adapter_desc);
+
+ // No resource can be larger than max_res_size in bytes
+ unsigned int max_res_size = PL_CLAMP(
+ D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_B_TERM * adapter_desc.DedicatedVideoMemory,
+ D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u,
+ D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024u * 1024u);
+
+ gpu->glsl = (struct pl_glsl_version) {
+ .version = 450,
+ .vulkan = true,
+ };
+
+ gpu->limits = (struct pl_gpu_limits) {
+ .max_buf_size = max_res_size,
+ .max_ssbo_size = max_res_size,
+ .max_vbo_size = max_res_size,
+ .align_vertex_stride = 1,
+
+ // Make up some values
+ .align_tex_xfer_offset = 32,
+ .align_tex_xfer_pitch = 1,
+ .fragment_queues = 1,
+ };
+
+ p->fl = ID3D11Device_GetFeatureLevel(p->dev);
+
+ // If we're not using FL9_x, we can use the same suballocated buffer as a
+ // vertex buffer and index buffer
+ if (p->fl >= D3D_FEATURE_LEVEL_10_0)
+ p->vbuf.bind_flags |= D3D11_BIND_INDEX_BUFFER;
+
+ if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+ gpu->limits.max_ubo_size = D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * CBUF_ELEM;
+ } else {
+ // 10level9 restriction:
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context
+ gpu->limits.max_ubo_size = 255 * CBUF_ELEM;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ gpu->limits.max_tex_1d_dim = D3D11_REQ_TEXTURE1D_U_DIMENSION;
+ gpu->limits.max_tex_2d_dim = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ gpu->limits.max_tex_3d_dim = D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+ gpu->limits.max_tex_1d_dim = D3D10_REQ_TEXTURE1D_U_DIMENSION;
+ gpu->limits.max_tex_2d_dim = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ gpu->limits.max_tex_3d_dim = D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) {
+ gpu->limits.max_tex_2d_dim = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ // Same limit as FL9_1
+ gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
+ } else {
+ gpu->limits.max_tex_2d_dim = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+ gpu->limits.max_buffer_texels =
+ 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ gpu->glsl.compute = true;
+ gpu->limits.compute_queues = 1;
+ // Set `gpu->limits.blittable_1d_3d`, since `pl_tex_blit_compute`, which
+ // is used to emulate blits on 11_0 and up, supports 1D and 3D textures
+ gpu->limits.blittable_1d_3d = true;
+
+ gpu->glsl.max_shmem_size = D3D11_CS_TGSM_REGISTER_COUNT * sizeof(float);
+ gpu->glsl.max_group_threads = D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
+ gpu->glsl.max_group_size[0] = D3D11_CS_THREAD_GROUP_MAX_X;
+ gpu->glsl.max_group_size[1] = D3D11_CS_THREAD_GROUP_MAX_Y;
+ gpu->glsl.max_group_size[2] = D3D11_CS_THREAD_GROUP_MAX_Z;
+ gpu->limits.max_dispatch[0] = gpu->limits.max_dispatch[1] =
+ gpu->limits.max_dispatch[2] =
+ D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ // The offset limits are defined by HLSL:
+ // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4-po--sm5---asm-
+ gpu->glsl.min_gather_offset = -32;
+ gpu->glsl.max_gather_offset = 31;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
+ // SM4.1 has no gather4_po, so the offset must be specified by an
+ // immediate with a range of [-8, 7]
+ // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4--sm4-1---asm-
+ // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sample--sm4---asm-#address-offset
+ gpu->glsl.min_gather_offset = -8;
+ gpu->glsl.max_gather_offset = 7;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+ p->max_srvs = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT;
+ } else {
+ // 10level9 restriction:
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context
+ p->max_srvs = 8;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
+ p->max_uavs = D3D11_1_UAV_SLOT_COUNT;
+ } else {
+ p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
+ }
+
+ if (!load_d3d_compiler(gpu)) {
+ PL_FATAL(gpu, "Could not find D3DCompiler DLL");
+ goto error;
+ }
+ PL_INFO(gpu, "D3DCompiler version: %u.%u.%u.%u",
+ p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor,
+ p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision);
+
+ // Detect support for timestamp queries. Some FL9_x devices don't support them.
+ hr = ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL);
+ p->has_timestamp_queries = SUCCEEDED(hr);
+
+ pl_d3d11_setup_formats(gpu);
+
+ // The rasterizer state never changes, so create it here
+ D3D11_RASTERIZER_DESC rdesc = {
+ .FillMode = D3D11_FILL_SOLID,
+ .CullMode = D3D11_CULL_NONE,
+ .FrontCounterClockwise = FALSE,
+ .DepthClipEnable = TRUE, // Required for 10level9
+ .ScissorEnable = TRUE,
+ };
+ D3D(ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &p->rstate));
+
+ // The depth stencil state never changes either, and we only set it to turn
+ // depth testing off so the debug layer doesn't complain about an unbound
+ // depth buffer
+ D3D11_DEPTH_STENCIL_DESC dsdesc = {
+ .DepthEnable = FALSE,
+ .DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL,
+ .DepthFunc = D3D11_COMPARISON_LESS,
+ .StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK,
+ .StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK,
+ .FrontFace = {
+ .StencilFailOp = D3D11_STENCIL_OP_KEEP,
+ .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP,
+ .StencilPassOp = D3D11_STENCIL_OP_KEEP,
+ .StencilFunc = D3D11_COMPARISON_ALWAYS,
+ },
+ .BackFace = {
+ .StencilFailOp = D3D11_STENCIL_OP_KEEP,
+ .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP,
+ .StencilPassOp = D3D11_STENCIL_OP_KEEP,
+ .StencilFunc = D3D11_COMPARISON_ALWAYS,
+ },
+ };
+ D3D(ID3D11Device_CreateDepthStencilState(p->dev, &dsdesc, &p->dsstate));
+
+ // Initialize the samplers
+ for (int sample_mode = 0; sample_mode < PL_TEX_SAMPLE_MODE_COUNT; sample_mode++) {
+ for (int address_mode = 0; address_mode < PL_TEX_ADDRESS_MODE_COUNT; address_mode++) {
+ static const D3D11_TEXTURE_ADDRESS_MODE d3d_address_mode[] = {
+ [PL_TEX_ADDRESS_CLAMP] = D3D11_TEXTURE_ADDRESS_CLAMP,
+ [PL_TEX_ADDRESS_REPEAT] = D3D11_TEXTURE_ADDRESS_WRAP,
+ [PL_TEX_ADDRESS_MIRROR] = D3D11_TEXTURE_ADDRESS_MIRROR,
+ };
+ static const D3D11_FILTER d3d_filter[] = {
+ [PL_TEX_SAMPLE_NEAREST] = D3D11_FILTER_MIN_MAG_MIP_POINT,
+ [PL_TEX_SAMPLE_LINEAR] = D3D11_FILTER_MIN_MAG_MIP_LINEAR,
+ };
+
+ D3D11_SAMPLER_DESC sdesc = {
+ .AddressU = d3d_address_mode[address_mode],
+ .AddressV = d3d_address_mode[address_mode],
+ .AddressW = d3d_address_mode[address_mode],
+ .ComparisonFunc = D3D11_COMPARISON_NEVER,
+ .MinLOD = 0,
+ .MaxLOD = D3D11_FLOAT32_MAX,
+ .MaxAnisotropy = 1,
+ .Filter = d3d_filter[sample_mode],
+ };
+ D3D(ID3D11Device_CreateSamplerState(p->dev, &sdesc,
+ &p->samplers[sample_mode][address_mode]));
+ }
+ }
+
+ hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter4,
+ (void **) &adapter4);
+ if (SUCCEEDED(hr)) {
+ DXGI_ADAPTER_DESC3 adapter_desc3 = {0};
+ IDXGIAdapter4_GetDesc3(adapter4, &adapter_desc3);
+
+ p->has_monitored_fences =
+ adapter_desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES;
+ }
+
+ // Try to create a D3D11.4 fence object to wait on in pl_gpu_finish()
+ if (p->dev5 && p->has_monitored_fences) {
+ hr = ID3D11Device5_CreateFence(p->dev5, 0, D3D11_FENCE_FLAG_NONE,
+ &IID_ID3D11Fence,
+ (void **) &p->finish_fence);
+ if (SUCCEEDED(hr)) {
+ p->finish_event = CreateEventW(NULL, FALSE, FALSE, NULL);
+ if (!p->finish_event) {
+ PL_ERR(gpu, "Failed to create finish() event");
+ goto error;
+ }
+ }
+ }
+
+ // If fences are not available, we will have to poll a event query instead
+ if (!p->finish_fence) {
+ // Buffers for dummy copy/readback (see d3d11_gpu_finish())
+ p->finish_buf_src = pl_buf_create(gpu, pl_buf_params(
+ .size = sizeof(uint32_t),
+ .drawable = true, // Make these vertex buffers for 10level9
+ .initial_data = &(uint32_t) {0x11223344},
+ ));
+ p->finish_buf_dst = pl_buf_create(gpu, pl_buf_params(
+ .size = sizeof(uint32_t),
+ .host_readable = true,
+ .drawable = true,
+ ));
+
+ D3D(ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &p->finish_query));
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After gpu create");
+
+ success = true;
+error:
+ SAFE_RELEASE(dxgi_dev);
+ SAFE_RELEASE(adapter);
+ SAFE_RELEASE(adapter4);
+ if (success) {
+ return pl_gpu_finalize(gpu);
+ } else {
+ d3d11_gpu_destroy(gpu);
+ return NULL;
+ }
+}
diff --git a/src/d3d11/gpu.h b/src/d3d11/gpu.h
new file mode 100644
index 0000000..cbc706a
--- /dev/null
+++ b/src/d3d11/gpu.h
@@ -0,0 +1,212 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdalign.h>
+#include <d3d11_4.h>
+#include <dxgi1_6.h>
+#include <d3dcompiler.h>
+#include <spirv_cross_c.h>
+
+#include "../gpu.h"
+#include "../glsl/spirv.h"
+
+#include "common.h"
+#include "utils.h"
+
+pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx);
+
+// --- pl_gpu internal structs and helpers
+
+// Size of one constant in a constant buffer
+#define CBUF_ELEM (sizeof(float[4]))
+
+struct d3d_stream_buf {
+ UINT bind_flags;
+ ID3D11Buffer *buf;
+ size_t size;
+ size_t used;
+ unsigned int align;
+};
+
+struct pl_gpu_d3d11 {
+ struct pl_gpu_fns impl;
+ struct d3d11_ctx *ctx;
+ ID3D11Device *dev;
+ ID3D11Device1 *dev1;
+ ID3D11Device5 *dev5;
+ ID3D11DeviceContext *imm;
+ ID3D11DeviceContext1 *imm1;
+ ID3D11DeviceContext4 *imm4;
+
+ // The Direct3D 11 minor version number
+ int minor;
+
+ pl_spirv spirv;
+
+ pD3DCompile D3DCompile;
+ struct dll_version d3d_compiler_ver;
+
+ // Device capabilities
+ D3D_FEATURE_LEVEL fl;
+ bool has_timestamp_queries;
+ bool has_monitored_fences;
+
+ int max_srvs;
+ int max_uavs;
+
+ // Streaming vertex and index buffers
+ struct d3d_stream_buf vbuf;
+ struct d3d_stream_buf ibuf;
+
+ // Shared rasterizer state
+ ID3D11RasterizerState *rstate;
+
+ // Shared depth-stencil state
+ ID3D11DepthStencilState *dsstate;
+
+ // Array of ID3D11SamplerStates for every combination of sample/address modes
+ ID3D11SamplerState *samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT];
+
+ // Resources for finish()
+ ID3D11Fence *finish_fence;
+ uint64_t finish_value;
+ HANDLE finish_event;
+ ID3D11Query *finish_query;
+ pl_buf finish_buf_src;
+ pl_buf finish_buf_dst;
+};
+
+void pl_d3d11_setup_formats(struct pl_gpu_t *gpu);
+
+void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer);
+void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer);
+
+struct pl_buf_d3d11 {
+ ID3D11Buffer *buf;
+ ID3D11Buffer *staging;
+ ID3D11ShaderResourceView *raw_srv;
+ ID3D11UnorderedAccessView *raw_uav;
+ ID3D11ShaderResourceView *texel_srv;
+ ID3D11UnorderedAccessView *texel_uav;
+
+ char *data;
+ bool dirty;
+};
+
+void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf);
+pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params);
+void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data,
+ size_t size);
+bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest,
+ size_t size);
+void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src,
+ size_t src_offset, size_t size);
+
+// Ensure a buffer is up-to-date with its system memory mirror before it is used
+void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf);
+
+struct pl_tex_d3d11 {
+ // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
+ // hold an additional reference to the texture object.
+ ID3D11Resource *res;
+
+ ID3D11Texture1D *tex1d;
+ ID3D11Texture2D *tex2d;
+ ID3D11Texture3D *tex3d;
+ int array_slice;
+
+ // Mirrors one of staging1d, staging2d, or staging3d, and doesn't hold a ref
+ ID3D11Resource *staging;
+
+ // Staging textures for pl_tex_download
+ ID3D11Texture1D *staging1d;
+ ID3D11Texture2D *staging2d;
+ ID3D11Texture3D *staging3d;
+
+ ID3D11ShaderResourceView *srv;
+ ID3D11RenderTargetView *rtv;
+ ID3D11UnorderedAccessView *uav;
+
+ // for tex_upload/download fallback code
+ pl_fmt texel_fmt;
+};
+
+void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex);
+pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params);
+void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex);
+void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex,
+ const union pl_clear_color color);
+void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params);
+bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params);
+bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params);
+
+// Constant buffer layout used for gl_NumWorkGroups emulation
+struct d3d_num_workgroups_buf {
+ alignas(CBUF_ELEM) uint32_t num_wgs[3];
+};
+
+enum {
+ HLSL_BINDING_NOT_USED = -1, // Slot should always be bound as NULL
+ HLSL_BINDING_NUM_WORKGROUPS = -2, // Slot used for gl_NumWorkGroups emulation
+};
+
+// Represents a specific shader stage in a pl_pass (VS, PS, CS)
+struct d3d_pass_stage {
+ // Lists for each resource type, to simplify binding in pl_pass_run. Indexes
+ // match the index of the arrays passed to the ID3D11DeviceContext methods.
+ // Entries are the index of pass->params.descriptors which should be bound
+ // in that position, or a HLSL_BINDING_* special value.
+ PL_ARRAY(int) cbvs;
+ PL_ARRAY(int) srvs;
+ PL_ARRAY(int) samplers;
+};
+
+struct pl_pass_d3d11 {
+ ID3D11PixelShader *ps;
+ ID3D11VertexShader *vs;
+ ID3D11ComputeShader *cs;
+ ID3D11InputLayout *layout;
+ ID3D11BlendState *bstate;
+
+ // gl_NumWorkGroups emulation
+ struct d3d_num_workgroups_buf last_num_wgs;
+ ID3D11Buffer *num_workgroups_buf;
+ bool num_workgroups_used;
+
+ // Maximum binding number
+ int max_binding;
+
+ struct d3d_pass_stage main; // PS and CS
+ struct d3d_pass_stage vertex;
+
+ // List of resources, as in `struct pass_stage`, except UAVs are shared
+ // between all shader stages
+ PL_ARRAY(int) uavs;
+
+ // Pre-allocated resource arrays to use in pl_pass_run
+ ID3D11Buffer **cbv_arr;
+ ID3D11ShaderResourceView **srv_arr;
+ ID3D11SamplerState **sampler_arr;
+ ID3D11UnorderedAccessView **uav_arr;
+};
+
+void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass);
+const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu,
+ const struct pl_pass_params *params);
+void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params);
diff --git a/src/d3d11/gpu_buf.c b/src/d3d11/gpu_buf.c
new file mode 100644
index 0000000..955e6e1
--- /dev/null
+++ b/src/d3d11/gpu_buf.c
@@ -0,0 +1,310 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gpu.h"
+#include "formats.h"
+
+void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+
+ SAFE_RELEASE(buf_p->buf);
+ SAFE_RELEASE(buf_p->staging);
+ SAFE_RELEASE(buf_p->raw_srv);
+ SAFE_RELEASE(buf_p->raw_uav);
+ SAFE_RELEASE(buf_p->texel_srv);
+ SAFE_RELEASE(buf_p->texel_uav);
+
+ pl_d3d11_flush_message_queue(ctx, "After buffer destroy");
+
+ pl_free((void *) buf);
+}
+
+pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_d3d11);
+ buf->params = *params;
+ buf->params.initial_data = NULL;
+
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+
+ D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
+
+ if (params->uniform && !params->format &&
+ (params->storable || params->drawable))
+ {
+ // TODO: Figure out what to do with these
+ PL_ERR(gpu, "Uniform buffers cannot share any other buffer type");
+ goto error;
+ }
+
+ // TODO: Distinguish between uniform buffers and texel uniform buffers.
+ // Currently we assume that if uniform and format are set, it's a texel
+ // buffer and NOT a uniform buffer.
+ if (params->uniform && !params->format) {
+ desc.BindFlags |= D3D11_BIND_CONSTANT_BUFFER;
+ desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, CBUF_ELEM);
+ }
+ if (params->uniform && params->format) {
+ desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
+ }
+ if (params->storable) {
+ desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS
+ | D3D11_BIND_SHADER_RESOURCE;
+ desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, sizeof(float));
+ desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
+ }
+ if (params->drawable) {
+ desc.BindFlags |= D3D11_BIND_VERTEX_BUFFER;
+
+ // In FL9_x, a vertex buffer can't also be an index buffer, so index
+ // buffers are unsupported in FL9_x for now
+ if (p->fl > D3D_FEATURE_LEVEL_9_3)
+ desc.BindFlags |= D3D11_BIND_INDEX_BUFFER;
+ }
+
+ char *data = NULL;
+
+ // D3D11 doesn't allow partial constant buffer updates without special
+ // conditions. To support partial buffer updates, keep a mirror of the
+ // buffer data in system memory and upload the whole thing before the buffer
+ // is used.
+ //
+ // Note: We don't use a staging buffer for this because of Intel.
+ // https://github.com/mpv-player/mpv/issues/5293
+ // https://crbug.com/593024
+ if (params->uniform && !params->format && params->host_writable) {
+ data = pl_zalloc(buf, desc.ByteWidth);
+ buf_p->data = data;
+ }
+
+ D3D11_SUBRESOURCE_DATA srdata = { 0 };
+ if (params->initial_data) {
+ if (desc.ByteWidth != params->size) {
+ // If the size had to be rounded-up, uploading from
+ // params->initial_data is technically undefined behavior, so copy
+ // the initial data to an allocation first
+ if (!data)
+ data = pl_zalloc(buf, desc.ByteWidth);
+ srdata.pSysMem = data;
+ } else {
+ srdata.pSysMem = params->initial_data;
+ }
+
+ if (data)
+ memcpy(data, params->initial_data, params->size);
+ }
+
+ D3D(ID3D11Device_CreateBuffer(p->dev, &desc,
+ params->initial_data ? &srdata : NULL,
+ &buf_p->buf));
+
+ if (!buf_p->data)
+ pl_free(data);
+
+ // Create raw views for PL_DESC_BUF_STORAGE
+ if (params->storable) {
+ // A SRV is used for PL_DESC_ACCESS_READONLY
+ D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX,
+ .BufferEx = {
+ .NumElements =
+ PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float),
+ .Flags = D3D11_BUFFEREX_SRV_FLAG_RAW,
+ },
+ };
+ D3D(ID3D11Device_CreateShaderResourceView(p->dev,
+ (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->raw_srv));
+
+ // A UAV is used for all other access modes
+ D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .NumElements =
+ PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float),
+ .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
+ },
+ };
+ D3D(ID3D11Device_CreateUnorderedAccessView(p->dev,
+ (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->raw_uav));
+ }
+
+ // Create a typed SRV for PL_BUF_TEXEL_UNIFORM and PL_BUF_TEXEL_STORAGE
+ if (params->format) {
+ if (params->uniform) {
+ D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = {
+ .Format = fmt_to_dxgi(params->format),
+ .ViewDimension = D3D11_SRV_DIMENSION_BUFFER,
+ .Buffer = {
+ .NumElements =
+ PL_ALIGN(buf->params.size, buf->params.format->texel_size)
+ / buf->params.format->texel_size,
+ },
+ };
+ D3D(ID3D11Device_CreateShaderResourceView(p->dev,
+ (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->texel_srv));
+ }
+
+ // Create a typed UAV for PL_BUF_TEXEL_STORAGE
+ if (params->storable) {
+ D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
+ .Format = fmt_to_dxgi(buf->params.format),
+ .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .NumElements =
+ PL_ALIGN(buf->params.size, buf->params.format->texel_size)
+ / buf->params.format->texel_size,
+ },
+ };
+ D3D(ID3D11Device_CreateUnorderedAccessView(p->dev,
+ (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->texel_uav));
+ }
+ }
+
+
+ if (!buf_p->data) {
+ // Create the staging buffer regardless of whether params->host_readable
+ // is set or not, so that buf_copy can copy to system-memory-backed
+ // buffers
+ // TODO: Consider sharing a big staging buffer for this, rather than
+ // having one staging buffer per buffer
+ desc.BindFlags = 0;
+ desc.MiscFlags = 0;
+ desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+ desc.Usage = D3D11_USAGE_STAGING;
+ D3D(ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging));
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After buffer create");
+
+ return buf;
+
+error:
+ pl_d3d11_buf_destroy(gpu, buf);
+ return NULL;
+}
+
+void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data,
+ size_t size)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+
+ if (buf_p->data) {
+ memcpy(buf_p->data + offset, data, size);
+ buf_p->dirty = true;
+ } else {
+ ID3D11DeviceContext_UpdateSubresource(p->imm,
+ (ID3D11Resource *) buf_p->buf, 0, (&(D3D11_BOX) {
+ .left = offset,
+ .top = 0,
+ .front = 0,
+ .right = offset + size,
+ .bottom = 1,
+ .back = 1,
+ }), data, 0, 0);
+ }
+}
+
+void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+
+ if (!buf_p->data || !buf_p->dirty)
+ return;
+
+ ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf,
+ 0, NULL, buf_p->data, 0, 0);
+}
+
+bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest,
+ size_t size)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+
+ // If there is a system-memory mirror of the buffer contents, use it
+ if (buf_p->data) {
+ memcpy(dest, buf_p->data + offset, size);
+ return true;
+ }
+
+ ID3D11DeviceContext_CopyResource(p->imm, (ID3D11Resource *) buf_p->staging,
+ (ID3D11Resource *) buf_p->buf);
+
+ D3D11_MAPPED_SUBRESOURCE lock;
+ D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) buf_p->staging, 0,
+ D3D11_MAP_READ, 0, &lock));
+
+ char *csrc = lock.pData;
+ memcpy(dest, csrc + offset, size);
+
+ ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) buf_p->staging, 0);
+
+ pl_d3d11_flush_message_queue(ctx, "After buffer read");
+
+ return true;
+
+error:
+ return false;
+}
+
+void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src,
+ size_t src_offset, size_t size)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_buf_d3d11 *src_p = PL_PRIV(src);
+ struct pl_buf_d3d11 *dst_p = PL_PRIV(dst);
+
+ // Handle system memory copies in case one or both of the buffers has a
+ // system memory mirror
+ if (src_p->data && dst_p->data) {
+ memcpy(dst_p->data + dst_offset, src_p->data + src_offset, size);
+ dst_p->dirty = true;
+ } else if (src_p->data) {
+ pl_d3d11_buf_write(gpu, dst, dst_offset, src_p->data + src_offset, size);
+ } else if (dst_p->data) {
+ if (pl_d3d11_buf_read(gpu, src, src_offset, dst_p->data + dst_offset, size)) {
+ dst_p->dirty = true;
+ } else {
+ PL_ERR(gpu, "Failed to read from GPU during buffer copy");
+ }
+ } else {
+ ID3D11DeviceContext_CopySubresourceRegion(p->imm,
+ (ID3D11Resource *) dst_p->buf, 0, dst_offset, 0, 0,
+ (ID3D11Resource *) src_p->buf, 0, (&(D3D11_BOX) {
+ .left = src_offset,
+ .top = 0,
+ .front = 0,
+ .right = src_offset + size,
+ .bottom = 1,
+ .back = 1,
+ }));
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After buffer copy");
+}
diff --git a/src/d3d11/gpu_pass.c b/src/d3d11/gpu_pass.c
new file mode 100644
index 0000000..0e46ccd
--- /dev/null
+++ b/src/d3d11/gpu_pass.c
@@ -0,0 +1,1293 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gpu.h"
+#include "formats.h"
+#include "glsl/spirv.h"
+#include "../cache.h"
+
+struct stream_buf_slice {
+ const void *data;
+ unsigned int size;
+ unsigned int offset;
+};
+
+// Upload one or more slices of single-use data to a suballocated dynamic
+// buffer. Only call this once per-buffer per-pass, since it will discard or
+// reallocate the buffer when full.
+static bool stream_buf_upload(pl_gpu gpu, struct d3d_stream_buf *stream,
+ struct stream_buf_slice *slices, int num_slices)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ unsigned int align = PL_DEF(stream->align, sizeof(float));
+
+ // Get total size, rounded up to the buffer's alignment
+ size_t size = 0;
+ for (int i = 0; i < num_slices; i++)
+ size += PL_ALIGN2(slices[i].size, align);
+
+ if (size > gpu->limits.max_buf_size) {
+ PL_ERR(gpu, "Streaming buffer is too large");
+ return -1;
+ }
+
+ // If the data doesn't fit, realloc the buffer
+ if (size > stream->size) {
+ size_t new_size = stream->size;
+ // Arbitrary base size
+ if (!new_size)
+ new_size = 16 * 1024;
+ while (new_size < size)
+ new_size *= 2;
+ new_size = PL_MIN(new_size, gpu->limits.max_buf_size);
+
+ ID3D11Buffer *new_buf;
+ D3D11_BUFFER_DESC vbuf_desc = {
+ .ByteWidth = new_size,
+ .Usage = D3D11_USAGE_DYNAMIC,
+ .BindFlags = stream->bind_flags,
+ .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
+ };
+ D3D(ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf));
+
+ SAFE_RELEASE(stream->buf);
+ stream->buf = new_buf;
+ stream->size = new_size;
+ stream->used = 0;
+ }
+
+ bool discard = false;
+ size_t offset = stream->used;
+ if (offset + size > stream->size) {
+ // We reached the end of the buffer, so discard and wrap around
+ discard = true;
+ offset = 0;
+ }
+
+ D3D11_MAPPED_SUBRESOURCE map = {0};
+ UINT type = discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE;
+ D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) stream->buf, 0, type,
+ 0, &map));
+
+ // Upload each slice
+ char *cdata = map.pData;
+ stream->used = offset;
+ for (int i = 0; i < num_slices; i++) {
+ slices[i].offset = stream->used;
+ memcpy(cdata + slices[i].offset, slices[i].data, slices[i].size);
+ stream->used += PL_ALIGN2(slices[i].size, align);
+ }
+
+ ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) stream->buf, 0);
+
+ return true;
+
+error:
+ return false;
+}
+
+static const char *get_shader_target(pl_gpu gpu, enum glsl_shader_stage stage)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ switch (p->fl) {
+ default:
+ switch (stage) {
+ case GLSL_SHADER_VERTEX: return "vs_5_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_5_0";
+ case GLSL_SHADER_COMPUTE: return "cs_5_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_1:
+ switch (stage) {
+ case GLSL_SHADER_VERTEX: return "vs_4_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_1";
+ case GLSL_SHADER_COMPUTE: return "cs_4_1";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_0:
+ switch (stage) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0";
+ case GLSL_SHADER_COMPUTE: return "cs_4_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_3:
+ switch (stage) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
+ case GLSL_SHADER_COMPUTE: return NULL;
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_2:
+ case D3D_FEATURE_LEVEL_9_1:
+ switch (stage) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
+ case GLSL_SHADER_COMPUTE: return NULL;
+ }
+ break;
+ }
+ return NULL;
+}
+
+static SpvExecutionModel stage_to_spv(enum glsl_shader_stage stage)
+{
+ static const SpvExecutionModel spv_execution_model[] = {
+ [GLSL_SHADER_VERTEX] = SpvExecutionModelVertex,
+ [GLSL_SHADER_FRAGMENT] = SpvExecutionModelFragment,
+ [GLSL_SHADER_COMPUTE] = SpvExecutionModelGLCompute,
+ };
+ return spv_execution_model[stage];
+}
+
+#define SC(cmd) \
+ do { \
+ spvc_result res = (cmd); \
+ if (res != SPVC_SUCCESS) { \
+ PL_ERR(gpu, "%s: %s (%d) (%s:%d)", \
+ #cmd, sc ? spvc_context_get_last_error_string(sc) : "", \
+ res, __FILE__, __LINE__); \
+ goto error; \
+ } \
+ } while (0)
+
+// Some decorations, like SpvDecorationNonWritable, are actually found on the
+// members of a buffer block, rather than the buffer block itself. If all
+// members have a certain decoration, SPIRV-Cross considers it to apply to the
+// buffer block too, which determines things like whether a SRV or UAV is used
+// for an SSBO. This function checks if SPIRV-Cross considers a decoration to
+// apply to a buffer block.
+static spvc_result buffer_block_has_decoration(spvc_compiler sc_comp,
+ spvc_variable_id id,
+ SpvDecoration decoration,
+ bool *out)
+{
+ const SpvDecoration *decorations;
+ size_t num_decorations = 0;
+
+ spvc_result res = spvc_compiler_get_buffer_block_decorations(sc_comp, id,
+ &decorations, &num_decorations);
+ if (res != SPVC_SUCCESS)
+ return res;
+
+ for (size_t j = 0; j < num_decorations; j++) {
+ if (decorations[j] == decoration) {
+ *out = true;
+ return res;
+ }
+ }
+
+ *out = false;
+ return res;
+}
+
+static bool alloc_hlsl_reg_bindings(pl_gpu gpu, pl_pass pass,
+ struct d3d_pass_stage *pass_s,
+ spvc_context sc,
+ spvc_compiler sc_comp,
+ spvc_resources resources,
+ spvc_resource_type res_type,
+ enum glsl_shader_stage stage)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ const spvc_reflected_resource *res_list;
+ size_t res_count;
+
+ SC(spvc_resources_get_resource_list_for_type(resources, res_type,
+ &res_list, &res_count));
+
+ // In a raster pass, one of the UAV slots is used by the runtime for the RTV
+ int uav_offset = stage == GLSL_SHADER_COMPUTE ? 0 : 1;
+ int max_uavs = p->max_uavs - uav_offset;
+
+ for (int i = 0; i < res_count; i++) {
+ unsigned int binding = spvc_compiler_get_decoration(sc_comp,
+ res_list[i].id, SpvDecorationBinding);
+ unsigned int descriptor_set = spvc_compiler_get_decoration(sc_comp,
+ res_list[i].id, SpvDecorationDescriptorSet);
+ if (descriptor_set != 0)
+ continue;
+
+ pass_p->max_binding = PL_MAX(pass_p->max_binding, binding);
+
+ spvc_hlsl_resource_binding hlslbind;
+ spvc_hlsl_resource_binding_init(&hlslbind);
+ hlslbind.stage = stage_to_spv(stage);
+ hlslbind.binding = binding;
+ hlslbind.desc_set = descriptor_set;
+
+ bool has_cbv = false, has_sampler = false, has_srv = false, has_uav = false;
+ switch (res_type) {
+ case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER:
+ has_cbv = true;
+ break;
+ case SPVC_RESOURCE_TYPE_STORAGE_BUFFER:;
+ bool non_writable_bb = false;
+ SC(buffer_block_has_decoration(sc_comp, res_list[i].id,
+ SpvDecorationNonWritable, &non_writable_bb));
+ if (non_writable_bb) {
+ has_srv = true;
+ } else {
+ has_uav = true;
+ }
+ break;
+ case SPVC_RESOURCE_TYPE_STORAGE_IMAGE:;
+ bool non_writable = spvc_compiler_has_decoration(sc_comp,
+ res_list[i].id, SpvDecorationNonWritable);
+ if (non_writable) {
+ has_srv = true;
+ } else {
+ has_uav = true;
+ }
+ break;
+ case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE:
+ has_srv = true;
+ break;
+ case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE:;
+ spvc_type type = spvc_compiler_get_type_handle(sc_comp,
+ res_list[i].type_id);
+ SpvDim dimension = spvc_type_get_image_dimension(type);
+ // Uniform texel buffers are technically sampled images, but they
+ // aren't sampled from, so don't allocate a sampler
+ if (dimension != SpvDimBuffer)
+ has_sampler = true;
+ has_srv = true;
+ break;
+ default:
+ break;
+ }
+
+ if (has_cbv) {
+ hlslbind.cbv.register_binding = pass_s->cbvs.num;
+ PL_ARRAY_APPEND(pass, pass_s->cbvs, binding);
+ if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) {
+ PL_ERR(gpu, "Too many constant buffers in shader");
+ goto error;
+ }
+ }
+
+ if (has_sampler) {
+ hlslbind.sampler.register_binding = pass_s->samplers.num;
+ PL_ARRAY_APPEND(pass, pass_s->samplers, binding);
+ if (pass_s->samplers.num > D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) {
+ PL_ERR(gpu, "Too many samplers in shader");
+ goto error;
+ }
+ }
+
+ if (has_srv) {
+ hlslbind.srv.register_binding = pass_s->srvs.num;
+ PL_ARRAY_APPEND(pass, pass_s->srvs, binding);
+ if (pass_s->srvs.num > p->max_srvs) {
+ PL_ERR(gpu, "Too many SRVs in shader");
+ goto error;
+ }
+ }
+
+ if (has_uav) {
+ // UAV registers are shared between the vertex and fragment shaders
+ // in a raster pass, so check if the UAV for this resource has
+ // already been allocated
+ bool uav_bound = false;
+ for (int j = 0; j < pass_p->uavs.num; j++) {
+ if (pass_p->uavs.elem[j] == binding) {
+ uav_bound = true;
+ break;
+ }
+ }
+
+ if (!uav_bound) {
+ hlslbind.uav.register_binding = pass_p->uavs.num + uav_offset;
+ PL_ARRAY_APPEND(pass, pass_p->uavs, binding);
+ if (pass_p->uavs.num > max_uavs) {
+ PL_ERR(gpu, "Too many UAVs in shader");
+ goto error;
+ }
+ }
+ }
+
+ SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &hlslbind));
+ }
+
+ return true;
+error:
+ return false;
+}
+
+static const char *shader_names[] = {
+ [GLSL_SHADER_VERTEX] = "vertex",
+ [GLSL_SHADER_FRAGMENT] = "fragment",
+ [GLSL_SHADER_COMPUTE] = "compute",
+};
+
+static ID3DBlob *shader_compile_glsl(pl_gpu gpu, pl_pass pass,
+ struct d3d_pass_stage *pass_s,
+ enum glsl_shader_stage stage,
+ const char *glsl)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ void *tmp = pl_tmp(NULL);
+ spvc_context sc = NULL;
+ spvc_compiler sc_comp = NULL;
+ const char *hlsl = NULL;
+ ID3DBlob *out = NULL;
+ ID3DBlob *errors = NULL;
+ HRESULT hr;
+
+ pl_clock_t start = pl_clock_now();
+ pl_str spirv = pl_spirv_compile_glsl(p->spirv, tmp, gpu->glsl, stage, glsl);
+ if (!spirv.len)
+ goto error;
+
+ pl_clock_t after_glsl = pl_clock_now();
+ pl_log_cpu_time(gpu->log, start, after_glsl, "translating GLSL to SPIR-V");
+
+ SC(spvc_context_create(&sc));
+
+ spvc_parsed_ir sc_ir;
+ SC(spvc_context_parse_spirv(sc, (SpvId *) spirv.buf,
+ spirv.len / sizeof(SpvId), &sc_ir));
+
+ SC(spvc_context_create_compiler(sc, SPVC_BACKEND_HLSL, sc_ir,
+ SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
+ &sc_comp));
+
+ spvc_compiler_options sc_opts;
+ SC(spvc_compiler_create_compiler_options(sc_comp, &sc_opts));
+
+ int sc_shader_model;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ sc_shader_model = 50;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
+ sc_shader_model = 41;
+ } else {
+ sc_shader_model = 40;
+ }
+
+ SC(spvc_compiler_options_set_uint(sc_opts,
+ SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model));
+
+ // Unlike Vulkan and OpenGL, in D3D11, the clip-space is "flipped" with
+ // respect to framebuffer-space. In other words, if you render to a pixel at
+ // (0, -1), you have to sample from (0, 1) to get the value back. We unflip
+ // it by setting the following option, which inserts the equivalent of
+ // `gl_Position.y = -gl_Position.y` into the vertex shader
+ if (stage == GLSL_SHADER_VERTEX) {
+ SC(spvc_compiler_options_set_bool(sc_opts,
+ SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE));
+ }
+
+ // Bind readonly images and imageBuffers as SRVs. This is done because a lot
+ // of hardware (especially FL11_x hardware) has very poor format support for
+ // reading values from UAVs. It allows the common case of readonly and
+ // writeonly images to support more formats, though the less common case of
+ // readwrite images still requires format support for UAV loads (represented
+ // by the PL_FMT_CAP_READWRITE cap in libplacebo.)
+ //
+ // Note that setting this option comes at the cost of GLSL support. Readonly
+ // and readwrite images are the same type in GLSL, but SRV and UAV bound
+ // textures are different types in HLSL, so for example, a GLSL function
+ // with an image parameter may fail to compile as HLSL if it's called with a
+ // readonly image and a readwrite image at different call sites.
+ SC(spvc_compiler_options_set_bool(sc_opts,
+ SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV, SPVC_TRUE));
+
+ SC(spvc_compiler_install_compiler_options(sc_comp, sc_opts));
+
+ spvc_set active = NULL;
+ SC(spvc_compiler_get_active_interface_variables(sc_comp, &active));
+ spvc_resources resources = NULL;
+ SC(spvc_compiler_create_shader_resources_for_active_variables(
+ sc_comp, &resources, active));
+
+ // Allocate HLSL registers for each resource type
+ alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources,
+ SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, stage);
+ alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources,
+ SPVC_RESOURCE_TYPE_SEPARATE_IMAGE, stage);
+ alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources,
+ SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, stage);
+ alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources,
+ SPVC_RESOURCE_TYPE_STORAGE_BUFFER, stage);
+ alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources,
+ SPVC_RESOURCE_TYPE_STORAGE_IMAGE, stage);
+
+ if (stage == GLSL_SHADER_COMPUTE) {
+ // Check if the gl_NumWorkGroups builtin is used. If it is, we have to
+ // emulate it with a constant buffer, so allocate it a CBV register.
+ spvc_variable_id num_workgroups_id =
+ spvc_compiler_hlsl_remap_num_workgroups_builtin(sc_comp);
+ if (num_workgroups_id) {
+ pass_p->num_workgroups_used = true;
+
+ spvc_hlsl_resource_binding binding;
+ spvc_hlsl_resource_binding_init(&binding);
+ binding.stage = stage_to_spv(stage);
+ binding.binding = pass_p->max_binding + 1;
+
+ // Allocate a CBV register for the buffer
+ binding.cbv.register_binding = pass_s->cbvs.num;
+ PL_ARRAY_APPEND(pass, pass_s->cbvs, HLSL_BINDING_NUM_WORKGROUPS);
+ if (pass_s->cbvs.num >
+ D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) {
+ PL_ERR(gpu, "Not enough constant buffer slots for gl_NumWorkGroups");
+ goto error;
+ }
+
+ spvc_compiler_set_decoration(sc_comp, num_workgroups_id,
+ SpvDecorationDescriptorSet, 0);
+ spvc_compiler_set_decoration(sc_comp, num_workgroups_id,
+ SpvDecorationBinding, binding.binding);
+
+ SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &binding));
+ }
+ }
+
+ SC(spvc_compiler_compile(sc_comp, &hlsl));
+
+ pl_clock_t after_spvc = pl_clock_now();
+ pl_log_cpu_time(gpu->log, after_glsl, after_spvc, "translating SPIR-V to HLSL");
+
+ hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
+ get_shader_target(gpu, stage),
+ D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &out,
+ &errors);
+ if (FAILED(hr)) {
+ SAFE_RELEASE(out);
+ PL_ERR(gpu, "D3DCompile failed: %s\n%.*s", pl_hresult_to_str(hr),
+ (int) ID3D10Blob_GetBufferSize(errors),
+ (char *) ID3D10Blob_GetBufferPointer(errors));
+ goto error;
+ }
+
+ pl_log_cpu_time(gpu->log, after_spvc, pl_clock_now(), "translating HLSL to DXBC");
+
+error:;
+ if (hlsl) {
+ int level = out ? PL_LOG_DEBUG : PL_LOG_ERR;
+ PL_MSG(gpu, level, "%s shader HLSL source:", shader_names[stage]);
+ pl_msg_source(gpu->log, level, hlsl);
+ }
+
+ if (sc)
+ spvc_context_destroy(sc);
+ SAFE_RELEASE(errors);
+ pl_free(tmp);
+ return out;
+}
+
+struct d3d11_cache_header {
+ uint64_t hash;
+ bool num_workgroups_used;
+ int num_main_cbvs;
+ int num_main_srvs;
+ int num_main_samplers;
+ int num_vertex_cbvs;
+ int num_vertex_srvs;
+ int num_vertex_samplers;
+ int num_uavs;
+ size_t vert_bc_len;
+ size_t frag_bc_len;
+ size_t comp_bc_len;
+};
+
+static inline uint64_t pass_cache_signature(pl_gpu gpu, uint64_t *key,
+ const struct pl_pass_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+
+ uint64_t hash = CACHE_KEY_D3D_DXBC; // seed to uniquely identify d3d11 shaders
+
+ pl_hash_merge(&hash, pl_str0_hash(params->glsl_shader));
+ if (params->type == PL_PASS_RASTER)
+ pl_hash_merge(&hash, pl_str0_hash(params->vertex_shader));
+
+ // store hash based on the shader bodys as the lookup key
+ if (key)
+ *key = hash;
+
+ // and add the compiler version information into the verification signature
+ pl_hash_merge(&hash, p->spirv->signature);
+
+ unsigned spvc_major, spvc_minor, spvc_patch;
+ spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
+
+ pl_hash_merge(&hash, spvc_major);
+ pl_hash_merge(&hash, spvc_minor);
+ pl_hash_merge(&hash, spvc_patch);
+
+ pl_hash_merge(&hash, ((uint64_t)p->d3d_compiler_ver.major << 48)
+ | ((uint64_t)p->d3d_compiler_ver.minor << 32)
+ | ((uint64_t)p->d3d_compiler_ver.build << 16)
+ | (uint64_t)p->d3d_compiler_ver.revision);
+ pl_hash_merge(&hash, p->fl);
+
+ return hash;
+}
+
+static inline size_t cache_payload_size(struct d3d11_cache_header *header)
+{
+ size_t required = (header->num_main_cbvs + header->num_main_srvs +
+ header->num_main_samplers + header->num_vertex_cbvs +
+ header->num_vertex_srvs + header->num_vertex_samplers +
+ header->num_uavs) * sizeof(int) + header->vert_bc_len +
+ header->frag_bc_len + header->comp_bc_len;
+
+ return required;
+}
+
+static bool d3d11_use_cached_program(pl_gpu gpu, struct pl_pass_t *pass,
+ const struct pl_pass_params *params,
+ pl_cache_obj *obj, uint64_t *out_sig,
+ pl_str *vert_bc, pl_str *frag_bc, pl_str *comp_bc)
+{
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ const pl_cache gpu_cache = pl_gpu_cache(gpu);
+ if (!gpu_cache)
+ return false;
+
+ *out_sig = pass_cache_signature(gpu, &obj->key, params);
+ if (!pl_cache_get(gpu_cache, obj))
+ return false;
+
+ pl_str cache = (pl_str) { obj->data, obj->size };
+ if (cache.len < sizeof(struct d3d11_cache_header))
+ return false;
+
+ struct d3d11_cache_header *header = (struct d3d11_cache_header *) cache.buf;
+ cache = pl_str_drop(cache, sizeof(*header));
+
+ if (header->hash != *out_sig)
+ return false;
+
+ // determine required cache size before reading anything
+ size_t required = cache_payload_size(header);
+
+ if (cache.len < required)
+ return false;
+
+ pass_p->num_workgroups_used = header->num_workgroups_used;
+
+#define GET_ARRAY(object, name, num_elems) \
+ do { \
+ PL_ARRAY_MEMDUP(pass, (object)->name, cache.buf, num_elems); \
+ cache = pl_str_drop(cache, num_elems * sizeof(*(object)->name.elem)); \
+ } while (0)
+
+#define GET_STAGE_ARRAY(stage, name) \
+ GET_ARRAY(&pass_p->stage, name, header->num_##stage##_##name)
+
+ GET_STAGE_ARRAY(main, cbvs);
+ GET_STAGE_ARRAY(main, srvs);
+ GET_STAGE_ARRAY(main, samplers);
+ GET_STAGE_ARRAY(vertex, cbvs);
+ GET_STAGE_ARRAY(vertex, srvs);
+ GET_STAGE_ARRAY(vertex, samplers);
+ GET_ARRAY(pass_p, uavs, header->num_uavs);
+
+#define GET_SHADER(ptr) \
+ do { \
+ if (ptr) \
+ *ptr = pl_str_take(cache, header->ptr##_len); \
+ cache = pl_str_drop(cache, header->ptr##_len); \
+ } while (0)
+
+ GET_SHADER(vert_bc);
+ GET_SHADER(frag_bc);
+ GET_SHADER(comp_bc);
+
+ return true;
+}
+
+static void d3d11_update_program_cache(pl_gpu gpu, struct pl_pass_t *pass,
+ uint64_t key, uint64_t sig,
+ const pl_str *vs_str, const pl_str *ps_str,
+ const pl_str *cs_str)
+{
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ const pl_cache gpu_cache = pl_gpu_cache(gpu);
+ if (!gpu_cache)
+ return;
+
+ struct d3d11_cache_header header = {
+ .hash = sig,
+ .num_workgroups_used = pass_p->num_workgroups_used,
+ .num_main_cbvs = pass_p->main.cbvs.num,
+ .num_main_srvs = pass_p->main.srvs.num,
+ .num_main_samplers = pass_p->main.samplers.num,
+ .num_vertex_cbvs = pass_p->vertex.cbvs.num,
+ .num_vertex_srvs = pass_p->vertex.srvs.num,
+ .num_vertex_samplers = pass_p->vertex.samplers.num,
+ .num_uavs = pass_p->uavs.num,
+ .vert_bc_len = vs_str ? vs_str->len : 0,
+ .frag_bc_len = ps_str ? ps_str->len : 0,
+ .comp_bc_len = cs_str ? cs_str->len : 0,
+ };
+
+ size_t cache_size = sizeof(header) + cache_payload_size(&header);
+ pl_str cache = {0};
+ pl_str_append(NULL, &cache, (pl_str){ (uint8_t *) &header, sizeof(header) });
+
+#define WRITE_ARRAY(name) pl_str_append(NULL, &cache, \
+ (pl_str){ (uint8_t *) pass_p->name.elem, \
+ sizeof(*pass_p->name.elem) * pass_p->name.num })
+ WRITE_ARRAY(main.cbvs);
+ WRITE_ARRAY(main.srvs);
+ WRITE_ARRAY(main.samplers);
+ WRITE_ARRAY(vertex.cbvs);
+ WRITE_ARRAY(vertex.srvs);
+ WRITE_ARRAY(vertex.samplers);
+ WRITE_ARRAY(uavs);
+
+ if (vs_str)
+ pl_str_append(NULL, &cache, *vs_str);
+
+ if (ps_str)
+ pl_str_append(NULL, &cache, *ps_str);
+
+ if (cs_str)
+ pl_str_append(NULL, &cache, *cs_str);
+
+ pl_assert(cache_size == cache.len);
+ pl_cache_str(gpu_cache, key, &cache);
+}
+
+void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+
+ SAFE_RELEASE(pass_p->vs);
+ SAFE_RELEASE(pass_p->ps);
+ SAFE_RELEASE(pass_p->cs);
+ SAFE_RELEASE(pass_p->layout);
+ SAFE_RELEASE(pass_p->bstate);
+ SAFE_RELEASE(pass_p->num_workgroups_buf);
+
+ pl_d3d11_flush_message_queue(ctx, "After pass destroy");
+
+ pl_free((void *) pass);
+}
+
+static bool pass_create_raster(pl_gpu gpu, struct pl_pass_t *pass,
+ const struct pl_pass_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ ID3DBlob *vs_blob = NULL;
+ pl_str vs_str = {0};
+ ID3DBlob *ps_blob = NULL;
+ pl_str ps_str = {0};
+ D3D11_INPUT_ELEMENT_DESC *in_descs = NULL;
+ pl_cache_obj obj = {0};
+ uint64_t sig = 0;
+ bool success = false;
+
+ if (d3d11_use_cached_program(gpu, pass, params, &obj, &sig, &vs_str, &ps_str, NULL))
+ PL_DEBUG(gpu, "Using cached DXBC shaders");
+
+ pl_assert((vs_str.len == 0) == (ps_str.len == 0));
+ if (vs_str.len == 0) {
+ vs_blob = shader_compile_glsl(gpu, pass, &pass_p->vertex,
+ GLSL_SHADER_VERTEX, params->vertex_shader);
+ if (!vs_blob)
+ goto error;
+
+ vs_str = (pl_str) {
+ .buf = ID3D10Blob_GetBufferPointer(vs_blob),
+ .len = ID3D10Blob_GetBufferSize(vs_blob),
+ };
+
+ ps_blob = shader_compile_glsl(gpu, pass, &pass_p->main,
+ GLSL_SHADER_FRAGMENT, params->glsl_shader);
+ if (!ps_blob)
+ goto error;
+
+ ps_str = (pl_str) {
+ .buf = ID3D10Blob_GetBufferPointer(ps_blob),
+ .len = ID3D10Blob_GetBufferSize(ps_blob),
+ };
+ }
+
+ D3D(ID3D11Device_CreateVertexShader(p->dev, vs_str.buf, vs_str.len, NULL,
+ &pass_p->vs));
+
+ D3D(ID3D11Device_CreatePixelShader(p->dev, ps_str.buf, ps_str.len, NULL,
+ &pass_p->ps));
+
+ in_descs = pl_calloc_ptr(pass, params->num_vertex_attribs, in_descs);
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ struct pl_vertex_attrib *va = &params->vertex_attribs[i];
+
+ in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
+ // The semantic name doesn't mean much and is just used to verify
+ // the input description matches the shader. SPIRV-Cross always
+ // uses TEXCOORD, so we should too.
+ .SemanticName = "TEXCOORD",
+ .SemanticIndex = va->location,
+ .AlignedByteOffset = va->offset,
+ .Format = fmt_to_dxgi(va->fmt),
+ };
+ }
+ D3D(ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ params->num_vertex_attribs, vs_str.buf, vs_str.len, &pass_p->layout));
+
+ static const D3D11_BLEND blend_options[] = {
+ [PL_BLEND_ZERO] = D3D11_BLEND_ZERO,
+ [PL_BLEND_ONE] = D3D11_BLEND_ONE,
+ [PL_BLEND_SRC_ALPHA] = D3D11_BLEND_SRC_ALPHA,
+ [PL_BLEND_ONE_MINUS_SRC_ALPHA] = D3D11_BLEND_INV_SRC_ALPHA,
+ };
+
+ D3D11_BLEND_DESC bdesc = {
+ .RenderTarget[0] = {
+ .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
+ },
+ };
+ if (params->blend_params) {
+ bdesc.RenderTarget[0] = (D3D11_RENDER_TARGET_BLEND_DESC) {
+ .BlendEnable = TRUE,
+ .SrcBlend = blend_options[params->blend_params->src_rgb],
+ .DestBlend = blend_options[params->blend_params->dst_rgb],
+ .BlendOp = D3D11_BLEND_OP_ADD,
+ .SrcBlendAlpha = blend_options[params->blend_params->src_alpha],
+ .DestBlendAlpha = blend_options[params->blend_params->dst_alpha],
+ .BlendOpAlpha = D3D11_BLEND_OP_ADD,
+ .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
+ };
+ }
+ D3D(ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate));
+
+ d3d11_update_program_cache(gpu, pass, obj.key, sig, &vs_str, &ps_str, NULL);
+
+ success = true;
+error:
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ pl_cache_obj_free(&obj);
+ pl_free(in_descs);
+ return success;
+}
+
+static bool pass_create_compute(pl_gpu gpu, struct pl_pass_t *pass,
+ const struct pl_pass_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ ID3DBlob *cs_blob = NULL;
+ pl_str cs_str = {0};
+ pl_cache_obj obj = {0};
+ uint64_t sig = 0;
+ bool success = false;
+
+ if (d3d11_use_cached_program(gpu, pass, params, &obj, &sig, NULL, NULL, &cs_str))
+ PL_DEBUG(gpu, "Using cached DXBC shader");
+
+ if (cs_str.len == 0) {
+ cs_blob = shader_compile_glsl(gpu, pass, &pass_p->main,
+ GLSL_SHADER_COMPUTE, params->glsl_shader);
+ if (!cs_blob)
+ goto error;
+
+ cs_str = (pl_str) {
+ .buf = ID3D10Blob_GetBufferPointer(cs_blob),
+ .len = ID3D10Blob_GetBufferSize(cs_blob),
+ };
+ }
+
+ D3D(ID3D11Device_CreateComputeShader(p->dev, cs_str.buf, cs_str.len, NULL,
+ &pass_p->cs));
+
+ if (pass_p->num_workgroups_used) {
+ D3D11_BUFFER_DESC bdesc = {
+ .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
+ .ByteWidth = sizeof(pass_p->last_num_wgs),
+ };
+ D3D(ID3D11Device_CreateBuffer(p->dev, &bdesc, NULL,
+ &pass_p->num_workgroups_buf));
+ }
+
+ d3d11_update_program_cache(gpu, pass, obj.key, sig, NULL, NULL, &cs_str);
+
+ success = true;
+error:
+ pl_cache_obj_free(&obj);
+ SAFE_RELEASE(cs_blob);
+ return success;
+}
+
+const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu,
+ const struct pl_pass_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_d3d11);
+ pass->params = pl_pass_params_copy(pass, params);
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+ *pass_p = (struct pl_pass_d3d11) {
+ .max_binding = -1,
+ };
+
+ if (params->type == PL_PASS_COMPUTE) {
+ if (!pass_create_compute(gpu, pass, params))
+ goto error;
+ } else {
+ if (!pass_create_raster(gpu, pass, params))
+ goto error;
+ }
+
+ // Pre-allocate resource arrays to use in pl_pass_run
+ pass_p->cbv_arr = pl_calloc(pass,
+ PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num),
+ sizeof(*pass_p->cbv_arr));
+ pass_p->srv_arr = pl_calloc(pass,
+ PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num),
+ sizeof(*pass_p->srv_arr));
+ pass_p->sampler_arr = pl_calloc(pass,
+ PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num),
+ sizeof(*pass_p->sampler_arr));
+ pass_p->uav_arr = pl_calloc(pass, pass_p->uavs.num, sizeof(*pass_p->uav_arr));
+
+ // Find the highest binding number used in `params->descriptors` if we
+ // haven't found it already. (If the shader was compiled fresh rather than
+ // loaded from cache, `pass_p->max_binding` should already be set.)
+ if (pass_p->max_binding == -1) {
+ for (int i = 0; i < params->num_descriptors; i++) {
+ pass_p->max_binding = PL_MAX(pass_p->max_binding,
+ params->descriptors[i].binding);
+ }
+ }
+
+ // Build a mapping from binding numbers to descriptor array indexes
+ int *binding_map = pl_calloc_ptr(pass, pass_p->max_binding + 1, binding_map);
+ for (int i = 0; i <= pass_p->max_binding; i++)
+ binding_map[i] = HLSL_BINDING_NOT_USED;
+ for (int i = 0; i < params->num_descriptors; i++)
+ binding_map[params->descriptors[i].binding] = i;
+
+#define MAP_RESOURCES(array) \
+ do { \
+ for (int i = 0; i < array.num; i++) { \
+ if (array.elem[i] > pass_p->max_binding) { \
+ array.elem[i] = HLSL_BINDING_NOT_USED; \
+ } else if (array.elem[i] >= 0) { \
+ array.elem[i] = binding_map[array.elem[i]]; \
+ } \
+ } \
+ } while (0)
+
+ // During shader compilation (or after loading a compiled shader from cache)
+ // the entries of the following resource lists are shader binding numbers,
+ // however, it's more efficient for `pl_pass_run` if they refer to indexes
+ // of the `params->descriptors` array instead, so remap them here
+ MAP_RESOURCES(pass_p->main.cbvs);
+ MAP_RESOURCES(pass_p->main.samplers);
+ MAP_RESOURCES(pass_p->main.srvs);
+ MAP_RESOURCES(pass_p->vertex.cbvs);
+ MAP_RESOURCES(pass_p->vertex.samplers);
+ MAP_RESOURCES(pass_p->vertex.srvs);
+ MAP_RESOURCES(pass_p->uavs);
+ pl_free(binding_map);
+
+ pl_d3d11_flush_message_queue(ctx, "After pass create");
+
+ return pass;
+
+error:
+ pl_d3d11_pass_destroy(gpu, pass);
+ return NULL;
+}
+
+// Shared logic between VS, PS and CS for filling the resource arrays that are
+// passed to ID3D11DeviceContext methods
+static void fill_resources(pl_gpu gpu, pl_pass pass,
+ struct d3d_pass_stage *pass_s,
+ const struct pl_pass_run_params *params,
+ ID3D11Buffer **cbvs, ID3D11ShaderResourceView **srvs,
+ ID3D11SamplerState **samplers)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+
+ for (int i = 0; i < pass_s->cbvs.num; i++) {
+ int binding = pass_s->cbvs.elem[i];
+ if (binding == HLSL_BINDING_NUM_WORKGROUPS) {
+ cbvs[i] = pass_p->num_workgroups_buf;
+ continue;
+ } else if (binding < 0) {
+ cbvs[i] = NULL;
+ continue;
+ }
+
+ pl_buf buf = params->desc_bindings[binding].object;
+ pl_d3d11_buf_resolve(gpu, buf);
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
+ cbvs[i] = buf_p->buf;
+ }
+
+ for (int i = 0; i < pass_s->srvs.num; i++) {
+ int binding = pass_s->srvs.elem[i];
+ if (binding < 0) {
+ srvs[i] = NULL;
+ continue;
+ }
+
+ pl_tex tex;
+ struct pl_tex_d3d11 *tex_p;
+ pl_buf buf;
+ struct pl_buf_d3d11 *buf_p;
+ switch (pass->params.descriptors[binding].type) {
+ case PL_DESC_SAMPLED_TEX:
+ case PL_DESC_STORAGE_IMG:
+ tex = params->desc_bindings[binding].object;
+ tex_p = PL_PRIV(tex);
+ srvs[i] = tex_p->srv;
+ break;
+ case PL_DESC_BUF_STORAGE:
+ buf = params->desc_bindings[binding].object;
+ buf_p = PL_PRIV(buf);
+ srvs[i] = buf_p->raw_srv;
+ break;
+ case PL_DESC_BUF_TEXEL_UNIFORM:
+ case PL_DESC_BUF_TEXEL_STORAGE:
+ buf = params->desc_bindings[binding].object;
+ buf_p = PL_PRIV(buf);
+ srvs[i] = buf_p->texel_srv;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (int i = 0; i < pass_s->samplers.num; i++) {
+ int binding = pass_s->samplers.elem[i];
+ if (binding < 0) {
+ samplers[i] = NULL;
+ continue;
+ }
+
+ struct pl_desc_binding *db = &params->desc_bindings[binding];
+ samplers[i] = p->samplers[db->sample_mode][db->address_mode];
+ }
+}
+
+static void fill_uavs(pl_pass pass, const struct pl_pass_run_params *params,
+ ID3D11UnorderedAccessView **uavs)
+{
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+
+ for (int i = 0; i < pass_p->uavs.num; i++) {
+ int binding = pass_p->uavs.elem[i];
+ if (binding < 0) {
+ uavs[i] = NULL;
+ continue;
+ }
+
+ pl_tex tex;
+ struct pl_tex_d3d11 *tex_p;
+ pl_buf buf;
+ struct pl_buf_d3d11 *buf_p;
+ switch (pass->params.descriptors[binding].type) {
+ case PL_DESC_BUF_STORAGE:
+ buf = params->desc_bindings[binding].object;
+ buf_p = PL_PRIV(buf);
+ uavs[i] = buf_p->raw_uav;
+ break;
+ case PL_DESC_STORAGE_IMG:
+ tex = params->desc_bindings[binding].object;
+ tex_p = PL_PRIV(tex);
+ uavs[i] = tex_p->uav;
+ break;
+ case PL_DESC_BUF_TEXEL_STORAGE:
+ buf = params->desc_bindings[binding].object;
+ buf_p = PL_PRIV(buf);
+ uavs[i] = buf_p->texel_uav;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void pass_run_raster(pl_gpu gpu, const struct pl_pass_run_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ pl_pass pass = params->pass;
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+
+ if (p->fl <= D3D_FEATURE_LEVEL_9_3 && params->index_buf) {
+ // Index buffers are unsupported because we can't tell if they are an
+ // index buffer or a vertex buffer on creation, and FL9_x allows only
+ // one binding type per-buffer
+ PL_ERR(gpu, "Index buffers are unsupported in FL9_x");
+ return;
+ }
+
+ if (p->fl <= D3D_FEATURE_LEVEL_9_1 && params->index_data &&
+ params->index_fmt != PL_INDEX_UINT16)
+ {
+ PL_ERR(gpu, "32-bit index format is unsupported in FL9_1");
+ return;
+ }
+
+ // Figure out how much vertex/index data to upload, if any
+ size_t vertex_alloc = params->vertex_data ? pl_vertex_buf_size(params) : 0;
+ size_t index_alloc = params->index_data ? pl_index_buf_size(params) : 0;
+
+ static const DXGI_FORMAT index_fmts[PL_INDEX_FORMAT_COUNT] = {
+ [PL_INDEX_UINT16] = DXGI_FORMAT_R16_UINT,
+ [PL_INDEX_UINT32] = DXGI_FORMAT_R32_UINT,
+ };
+
+ // Upload vertex data. On >=FL10_0 we use the same buffer for index data, so
+ // upload that too.
+ bool share_vertex_index_buf = p->fl > D3D_FEATURE_LEVEL_9_3;
+ if (vertex_alloc || (share_vertex_index_buf && index_alloc)) {
+ struct stream_buf_slice slices[] = {
+ { .data = params->vertex_data, .size = vertex_alloc },
+ { .data = params->index_data, .size = index_alloc },
+ };
+
+ if (!stream_buf_upload(gpu, &p->vbuf, slices,
+ share_vertex_index_buf ? 2 : 1)) {
+ PL_ERR(gpu, "Failed to upload vertex data");
+ return;
+ }
+
+ if (vertex_alloc) {
+ ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &p->vbuf.buf,
+ &(UINT) { pass->params.vertex_stride }, &slices[0].offset);
+ }
+ if (share_vertex_index_buf && index_alloc) {
+ ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->vbuf.buf,
+ index_fmts[params->index_fmt], slices[1].offset);
+ }
+ }
+
+ // Upload index data for <=FL9_3, which must be in its own buffer
+ if (!share_vertex_index_buf && index_alloc) {
+ struct stream_buf_slice slices[] = {
+ { .data = params->index_data, .size = index_alloc },
+ };
+
+ if (!stream_buf_upload(gpu, &p->ibuf, slices, PL_ARRAY_SIZE(slices))) {
+ PL_ERR(gpu, "Failed to upload index data");
+ return;
+ }
+
+ ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->ibuf.buf,
+ index_fmts[params->index_fmt], slices[0].offset);
+ }
+
+ if (params->vertex_buf) {
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(params->vertex_buf);
+ ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &buf_p->buf,
+ &(UINT) { pass->params.vertex_stride },
+ &(UINT) { params->buf_offset });
+ }
+
+ if (params->index_buf) {
+ struct pl_buf_d3d11 *buf_p = PL_PRIV(params->index_buf);
+ ID3D11DeviceContext_IASetIndexBuffer(p->imm, buf_p->buf,
+ index_fmts[params->index_fmt], params->index_offset);
+ }
+
+ ID3D11DeviceContext_IASetInputLayout(p->imm, pass_p->layout);
+
+ static const D3D_PRIMITIVE_TOPOLOGY prim_topology[] = {
+ [PL_PRIM_TRIANGLE_LIST] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
+ [PL_PRIM_TRIANGLE_STRIP] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
+ };
+ ID3D11DeviceContext_IASetPrimitiveTopology(p->imm,
+ prim_topology[pass->params.vertex_type]);
+
+ ID3D11DeviceContext_VSSetShader(p->imm, pass_p->vs, NULL, 0);
+
+ ID3D11Buffer **cbvs = pass_p->cbv_arr;
+ ID3D11ShaderResourceView **srvs = pass_p->srv_arr;
+ ID3D11SamplerState **samplers = pass_p->sampler_arr;
+ ID3D11UnorderedAccessView **uavs = pass_p->uav_arr;
+
+ // Set vertex shader resources. The device context is called conditionally
+ // because the debug layer complains if these are called with 0 resources.
+ fill_resources(gpu, pass, &pass_p->vertex, params, cbvs, srvs, samplers);
+ if (pass_p->vertex.cbvs.num)
+ ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs);
+ if (pass_p->vertex.srvs.num)
+ ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs);
+ if (pass_p->vertex.samplers.num)
+ ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers);
+
+ ID3D11DeviceContext_RSSetState(p->imm, p->rstate);
+ ID3D11DeviceContext_RSSetViewports(p->imm, 1, (&(D3D11_VIEWPORT) {
+ .TopLeftX = params->viewport.x0,
+ .TopLeftY = params->viewport.y0,
+ .Width = pl_rect_w(params->viewport),
+ .Height = pl_rect_h(params->viewport),
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ }));
+ ID3D11DeviceContext_RSSetScissorRects(p->imm, 1, (&(D3D11_RECT) {
+ .left = params->scissors.x0,
+ .top = params->scissors.y0,
+ .right = params->scissors.x1,
+ .bottom = params->scissors.y1,
+ }));
+
+ ID3D11DeviceContext_PSSetShader(p->imm, pass_p->ps, NULL, 0);
+
+ // Set pixel shader resources
+ fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers);
+ if (pass_p->main.cbvs.num)
+ ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
+ if (pass_p->main.srvs.num)
+ ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
+ if (pass_p->main.samplers.num)
+ ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
+
+ ID3D11DeviceContext_OMSetBlendState(p->imm, pass_p->bstate, NULL,
+ D3D11_DEFAULT_SAMPLE_MASK);
+ ID3D11DeviceContext_OMSetDepthStencilState(p->imm, p->dsstate, 0);
+
+ fill_uavs(pass, params, uavs);
+
+ struct pl_tex_d3d11 *target_p = PL_PRIV(params->target);
+ ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(
+ p->imm, 1, &target_p->rtv, NULL, 1, pass_p->uavs.num, uavs, NULL);
+
+ if (params->index_data || params->index_buf) {
+ ID3D11DeviceContext_DrawIndexed(p->imm, params->vertex_count, 0, 0);
+ } else {
+ ID3D11DeviceContext_Draw(p->imm, params->vertex_count, 0);
+ }
+
+ // Unbind everything. It's easier to do this than to actually track state,
+ // and if we leave the RTV bound, it could trip up D3D's conflict checker.
+ // Also, apparently unbinding SRVs can prevent a 10level9 bug?
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-prevent-null-srvs
+ for (int i = 0; i < PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num); i++)
+ cbvs[i] = NULL;
+ for (int i = 0; i < PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num); i++)
+ srvs[i] = NULL;
+ for (int i = 0; i < PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num); i++)
+ samplers[i] = NULL;
+ for (int i = 0; i < pass_p->uavs.num; i++)
+ uavs[i] = NULL;
+ if (pass_p->vertex.cbvs.num)
+ ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs);
+ if (pass_p->vertex.srvs.num)
+ ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs);
+ if (pass_p->vertex.samplers.num)
+ ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers);
+ if (pass_p->main.cbvs.num)
+ ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
+ if (pass_p->main.srvs.num)
+ ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
+ if (pass_p->main.samplers.num)
+ ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
+ ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(
+ p->imm, 0, NULL, NULL, 1, pass_p->uavs.num, uavs, NULL);
+}
+
+static void pass_run_compute(pl_gpu gpu, const struct pl_pass_run_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ pl_pass pass = params->pass;
+ struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
+
+ // Update gl_NumWorkGroups emulation buffer if necessary
+ if (pass_p->num_workgroups_used) {
+ bool needs_update = false;
+ for (int i = 0; i < 3; i++) {
+ if (pass_p->last_num_wgs.num_wgs[i] != params->compute_groups[i])
+ needs_update = true;
+ pass_p->last_num_wgs.num_wgs[i] = params->compute_groups[i];
+ }
+
+ if (needs_update) {
+ ID3D11DeviceContext_UpdateSubresource(p->imm,
+ (ID3D11Resource *) pass_p->num_workgroups_buf, 0, NULL,
+ &pass_p->last_num_wgs, 0, 0);
+ }
+ }
+
+ ID3D11DeviceContext_CSSetShader(p->imm, pass_p->cs, NULL, 0);
+
+ ID3D11Buffer **cbvs = pass_p->cbv_arr;
+ ID3D11ShaderResourceView **srvs = pass_p->srv_arr;
+ ID3D11UnorderedAccessView **uavs = pass_p->uav_arr;
+ ID3D11SamplerState **samplers = pass_p->sampler_arr;
+
+ fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers);
+ fill_uavs(pass, params, uavs);
+
+ if (pass_p->main.cbvs.num)
+ ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
+ if (pass_p->main.srvs.num)
+ ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
+ if (pass_p->main.samplers.num)
+ ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
+ if (pass_p->uavs.num)
+ ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL);
+
+ ID3D11DeviceContext_Dispatch(p->imm, params->compute_groups[0],
+ params->compute_groups[1],
+ params->compute_groups[2]);
+
+ // Unbind everything
+ for (int i = 0; i < pass_p->main.cbvs.num; i++)
+ cbvs[i] = NULL;
+ for (int i = 0; i < pass_p->main.srvs.num; i++)
+ srvs[i] = NULL;
+ for (int i = 0; i < pass_p->main.samplers.num; i++)
+ samplers[i] = NULL;
+ for (int i = 0; i < pass_p->uavs.num; i++)
+ uavs[i] = NULL;
+ if (pass_p->main.cbvs.num)
+ ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
+ if (pass_p->main.srvs.num)
+ ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
+ if (pass_p->main.samplers.num)
+ ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
+ if (pass_p->uavs.num)
+ ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL);
+}
+
+void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ pl_pass pass = params->pass;
+
+ pl_d3d11_timer_start(gpu, params->timer);
+
+ if (pass->params.type == PL_PASS_COMPUTE) {
+ pass_run_compute(gpu, params);
+ } else {
+ pass_run_raster(gpu, params);
+ }
+
+ pl_d3d11_timer_end(gpu, params->timer);
+ pl_d3d11_flush_message_queue(ctx, "After pass run");
+}
diff --git a/src/d3d11/gpu_tex.c b/src/d3d11/gpu_tex.c
new file mode 100644
index 0000000..d63fc17
--- /dev/null
+++ b/src/d3d11/gpu_tex.c
@@ -0,0 +1,745 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gpu.h"
+#include "formats.h"
+
+static inline UINT tex_subresource(pl_tex tex)
+{
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+ return tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
+}
+
+static bool tex_init(pl_gpu gpu, pl_tex tex)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ // View formats may be omitted when they match the texture format, but for
+ // simplicity's sake we always set it. It will match the texture format for
+ // textures created with tex_create, but it can be different for video
+ // textures wrapped with pl_d3d11_wrap.
+ DXGI_FORMAT fmt = fmt_to_dxgi(tex->params.format);
+
+ if (tex->params.sampleable || tex->params.storable) {
+ D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
+ .Format = fmt,
+ };
+ switch (pl_tex_params_dimension(tex->params)) {
+ case 1:
+ if (tex_p->array_slice >= 0) {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
+ srvdesc.Texture1DArray.MipLevels = 1;
+ srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
+ srvdesc.Texture1DArray.ArraySize = 1;
+ } else {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+ srvdesc.Texture1D.MipLevels = 1;
+ }
+ break;
+ case 2:
+ if (tex_p->array_slice >= 0) {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
+ srvdesc.Texture2DArray.MipLevels = 1;
+ srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
+ srvdesc.Texture2DArray.ArraySize = 1;
+ } else {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+ srvdesc.Texture2D.MipLevels = 1;
+ }
+ break;
+ case 3:
+ // D3D11 does not have Texture3D arrays
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+ srvdesc.Texture3D.MipLevels = 1;
+ break;
+ }
+ D3D(ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
+ &tex_p->srv));
+ }
+
+ if (tex->params.renderable) {
+ D3D11_RENDER_TARGET_VIEW_DESC rtvdesc = {
+ .Format = fmt,
+ };
+ switch (pl_tex_params_dimension(tex->params)) {
+ case 1:
+ if (tex_p->array_slice >= 0) {
+ rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1DARRAY;
+ rtvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
+ rtvdesc.Texture1DArray.ArraySize = 1;
+ } else {
+ rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D;
+ }
+ break;
+ case 2:
+ if (tex_p->array_slice >= 0) {
+ rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY;
+ rtvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
+ rtvdesc.Texture2DArray.ArraySize = 1;
+ } else {
+ rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
+ }
+ break;
+ case 3:
+ // D3D11 does not have Texture3D arrays
+ rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D;
+ rtvdesc.Texture3D.WSize = -1;
+ break;
+ }
+ D3D(ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, &rtvdesc,
+ &tex_p->rtv));
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) {
+ D3D11_UNORDERED_ACCESS_VIEW_DESC uavdesc = {
+ .Format = fmt,
+ };
+ switch (pl_tex_params_dimension(tex->params)) {
+ case 1:
+ if (tex_p->array_slice >= 0) {
+ uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1DARRAY;
+ uavdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
+ uavdesc.Texture1DArray.ArraySize = 1;
+ } else {
+ uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1D;
+ }
+ break;
+ case 2:
+ if (tex_p->array_slice >= 0) {
+ uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2DARRAY;
+ uavdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
+ uavdesc.Texture2DArray.ArraySize = 1;
+ } else {
+ uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
+ }
+ break;
+ case 3:
+ // D3D11 does not have Texture3D arrays
+ uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D;
+ uavdesc.Texture3D.WSize = -1;
+ break;
+ }
+ D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, &uavdesc,
+ &tex_p->uav));
+ }
+
+ return true;
+error:
+ return false;
+}
+
+void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ SAFE_RELEASE(tex_p->srv);
+ SAFE_RELEASE(tex_p->rtv);
+ SAFE_RELEASE(tex_p->uav);
+ SAFE_RELEASE(tex_p->res);
+ SAFE_RELEASE(tex_p->staging);
+
+ pl_d3d11_flush_message_queue(ctx, "After texture destroy");
+
+ pl_free((void *) tex);
+}
+
+pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+ tex->sampler_type = PL_SAMPLER_NORMAL;
+
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ DXGI_FORMAT dxfmt = fmt_to_dxgi(params->format);
+
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ if (params->format->emulated) {
+ tex_p->texel_fmt = pl_find_fmt(gpu, params->format->type, 1, 0,
+ params->format->host_bits[0],
+ PL_FMT_CAP_TEXEL_UNIFORM);
+
+ if (!tex_p->texel_fmt) {
+ PL_ERR(gpu, "Failed picking texel format for emulated texture!");
+ goto error;
+ }
+
+ tex->params.storable = true;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ // On >=FL11_0, blit emulation needs image storage
+ tex->params.storable |= params->blit_src || params->blit_dst;
+
+ // Blit emulation can use a sampler for linear filtering during stretch
+ if ((tex->params.format->caps & PL_FMT_CAP_LINEAR) && params->blit_src)
+ tex->params.sampleable = true;
+ } else {
+ // On <FL11_0, blit emulation uses a render pass
+ tex->params.sampleable |= params->blit_src;
+ tex->params.renderable |= params->blit_dst;
+ }
+
+ if (tex->params.sampleable)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+ if (tex->params.renderable)
+ bind_flags |= D3D11_BIND_RENDER_TARGET;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
+
+ // Apparently IMMUTABLE textures are efficient, so try to infer whether we
+ // can use one
+ if (params->initial_data && !params->format->emulated &&
+ !tex->params.renderable && !tex->params.storable && !params->host_writable)
+ {
+ usage = D3D11_USAGE_IMMUTABLE;
+ }
+
+ // In FL9_x, resources with only D3D11_BIND_SHADER_RESOURCE can't be copied
+ // from GPU-accessible memory to CPU-accessible memory. The only other bind
+ // flag we set on this FL is D3D11_BIND_RENDER_TARGET, so set it.
+ if (p->fl <= D3D_FEATURE_LEVEL_9_3 && tex->params.host_readable)
+ bind_flags |= D3D11_BIND_RENDER_TARGET;
+
+ // In FL9_x, when using DEFAULT or IMMUTABLE, BindFlags cannot be zero
+ if (p->fl <= D3D_FEATURE_LEVEL_9_3 && !bind_flags)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+
+ D3D11_SUBRESOURCE_DATA data;
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data && !params->format->emulated) {
+ data = (D3D11_SUBRESOURCE_DATA) {
+ .pSysMem = params->initial_data,
+ .SysMemPitch = params->w * params->format->texel_size,
+ };
+ if (params->d)
+ data.SysMemSlicePitch = data.SysMemPitch * params->h;
+ pdata = &data;
+ }
+
+ switch (pl_tex_params_dimension(*params)) {
+ case 1:;
+ D3D11_TEXTURE1D_DESC desc1d = {
+ .Width = params->w,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = dxfmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d));
+ tex_p->res = (ID3D11Resource *)tex_p->tex1d;
+
+ // Create a staging texture with CPU access for pl_tex_download()
+ if (params->host_readable) {
+ desc1d.BindFlags = 0;
+ desc1d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+ desc1d.Usage = D3D11_USAGE_STAGING;
+
+ D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, NULL,
+ &tex_p->staging1d));
+ tex_p->staging = (ID3D11Resource *) tex_p->staging1d;
+ }
+ break;
+ case 2:;
+ D3D11_TEXTURE2D_DESC desc2d = {
+ .Width = params->w,
+ .Height = params->h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = dxfmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d));
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+ // Create a staging texture with CPU access for pl_tex_download()
+ if (params->host_readable) {
+ desc2d.BindFlags = 0;
+ desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+ desc2d.Usage = D3D11_USAGE_STAGING;
+
+ D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL,
+ &tex_p->staging2d));
+ tex_p->staging = (ID3D11Resource *) tex_p->staging2d;
+ }
+ break;
+ case 3:;
+ D3D11_TEXTURE3D_DESC desc3d = {
+ .Width = params->w,
+ .Height = params->h,
+ .Depth = params->d,
+ .MipLevels = 1,
+ .Format = dxfmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d));
+ tex_p->res = (ID3D11Resource *)tex_p->tex3d;
+
+ // Create a staging texture with CPU access for pl_tex_download()
+ if (params->host_readable) {
+ desc3d.BindFlags = 0;
+ desc3d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+ desc3d.Usage = D3D11_USAGE_STAGING;
+
+ D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, NULL,
+ &tex_p->staging3d));
+ tex_p->staging = (ID3D11Resource *) tex_p->staging3d;
+ }
+ break;
+ default:
+ pl_unreachable();
+ }
+
+ tex_p->array_slice = -1;
+
+ if (!tex_init(gpu, tex))
+ goto error;
+
+ if (params->initial_data && params->format->emulated) {
+ struct pl_tex_transfer_params ul_params = {
+ .tex = tex,
+ .ptr = (void *) params->initial_data,
+ .rc = { 0, 0, 0, params->w, params->h, params->d },
+ };
+
+ // Since we re-use GPU helpers which require writable images, just fake it
+ bool writable = tex->params.host_writable;
+ tex->params.host_writable = true;
+ if (!pl_tex_upload(gpu, &ul_params))
+ goto error;
+ tex->params.host_writable = writable;
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After texture create");
+
+ return tex;
+
+error:
+ pl_d3d11_tex_destroy(gpu, tex);
+ return NULL;
+}
+
+pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11);
+ tex->sampler_type = PL_SAMPLER_NORMAL;
+
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+ UINT mip_levels = 1;
+ UINT array_size = 1;
+ UINT sample_count = 1;
+
+ D3D11_RESOURCE_DIMENSION type;
+ ID3D11Resource_GetType(params->tex, &type);
+
+ switch (type) {
+ case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
+ D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture1D,
+ (void **) &tex_p->tex1d));
+ tex_p->res = (ID3D11Resource *) tex_p->tex1d;
+
+ D3D11_TEXTURE1D_DESC desc1d;
+ ID3D11Texture1D_GetDesc(tex_p->tex1d, &desc1d);
+
+ tex->params.w = desc1d.Width;
+ mip_levels = desc1d.MipLevels;
+ array_size = desc1d.ArraySize;
+ fmt = desc1d.Format;
+ usage = desc1d.Usage;
+ bind_flags = desc1d.BindFlags;
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+ D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture2D,
+ (void **) &tex_p->tex2d));
+ tex_p->res = (ID3D11Resource *) tex_p->tex2d;
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+
+ tex->params.w = desc2d.Width;
+ tex->params.h = desc2d.Height;
+ mip_levels = desc2d.MipLevels;
+ array_size = desc2d.ArraySize;
+ fmt = desc2d.Format;
+ sample_count = desc2d.SampleDesc.Count;
+ usage = desc2d.Usage;
+ bind_flags = desc2d.BindFlags;
+
+ // Allow the format and size of 2D textures to be overridden to support
+ // shader views of video resources
+ if (params->fmt) {
+ fmt = params->fmt;
+ tex->params.w = params->w;
+ tex->params.h = params->h;
+ }
+
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
+ D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture3D,
+ (void **) &tex_p->tex3d));
+ tex_p->res = (ID3D11Resource *) tex_p->tex3d;
+
+ D3D11_TEXTURE3D_DESC desc3d;
+ ID3D11Texture3D_GetDesc(tex_p->tex3d, &desc3d);
+
+ tex->params.w = desc3d.Width;
+ tex->params.h = desc3d.Height;
+ tex->params.d = desc3d.Depth;
+ mip_levels = desc3d.MipLevels;
+ fmt = desc3d.Format;
+ usage = desc3d.Usage;
+ bind_flags = desc3d.BindFlags;
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_UNKNOWN:
+ case D3D11_RESOURCE_DIMENSION_BUFFER:
+ PL_ERR(gpu, "Resource is not suitable to wrap");
+ goto error;
+ }
+
+ if (mip_levels != 1) {
+ PL_ERR(gpu, "Mipmapped textures not supported for wrapping");
+ goto error;
+ }
+ if (sample_count != 1) {
+ PL_ERR(gpu, "Multisampled textures not supported for wrapping");
+ goto error;
+ }
+ if (usage != D3D11_USAGE_DEFAULT) {
+ PL_ERR(gpu, "Resource is not D3D11_USAGE_DEFAULT");
+ goto error;
+ }
+
+ if (array_size > 1) {
+ if (params->array_slice < 0 || params->array_slice >= array_size) {
+ PL_ERR(gpu, "array_slice out of range");
+ goto error;
+ }
+ tex_p->array_slice = params->array_slice;
+ } else {
+ tex_p->array_slice = -1;
+ }
+
+ if (bind_flags & D3D11_BIND_SHADER_RESOURCE) {
+ tex->params.sampleable = true;
+
+ // Blit emulation uses a render pass on <FL11_0
+ if (p->fl < D3D_FEATURE_LEVEL_11_0)
+ tex->params.blit_src = true;
+ }
+ if (bind_flags & D3D11_BIND_RENDER_TARGET) {
+ tex->params.renderable = true;
+
+ // Blit emulation uses a render pass on <FL11_0
+ if (p->fl < D3D_FEATURE_LEVEL_11_0)
+ tex->params.blit_dst = true;
+ }
+ static const D3D11_BIND_FLAG storable_flags =
+ D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
+ if ((bind_flags & storable_flags) == storable_flags) {
+ tex->params.storable = true;
+
+ // Blit emulation uses image storage on >=FL11_0. A feature level check
+ // isn't required because <FL11_0 doesn't have storable images.
+ tex->params.blit_src = tex->params.blit_dst = true;
+ }
+
+ for (int i = 0; i < gpu->num_formats; i++) {
+ DXGI_FORMAT target_fmt = fmt_to_dxgi(gpu->formats[i]);
+ if (fmt == target_fmt) {
+ tex->params.format = gpu->formats[i];
+ break;
+ }
+ }
+ if (!tex->params.format) {
+ PL_ERR(gpu, "Could not find a suitable pl_fmt for wrapped resource");
+ goto error;
+ }
+
+ if (!tex_init(gpu, tex))
+ goto error;
+
+ pl_d3d11_flush_message_queue(ctx, "After texture wrap");
+
+ return tex;
+
+error:
+ pl_d3d11_tex_destroy(gpu, tex);
+ return NULL;
+}
+
+void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ // Resource discarding requires D3D11.1
+ if (!p->imm1)
+ return;
+
+ // Prefer discarding a view to discarding the whole resource. The reason
+ // for this is that a pl_tex can refer to a single member of a texture
+ // array. Discarding the SRV, RTV or UAV should only discard that member.
+ if (tex_p->rtv) {
+ ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->rtv);
+ } else if (tex_p->uav) {
+ ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->uav);
+ } else if (tex_p->srv) {
+ ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->srv);
+ } else if (tex_p->array_slice < 0) {
+ // If there are no views, only discard if the ID3D11Resource is not a
+ // texture array
+ ID3D11DeviceContext1_DiscardResource(p->imm1, tex_p->res);
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After texture invalidate");
+}
+
+void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex,
+ const union pl_clear_color color)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+
+ if (tex->params.format->type == PL_FMT_UINT) {
+ if (tex_p->uav) {
+ ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav,
+ color.u);
+ } else {
+ float c[4] = { color.u[0], color.u[1], color.u[2], color.u[3] };
+ ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c);
+ }
+
+ } else if (tex->params.format->type == PL_FMT_SINT) {
+ if (tex_p->uav) {
+ ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav,
+ (const uint32_t *)color.i);
+ } else {
+ float c[4] = { color.i[0], color.i[1], color.i[2], color.i[3] };
+ ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c);
+ }
+
+ } else if (tex_p->rtv) {
+ ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, color.f);
+ } else {
+ ID3D11DeviceContext_ClearUnorderedAccessViewFloat(p->imm, tex_p->uav, color.f);
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After texture clear");
+}
+
+#define pl_rect3d_to_box(rc) \
+ ((D3D11_BOX) { \
+ .left = rc.x0, .top = rc.y0, .front = rc.z0, \
+ .right = rc.x1, .bottom = rc.y1, .back = rc.z1, \
+ })
+
+void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ struct pl_tex_d3d11 *src_p = PL_PRIV(params->src);
+ DXGI_FORMAT src_fmt = fmt_to_dxgi(params->src->params.format);
+ struct pl_tex_d3d11 *dst_p = PL_PRIV(params->dst);
+ DXGI_FORMAT dst_fmt = fmt_to_dxgi(params->dst->params.format);
+
+ // If the blit operation doesn't require flipping, scaling or format
+ // conversion, we can use CopySubresourceRegion
+ pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc;
+ if (pl_rect3d_eq(src_rc, dst_rc) && src_fmt == dst_fmt) {
+ pl_rect3d rc = params->src_rc;
+ pl_rect3d_normalize(&rc);
+
+ ID3D11DeviceContext_CopySubresourceRegion(p->imm, dst_p->res,
+ tex_subresource(params->dst), rc.x0, rc.y0, rc.z0, src_p->res,
+ tex_subresource(params->src), &pl_rect3d_to_box(rc));
+ } else if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ if (!pl_tex_blit_compute(gpu, params))
+ PL_ERR(gpu, "Failed compute shader fallback blit");
+ } else {
+ pl_tex_blit_raster(gpu, params);
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After texture blit");
+}
+
+bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ pl_tex tex = params->tex;
+ pl_fmt fmt = tex->params.format;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+ struct pl_tex_transfer_params *slices = NULL;
+ bool ret = false;
+
+ pl_d3d11_timer_start(gpu, params->timer);
+
+ if (fmt->emulated) {
+
+ int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices);
+ for (int i = 0; i < num_slices; i++) {
+ // Copy the source data buffer into an intermediate buffer
+ pl_buf tbuf = pl_buf_create(gpu, pl_buf_params(
+ .memory_type = PL_BUF_MEM_DEVICE,
+ .format = tex_p->texel_fmt,
+ .size = pl_tex_transfer_size(&slices[i]),
+ .initial_data = slices[i].ptr,
+ .storable = true,
+ ));
+
+ if (!tbuf) {
+ PL_ERR(gpu, "Failed creating buffer for tex upload fallback!");
+ goto error;
+ }
+
+ slices[i].ptr = NULL;
+ slices[i].buf = tbuf;
+ slices[i].buf_offset = 0;
+ bool ok = pl_tex_upload_texel(gpu, &slices[i]);
+ pl_buf_destroy(gpu, &tbuf);
+ if (!ok)
+ goto error;
+ }
+
+ } else {
+
+ ID3D11DeviceContext_UpdateSubresource(p->imm, tex_p->res,
+ tex_subresource(tex), &pl_rect3d_to_box(params->rc), params->ptr,
+ params->row_pitch, params->depth_pitch);
+
+ }
+
+ ret = true;
+
+error:
+ pl_d3d11_timer_end(gpu, params->timer);
+ pl_d3d11_flush_message_queue(ctx, "After texture upload");
+
+ pl_free(slices);
+ return ret;
+}
+
+bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params)
+{
+ struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
+ struct d3d11_ctx *ctx = p->ctx;
+ const struct pl_tex_t *tex = params->tex;
+ pl_fmt fmt = tex->params.format;
+ struct pl_tex_d3d11 *tex_p = PL_PRIV(tex);
+ struct pl_tex_transfer_params *slices = NULL;
+ bool ret = false;
+
+ if (!tex_p->staging)
+ return false;
+
+ pl_d3d11_timer_start(gpu, params->timer);
+
+ if (fmt->emulated) {
+
+ pl_buf tbuf = NULL;
+ int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices);
+ for (int i = 0; i < num_slices; i++) {
+ const size_t slice_size = pl_tex_transfer_size(&slices[i]);
+ bool ok = pl_buf_recreate(gpu, &tbuf, pl_buf_params(
+ .storable = true,
+ .size = slice_size,
+ .memory_type = PL_BUF_MEM_DEVICE,
+ .format = tex_p->texel_fmt,
+ .host_readable = true,
+ ));
+
+ if (!ok) {
+ PL_ERR(gpu, "Failed creating buffer for tex download fallback!");
+ goto error;
+ }
+
+ void *ptr = slices[i].ptr;
+ slices[i].ptr = NULL;
+ slices[i].buf = tbuf;
+ slices[i].buf_offset = 0;
+
+ // Download into an intermediate buffer first
+ ok = pl_tex_download_texel(gpu, &slices[i]);
+ ok = ok && pl_buf_read(gpu, tbuf, 0, ptr, slice_size);
+ if (!ok) {
+ pl_buf_destroy(gpu, &tbuf);
+ goto error;
+ }
+ }
+ pl_buf_destroy(gpu, &tbuf);
+
+ } else {
+
+ ID3D11DeviceContext_CopySubresourceRegion(p->imm,
+ (ID3D11Resource *) tex_p->staging, 0, params->rc.x0, params->rc.y0,
+ params->rc.z0, tex_p->res, tex_subresource(tex),
+ &pl_rect3d_to_box(params->rc));
+
+ D3D11_MAPPED_SUBRESOURCE lock;
+ D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) tex_p->staging, 0,
+ D3D11_MAP_READ, 0, &lock));
+
+ char *cdst = params->ptr;
+ char *csrc = lock.pData;
+ size_t line_size = pl_rect_w(params->rc) * tex->params.format->texel_size;
+ for (int z = 0; z < pl_rect_d(params->rc); z++) {
+ for (int y = 0; y < pl_rect_h(params->rc); y++) {
+ memcpy(cdst + z * params->depth_pitch + y * params->row_pitch,
+ csrc + (params->rc.z0 + z) * lock.DepthPitch +
+ (params->rc.y0 + y) * lock.RowPitch + params->rc.x0,
+ line_size);
+ }
+ }
+
+ ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource*)tex_p->staging, 0);
+ }
+
+ ret = true;
+
+error:
+ pl_d3d11_timer_end(gpu, params->timer);
+ pl_d3d11_flush_message_queue(ctx, "After texture download");
+
+ pl_free(slices);
+ return ret;
+}
diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build
new file mode 100644
index 0000000..d4c4b44
--- /dev/null
+++ b/src/d3d11/meson.build
@@ -0,0 +1,41 @@
+d3d11 = get_option('d3d11')
+d3d11_header = cc.check_header('d3d11.h', required: false) # needed publicly
+d3d11_headers_extra = [ # needed internally
+ cc.check_header('d3d11_4.h', required: d3d11),
+ cc.check_header('dxgi1_6.h', required: d3d11),
+]
+d3d11_deps = [
+ dependency('spirv-cross-c-shared', version: '>=0.29.0', required: d3d11),
+ cc.find_library('version', required: d3d11),
+]
+
+d3d11 = d3d11.require(d3d11_header)
+foreach h : d3d11_headers_extra
+ d3d11 = d3d11.require(h)
+endforeach
+foreach d : d3d11_deps
+ d3d11 = d3d11.require(d.found())
+endforeach
+
+components.set('d3d11', d3d11.allowed())
+if d3d11.allowed()
+ conf_internal.set('PL_HAVE_DXGI_DEBUG',
+ cc.has_header_symbol('dxgidebug.h', 'IID_IDXGIInfoQueue'))
+ conf_internal.set('PL_HAVE_DXGI_DEBUG_D3D11',
+ cc.has_header_symbol('d3d11sdklayers.h', 'DXGI_DEBUG_D3D11'))
+ add_project_arguments(['-DCOBJMACROS'], language: ['c', 'cpp'])
+ build_deps += declare_dependency(dependencies: d3d11_deps)
+ tests += 'd3d11.c'
+ sources += [
+ 'd3d11/context.c',
+ 'd3d11/formats.c',
+ 'd3d11/gpu.c',
+ 'd3d11/gpu_buf.c',
+ 'd3d11/gpu_tex.c',
+ 'd3d11/gpu_pass.c',
+ 'd3d11/swapchain.c',
+ 'd3d11/utils.c',
+ ]
+elif d3d11_header
+ sources += 'd3d11/stubs.c'
+endif
diff --git a/src/d3d11/stubs.c b/src/d3d11/stubs.c
new file mode 100644
index 0000000..b3f259c
--- /dev/null
+++ b/src/d3d11/stubs.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../common.h"
+#include "log.h"
+
+#include <libplacebo/d3d11.h>
+
+const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS };
+
+pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params)
+{
+ pl_fatal(log, "libplacebo compiled without D3D11 support!");
+ return NULL;
+}
+
+void pl_d3d11_destroy(pl_d3d11 *pd3d11)
+{
+ pl_d3d11 d3d11 = *pd3d11;
+ pl_assert(!d3d11);
+}
+
+pl_d3d11 pl_d3d11_get(pl_gpu gpu)
+{
+ return NULL;
+}
+
+pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11,
+ const struct pl_d3d11_swapchain_params *params)
+{
+ pl_unreachable();
+}
+
+IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw)
+{
+ pl_unreachable();
+}
+
+pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params)
+{
+ pl_unreachable();
+}
diff --git a/src/d3d11/swapchain.c b/src/d3d11/swapchain.c
new file mode 100644
index 0000000..8a53632
--- /dev/null
+++ b/src/d3d11/swapchain.c
@@ -0,0 +1,667 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <versionhelpers.h>
+#include <math.h>
+
+#include "gpu.h"
+#include "swapchain.h"
+#include "utils.h"
+
+struct d3d11_csp_mapping {
+ DXGI_COLOR_SPACE_TYPE d3d11_csp;
+ DXGI_FORMAT d3d11_fmt;
+ struct pl_color_space out_csp;
+};
+
+static struct d3d11_csp_mapping map_pl_csp_to_d3d11(const struct pl_color_space *hint,
+ bool use_8bit_sdr)
+{
+ if (pl_color_space_is_hdr(hint) &&
+ hint->transfer != PL_COLOR_TRC_LINEAR)
+ {
+ struct pl_color_space pl_csp = pl_color_space_hdr10;
+ pl_csp.hdr = (struct pl_hdr_metadata) {
+ // Whitelist only values that we support signalling metadata for
+ .prim = hint->hdr.prim,
+ .min_luma = hint->hdr.min_luma,
+ .max_luma = hint->hdr.max_luma,
+ .max_cll = hint->hdr.max_cll,
+ .max_fall = hint->hdr.max_fall,
+ };
+
+ return (struct d3d11_csp_mapping){
+ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020,
+ .d3d11_fmt = DXGI_FORMAT_R10G10B10A2_UNORM,
+ .out_csp = pl_csp,
+ };
+ } else if (pl_color_primaries_is_wide_gamut(hint->primaries) ||
+ hint->transfer == PL_COLOR_TRC_LINEAR)
+ {
+ // scRGB a la VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT,
+ // so could be utilized for HDR/wide gamut content as well
+ // with content that goes beyond 0.0-1.0.
+ return (struct d3d11_csp_mapping){
+ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709,
+ .d3d11_fmt = DXGI_FORMAT_R16G16B16A16_FLOAT,
+ .out_csp = {
+ .primaries = PL_COLOR_PRIM_BT_709,
+ .transfer = PL_COLOR_TRC_LINEAR,
+ }
+ };
+ }
+
+ return (struct d3d11_csp_mapping){
+ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709,
+ .d3d11_fmt = use_8bit_sdr ? DXGI_FORMAT_R8G8B8A8_UNORM :
+ DXGI_FORMAT_R10G10B10A2_UNORM,
+ .out_csp = pl_color_space_monitor,
+ };
+}
+
+struct priv {
+ struct pl_sw_fns impl;
+
+ struct d3d11_ctx *ctx;
+ IDXGISwapChain *swapchain;
+ pl_tex backbuffer;
+
+ // Currently requested or applied swap chain configuration.
+ // Affected by received colorspace hints.
+ struct d3d11_csp_mapping csp_map;
+
+ // Whether a swapchain backbuffer format reconfiguration has been
+ // requested by means of an additional resize action.
+ bool update_swapchain_format;
+
+ // Whether 10-bit backbuffer format is disabled for SDR content.
+ bool disable_10bit_sdr;
+
+ // Fallback to 8-bit RGB was triggered due to lack of compatiblity
+ bool fallback_8bit_rgb;
+};
+
+static void d3d11_sw_destroy(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+
+ pl_tex_destroy(sw->gpu, &p->backbuffer);
+ SAFE_RELEASE(p->swapchain);
+ pl_free((void *) sw);
+}
+
+static int d3d11_sw_latency(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ UINT max_latency;
+ IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency);
+ return max_latency;
+}
+
+static pl_tex get_backbuffer(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+ ID3D11Texture2D *backbuffer = NULL;
+ pl_tex tex = NULL;
+
+ D3D(IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D,
+ (void **) &backbuffer));
+
+ tex = pl_d3d11_wrap(sw->gpu, pl_d3d11_wrap_params(
+ .tex = (ID3D11Resource *) backbuffer,
+ ));
+
+error:
+ SAFE_RELEASE(backbuffer);
+ return tex;
+}
+
+static bool d3d11_sw_resize(pl_swapchain sw, int *width, int *height)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ DXGI_SWAP_CHAIN_DESC desc = {0};
+ IDXGISwapChain_GetDesc(p->swapchain, &desc);
+ int w = PL_DEF(*width, desc.BufferDesc.Width);
+ int h = PL_DEF(*height, desc.BufferDesc.Height);
+ bool format_changed = p->csp_map.d3d11_fmt != desc.BufferDesc.Format;
+ if (format_changed) {
+ PL_INFO(ctx, "Attempting to reconfigure swap chain format: %s -> %s",
+ pl_get_dxgi_format_name(desc.BufferDesc.Format),
+ pl_get_dxgi_format_name(p->csp_map.d3d11_fmt));
+ }
+
+ if (w != desc.BufferDesc.Width || h != desc.BufferDesc.Height ||
+ format_changed)
+ {
+ if (p->backbuffer) {
+ PL_ERR(sw, "Tried resizing the swapchain while a frame was in "
+ "progress! Please submit the current frame first.");
+ return false;
+ }
+
+ HRESULT hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h,
+ p->csp_map.d3d11_fmt, desc.Flags);
+
+ if (hr == E_INVALIDARG && p->csp_map.d3d11_fmt != DXGI_FORMAT_R8G8B8A8_UNORM)
+ {
+ PL_WARN(sw, "Reconfiguring the swapchain failed, re-trying with R8G8B8A8_UNORM fallback.");
+ D3D(IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h,
+ DXGI_FORMAT_R8G8B8A8_UNORM, desc.Flags));
+
+ // re-configure the colorspace to 8-bit RGB SDR fallback
+ p->csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true);
+ p->fallback_8bit_rgb = true;
+ }
+ else if (FAILED(hr))
+ {
+ PL_ERR(sw, "Reconfiguring the swapchain failed with error: %s", pl_hresult_to_str(hr));
+ return false;
+ }
+ }
+
+ *width = w;
+ *height = h;
+ p->update_swapchain_format = false;
+ return true;
+
+error:
+ return false;
+}
+
+static bool d3d11_sw_start_frame(pl_swapchain sw,
+ struct pl_swapchain_frame *out_frame)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ if (ctx->is_failed)
+ return false;
+ if (p->backbuffer) {
+ PL_ERR(sw, "Attempted calling `pl_swapchain_start_frame` while a frame "
+ "was already in progress! Call `pl_swapchain_submit_frame` first.");
+ return false;
+ }
+
+ if (p->update_swapchain_format) {
+ int w = 0, h = 0;
+ if (!d3d11_sw_resize(sw, &w, &h))
+ return false;
+ }
+
+ p->backbuffer = get_backbuffer(sw);
+ if (!p->backbuffer)
+ return false;
+
+ int bits = 0;
+ pl_fmt fmt = p->backbuffer->params.format;
+ for (int i = 0; i < fmt->num_components; i++)
+ bits = PL_MAX(bits, fmt->component_depth[i]);
+
+ *out_frame = (struct pl_swapchain_frame) {
+ .fbo = p->backbuffer,
+ .flipped = false,
+ .color_repr = {
+ .sys = PL_COLOR_SYSTEM_RGB,
+ .levels = PL_COLOR_LEVELS_FULL,
+ .alpha = PL_ALPHA_UNKNOWN,
+ .bits = {
+ .sample_depth = bits,
+ .color_depth = bits,
+ },
+ },
+ .color_space = p->csp_map.out_csp,
+ };
+
+ return true;
+}
+
+static bool d3d11_sw_submit_frame(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ // Release the backbuffer. We shouldn't hold onto it unnecessarily, because
+ // it prevents external code from resizing the swapchain, which we'd
+ // otherwise support just fine.
+ pl_tex_destroy(sw->gpu, &p->backbuffer);
+
+ return !ctx->is_failed;
+}
+
+static void d3d11_sw_swap_buffers(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+
+ // Present can fail with a device removed error
+ D3D(IDXGISwapChain_Present(p->swapchain, 1, 0));
+
+error:
+ return;
+}
+
+static DXGI_HDR_METADATA_HDR10 set_hdr10_metadata(const struct pl_hdr_metadata *hdr)
+{
+ return (DXGI_HDR_METADATA_HDR10) {
+ .RedPrimary = { roundf(hdr->prim.red.x * 50000),
+ roundf(hdr->prim.red.y * 50000) },
+ .GreenPrimary = { roundf(hdr->prim.green.x * 50000),
+ roundf(hdr->prim.green.y * 50000) },
+ .BluePrimary = { roundf(hdr->prim.blue.x * 50000),
+ roundf(hdr->prim.blue.y * 50000) },
+ .WhitePoint = { roundf(hdr->prim.white.x * 50000),
+ roundf(hdr->prim.white.y * 50000) },
+ .MaxMasteringLuminance = roundf(hdr->max_luma),
+ .MinMasteringLuminance = roundf(hdr->min_luma * 10000),
+ .MaxContentLightLevel = roundf(hdr->max_cll),
+ .MaxFrameAverageLightLevel = roundf(hdr->max_fall),
+ };
+}
+
+static bool set_swapchain_metadata(struct d3d11_ctx *ctx,
+ IDXGISwapChain3 *swapchain3,
+ struct d3d11_csp_mapping *csp_map)
+{
+ IDXGISwapChain4 *swapchain4 = NULL;
+ bool ret = false;
+ bool is_hdr = pl_color_space_is_hdr(&csp_map->out_csp);
+ DXGI_HDR_METADATA_HDR10 hdr10 = is_hdr ?
+ set_hdr10_metadata(&csp_map->out_csp.hdr) : (DXGI_HDR_METADATA_HDR10){ 0 };
+
+ D3D(IDXGISwapChain3_SetColorSpace1(swapchain3, csp_map->d3d11_csp));
+
+ // if we succeeded to set the color space, it's good enough,
+ // since older versions of Windows 10 will not have swapchain v4 available.
+ ret = true;
+
+ if (FAILED(IDXGISwapChain3_QueryInterface(swapchain3, &IID_IDXGISwapChain4,
+ (void **)&swapchain4)))
+ {
+ PL_TRACE(ctx, "v4 swap chain interface is not available, skipping HDR10 "
+ "metadata configuration.");
+ goto error;
+ }
+
+ D3D(IDXGISwapChain4_SetHDRMetaData(swapchain4,
+ is_hdr ?
+ DXGI_HDR_METADATA_TYPE_HDR10 :
+ DXGI_HDR_METADATA_TYPE_NONE,
+ is_hdr ? sizeof(hdr10) : 0,
+ is_hdr ? &hdr10 : NULL));
+
+ goto success;
+
+error:
+ csp_map->out_csp.hdr = (struct pl_hdr_metadata) { 0 };
+success:
+ SAFE_RELEASE(swapchain4);
+ return ret;
+}
+
+static bool d3d11_format_supported(struct d3d11_ctx *ctx, DXGI_FORMAT fmt)
+{
+ UINT sup = 0;
+ UINT wanted_sup =
+ D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_DISPLAY |
+ D3D11_FORMAT_SUPPORT_SHADER_SAMPLE | D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+ D3D11_FORMAT_SUPPORT_BLENDABLE;
+
+ D3D(ID3D11Device_CheckFormatSupport(ctx->dev, fmt, &sup));
+
+ return (sup & wanted_sup) == wanted_sup;
+
+error:
+ return false;
+}
+
+static bool d3d11_csp_supported(struct d3d11_ctx *ctx,
+ IDXGISwapChain3 *swapchain3,
+ DXGI_COLOR_SPACE_TYPE color_space)
+{
+ UINT csp_support_flags = 0;
+
+ D3D(IDXGISwapChain3_CheckColorSpaceSupport(swapchain3,
+ color_space,
+ &csp_support_flags));
+
+ return (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT);
+
+error:
+ return false;
+}
+
+static void update_swapchain_color_config(pl_swapchain sw,
+ const struct pl_color_space *csp,
+ bool is_internal)
+{
+ struct priv *p = PL_PRIV(sw);
+ struct d3d11_ctx *ctx = p->ctx;
+ IDXGISwapChain3 *swapchain3 = NULL;
+ struct d3d11_csp_mapping old_map = p->csp_map;
+
+ // ignore config changes in fallback mode
+ if (p->fallback_8bit_rgb)
+ goto cleanup;
+
+ HRESULT hr = IDXGISwapChain_QueryInterface(p->swapchain, &IID_IDXGISwapChain3,
+ (void **)&swapchain3);
+ if (FAILED(hr)) {
+ PL_TRACE(ctx, "v3 swap chain interface is not available, skipping "
+ "color space configuration.");
+ swapchain3 = NULL;
+ }
+
+ // Lack of swap chain v3 means we cannot control swap chain color space;
+ // Only effective formats are the 8 and 10 bit RGB ones.
+ struct d3d11_csp_mapping csp_map =
+ map_pl_csp_to_d3d11(swapchain3 ? csp : &pl_color_space_unknown,
+ p->disable_10bit_sdr);
+
+ if (p->csp_map.d3d11_fmt == csp_map.d3d11_fmt &&
+ p->csp_map.d3d11_csp == csp_map.d3d11_csp &&
+ pl_color_space_equal(&p->csp_map.out_csp, &csp_map.out_csp))
+ goto cleanup;
+
+ PL_INFO(ctx, "%s swap chain configuration%s: format: %s, color space: %s.",
+ is_internal ? "Initial" : "New",
+ is_internal ? "" : " received from hint",
+ pl_get_dxgi_format_name(csp_map.d3d11_fmt),
+ pl_get_dxgi_csp_name(csp_map.d3d11_csp));
+
+ bool fmt_supported = d3d11_format_supported(ctx, csp_map.d3d11_fmt);
+ bool csp_supported = swapchain3 ?
+ d3d11_csp_supported(ctx, swapchain3, csp_map.d3d11_csp) : true;
+ if (!fmt_supported || !csp_supported) {
+ PL_ERR(ctx, "New swap chain configuration was deemed not supported: "
+ "format: %s, color space: %s. Failling back to 8bit RGB.",
+ fmt_supported ? "supported" : "unsupported",
+ csp_supported ? "supported" : "unsupported");
+ // fall back to 8bit sRGB if requested configuration is not supported
+ csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true);
+ }
+
+ p->csp_map = csp_map;
+ p->update_swapchain_format = true;
+
+ if (!swapchain3)
+ goto cleanup;
+
+ if (!set_swapchain_metadata(ctx, swapchain3, &p->csp_map)) {
+ // format succeeded, but color space configuration failed
+ p->csp_map = old_map;
+ p->csp_map.d3d11_fmt = csp_map.d3d11_fmt;
+ }
+
+ pl_d3d11_flush_message_queue(ctx, "After colorspace hint");
+
+cleanup:
+ SAFE_RELEASE(swapchain3);
+}
+
+static void d3d11_sw_colorspace_hint(pl_swapchain sw,
+ const struct pl_color_space *csp)
+{
+ update_swapchain_color_config(sw, csp, false);
+}
+
+IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw)
+{
+ struct priv *p = PL_PRIV(sw);
+ IDXGISwapChain_AddRef(p->swapchain);
+ return p->swapchain;
+}
+
+static const struct pl_sw_fns d3d11_swapchain = {
+ .destroy = d3d11_sw_destroy,
+ .latency = d3d11_sw_latency,
+ .resize = d3d11_sw_resize,
+ .colorspace_hint = d3d11_sw_colorspace_hint,
+ .start_frame = d3d11_sw_start_frame,
+ .submit_frame = d3d11_sw_submit_frame,
+ .swap_buffers = d3d11_sw_swap_buffers,
+};
+
+static HRESULT create_swapchain_1_2(struct d3d11_ctx *ctx,
+ IDXGIFactory2 *factory, const struct pl_d3d11_swapchain_params *params,
+ bool flip, UINT width, UINT height, DXGI_FORMAT format,
+ IDXGISwapChain **swapchain_out)
+{
+ IDXGISwapChain *swapchain = NULL;
+ IDXGISwapChain1 *swapchain1 = NULL;
+ HRESULT hr;
+
+ DXGI_SWAP_CHAIN_DESC1 desc = {
+ .Width = width,
+ .Height = height,
+ .Format = format,
+ .SampleDesc.Count = 1,
+ .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT,
+ .Flags = params->flags,
+ };
+
+ if (ID3D11Device_GetFeatureLevel(ctx->dev) >= D3D_FEATURE_LEVEL_11_0)
+ desc.BufferUsage |= DXGI_USAGE_UNORDERED_ACCESS;
+
+ if (flip) {
+ UINT max_latency;
+ IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency);
+
+ // Make sure we have at least enough buffers to allow `max_latency`
+ // frames in-flight at once, plus one frame for the frontbuffer
+ desc.BufferCount = max_latency + 1;
+
+ if (IsWindows10OrGreater()) {
+ desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+ } else {
+ desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
+ }
+
+ desc.BufferCount = PL_MIN(desc.BufferCount, DXGI_MAX_SWAP_CHAIN_BUFFERS);
+ } else {
+ desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
+ desc.BufferCount = 1;
+ }
+
+ if (params->window) {
+ hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *) ctx->dev,
+ params->window, &desc, NULL, NULL, &swapchain1);
+ } else if (params->core_window) {
+ hr = IDXGIFactory2_CreateSwapChainForCoreWindow(factory,
+ (IUnknown *) ctx->dev, params->core_window, &desc, NULL, &swapchain1);
+ } else {
+ hr = IDXGIFactory2_CreateSwapChainForComposition(factory,
+ (IUnknown *) ctx->dev, &desc, NULL, &swapchain1);
+ }
+ if (FAILED(hr))
+ goto done;
+ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain,
+ (void **) &swapchain);
+ if (FAILED(hr))
+ goto done;
+
+ *swapchain_out = swapchain;
+ swapchain = NULL;
+
+done:
+ SAFE_RELEASE(swapchain1);
+ SAFE_RELEASE(swapchain);
+ return hr;
+}
+
+static HRESULT create_swapchain_1_1(struct d3d11_ctx *ctx,
+ IDXGIFactory1 *factory, const struct pl_d3d11_swapchain_params *params,
+ UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out)
+{
+ DXGI_SWAP_CHAIN_DESC desc = {
+ .BufferDesc = {
+ .Width = width,
+ .Height = height,
+ .Format = format,
+ },
+ .SampleDesc.Count = 1,
+ .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT,
+ .BufferCount = 1,
+ .OutputWindow = params->window,
+ .Windowed = TRUE,
+ .SwapEffect = DXGI_SWAP_EFFECT_DISCARD,
+ .Flags = params->flags,
+ };
+
+ return IDXGIFactory1_CreateSwapChain(factory, (IUnknown *) ctx->dev, &desc,
+ swapchain_out);
+}
+
+static IDXGISwapChain *create_swapchain(struct d3d11_ctx *ctx,
+ const struct pl_d3d11_swapchain_params *params, DXGI_FORMAT format)
+{
+ IDXGIDevice1 *dxgi_dev = NULL;
+ IDXGIAdapter1 *adapter = NULL;
+ IDXGIFactory1 *factory = NULL;
+ IDXGIFactory2 *factory2 = NULL;
+ IDXGISwapChain *swapchain = NULL;
+ bool success = false;
+ HRESULT hr;
+
+ D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_IDXGIDevice1,
+ (void **) &dxgi_dev));
+ D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter));
+ D3D(IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void **) &factory));
+
+ hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2,
+ (void **) &factory2);
+ if (FAILED(hr))
+ factory2 = NULL;
+
+ bool flip = factory2 && !params->blit;
+ UINT width = PL_DEF(params->width, 1);
+ UINT height = PL_DEF(params->height, 1);
+
+ // If both width and height are unset, the default size is the window size
+ if (params->window && params->width == 0 && params->height == 0) {
+ RECT rc;
+ if (GetClientRect(params->window, &rc)) {
+ width = PL_DEF(rc.right - rc.left, 1);
+ height = PL_DEF(rc.bottom - rc.top, 1);
+ }
+ }
+
+ // Return here to retry creating the swapchain
+ do {
+ if (factory2) {
+ // Create a DXGI 1.2+ (Windows 8+) swap chain if possible
+ hr = create_swapchain_1_2(ctx, factory2, params, flip, width,
+ height, format, &swapchain);
+ } else {
+ // Fall back to DXGI 1.1 (Windows 7)
+ hr = create_swapchain_1_1(ctx, factory, params, width, height,
+ format, &swapchain);
+ }
+ if (SUCCEEDED(hr))
+ break;
+
+ pl_d3d11_after_error(ctx, hr);
+ if (flip) {
+ PL_DEBUG(ctx, "Failed to create flip-model swapchain, trying bitblt");
+ flip = false;
+ continue;
+ }
+
+ PL_FATAL(ctx, "Failed to create swapchain: %s", pl_hresult_to_str(hr));
+ goto error;
+ } while (true);
+
+ // Prevent DXGI from making changes to the window, otherwise it will hook
+ // the Alt+Enter keystroke and make it trigger an ugly transition to
+ // legacy exclusive fullscreen mode.
+ IDXGIFactory_MakeWindowAssociation(factory, params->window,
+ DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER |
+ DXGI_MWA_NO_PRINT_SCREEN);
+
+ success = true;
+error:
+ if (!success)
+ SAFE_RELEASE(swapchain);
+ SAFE_RELEASE(factory2);
+ SAFE_RELEASE(factory);
+ SAFE_RELEASE(adapter);
+ SAFE_RELEASE(dxgi_dev);
+ return swapchain;
+}
+
+pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11,
+ const struct pl_d3d11_swapchain_params *params)
+{
+ struct d3d11_ctx *ctx = PL_PRIV(d3d11);
+ pl_gpu gpu = d3d11->gpu;
+ bool success = false;
+
+ struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv);
+ struct priv *p = PL_PRIV(sw);
+ *sw = (struct pl_swapchain_t) {
+ .log = gpu->log,
+ .gpu = gpu,
+ };
+ *p = (struct priv) {
+ .impl = d3d11_swapchain,
+ .ctx = ctx,
+ // default to standard 8 or 10 bit RGB, unset pl_color_space
+ .csp_map = {
+ .d3d11_fmt = params->disable_10bit_sdr ?
+ DXGI_FORMAT_R8G8B8A8_UNORM :
+ (d3d11_format_supported(ctx, DXGI_FORMAT_R10G10B10A2_UNORM) ?
+ DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM),
+ },
+ .disable_10bit_sdr = params->disable_10bit_sdr,
+ };
+
+ if (params->swapchain) {
+ p->swapchain = params->swapchain;
+ IDXGISwapChain_AddRef(params->swapchain);
+ } else {
+ p->swapchain = create_swapchain(ctx, params, p->csp_map.d3d11_fmt);
+ if (!p->swapchain)
+ goto error;
+ }
+
+ DXGI_SWAP_CHAIN_DESC scd = {0};
+ IDXGISwapChain_GetDesc(p->swapchain, &scd);
+ if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL ||
+ scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD) {
+ PL_INFO(gpu, "Using flip-model presentation");
+ } else {
+ PL_INFO(gpu, "Using bitblt-model presentation");
+ }
+
+ p->csp_map.d3d11_fmt = scd.BufferDesc.Format;
+
+ update_swapchain_color_config(sw, &pl_color_space_unknown, true);
+
+ success = true;
+error:
+ if (!success) {
+ PL_FATAL(gpu, "Failed to create Direct3D 11 swapchain");
+ d3d11_sw_destroy(sw);
+ sw = NULL;
+ }
+ return sw;
+}
diff --git a/src/d3d11/utils.c b/src/d3d11/utils.c
new file mode 100644
index 0000000..47154b5
--- /dev/null
+++ b/src/d3d11/utils.c
@@ -0,0 +1,500 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+
+#include "utils.h"
+
+// D3D11.3 message IDs, not present in mingw-w64 v9
+#define D3D11_MESSAGE_ID_CREATE_FENCE (0x30020c)
+#define D3D11_MESSAGE_ID_DESTROY_FENCE (0x30020a)
+
+#ifdef PL_HAVE_DXGI_DEBUG
+static enum pl_log_level log_level_override(unsigned int id)
+{
+ switch (id) {
+ // These warnings can happen when a pl_timer is used too often before a
+ // blocking pl_swapchain_swap_buffers() or pl_gpu_finish(), overflowing
+ // its internal ring buffer and causing older query objects to be reused
+ // before their results are read. This is expected behavior, so reduce
+ // the log level to PL_LOG_TRACE to prevent log spam.
+ case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS:
+ case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS:
+ return PL_LOG_TRACE;
+
+ // D3D11 writes log messages every time an object is created or
+ // destroyed. That results in a lot of log spam, so force PL_LOG_TRACE.
+#define OBJ_LIFETIME_MESSAGES(obj) \
+ case D3D11_MESSAGE_ID_CREATE_ ## obj: \
+ case D3D11_MESSAGE_ID_DESTROY_ ## obj
+
+ OBJ_LIFETIME_MESSAGES(CONTEXT):
+ OBJ_LIFETIME_MESSAGES(BUFFER):
+ OBJ_LIFETIME_MESSAGES(TEXTURE1D):
+ OBJ_LIFETIME_MESSAGES(TEXTURE2D):
+ OBJ_LIFETIME_MESSAGES(TEXTURE3D):
+ OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW):
+ OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW):
+ OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW):
+ OBJ_LIFETIME_MESSAGES(VERTEXSHADER):
+ OBJ_LIFETIME_MESSAGES(HULLSHADER):
+ OBJ_LIFETIME_MESSAGES(DOMAINSHADER):
+ OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER):
+ OBJ_LIFETIME_MESSAGES(PIXELSHADER):
+ OBJ_LIFETIME_MESSAGES(INPUTLAYOUT):
+ OBJ_LIFETIME_MESSAGES(SAMPLER):
+ OBJ_LIFETIME_MESSAGES(BLENDSTATE):
+ OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE):
+ OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE):
+ OBJ_LIFETIME_MESSAGES(QUERY):
+ OBJ_LIFETIME_MESSAGES(PREDICATE):
+ OBJ_LIFETIME_MESSAGES(COUNTER):
+ OBJ_LIFETIME_MESSAGES(COMMANDLIST):
+ OBJ_LIFETIME_MESSAGES(CLASSINSTANCE):
+ OBJ_LIFETIME_MESSAGES(CLASSLINKAGE):
+ OBJ_LIFETIME_MESSAGES(COMPUTESHADER):
+ OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW):
+ OBJ_LIFETIME_MESSAGES(VIDEODECODER):
+ OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM):
+ OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR):
+ OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE):
+ OBJ_LIFETIME_MESSAGES(FENCE):
+ return PL_LOG_TRACE;
+
+#undef OBJ_LIFETIME_MESSAGES
+
+ // Don't force the log level of any other messages. It will be mapped
+ // from the D3D severity code instead.
+ default:
+ return PL_LOG_NONE;
+ }
+}
+#endif
+
+void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header)
+{
+#ifdef PL_HAVE_DXGI_DEBUG
+ if (!ctx->iqueue)
+ return;
+
+ static const enum pl_log_level severity_map[] = {
+ [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION] = PL_LOG_FATAL,
+ [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR] = PL_LOG_ERR,
+ [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING] = PL_LOG_WARN,
+ [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO] = PL_LOG_DEBUG,
+ [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE] = PL_LOG_DEBUG,
+ };
+
+ enum pl_log_level header_printed = PL_LOG_NONE;
+
+ // After the storage limit is reached and ID3D11InfoQueue::ClearStoredMessages
+ // is called message counter seems to be initialized to -1 which is quite big
+ // number if we read it as uint64_t. Any subsequent call to the
+ // ID3D11InfoQueue::GetNumStoredMessages will be off by one.
+ // Use ID3D11InfoQueue_GetNumStoredMessagesAllowedByRetrievalFilter without
+ // any filter set, which seem to be unaffected by this bug and return correct
+ // number of messages.
+ // IDXGIInfoQueue seems to be unaffected, but keep the same way of retrival
+ uint64_t messages = IDXGIInfoQueue_GetNumStoredMessagesAllowedByRetrievalFilters(ctx->iqueue, DXGI_DEBUG_ALL);
+
+ // Just to be on the safe side, check also for the mentioned -1 value...
+ if (!messages || messages == UINT64_C(-1))
+ return;
+
+ uint64_t discarded =
+ IDXGIInfoQueue_GetNumMessagesDiscardedByMessageCountLimit(ctx->iqueue, DXGI_DEBUG_ALL);
+ if (discarded > ctx->last_discarded) {
+ PL_WARN(ctx, "%s:", header);
+ header_printed = PL_LOG_WARN;
+
+ // Notify number of messages skipped due to the message count limit
+ PL_WARN(ctx, " (skipped %"PRIu64" debug layer messages)",
+ discarded - ctx->last_discarded);
+ ctx->last_discarded = discarded;
+ }
+
+ // Copy debug layer messages to libplacebo's log output
+ for (uint64_t i = 0; i < messages; i++) {
+ SIZE_T len;
+ if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, NULL, &len)))
+ goto error;
+
+ pl_grow((void *) ctx->d3d11, &ctx->dxgi_msg, len);
+ DXGI_INFO_QUEUE_MESSAGE *dxgi_msg = ctx->dxgi_msg;
+
+ if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, dxgi_msg, &len)))
+ goto error;
+
+ enum pl_log_level level = PL_LOG_NONE;
+ if (IsEqualGUID(&dxgi_msg->Producer, &DXGI_DEBUG_D3D11))
+ level = log_level_override(dxgi_msg->ID);
+ if (level == PL_LOG_NONE)
+ level = severity_map[dxgi_msg->Severity];
+
+ if (pl_msg_test(ctx->log, level)) {
+ // If the header hasn't been printed, or it was printed for a lower
+ // log level than the current message, print it (again)
+ if (header_printed == PL_LOG_NONE || header_printed > level) {
+ PL_MSG(ctx, level, "%s:", header);
+ pl_log_stack_trace(ctx->log, level);
+ header_printed = level;
+ }
+
+ PL_MSG(ctx, level, " %d: %.*s", (int) dxgi_msg->ID,
+ (int) dxgi_msg->DescriptionByteLength, dxgi_msg->pDescription);
+ }
+
+ if (dxgi_msg->Severity <= DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR)
+ pl_debug_abort();
+ }
+
+error:
+ IDXGIInfoQueue_ClearStoredMessages(ctx->iqueue, DXGI_DEBUG_ALL);
+#endif
+}
+
+HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr)
+{
+ // This can be called before we have a device
+ if (!ctx->dev)
+ return hr;
+
+ switch (hr) {
+ case DXGI_ERROR_DEVICE_HUNG:
+ case DXGI_ERROR_DEVICE_RESET:
+ case DXGI_ERROR_DRIVER_INTERNAL_ERROR:
+ ctx->is_failed = true;
+ break;
+ case D3DDDIERR_DEVICEREMOVED:
+ case DXGI_ERROR_DEVICE_REMOVED:
+ hr = ID3D11Device_GetDeviceRemovedReason(ctx->dev);
+ ctx->is_failed = true;
+ break;
+ }
+ if (ctx->is_failed)
+ PL_ERR(ctx, "Device lost!");
+ return hr;
+}
+
+HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr)
+{
+ hr = pl_d3d11_check_device_removed(ctx, hr);
+ pl_d3d11_flush_message_queue(ctx, "After error");
+ return hr;
+}
+
+struct dll_version pl_get_dll_version(const wchar_t *name)
+{
+ void *data = NULL;
+ struct dll_version ret = {0};
+
+ DWORD size = GetFileVersionInfoSizeW(name, &(DWORD) {0});
+ if (!size)
+ goto error;
+ data = pl_alloc(NULL, size);
+
+ if (!GetFileVersionInfoW(name, 0, size, data))
+ goto error;
+
+ VS_FIXEDFILEINFO *ffi;
+ UINT ffi_len;
+ if (!VerQueryValueW(data, L"\\", (void**)&ffi, &ffi_len))
+ goto error;
+ if (ffi_len < sizeof(*ffi))
+ goto error;
+
+ ret = (struct dll_version) {
+ .major = HIWORD(ffi->dwFileVersionMS),
+ .minor = LOWORD(ffi->dwFileVersionMS),
+ .build = HIWORD(ffi->dwFileVersionLS),
+ .revision = LOWORD(ffi->dwFileVersionLS),
+ };
+
+error:
+ pl_free(data);
+ return ret;
+}
+
+wchar_t *pl_from_utf8(void *ctx, const char *str)
+{
+ int count = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
+ pl_assert(count > 0);
+ wchar_t *ret = pl_calloc_ptr(ctx, count, ret);
+ MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, count);
+ return ret;
+}
+
+char *pl_to_utf8(void *ctx, const wchar_t *str)
+{
+ int count = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
+ pl_assert(count > 0);
+ char *ret = pl_calloc_ptr(ctx, count, ret);
+ WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, count, NULL, NULL);
+ return ret;
+}
+
+static const char *hresult_str(HRESULT hr)
+{
+ switch (hr) {
+#define CASE(name) case name: return #name
+ CASE(S_OK);
+ CASE(S_FALSE);
+ CASE(E_ABORT);
+ CASE(E_ACCESSDENIED);
+ CASE(E_FAIL);
+ CASE(E_HANDLE);
+ CASE(E_INVALIDARG);
+ CASE(E_NOINTERFACE);
+ CASE(E_NOTIMPL);
+ CASE(E_OUTOFMEMORY);
+ CASE(E_POINTER);
+ CASE(E_UNEXPECTED);
+
+ CASE(DXGI_ERROR_ACCESS_DENIED);
+ CASE(DXGI_ERROR_ACCESS_LOST);
+ CASE(DXGI_ERROR_CANNOT_PROTECT_CONTENT);
+ CASE(DXGI_ERROR_DEVICE_HUNG);
+ CASE(DXGI_ERROR_DEVICE_REMOVED);
+ CASE(DXGI_ERROR_DEVICE_RESET);
+ CASE(DXGI_ERROR_DRIVER_INTERNAL_ERROR);
+ CASE(DXGI_ERROR_FRAME_STATISTICS_DISJOINT);
+ CASE(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE);
+ CASE(DXGI_ERROR_INVALID_CALL);
+ CASE(DXGI_ERROR_MORE_DATA);
+ CASE(DXGI_ERROR_NAME_ALREADY_EXISTS);
+ CASE(DXGI_ERROR_NONEXCLUSIVE);
+ CASE(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE);
+ CASE(DXGI_ERROR_NOT_FOUND);
+ CASE(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED);
+ CASE(DXGI_ERROR_REMOTE_OUTOFMEMORY);
+ CASE(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE);
+ CASE(DXGI_ERROR_SDK_COMPONENT_MISSING);
+ CASE(DXGI_ERROR_SESSION_DISCONNECTED);
+ CASE(DXGI_ERROR_UNSUPPORTED);
+ CASE(DXGI_ERROR_WAIT_TIMEOUT);
+ CASE(DXGI_ERROR_WAS_STILL_DRAWING);
+#undef CASE
+
+ default:
+ return "Unknown error";
+ }
+}
+
+static char *format_error(void *ctx, DWORD error)
+{
+ wchar_t *wstr;
+ if (!FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ (LPWSTR)&wstr, 0, NULL))
+ {
+ return NULL;
+ }
+
+ // Trim any trailing newline from the message
+ for (int i = wcslen(wstr) - 1; i >= 0; i--) {
+ if (wstr[i] != '\r' && wstr[i] != '\n') {
+ wstr[i + 1] = '\0';
+ break;
+ }
+ }
+
+ char *str = pl_to_utf8(ctx, wstr);
+ LocalFree(wstr);
+ return str;
+}
+
+char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr)
+{
+ char *fmsg = format_error(NULL, hr);
+ const char *code = hresult_str(hr);
+ if (fmsg) {
+ snprintf(buf, buf_size, "%s (%s, 0x%08lx)", fmsg, code, hr);
+ } else {
+ snprintf(buf, buf_size, "%s, 0x%08lx", code, hr);
+ }
+ pl_free(fmsg);
+ return buf;
+}
+
+#define D3D11_DXGI_ENUM(prefix, define) { case prefix ## define: return #define; }
+
+const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt)
+{
+ switch (fmt) {
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, UNKNOWN);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G8X24_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT_S8X24_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT_X8X24_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, X32_TYPELESS_G8X24_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R11G11B10_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R24G8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, D24_UNORM_S8_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R24_UNORM_X8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, X24_TYPELESS_G8_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_FLOAT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, D16_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SINT);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, A8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R1_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R9G9B9E5_SHAREDEXP);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_B8G8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, G8R8_G8B8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_SNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G6R5_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G5R5A1_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10_XR_BIAS_A2_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_UF16);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_SF16);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_TYPELESS);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM_SRGB);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, AYUV);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, Y410);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, Y416);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, NV12);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, P010);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, P016);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, 420_OPAQUE);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, YUY2);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, Y210);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, Y216);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, NV11);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, AI44);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, IA44);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, P8);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, A8P8);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, B4G4R4A4_UNORM);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, P208);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, V208);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, V408);
+ D3D11_DXGI_ENUM(DXGI_FORMAT_, FORCE_UINT);
+ }
+
+ return "<unknown>";
+}
+
+const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp)
+{
+ switch ((int) csp) {
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G10_NONE_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RESERVED);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_NONE_P709_X601);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P601);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P601);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G2084_NONE_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_LEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G2084_NONE_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_TOPLEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_TOPLEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_GHLG_TOPLEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_GHLG_TOPLEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P709);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_TOPLEFT_P2020);
+ D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, CUSTOM);
+ }
+
+ return "<unknown>";
+}
diff --git a/src/d3d11/utils.h b/src/d3d11/utils.h
new file mode 100644
index 0000000..86b4072
--- /dev/null
+++ b/src/d3d11/utils.h
@@ -0,0 +1,88 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "common.h"
+
+#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709 ((DXGI_COLOR_SPACE_TYPE)20)
+#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020 ((DXGI_COLOR_SPACE_TYPE)21)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709 ((DXGI_COLOR_SPACE_TYPE)22)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)23)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24)
+
+// Flush debug messages from D3D11's info queue to libplacebo's log output.
+// Should be called regularly.
+void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header);
+
+// Some D3D11 functions can fail with a set of HRESULT codes which indicate the
+// device has been removed. This is equivalent to libplacebo's gpu_is_failed
+// state and indicates that the pl_gpu needs to be recreated. This function
+// checks for one of those HRESULTs, sets the failed state, and returns a
+// specific HRESULT that indicates why the device was removed (eg. GPU hang,
+// driver crash, etc.)
+HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr);
+
+// Helper function for the D3D() macro, though it can be called directly when
+// handling D3D11 errors if the D3D() macro isn't suitable for some reason.
+// Calls `pl_d3d11_check_device_removed` and `pl_d3d11_drain_debug_messages` and
+// returns the specific HRESULT from `pl_d3d11_check_device_removed` for logging
+// purposes.
+HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr);
+
+// Convenience macro for running DXGI/D3D11 functions and performing appropriate
+// actions on failure. Can also be used for any HRESULT-returning function.
+#define D3D(call) \
+ do { \
+ HRESULT hr_ = (call); \
+ if (FAILED(hr_)) { \
+ hr_ = pl_d3d11_after_error(ctx, hr_); \
+ PL_ERR(ctx, "%s: %s (%s:%d)", #call, pl_hresult_to_str(hr_), \
+ __FILE__, __LINE__); \
+ goto error; \
+ } \
+ } while (0);
+
+// Conditionally release a COM interface and set the pointer to NULL
+#define SAFE_RELEASE(iface) \
+ do { \
+ if (iface) \
+ (iface)->lpVtbl->Release(iface); \
+ (iface) = NULL; \
+ } while (0)
+
+struct dll_version {
+ uint16_t major;
+ uint16_t minor;
+ uint16_t build;
+ uint16_t revision;
+};
+
+// Get the version number of a DLL. This calls GetFileVersionInfoW, which should
+// call LoadLibraryExW internally, so it should get the same copy of the DLL
+// that is loaded into memory if there is a copy in System32 and a copy in the
+// %PATH% or application directory.
+struct dll_version pl_get_dll_version(const wchar_t *name);
+
+wchar_t *pl_from_utf8(void *ctx, const char *str);
+char *pl_to_utf8(void *ctx, const wchar_t *str);
+
+#define pl_hresult_to_str(hr) pl_hresult_to_str_buf((char[256]){0}, 256, (hr))
+char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr);
+
+const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp);
+const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt);