diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:38:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:38:23 +0000 |
commit | ff6e3c025658a5fa1affd094f220b623e7e1b24b (patch) | |
tree | 9faab72d69c92d24e349d184f5869b9796f17e0c /src/d3d11 | |
parent | Initial commit. (diff) | |
download | libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.tar.xz libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.zip |
Adding upstream version 6.338.2.upstream/6.338.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/d3d11')
-rw-r--r-- | src/d3d11/common.h | 66 | ||||
-rw-r--r-- | src/d3d11/context.c | 488 | ||||
-rw-r--r-- | src/d3d11/formats.c | 293 | ||||
-rw-r--r-- | src/d3d11/formats.h | 36 | ||||
-rw-r--r-- | src/d3d11/gpu.c | 685 | ||||
-rw-r--r-- | src/d3d11/gpu.h | 212 | ||||
-rw-r--r-- | src/d3d11/gpu_buf.c | 310 | ||||
-rw-r--r-- | src/d3d11/gpu_pass.c | 1293 | ||||
-rw-r--r-- | src/d3d11/gpu_tex.c | 745 | ||||
-rw-r--r-- | src/d3d11/meson.build | 41 | ||||
-rw-r--r-- | src/d3d11/stubs.c | 56 | ||||
-rw-r--r-- | src/d3d11/swapchain.c | 667 | ||||
-rw-r--r-- | src/d3d11/utils.c | 500 | ||||
-rw-r--r-- | src/d3d11/utils.h | 88 |
14 files changed, 5480 insertions, 0 deletions
diff --git a/src/d3d11/common.h b/src/d3d11/common.h new file mode 100644 index 0000000..e14b709 --- /dev/null +++ b/src/d3d11/common.h @@ -0,0 +1,66 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "../common.h" +#include "../log.h" + +#ifdef PL_HAVE_DXGI_DEBUG +#include <dxgidebug.h> +#endif + +#include <libplacebo/d3d11.h> + +// Shared struct used to hold the D3D11 device and associated interfaces +struct d3d11_ctx { + pl_log log; + pl_d3d11 d3d11; + + // Copy of the device from pl_d3d11 for convenience. Does not hold an + // additional reference. + ID3D11Device *dev; + + // DXGI device. This does hold a reference. + IDXGIDevice1 *dxgi_dev; + +#ifdef PL_HAVE_DXGI_DEBUG + // Debug interfaces + IDXGIDebug *debug; + IDXGIInfoQueue *iqueue; + uint64_t last_discarded; // Last count of discarded messages + DXGI_INFO_QUEUE_MESSAGE *dxgi_msg; +#endif + + // pl_gpu_is_failed (We saw a device removed error!) + bool is_failed; +}; + +// DDK value. Apparently some D3D functions can return this instead of the +// proper user-mode error code. See: +// https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgiswapchain-present +#define D3DDDIERR_DEVICEREMOVED (0x88760870) + +#ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE +#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80) +#endif +#ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD +#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD (0x40) +#endif +#ifndef PL_HAVE_DXGI_DEBUG_D3D11 +DEFINE_GUID(DXGI_DEBUG_D3D11, 0x4b99317b, 0xac39, 0x4aa6, 0xbb, 0xb, 0xba, 0xa0, 0x47, 0x84, 0x79, 0x8f); +#endif diff --git a/src/d3d11/context.c b/src/d3d11/context.c new file mode 100644 index 0000000..e0ba90f --- /dev/null +++ b/src/d3d11/context.c @@ -0,0 +1,488 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "gpu.h" + +// Windows 8 enum value, not present in mingw-w64 v7 +#define DXGI_ADAPTER_FLAG_SOFTWARE (2) + +const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS }; + +static INIT_ONCE d3d11_once = INIT_ONCE_STATIC_INIT; +static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; +static __typeof__(&CreateDXGIFactory1) pCreateDXGIFactory1 = NULL; +#ifdef PL_HAVE_DXGI_DEBUG +static __typeof__(&DXGIGetDebugInterface) pDXGIGetDebugInterface = NULL; +#endif + +static void d3d11_load(void) +{ + BOOL bPending = FALSE; + InitOnceBeginInitialize(&d3d11_once, 0, &bPending, NULL); + + if (bPending) + { + HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); + if (d3d11) { + pD3D11CreateDevice = (void *) + GetProcAddress(d3d11, "D3D11CreateDevice"); + } + + HMODULE dxgi = LoadLibraryW(L"dxgi.dll"); + if (dxgi) { + pCreateDXGIFactory1 = (void *) + GetProcAddress(dxgi, "CreateDXGIFactory1"); + } + +#ifdef PL_HAVE_DXGI_DEBUG + HMODULE dxgi_debug = LoadLibraryW(L"dxgidebug.dll"); + if (dxgi_debug) { + pDXGIGetDebugInterface = (void *) + GetProcAddress(dxgi_debug, "DXGIGetDebugInterface"); + } +#endif + } + + InitOnceComplete(&d3d11_once, 0, NULL); +} + +// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) +static int get_feature_levels(int max_fl, int min_fl, + const D3D_FEATURE_LEVEL **out) +{ + static const D3D_FEATURE_LEVEL levels[] = { + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, + }; + static const int levels_len = PL_ARRAY_SIZE(levels); + + int start = 0; + for (; start < levels_len; start++) { + if (levels[start] <= max_fl) + break; + } + int len = 0; + for (; start + len < levels_len; len++) { + if (levels[start + len] < min_fl) + break; + } + *out = &levels[start]; + return len; +} + +static bool is_null_luid(LUID luid) +{ + return luid.LowPart == 0 && luid.HighPart == 0; +} + +static IDXGIAdapter *get_adapter(pl_d3d11 d3d11, LUID adapter_luid) +{ + struct d3d11_ctx *ctx = PL_PRIV(d3d11); + IDXGIFactory1 *factory = NULL; + IDXGIAdapter1 *adapter1 = NULL; + IDXGIAdapter *adapter = NULL; + HRESULT hr; + + if (!pCreateDXGIFactory1) { + PL_FATAL(ctx, "Failed to load dxgi.dll"); + goto error; + } + pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory); + + for (int i = 0;; i++) { + hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + if (FAILED(hr)) { + PL_FATAL(ctx, "Failed to enumerate adapters"); + goto error; + } + + DXGI_ADAPTER_DESC1 desc; + D3D(IDXGIAdapter1_GetDesc1(adapter1, &desc)); + if (desc.AdapterLuid.LowPart == adapter_luid.LowPart && + desc.AdapterLuid.HighPart == adapter_luid.HighPart) + { + break; + } + + SAFE_RELEASE(adapter1); + } + if (!adapter1) { + PL_FATAL(ctx, "Adapter with LUID %08lx%08lx not found", + adapter_luid.HighPart, adapter_luid.LowPart); + goto error; + } + + D3D(IDXGIAdapter1_QueryInterface(adapter1, &IID_IDXGIAdapter, + (void **) &adapter)); + +error: + SAFE_RELEASE(factory); + SAFE_RELEASE(adapter1); + return adapter; +} + +static bool has_sdk_layers(void) +{ + // This will fail if the SDK layers aren't installed + return SUCCEEDED(pD3D11CreateDevice(NULL, D3D_DRIVER_TYPE_NULL, NULL, + D3D11_CREATE_DEVICE_DEBUG, NULL, 0, D3D11_SDK_VERSION, NULL, NULL, + NULL)); +} + +static ID3D11Device *create_device(struct pl_d3d11_t *d3d11, + const struct pl_d3d11_params *params) +{ + struct d3d11_ctx *ctx = PL_PRIV(d3d11); + bool debug = params->debug; + bool warp = params->force_software; + int max_fl = params->max_feature_level; + int min_fl = params->min_feature_level; + ID3D11Device *dev = NULL; + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter *adapter = NULL; + bool release_adapter = false; + HRESULT hr; + + d3d11_load(); + + if (!pD3D11CreateDevice) { + PL_FATAL(ctx, "Failed to load d3d11.dll"); + goto error; + } + + if (params->adapter) { + adapter = params->adapter; + } else if (!is_null_luid(params->adapter_luid)) { + adapter = get_adapter(d3d11, params->adapter_luid); + release_adapter = true; + } + + if (debug && !has_sdk_layers()) { + PL_INFO(ctx, "Debug layer not available, removing debug flag"); + debug = false; + } + + // Return here to retry creating the device + do { + // Use these default feature levels if they are not set + max_fl = PL_DEF(max_fl, D3D_FEATURE_LEVEL_12_1); + min_fl = PL_DEF(min_fl, D3D_FEATURE_LEVEL_9_1); + + // Get a list of feature levels from min_fl to max_fl + const D3D_FEATURE_LEVEL *levels; + int levels_len = get_feature_levels(max_fl, min_fl, &levels); + if (!levels_len) { + PL_FATAL(ctx, "No suitable Direct3D feature level found"); + goto error; + } + + D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_UNKNOWN; + if (!adapter) { + if (warp) { + type = D3D_DRIVER_TYPE_WARP; + } else { + type = D3D_DRIVER_TYPE_HARDWARE; + } + } + + UINT flags = params->flags; + if (debug) + flags |= D3D11_CREATE_DEVICE_DEBUG; + + hr = pD3D11CreateDevice(adapter, type, NULL, flags, levels, levels_len, + D3D11_SDK_VERSION, &dev, NULL, NULL); + if (SUCCEEDED(hr)) + break; + + pl_d3d11_after_error(ctx, hr); + + // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or + // below will not succeed. Try an 11_1 device. + if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_12_0 && + min_fl <= D3D_FEATURE_LEVEL_11_1) { + PL_DEBUG(ctx, "Failed to create 12_0+ device, trying 11_1"); + max_fl = D3D_FEATURE_LEVEL_11_1; + continue; + } + + // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 + // without the platform update will not succeed. Try an 11_0 device. + if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_11_1 && + min_fl <= D3D_FEATURE_LEVEL_11_0) { + PL_DEBUG(ctx, "Failed to create 11_1+ device, trying 11_0"); + max_fl = D3D_FEATURE_LEVEL_11_0; + continue; + } + + // Retry with WARP if allowed + if (!adapter && !warp && params->allow_software) { + PL_DEBUG(ctx, "Failed to create hardware device, trying WARP: %s", + pl_hresult_to_str(hr)); + warp = true; + max_fl = params->max_feature_level; + min_fl = params->min_feature_level; + continue; + } + + PL_FATAL(ctx, "Failed to create Direct3D 11 device: %s", + pl_hresult_to_str(hr)); + goto error; + } while (true); + + if (params->max_frame_latency) { + D3D(ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, + (void **) &dxgi_dev)); + IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, params->max_frame_latency); + } + + d3d11->software = warp; + +error: + if (release_adapter) + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + return dev; +} + +static void init_debug_layer(struct d3d11_ctx *ctx, bool leak_check) +{ +#ifdef PL_HAVE_DXGI_DEBUG + if (!pDXGIGetDebugInterface) + d3d11_load(); + + if (!pDXGIGetDebugInterface) + goto error; + + D3D(pDXGIGetDebugInterface(&IID_IDXGIInfoQueue, (void **) &ctx->iqueue)); + + // Push empty filter to get everything + IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_ALL, + &(DXGI_INFO_QUEUE_FILTER){0}); + + // Filter some annoying D3D11 messages + DXGI_INFO_QUEUE_MESSAGE_ID deny_ids[] = { + // This false-positive error occurs every time we Draw() with a shader + // that samples from a texture format that only supports point sampling. + // Since we already use CheckFormatSupport to know which formats can be + // linearly sampled from, we shouldn't ever bind a non-point sampler to + // a format that doesn't support it. + D3D11_MESSAGE_ID_DEVICE_DRAW_RESOURCE_FORMAT_SAMPLE_UNSUPPORTED, + }; + DXGI_INFO_QUEUE_FILTER filter = { + .DenyList = { + .NumIDs = PL_ARRAY_SIZE(deny_ids), + .pIDList = deny_ids, + }, + }; + IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_D3D11, &filter); + + IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_D3D11, -1); + IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_DXGI, -1); + + if (leak_check) + D3D(pDXGIGetDebugInterface(&IID_IDXGIDebug, (void **) &ctx->debug)); + +error: + return; +#endif +} + +void pl_d3d11_destroy(pl_d3d11 *ptr) +{ + pl_d3d11 d3d11 = *ptr; + if (!d3d11) + return; + struct d3d11_ctx *ctx = PL_PRIV(d3d11); + + pl_gpu_destroy(d3d11->gpu); + + SAFE_RELEASE(ctx->dev); + SAFE_RELEASE(ctx->dxgi_dev); + +#ifdef PL_HAVE_DXGI_DEBUG + if (ctx->debug) { + // Report any leaked objects + pl_d3d11_flush_message_queue(ctx, "After destroy"); + IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_DETAIL); + pl_d3d11_flush_message_queue(ctx, "After leak check"); + IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_SUMMARY); + pl_d3d11_flush_message_queue(ctx, "After leak summary"); + } + + SAFE_RELEASE(ctx->debug); + SAFE_RELEASE(ctx->iqueue); +#endif + + pl_free_ptr((void **) ptr); +} + +pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params) +{ + params = PL_DEF(params, &pl_d3d11_default_params); + IDXGIAdapter1 *adapter = NULL; + IDXGIAdapter2 *adapter2 = NULL; + bool success = false; + HRESULT hr; + + struct pl_d3d11_t *d3d11 = pl_zalloc_obj(NULL, d3d11, struct d3d11_ctx); + struct d3d11_ctx *ctx = PL_PRIV(d3d11); + ctx->log = log; + ctx->d3d11 = d3d11; + + if (params->device) { + d3d11->device = params->device; + ID3D11Device_AddRef(d3d11->device); + } else { + d3d11->device = create_device(d3d11, params); + if (!d3d11->device) + goto error; + } + ctx->dev = d3d11->device; + + if (params->debug || + ID3D11Device_GetCreationFlags(d3d11->device) & D3D11_CREATE_DEVICE_DEBUG) + { + // Do not report live object on pl_d3d11_destroy if device was created + // externally, it makes no sense as there will be a lot of things alive. + init_debug_layer(ctx, !params->device); + } + + D3D(ID3D11Device_QueryInterface(d3d11->device, &IID_IDXGIDevice1, + (void **) &ctx->dxgi_dev)); + D3D(IDXGIDevice1_GetParent(ctx->dxgi_dev, &IID_IDXGIAdapter1, + (void **) &adapter)); + + hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter2, + (void **) &adapter2); + if (FAILED(hr)) + adapter2 = NULL; + + if (adapter2) { + PL_INFO(ctx, "Using DXGI 1.2+"); + } else { + PL_INFO(ctx, "Using DXGI 1.1"); + } + + D3D_FEATURE_LEVEL fl = ID3D11Device_GetFeatureLevel(d3d11->device); + PL_INFO(ctx, "Using Direct3D 11 feature level %u_%u", + ((unsigned) fl) >> 12, (((unsigned) fl) >> 8) & 0xf); + + char *dev_name = NULL; + UINT vendor_id, device_id, revision, subsys_id; + LUID adapter_luid; + UINT flags; + + if (adapter2) { + // DXGI 1.2 IDXGIAdapter2::GetDesc2 is preferred over the DXGI 1.1 + // version because it reports the real adapter information when using + // feature level 9 hardware + DXGI_ADAPTER_DESC2 desc; + D3D(IDXGIAdapter2_GetDesc2(adapter2, &desc)); + + dev_name = pl_to_utf8(NULL, desc.Description); + vendor_id = desc.VendorId; + device_id = desc.DeviceId; + revision = desc.Revision; + subsys_id = desc.SubSysId; + adapter_luid = desc.AdapterLuid; + flags = desc.Flags; + } else { + DXGI_ADAPTER_DESC1 desc; + D3D(IDXGIAdapter1_GetDesc1(adapter, &desc)); + + dev_name = pl_to_utf8(NULL, desc.Description); + vendor_id = desc.VendorId; + device_id = desc.DeviceId; + revision = desc.Revision; + subsys_id = desc.SubSysId; + adapter_luid = desc.AdapterLuid; + flags = desc.Flags; + } + + PL_INFO(ctx, "Direct3D 11 device properties:"); + PL_INFO(ctx, " Device Name: %s", dev_name); + PL_INFO(ctx, " Device ID: %04x:%04x (rev %02x)", + vendor_id, device_id, revision); + PL_INFO(ctx, " Subsystem ID: %04x:%04x", + LOWORD(subsys_id), HIWORD(subsys_id)); + PL_INFO(ctx, " LUID: %08lx%08lx", + adapter_luid.HighPart, adapter_luid.LowPart); + pl_free(dev_name); + + LARGE_INTEGER version; + hr = IDXGIAdapter1_CheckInterfaceSupport(adapter, &IID_IDXGIDevice, &version); + if (SUCCEEDED(hr)) { + PL_INFO(ctx, " Driver version: %u.%u.%u.%u", + HIWORD(version.HighPart), LOWORD(version.HighPart), + HIWORD(version.LowPart), LOWORD(version.LowPart)); + } + + // Note: DXGI_ADAPTER_FLAG_SOFTWARE doesn't exist before Windows 8, but we + // also set d3d11->software in create_device if we pick WARP ourselves + if (flags & DXGI_ADAPTER_FLAG_SOFTWARE) + d3d11->software = true; + + // If the primary display adapter is a software adapter, the + // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should + // still match the Microsoft Basic Render Driver + if (vendor_id == 0x1414 && device_id == 0x8c) + d3d11->software = true; + + if (d3d11->software) { + bool external_adapter = params->device || params->adapter || + !is_null_luid(params->adapter_luid); + + // The allow_software flag only applies if the API user didn't manually + // specify an adapter or a device + if (!params->allow_software && !external_adapter) { + // If we got this far with allow_software set, the primary adapter + // must be a software adapter + PL_ERR(ctx, "Primary adapter is a software adapter"); + goto error; + } + + // If a software adapter was manually specified, don't show a warning + enum pl_log_level level = PL_LOG_WARN; + if (external_adapter || params->force_software) + level = PL_LOG_INFO; + + PL_MSG(ctx, level, "Using a software adapter"); + } + + d3d11->gpu = pl_gpu_create_d3d11(ctx); + if (!d3d11->gpu) + goto error; + + success = true; +error: + if (!success) { + PL_FATAL(ctx, "Failed initializing Direct3D 11 device"); + pl_d3d11_destroy((pl_d3d11 *) &d3d11); + } + SAFE_RELEASE(adapter); + SAFE_RELEASE(adapter2); + return d3d11; +} diff --git a/src/d3d11/formats.c b/src/d3d11/formats.c new file mode 100644 index 0000000..7aaec26 --- /dev/null +++ b/src/d3d11/formats.c @@ -0,0 +1,293 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "formats.h" +#include "gpu.h" + +#define FMT(_minor, _name, _dxfmt, _type, num, size, bits, order) \ + (struct d3d_format) { \ + .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \ + .minor = _minor, \ + .fmt = { \ + .name = _name, \ + .type = PL_FMT_##_type, \ + .num_components = num, \ + .component_depth = bits, \ + .texel_size = size, \ + .texel_align = 1, \ + .internal_size = size, \ + .host_bits = bits, \ + .sample_order = order, \ + }, \ + } + +#define IDX(...) {__VA_ARGS__} +#define BITS(...) {__VA_ARGS__} + +#define REGFMT(name, dxfmt, type, num, bits) \ + FMT(0, name, dxfmt, type, num, (num) * (bits) / 8, \ + BITS(bits, bits, bits, bits), \ + IDX(0, 1, 2, 3)) + +#define EMUFMT(_name, _dxfmt, _type, in, en, ib, eb) \ + (struct d3d_format) { \ + .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \ + .minor = 0, \ + .fmt = { \ + .name = _name, \ + .type = PL_FMT_##_type, \ + .num_components = en, \ + .component_depth = BITS(ib, ib, ib, ib), \ + .internal_size = (in) * (ib) / 8, \ + .opaque = false, \ + .emulated = true, \ + .texel_size = (en) * (eb) / 8, \ + .texel_align = (eb) / 8, \ + .host_bits = BITS(eb, eb, eb, eb), \ + .sample_order = IDX(0, 1, 2, 3), \ + }, \ + } + +const struct d3d_format pl_d3d11_formats[] = { + REGFMT("r8", R8, UNORM, 1, 8), + REGFMT("rg8", R8G8, UNORM, 2, 8), + EMUFMT("rgb8", R8G8B8A8, UNORM, 4, 3, 8, 8), + REGFMT("rgba8", R8G8B8A8, UNORM, 4, 8), + REGFMT("r16", R16, UNORM, 1, 16), + REGFMT("rg16", R16G16, UNORM, 2, 16), + EMUFMT("rgb16", R16G16B16A16, UNORM, 4, 3, 16, 16), + REGFMT("rgba16", R16G16B16A16, UNORM, 4, 16), + + REGFMT("r8s", R8, SNORM, 1, 8), + REGFMT("rg8s", R8G8, SNORM, 2, 8), + REGFMT("rgba8s", R8G8B8A8, SNORM, 4, 8), + REGFMT("r16s", R16, SNORM, 1, 16), + REGFMT("rg16s", R16G16, SNORM, 2, 16), + REGFMT("rgba16s", R16G16B16A16, SNORM, 4, 16), + + REGFMT("r16hf", R16, FLOAT, 1, 16), + REGFMT("rg16hf", R16G16, FLOAT, 2, 16), + EMUFMT("rgb16hf", R16G16B16A16, FLOAT, 4, 3, 16, 16), + REGFMT("rgba16hf", R16G16B16A16, FLOAT, 4, 16), + REGFMT("r32f", R32, FLOAT, 1, 32), + REGFMT("rg32f", R32G32, FLOAT, 2, 32), + REGFMT("rgb32f", R32G32B32, FLOAT, 3, 32), + REGFMT("rgba32f", R32G32B32A32, FLOAT, 4, 32), + + EMUFMT("r16f", R16, FLOAT, 1, 1, 16, 32), + EMUFMT("rg16f", R16G16, FLOAT, 2, 2, 16, 32), + EMUFMT("rgb16f", R16G16B16A16, FLOAT, 4, 3, 16, 32), + EMUFMT("rgba16f", R16G16B16A16, FLOAT, 4, 4, 16, 32), + + REGFMT("r8u", R8, UINT, 1, 8), + REGFMT("rg8u", R8G8, UINT, 2, 8), + REGFMT("rgba8u", R8G8B8A8, UINT, 4, 8), + REGFMT("r16u", R16, UINT, 1, 16), + REGFMT("rg16u", R16G16, UINT, 2, 16), + REGFMT("rgba16u", R16G16B16A16, UINT, 4, 16), + REGFMT("r32u", R32, UINT, 1, 32), + REGFMT("rg32u", R32G32, UINT, 2, 32), + REGFMT("rgb32u", R32G32B32, UINT, 3, 32), + REGFMT("rgba32u", R32G32B32A32, UINT, 4, 32), + + REGFMT("r8i", R8, SINT, 1, 8), + REGFMT("rg8i", R8G8, SINT, 2, 8), + REGFMT("rgba8i", R8G8B8A8, SINT, 4, 8), + REGFMT("r16i", R16, SINT, 1, 16), + REGFMT("rg16i", R16G16, SINT, 2, 16), + REGFMT("rgba16i", R16G16B16A16, SINT, 4, 16), + REGFMT("r32i", R32, SINT, 1, 32), + REGFMT("rg32i", R32G32, SINT, 2, 32), + REGFMT("rgb32i", R32G32B32, SINT, 3, 32), + REGFMT("rgba32i", R32G32B32A32, SINT, 4, 32), + + FMT(0, "rgb10a2", R10G10B10A2, UNORM, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), + FMT(0, "rgb10a2u", R10G10B10A2, UINT, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), + + FMT(0, "bgra8", B8G8R8A8, UNORM, 4, 4, BITS( 8, 8, 8, 8), IDX(2, 1, 0, 3)), + FMT(0, "bgrx8", B8G8R8X8, UNORM, 3, 4, BITS( 8, 8, 8), IDX(2, 1, 0)), + FMT(0, "rg11b10f", R11G11B10, FLOAT, 3, 4, BITS(11, 11, 10), IDX(0, 1, 2)), + + // D3D11.1 16-bit formats (resurrected D3D9 formats) + FMT(1, "bgr565", B5G6R5, UNORM, 3, 2, BITS( 5, 6, 5), IDX(2, 1, 0)), + FMT(1, "bgr5a1", B5G5R5A1, UNORM, 4, 2, BITS( 5, 5, 5, 1), IDX(2, 1, 0, 3)), + FMT(1, "bgra4", B4G4R4A4, UNORM, 4, 2, BITS( 4, 4, 4, 4), IDX(2, 1, 0, 3)), + + {0} +}; +#undef BITS +#undef IDX +#undef REGFMT +#undef FMT + +void pl_d3d11_setup_formats(struct pl_gpu_t *gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + PL_ARRAY(pl_fmt) formats = {0}; + HRESULT hr; + + for (int i = 0; pl_d3d11_formats[i].dxfmt; i++) { + const struct d3d_format *d3d_fmt = &pl_d3d11_formats[i]; + + // The Direct3D 11.0 debug layer will segfault if CheckFormatSupport is + // called on a format it doesn't know about + if (pl_d3d11_formats[i].minor > p->minor) + continue; + + UINT sup = 0; + hr = ID3D11Device_CheckFormatSupport(p->dev, d3d_fmt->dxfmt, &sup); + if (FAILED(hr)) + continue; + + D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3d_fmt->dxfmt }; + ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2, + ², sizeof(sup2)); + + struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, struct d3d_fmt *); + const struct d3d_format **fmtp = PL_PRIV(fmt); + *fmt = d3d_fmt->fmt; + *fmtp = d3d_fmt; + + // For sanity, clear the superfluous fields + for (int j = fmt->num_components; j < 4; j++) { + fmt->component_depth[j] = 0; + fmt->sample_order[j] = 0; + fmt->host_bits[j] = 0; + } + + static const struct { + enum pl_fmt_caps caps; + UINT sup; + UINT sup2; + } support[] = { + { + .caps = PL_FMT_CAP_SAMPLEABLE, + .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D, + }, + { + .caps = PL_FMT_CAP_STORABLE, + // SHADER_LOAD is for readonly images, which can use a SRV + .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | + D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | + D3D11_FORMAT_SUPPORT_SHADER_LOAD, + .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, + }, + { + .caps = PL_FMT_CAP_READWRITE, + .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | + D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW, + .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD, + }, + { + .caps = PL_FMT_CAP_LINEAR, + .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE, + }, + { + .caps = PL_FMT_CAP_RENDERABLE, + .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET, + }, + { + .caps = PL_FMT_CAP_BLENDABLE, + .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE, + }, + { + .caps = PL_FMT_CAP_VERTEX, + .sup = D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER, + }, + { + .caps = PL_FMT_CAP_TEXEL_UNIFORM, + .sup = D3D11_FORMAT_SUPPORT_BUFFER | + D3D11_FORMAT_SUPPORT_SHADER_LOAD, + }, + { + .caps = PL_FMT_CAP_TEXEL_STORAGE, + // SHADER_LOAD is for readonly buffers, which can use a SRV + .sup = D3D11_FORMAT_SUPPORT_BUFFER | + D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | + D3D11_FORMAT_SUPPORT_SHADER_LOAD, + .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, + }, + { + .caps = PL_FMT_CAP_HOST_READABLE, + .sup = D3D11_FORMAT_SUPPORT_CPU_LOCKABLE, + }, + }; + + for (int j = 0; j < PL_ARRAY_SIZE(support); j++) { + if ((sup & support[j].sup) == support[j].sup && + (sup2.OutFormatSupport2 & support[j].sup2) == support[j].sup2) + { + fmt->caps |= support[j].caps; + } + } + + // PL_FMT_CAP_STORABLE implies compute shaders, so don't set it if we + // don't have them + if (!gpu->glsl.compute) + fmt->caps &= ~PL_FMT_CAP_STORABLE; + + // PL_FMT_CAP_READWRITE implies PL_FMT_CAP_STORABLE + if (!(fmt->caps & PL_FMT_CAP_STORABLE)) + fmt->caps &= ~PL_FMT_CAP_READWRITE; + + // `fmt->gatherable` must have PL_FMT_CAP_SAMPLEABLE + if ((fmt->caps & PL_FMT_CAP_SAMPLEABLE) && + (sup & D3D11_FORMAT_SUPPORT_SHADER_GATHER)) + { + fmt->gatherable = true; + } + + // PL_FMT_CAP_BLITTABLE implies support for stretching, flipping and + // loose format conversion, which require a shader pass in D3D11 + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + // On >=FL11_0, we use a compute pass, which supports 1D and 3D + // textures + if (fmt->caps & PL_FMT_CAP_STORABLE) + fmt->caps |= PL_FMT_CAP_BLITTABLE; + } else { + // On <FL11_0 we use a raster pass + static const enum pl_fmt_caps req = PL_FMT_CAP_RENDERABLE | + PL_FMT_CAP_SAMPLEABLE; + if ((fmt->caps & req) == req) + fmt->caps |= PL_FMT_CAP_BLITTABLE; + } + + if (fmt->caps & (PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM | + PL_FMT_CAP_TEXEL_STORAGE)) { + fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); + pl_assert(fmt->glsl_type); + } + + if (fmt->caps & (PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE)) + fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); + + fmt->fourcc = pl_fmt_fourcc(fmt); + + // If no caps, D3D11 only supports this for things we don't care about + if (!fmt->caps) { + pl_free(fmt); + continue; + } + + PL_ARRAY_APPEND(gpu, formats, fmt); + } + + gpu->formats = formats.elem; + gpu->num_formats = formats.num; +} diff --git a/src/d3d11/formats.h b/src/d3d11/formats.h new file mode 100644 index 0000000..08336c0 --- /dev/null +++ b/src/d3d11/formats.h @@ -0,0 +1,36 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "common.h" + +struct d3d_format { + DXGI_FORMAT dxfmt; + int minor; // The D3D11 minor version number which supports this format + struct pl_fmt_t fmt; +}; + +extern const struct d3d_format pl_d3d11_formats[]; + +static inline DXGI_FORMAT fmt_to_dxgi(pl_fmt fmt) +{ + const struct d3d_format **fmtp = PL_PRIV(fmt); + return (*fmtp)->dxfmt; +} + +void pl_d3d11_setup_formats(struct pl_gpu_t *gpu); diff --git a/src/d3d11/gpu.c b/src/d3d11/gpu.c new file mode 100644 index 0000000..05a08a3 --- /dev/null +++ b/src/d3d11/gpu.c @@ -0,0 +1,685 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <initguid.h> +#include <windows.h> +#include <versionhelpers.h> + +#include "common.h" +#include "gpu.h" +#include "formats.h" +#include "glsl/spirv.h" + +#define DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES (0x8) + +struct timer_query { + ID3D11Query *ts_start; + ID3D11Query *ts_end; + ID3D11Query *disjoint; +}; + +struct pl_timer_t { + // Ring buffer of timer queries to use + int current; + int pending; + struct timer_query queries[16]; +}; + +void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + if (!timer) + return; + struct timer_query *query = &timer->queries[timer->current]; + + // Create the query objects lazilly + if (!query->ts_start) { + D3D(ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_start)); + D3D(ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_end)); + + // Measuring duration in D3D11 requires three queries: start and end + // timestamp queries, and a disjoint query containing a flag which says + // whether the timestamps are usable or if a discontinuity occurred + // between them, like a change in power state or clock speed. The + // disjoint query also contains the timer frequency, so the timestamps + // are useless without it. + D3D(ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &query->disjoint)); + } + + // Query the start timestamp + ID3D11DeviceContext_Begin(p->imm, (ID3D11Asynchronous *) query->disjoint); + ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_start); + return; + +error: + SAFE_RELEASE(query->ts_start); + SAFE_RELEASE(query->ts_end); + SAFE_RELEASE(query->disjoint); +} + +void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + + if (!timer) + return; + struct timer_query *query = &timer->queries[timer->current]; + + // Even if timer_start and timer_end are called in-order, timer_start might + // have failed to create the timer objects + if (!query->ts_start) + return; + + // Query the end timestamp + ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_end); + ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->disjoint); + + // Advance to the next set of queries, for the next call to timer_start + timer->current++; + if (timer->current >= PL_ARRAY_SIZE(timer->queries)) + timer->current = 0; // Wrap around + + // Increment the number of pending queries, unless the ring buffer is full, + // in which case, timer->current now points to the oldest one, which will be + // dropped and reused + if (timer->pending < PL_ARRAY_SIZE(timer->queries)) + timer->pending++; +} + +static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) +{ + static const uint64_t ns_per_s = 1000000000llu; + return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; +} + +static uint64_t d3d11_timer_query(pl_gpu gpu, pl_timer timer) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + HRESULT hr; + + for (; timer->pending > 0; timer->pending--) { + int index = timer->current - timer->pending; + if (index < 0) + index += PL_ARRAY_SIZE(timer->queries); + struct timer_query *query = &timer->queries[index]; + + UINT64 start, end; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; + + // Fetch the results of each query, or on S_FALSE, return 0 to indicate + // the queries are still pending + D3D(hr = ID3D11DeviceContext_GetData(p->imm, + (ID3D11Asynchronous *) query->disjoint, &dj, sizeof(dj), + D3D11_ASYNC_GETDATA_DONOTFLUSH)); + if (hr == S_FALSE) + return 0; + D3D(hr = ID3D11DeviceContext_GetData(p->imm, + (ID3D11Asynchronous *) query->ts_end, &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH)); + if (hr == S_FALSE) + return 0; + D3D(hr = ID3D11DeviceContext_GetData(p->imm, + (ID3D11Asynchronous *) query->ts_start, &start, sizeof(start), + D3D11_ASYNC_GETDATA_DONOTFLUSH)); + if (hr == S_FALSE) + return 0; + + // There was a discontinuity during the queries, so a timestamp can't be + // produced. Skip it and try the next one. + if (dj.Disjoint || !dj.Frequency) + continue; + + // We got a result. Return it to the caller. + timer->pending--; + pl_d3d11_flush_message_queue(ctx, "After timer query"); + + uint64_t ns = timestamp_to_ns(end - start, dj.Frequency); + return PL_MAX(ns, 1); + + error: + // There was an error fetching the timer result, so skip it and try the + // next one + continue; + } + + // No more unprocessed results + return 0; +} + +static void d3d11_timer_destroy(pl_gpu gpu, pl_timer timer) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + for (int i = 0; i < PL_ARRAY_SIZE(timer->queries); i++) { + SAFE_RELEASE(timer->queries[i].ts_start); + SAFE_RELEASE(timer->queries[i].ts_end); + SAFE_RELEASE(timer->queries[i].disjoint); + } + + pl_d3d11_flush_message_queue(ctx, "After timer destroy"); + + pl_free(timer); +} + +static pl_timer d3d11_timer_create(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + if (!p->has_timestamp_queries) + return NULL; + + struct pl_timer_t *timer = pl_alloc_ptr(NULL, timer); + *timer = (struct pl_timer_t) {0}; + return timer; +} + +static int d3d11_desc_namespace(pl_gpu gpu, enum pl_desc_type type) +{ + // Vulkan-style binding, where all descriptors are in the same namespace, is + // required to use SPIRV-Cross' HLSL resource mapping API, which targets + // resources by binding number + return 0; +} + +static void d3d11_gpu_flush(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + ID3D11DeviceContext_Flush(p->imm); + + pl_d3d11_flush_message_queue(ctx, "After gpu flush"); +} + +static void d3d11_gpu_finish(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + HRESULT hr; + + if (p->finish_fence) { + p->finish_value++; + D3D(ID3D11Fence_SetEventOnCompletion(p->finish_fence, p->finish_value, + p->finish_event)); + ID3D11DeviceContext4_Signal(p->imm4, p->finish_fence, p->finish_value); + ID3D11DeviceContext_Flush(p->imm); + WaitForSingleObject(p->finish_event, INFINITE); + } else { + ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) p->finish_query); + + // D3D11 doesn't have blocking queries, but it does have blocking + // readback. As a performance hack to try to avoid polling, do a dummy + // copy/readback between two buffers. Hopefully this will block until + // all prior commands are finished. If it does, the first GetData call + // will return a result and we won't have to poll. + pl_buf_copy(gpu, p->finish_buf_dst, 0, p->finish_buf_src, 0, sizeof(uint32_t)); + pl_buf_read(gpu, p->finish_buf_dst, 0, &(uint32_t) {0}, sizeof(uint32_t)); + + // Poll the event query until it completes + for (;;) { + BOOL idle; + D3D(hr = ID3D11DeviceContext_GetData(p->imm, + (ID3D11Asynchronous *) p->finish_query, &idle, sizeof(idle), 0)); + if (hr == S_OK && idle) + break; + Sleep(1); + } + } + + pl_d3d11_flush_message_queue(ctx, "After gpu finish"); + +error: + return; +} + +static bool d3d11_gpu_is_failed(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + if (ctx->is_failed) + return true; + + // GetDeviceRemovedReason returns S_OK if the device isn't removed + HRESULT hr = ID3D11Device_GetDeviceRemovedReason(p->dev); + if (FAILED(hr)) { + ctx->is_failed = true; + pl_d3d11_after_error(ctx, hr); + } + + return ctx->is_failed; +} + +static void d3d11_gpu_destroy(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + + pl_buf_destroy(gpu, &p->finish_buf_src); + pl_buf_destroy(gpu, &p->finish_buf_dst); + + // Release everything except the immediate context + SAFE_RELEASE(p->dev); + SAFE_RELEASE(p->dev1); + SAFE_RELEASE(p->dev5); + SAFE_RELEASE(p->imm1); + SAFE_RELEASE(p->imm4); + SAFE_RELEASE(p->vbuf.buf); + SAFE_RELEASE(p->ibuf.buf); + SAFE_RELEASE(p->rstate); + SAFE_RELEASE(p->dsstate); + for (int i = 0; i < PL_TEX_SAMPLE_MODE_COUNT; i++) { + for (int j = 0; j < PL_TEX_ADDRESS_MODE_COUNT; j++) { + SAFE_RELEASE(p->samplers[i][j]); + } + } + SAFE_RELEASE(p->finish_fence); + if (p->finish_event) + CloseHandle(p->finish_event); + SAFE_RELEASE(p->finish_query); + + // Destroy the immediate context synchronously so referenced objects don't + // show up in the leak check + if (p->imm) { + ID3D11DeviceContext_ClearState(p->imm); + ID3D11DeviceContext_Flush(p->imm); + SAFE_RELEASE(p->imm); + } + + pl_spirv_destroy(&p->spirv); + pl_free((void *) gpu); +} + +pl_d3d11 pl_d3d11_get(pl_gpu gpu) +{ + const struct pl_gpu_fns *impl = PL_PRIV(gpu); + if (impl->destroy == d3d11_gpu_destroy) { + struct pl_gpu_d3d11 *p = (struct pl_gpu_d3d11 *) impl; + return p->ctx->d3d11; + } + + return NULL; +} + +static bool load_d3d_compiler(pl_gpu gpu) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + HMODULE d3dcompiler = NULL; + + static const struct { + const wchar_t *name; + bool inbox; + } compiler_dlls[] = { + // Try the inbox D3DCompiler first (Windows 8.1 and up) + { .name = L"d3dcompiler_47.dll", .inbox = true }, + // Check for a packaged version of d3dcompiler_47.dll + { .name = L"d3dcompiler_47.dll" }, + // Try d3dcompiler_46.dll from the Windows 8 SDK + { .name = L"d3dcompiler_46.dll" }, + // Try d3dcompiler_43.dll from the June 2010 DirectX SDK + { .name = L"d3dcompiler_43.dll" }, + }; + + for (int i = 0; i < PL_ARRAY_SIZE(compiler_dlls); i++) { + if (compiler_dlls[i].inbox) { + if (!IsWindows8Point1OrGreater()) + continue; + d3dcompiler = LoadLibraryExW(compiler_dlls[i].name, NULL, + LOAD_LIBRARY_SEARCH_SYSTEM32); + } else { + d3dcompiler = LoadLibraryW(compiler_dlls[i].name); + } + if (!d3dcompiler) + continue; + + p->D3DCompile = (void *) GetProcAddress(d3dcompiler, "D3DCompile"); + if (!p->D3DCompile) + return false; + p->d3d_compiler_ver = pl_get_dll_version(compiler_dlls[i].name); + + return true; + } + + return false; +} + +static struct pl_gpu_fns pl_fns_d3d11 = { + .tex_create = pl_d3d11_tex_create, + .tex_destroy = pl_d3d11_tex_destroy, + .tex_invalidate = pl_d3d11_tex_invalidate, + .tex_clear_ex = pl_d3d11_tex_clear_ex, + .tex_blit = pl_d3d11_tex_blit, + .tex_upload = pl_d3d11_tex_upload, + .tex_download = pl_d3d11_tex_download, + .buf_create = pl_d3d11_buf_create, + .buf_destroy = pl_d3d11_buf_destroy, + .buf_write = pl_d3d11_buf_write, + .buf_read = pl_d3d11_buf_read, + .buf_copy = pl_d3d11_buf_copy, + .desc_namespace = d3d11_desc_namespace, + .pass_create = pl_d3d11_pass_create, + .pass_destroy = pl_d3d11_pass_destroy, + .pass_run = pl_d3d11_pass_run, + .timer_create = d3d11_timer_create, + .timer_destroy = d3d11_timer_destroy, + .timer_query = d3d11_timer_query, + .gpu_flush = d3d11_gpu_flush, + .gpu_finish = d3d11_gpu_finish, + .gpu_is_failed = d3d11_gpu_is_failed, + .destroy = d3d11_gpu_destroy, +}; + +pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx) +{ + pl_assert(ctx->dev); + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + IDXGIAdapter4 *adapter4 = NULL; + bool success = false; + HRESULT hr; + + struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gpu_d3d11); + gpu->log = ctx->log; + + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + uint32_t spirv_ver = PL_MIN(SPV_VERSION, PL_MAX_SPIRV_VER); + *p = (struct pl_gpu_d3d11) { + .ctx = ctx, + .impl = pl_fns_d3d11, + .dev = ctx->dev, + .spirv = pl_spirv_create(ctx->log, (struct pl_spirv_version) { + .env_version = pl_spirv_version_to_vulkan(spirv_ver), + .spv_version = spirv_ver, + }), + .vbuf.bind_flags = D3D11_BIND_VERTEX_BUFFER, + .ibuf.bind_flags = D3D11_BIND_INDEX_BUFFER, + }; + if (!p->spirv) + goto error; + + ID3D11Device_AddRef(p->dev); + ID3D11Device_GetImmediateContext(p->dev, &p->imm); + + // Check D3D11.1 interfaces + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, + (void **) &p->dev1); + if (SUCCEEDED(hr)) { + p->minor = 1; + ID3D11Device1_GetImmediateContext1(p->dev1, &p->imm1); + } + + // Check D3D11.4 interfaces + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device5, + (void **) &p->dev5); + if (SUCCEEDED(hr)) { + // There is no GetImmediateContext4 method + hr = ID3D11DeviceContext_QueryInterface(p->imm, &IID_ID3D11DeviceContext4, + (void **) &p->imm4); + if (SUCCEEDED(hr)) + p->minor = 4; + } + + PL_INFO(gpu, "Using Direct3D 11.%d runtime", p->minor); + + D3D(ID3D11Device_QueryInterface(p->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); + D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); + + DXGI_ADAPTER_DESC1 adapter_desc = {0}; + IDXGIAdapter1_GetDesc1(adapter, &adapter_desc); + + // No resource can be larger than max_res_size in bytes + unsigned int max_res_size = PL_CLAMP( + D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_B_TERM * adapter_desc.DedicatedVideoMemory, + D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u, + D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024u * 1024u); + + gpu->glsl = (struct pl_glsl_version) { + .version = 450, + .vulkan = true, + }; + + gpu->limits = (struct pl_gpu_limits) { + .max_buf_size = max_res_size, + .max_ssbo_size = max_res_size, + .max_vbo_size = max_res_size, + .align_vertex_stride = 1, + + // Make up some values + .align_tex_xfer_offset = 32, + .align_tex_xfer_pitch = 1, + .fragment_queues = 1, + }; + + p->fl = ID3D11Device_GetFeatureLevel(p->dev); + + // If we're not using FL9_x, we can use the same suballocated buffer as a + // vertex buffer and index buffer + if (p->fl >= D3D_FEATURE_LEVEL_10_0) + p->vbuf.bind_flags |= D3D11_BIND_INDEX_BUFFER; + + if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + gpu->limits.max_ubo_size = D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * CBUF_ELEM; + } else { + // 10level9 restriction: + // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context + gpu->limits.max_ubo_size = 255 * CBUF_ELEM; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + gpu->limits.max_tex_1d_dim = D3D11_REQ_TEXTURE1D_U_DIMENSION; + gpu->limits.max_tex_2d_dim = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + gpu->limits.max_tex_3d_dim = D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + gpu->limits.max_tex_1d_dim = D3D10_REQ_TEXTURE1D_U_DIMENSION; + gpu->limits.max_tex_2d_dim = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + gpu->limits.max_tex_3d_dim = D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { + gpu->limits.max_tex_2d_dim = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; + // Same limit as FL9_1 + gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + } else { + gpu->limits.max_tex_2d_dim = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; + gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + } + + if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + gpu->limits.max_buffer_texels = + 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + gpu->glsl.compute = true; + gpu->limits.compute_queues = 1; + // Set `gpu->limits.blittable_1d_3d`, since `pl_tex_blit_compute`, which + // is used to emulate blits on 11_0 and up, supports 1D and 3D textures + gpu->limits.blittable_1d_3d = true; + + gpu->glsl.max_shmem_size = D3D11_CS_TGSM_REGISTER_COUNT * sizeof(float); + gpu->glsl.max_group_threads = D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + gpu->glsl.max_group_size[0] = D3D11_CS_THREAD_GROUP_MAX_X; + gpu->glsl.max_group_size[1] = D3D11_CS_THREAD_GROUP_MAX_Y; + gpu->glsl.max_group_size[2] = D3D11_CS_THREAD_GROUP_MAX_Z; + gpu->limits.max_dispatch[0] = gpu->limits.max_dispatch[1] = + gpu->limits.max_dispatch[2] = + D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + // The offset limits are defined by HLSL: + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4-po--sm5---asm- + gpu->glsl.min_gather_offset = -32; + gpu->glsl.max_gather_offset = 31; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { + // SM4.1 has no gather4_po, so the offset must be specified by an + // immediate with a range of [-8, 7] + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4--sm4-1---asm- + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sample--sm4---asm-#address-offset + gpu->glsl.min_gather_offset = -8; + gpu->glsl.max_gather_offset = 7; + } + + if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + p->max_srvs = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; + } else { + // 10level9 restriction: + // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context + p->max_srvs = 8; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_1) { + p->max_uavs = D3D11_1_UAV_SLOT_COUNT; + } else { + p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; + } + + if (!load_d3d_compiler(gpu)) { + PL_FATAL(gpu, "Could not find D3DCompiler DLL"); + goto error; + } + PL_INFO(gpu, "D3DCompiler version: %u.%u.%u.%u", + p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor, + p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision); + + // Detect support for timestamp queries. Some FL9_x devices don't support them. + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL); + p->has_timestamp_queries = SUCCEEDED(hr); + + pl_d3d11_setup_formats(gpu); + + // The rasterizer state never changes, so create it here + D3D11_RASTERIZER_DESC rdesc = { + .FillMode = D3D11_FILL_SOLID, + .CullMode = D3D11_CULL_NONE, + .FrontCounterClockwise = FALSE, + .DepthClipEnable = TRUE, // Required for 10level9 + .ScissorEnable = TRUE, + }; + D3D(ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &p->rstate)); + + // The depth stencil state never changes either, and we only set it to turn + // depth testing off so the debug layer doesn't complain about an unbound + // depth buffer + D3D11_DEPTH_STENCIL_DESC dsdesc = { + .DepthEnable = FALSE, + .DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL, + .DepthFunc = D3D11_COMPARISON_LESS, + .StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK, + .StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK, + .FrontFace = { + .StencilFailOp = D3D11_STENCIL_OP_KEEP, + .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, + .StencilPassOp = D3D11_STENCIL_OP_KEEP, + .StencilFunc = D3D11_COMPARISON_ALWAYS, + }, + .BackFace = { + .StencilFailOp = D3D11_STENCIL_OP_KEEP, + .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, + .StencilPassOp = D3D11_STENCIL_OP_KEEP, + .StencilFunc = D3D11_COMPARISON_ALWAYS, + }, + }; + D3D(ID3D11Device_CreateDepthStencilState(p->dev, &dsdesc, &p->dsstate)); + + // Initialize the samplers + for (int sample_mode = 0; sample_mode < PL_TEX_SAMPLE_MODE_COUNT; sample_mode++) { + for (int address_mode = 0; address_mode < PL_TEX_ADDRESS_MODE_COUNT; address_mode++) { + static const D3D11_TEXTURE_ADDRESS_MODE d3d_address_mode[] = { + [PL_TEX_ADDRESS_CLAMP] = D3D11_TEXTURE_ADDRESS_CLAMP, + [PL_TEX_ADDRESS_REPEAT] = D3D11_TEXTURE_ADDRESS_WRAP, + [PL_TEX_ADDRESS_MIRROR] = D3D11_TEXTURE_ADDRESS_MIRROR, + }; + static const D3D11_FILTER d3d_filter[] = { + [PL_TEX_SAMPLE_NEAREST] = D3D11_FILTER_MIN_MAG_MIP_POINT, + [PL_TEX_SAMPLE_LINEAR] = D3D11_FILTER_MIN_MAG_MIP_LINEAR, + }; + + D3D11_SAMPLER_DESC sdesc = { + .AddressU = d3d_address_mode[address_mode], + .AddressV = d3d_address_mode[address_mode], + .AddressW = d3d_address_mode[address_mode], + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + .Filter = d3d_filter[sample_mode], + }; + D3D(ID3D11Device_CreateSamplerState(p->dev, &sdesc, + &p->samplers[sample_mode][address_mode])); + } + } + + hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter4, + (void **) &adapter4); + if (SUCCEEDED(hr)) { + DXGI_ADAPTER_DESC3 adapter_desc3 = {0}; + IDXGIAdapter4_GetDesc3(adapter4, &adapter_desc3); + + p->has_monitored_fences = + adapter_desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES; + } + + // Try to create a D3D11.4 fence object to wait on in pl_gpu_finish() + if (p->dev5 && p->has_monitored_fences) { + hr = ID3D11Device5_CreateFence(p->dev5, 0, D3D11_FENCE_FLAG_NONE, + &IID_ID3D11Fence, + (void **) &p->finish_fence); + if (SUCCEEDED(hr)) { + p->finish_event = CreateEventW(NULL, FALSE, FALSE, NULL); + if (!p->finish_event) { + PL_ERR(gpu, "Failed to create finish() event"); + goto error; + } + } + } + + // If fences are not available, we will have to poll a event query instead + if (!p->finish_fence) { + // Buffers for dummy copy/readback (see d3d11_gpu_finish()) + p->finish_buf_src = pl_buf_create(gpu, pl_buf_params( + .size = sizeof(uint32_t), + .drawable = true, // Make these vertex buffers for 10level9 + .initial_data = &(uint32_t) {0x11223344}, + )); + p->finish_buf_dst = pl_buf_create(gpu, pl_buf_params( + .size = sizeof(uint32_t), + .host_readable = true, + .drawable = true, + )); + + D3D(ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &p->finish_query)); + } + + pl_d3d11_flush_message_queue(ctx, "After gpu create"); + + success = true; +error: + SAFE_RELEASE(dxgi_dev); + SAFE_RELEASE(adapter); + SAFE_RELEASE(adapter4); + if (success) { + return pl_gpu_finalize(gpu); + } else { + d3d11_gpu_destroy(gpu); + return NULL; + } +} diff --git a/src/d3d11/gpu.h b/src/d3d11/gpu.h new file mode 100644 index 0000000..cbc706a --- /dev/null +++ b/src/d3d11/gpu.h @@ -0,0 +1,212 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdalign.h> +#include <d3d11_4.h> +#include <dxgi1_6.h> +#include <d3dcompiler.h> +#include <spirv_cross_c.h> + +#include "../gpu.h" +#include "../glsl/spirv.h" + +#include "common.h" +#include "utils.h" + +pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx); + +// --- pl_gpu internal structs and helpers + +// Size of one constant in a constant buffer +#define CBUF_ELEM (sizeof(float[4])) + +struct d3d_stream_buf { + UINT bind_flags; + ID3D11Buffer *buf; + size_t size; + size_t used; + unsigned int align; +}; + +struct pl_gpu_d3d11 { + struct pl_gpu_fns impl; + struct d3d11_ctx *ctx; + ID3D11Device *dev; + ID3D11Device1 *dev1; + ID3D11Device5 *dev5; + ID3D11DeviceContext *imm; + ID3D11DeviceContext1 *imm1; + ID3D11DeviceContext4 *imm4; + + // The Direct3D 11 minor version number + int minor; + + pl_spirv spirv; + + pD3DCompile D3DCompile; + struct dll_version d3d_compiler_ver; + + // Device capabilities + D3D_FEATURE_LEVEL fl; + bool has_timestamp_queries; + bool has_monitored_fences; + + int max_srvs; + int max_uavs; + + // Streaming vertex and index buffers + struct d3d_stream_buf vbuf; + struct d3d_stream_buf ibuf; + + // Shared rasterizer state + ID3D11RasterizerState *rstate; + + // Shared depth-stencil state + ID3D11DepthStencilState *dsstate; + + // Array of ID3D11SamplerStates for every combination of sample/address modes + ID3D11SamplerState *samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT]; + + // Resources for finish() + ID3D11Fence *finish_fence; + uint64_t finish_value; + HANDLE finish_event; + ID3D11Query *finish_query; + pl_buf finish_buf_src; + pl_buf finish_buf_dst; +}; + +void pl_d3d11_setup_formats(struct pl_gpu_t *gpu); + +void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer); +void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer); + +struct pl_buf_d3d11 { + ID3D11Buffer *buf; + ID3D11Buffer *staging; + ID3D11ShaderResourceView *raw_srv; + ID3D11UnorderedAccessView *raw_uav; + ID3D11ShaderResourceView *texel_srv; + ID3D11UnorderedAccessView *texel_uav; + + char *data; + bool dirty; +}; + +void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf); +pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params); +void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, + size_t size); +bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, + size_t size); +void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, + size_t src_offset, size_t size); + +// Ensure a buffer is up-to-date with its system memory mirror before it is used +void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf); + +struct pl_tex_d3d11 { + // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not + // hold an additional reference to the texture object. + ID3D11Resource *res; + + ID3D11Texture1D *tex1d; + ID3D11Texture2D *tex2d; + ID3D11Texture3D *tex3d; + int array_slice; + + // Mirrors one of staging1d, staging2d, or staging3d, and doesn't hold a ref + ID3D11Resource *staging; + + // Staging textures for pl_tex_download + ID3D11Texture1D *staging1d; + ID3D11Texture2D *staging2d; + ID3D11Texture3D *staging3d; + + ID3D11ShaderResourceView *srv; + ID3D11RenderTargetView *rtv; + ID3D11UnorderedAccessView *uav; + + // for tex_upload/download fallback code + pl_fmt texel_fmt; +}; + +void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex); +pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params); +void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex); +void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, + const union pl_clear_color color); +void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); +bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); +bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); + +// Constant buffer layout used for gl_NumWorkGroups emulation +struct d3d_num_workgroups_buf { + alignas(CBUF_ELEM) uint32_t num_wgs[3]; +}; + +enum { + HLSL_BINDING_NOT_USED = -1, // Slot should always be bound as NULL + HLSL_BINDING_NUM_WORKGROUPS = -2, // Slot used for gl_NumWorkGroups emulation +}; + +// Represents a specific shader stage in a pl_pass (VS, PS, CS) +struct d3d_pass_stage { + // Lists for each resource type, to simplify binding in pl_pass_run. Indexes + // match the index of the arrays passed to the ID3D11DeviceContext methods. + // Entries are the index of pass->params.descriptors which should be bound + // in that position, or a HLSL_BINDING_* special value. + PL_ARRAY(int) cbvs; + PL_ARRAY(int) srvs; + PL_ARRAY(int) samplers; +}; + +struct pl_pass_d3d11 { + ID3D11PixelShader *ps; + ID3D11VertexShader *vs; + ID3D11ComputeShader *cs; + ID3D11InputLayout *layout; + ID3D11BlendState *bstate; + + // gl_NumWorkGroups emulation + struct d3d_num_workgroups_buf last_num_wgs; + ID3D11Buffer *num_workgroups_buf; + bool num_workgroups_used; + + // Maximum binding number + int max_binding; + + struct d3d_pass_stage main; // PS and CS + struct d3d_pass_stage vertex; + + // List of resources, as in `struct pass_stage`, except UAVs are shared + // between all shader stages + PL_ARRAY(int) uavs; + + // Pre-allocated resource arrays to use in pl_pass_run + ID3D11Buffer **cbv_arr; + ID3D11ShaderResourceView **srv_arr; + ID3D11SamplerState **sampler_arr; + ID3D11UnorderedAccessView **uav_arr; +}; + +void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass); +const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu, + const struct pl_pass_params *params); +void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); diff --git a/src/d3d11/gpu_buf.c b/src/d3d11/gpu_buf.c new file mode 100644 index 0000000..955e6e1 --- /dev/null +++ b/src/d3d11/gpu_buf.c @@ -0,0 +1,310 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "gpu.h" +#include "formats.h" + +void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + + SAFE_RELEASE(buf_p->buf); + SAFE_RELEASE(buf_p->staging); + SAFE_RELEASE(buf_p->raw_srv); + SAFE_RELEASE(buf_p->raw_uav); + SAFE_RELEASE(buf_p->texel_srv); + SAFE_RELEASE(buf_p->texel_uav); + + pl_d3d11_flush_message_queue(ctx, "After buffer destroy"); + + pl_free((void *) buf); +} + +pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_d3d11); + buf->params = *params; + buf->params.initial_data = NULL; + + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + + D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; + + if (params->uniform && !params->format && + (params->storable || params->drawable)) + { + // TODO: Figure out what to do with these + PL_ERR(gpu, "Uniform buffers cannot share any other buffer type"); + goto error; + } + + // TODO: Distinguish between uniform buffers and texel uniform buffers. + // Currently we assume that if uniform and format are set, it's a texel + // buffer and NOT a uniform buffer. + if (params->uniform && !params->format) { + desc.BindFlags |= D3D11_BIND_CONSTANT_BUFFER; + desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, CBUF_ELEM); + } + if (params->uniform && params->format) { + desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; + } + if (params->storable) { + desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS + | D3D11_BIND_SHADER_RESOURCE; + desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, sizeof(float)); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; + } + if (params->drawable) { + desc.BindFlags |= D3D11_BIND_VERTEX_BUFFER; + + // In FL9_x, a vertex buffer can't also be an index buffer, so index + // buffers are unsupported in FL9_x for now + if (p->fl > D3D_FEATURE_LEVEL_9_3) + desc.BindFlags |= D3D11_BIND_INDEX_BUFFER; + } + + char *data = NULL; + + // D3D11 doesn't allow partial constant buffer updates without special + // conditions. To support partial buffer updates, keep a mirror of the + // buffer data in system memory and upload the whole thing before the buffer + // is used. + // + // Note: We don't use a staging buffer for this because of Intel. + // https://github.com/mpv-player/mpv/issues/5293 + // https://crbug.com/593024 + if (params->uniform && !params->format && params->host_writable) { + data = pl_zalloc(buf, desc.ByteWidth); + buf_p->data = data; + } + + D3D11_SUBRESOURCE_DATA srdata = { 0 }; + if (params->initial_data) { + if (desc.ByteWidth != params->size) { + // If the size had to be rounded-up, uploading from + // params->initial_data is technically undefined behavior, so copy + // the initial data to an allocation first + if (!data) + data = pl_zalloc(buf, desc.ByteWidth); + srdata.pSysMem = data; + } else { + srdata.pSysMem = params->initial_data; + } + + if (data) + memcpy(data, params->initial_data, params->size); + } + + D3D(ID3D11Device_CreateBuffer(p->dev, &desc, + params->initial_data ? &srdata : NULL, + &buf_p->buf)); + + if (!buf_p->data) + pl_free(data); + + // Create raw views for PL_DESC_BUF_STORAGE + if (params->storable) { + // A SRV is used for PL_DESC_ACCESS_READONLY + D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX, + .BufferEx = { + .NumElements = + PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), + .Flags = D3D11_BUFFEREX_SRV_FLAG_RAW, + }, + }; + D3D(ID3D11Device_CreateShaderResourceView(p->dev, + (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->raw_srv)); + + // A UAV is used for all other access modes + D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = + PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), + .Flags = D3D11_BUFFER_UAV_FLAG_RAW, + }, + }; + D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, + (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->raw_uav)); + } + + // Create a typed SRV for PL_BUF_TEXEL_UNIFORM and PL_BUF_TEXEL_STORAGE + if (params->format) { + if (params->uniform) { + D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { + .Format = fmt_to_dxgi(params->format), + .ViewDimension = D3D11_SRV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = + PL_ALIGN(buf->params.size, buf->params.format->texel_size) + / buf->params.format->texel_size, + }, + }; + D3D(ID3D11Device_CreateShaderResourceView(p->dev, + (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->texel_srv)); + } + + // Create a typed UAV for PL_BUF_TEXEL_STORAGE + if (params->storable) { + D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { + .Format = fmt_to_dxgi(buf->params.format), + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = + PL_ALIGN(buf->params.size, buf->params.format->texel_size) + / buf->params.format->texel_size, + }, + }; + D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, + (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->texel_uav)); + } + } + + + if (!buf_p->data) { + // Create the staging buffer regardless of whether params->host_readable + // is set or not, so that buf_copy can copy to system-memory-backed + // buffers + // TODO: Consider sharing a big staging buffer for this, rather than + // having one staging buffer per buffer + desc.BindFlags = 0; + desc.MiscFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.Usage = D3D11_USAGE_STAGING; + D3D(ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging)); + } + + pl_d3d11_flush_message_queue(ctx, "After buffer create"); + + return buf; + +error: + pl_d3d11_buf_destroy(gpu, buf); + return NULL; +} + +void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, + size_t size) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + + if (buf_p->data) { + memcpy(buf_p->data + offset, data, size); + buf_p->dirty = true; + } else { + ID3D11DeviceContext_UpdateSubresource(p->imm, + (ID3D11Resource *) buf_p->buf, 0, (&(D3D11_BOX) { + .left = offset, + .top = 0, + .front = 0, + .right = offset + size, + .bottom = 1, + .back = 1, + }), data, 0, 0); + } +} + +void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + + if (!buf_p->data || !buf_p->dirty) + return; + + ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf, + 0, NULL, buf_p->data, 0, 0); +} + +bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, + size_t size) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + + // If there is a system-memory mirror of the buffer contents, use it + if (buf_p->data) { + memcpy(dest, buf_p->data + offset, size); + return true; + } + + ID3D11DeviceContext_CopyResource(p->imm, (ID3D11Resource *) buf_p->staging, + (ID3D11Resource *) buf_p->buf); + + D3D11_MAPPED_SUBRESOURCE lock; + D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) buf_p->staging, 0, + D3D11_MAP_READ, 0, &lock)); + + char *csrc = lock.pData; + memcpy(dest, csrc + offset, size); + + ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) buf_p->staging, 0); + + pl_d3d11_flush_message_queue(ctx, "After buffer read"); + + return true; + +error: + return false; +} + +void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, + size_t src_offset, size_t size) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_buf_d3d11 *src_p = PL_PRIV(src); + struct pl_buf_d3d11 *dst_p = PL_PRIV(dst); + + // Handle system memory copies in case one or both of the buffers has a + // system memory mirror + if (src_p->data && dst_p->data) { + memcpy(dst_p->data + dst_offset, src_p->data + src_offset, size); + dst_p->dirty = true; + } else if (src_p->data) { + pl_d3d11_buf_write(gpu, dst, dst_offset, src_p->data + src_offset, size); + } else if (dst_p->data) { + if (pl_d3d11_buf_read(gpu, src, src_offset, dst_p->data + dst_offset, size)) { + dst_p->dirty = true; + } else { + PL_ERR(gpu, "Failed to read from GPU during buffer copy"); + } + } else { + ID3D11DeviceContext_CopySubresourceRegion(p->imm, + (ID3D11Resource *) dst_p->buf, 0, dst_offset, 0, 0, + (ID3D11Resource *) src_p->buf, 0, (&(D3D11_BOX) { + .left = src_offset, + .top = 0, + .front = 0, + .right = src_offset + size, + .bottom = 1, + .back = 1, + })); + } + + pl_d3d11_flush_message_queue(ctx, "After buffer copy"); +} diff --git a/src/d3d11/gpu_pass.c b/src/d3d11/gpu_pass.c new file mode 100644 index 0000000..0e46ccd --- /dev/null +++ b/src/d3d11/gpu_pass.c @@ -0,0 +1,1293 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "gpu.h" +#include "formats.h" +#include "glsl/spirv.h" +#include "../cache.h" + +struct stream_buf_slice { + const void *data; + unsigned int size; + unsigned int offset; +}; + +// Upload one or more slices of single-use data to a suballocated dynamic +// buffer. Only call this once per-buffer per-pass, since it will discard or +// reallocate the buffer when full. +static bool stream_buf_upload(pl_gpu gpu, struct d3d_stream_buf *stream, + struct stream_buf_slice *slices, int num_slices) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + unsigned int align = PL_DEF(stream->align, sizeof(float)); + + // Get total size, rounded up to the buffer's alignment + size_t size = 0; + for (int i = 0; i < num_slices; i++) + size += PL_ALIGN2(slices[i].size, align); + + if (size > gpu->limits.max_buf_size) { + PL_ERR(gpu, "Streaming buffer is too large"); + return -1; + } + + // If the data doesn't fit, realloc the buffer + if (size > stream->size) { + size_t new_size = stream->size; + // Arbitrary base size + if (!new_size) + new_size = 16 * 1024; + while (new_size < size) + new_size *= 2; + new_size = PL_MIN(new_size, gpu->limits.max_buf_size); + + ID3D11Buffer *new_buf; + D3D11_BUFFER_DESC vbuf_desc = { + .ByteWidth = new_size, + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = stream->bind_flags, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; + D3D(ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf)); + + SAFE_RELEASE(stream->buf); + stream->buf = new_buf; + stream->size = new_size; + stream->used = 0; + } + + bool discard = false; + size_t offset = stream->used; + if (offset + size > stream->size) { + // We reached the end of the buffer, so discard and wrap around + discard = true; + offset = 0; + } + + D3D11_MAPPED_SUBRESOURCE map = {0}; + UINT type = discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; + D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) stream->buf, 0, type, + 0, &map)); + + // Upload each slice + char *cdata = map.pData; + stream->used = offset; + for (int i = 0; i < num_slices; i++) { + slices[i].offset = stream->used; + memcpy(cdata + slices[i].offset, slices[i].data, slices[i].size); + stream->used += PL_ALIGN2(slices[i].size, align); + } + + ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) stream->buf, 0); + + return true; + +error: + return false; +} + +static const char *get_shader_target(pl_gpu gpu, enum glsl_shader_stage stage) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + switch (p->fl) { + default: + switch (stage) { + case GLSL_SHADER_VERTEX: return "vs_5_0"; + case GLSL_SHADER_FRAGMENT: return "ps_5_0"; + case GLSL_SHADER_COMPUTE: return "cs_5_0"; + } + break; + case D3D_FEATURE_LEVEL_10_1: + switch (stage) { + case GLSL_SHADER_VERTEX: return "vs_4_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_1"; + case GLSL_SHADER_COMPUTE: return "cs_4_1"; + } + break; + case D3D_FEATURE_LEVEL_10_0: + switch (stage) { + case GLSL_SHADER_VERTEX: return "vs_4_0"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0"; + case GLSL_SHADER_COMPUTE: return "cs_4_0"; + } + break; + case D3D_FEATURE_LEVEL_9_3: + switch (stage) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; + case GLSL_SHADER_COMPUTE: return NULL; + } + break; + case D3D_FEATURE_LEVEL_9_2: + case D3D_FEATURE_LEVEL_9_1: + switch (stage) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; + case GLSL_SHADER_COMPUTE: return NULL; + } + break; + } + return NULL; +} + +static SpvExecutionModel stage_to_spv(enum glsl_shader_stage stage) +{ + static const SpvExecutionModel spv_execution_model[] = { + [GLSL_SHADER_VERTEX] = SpvExecutionModelVertex, + [GLSL_SHADER_FRAGMENT] = SpvExecutionModelFragment, + [GLSL_SHADER_COMPUTE] = SpvExecutionModelGLCompute, + }; + return spv_execution_model[stage]; +} + +#define SC(cmd) \ + do { \ + spvc_result res = (cmd); \ + if (res != SPVC_SUCCESS) { \ + PL_ERR(gpu, "%s: %s (%d) (%s:%d)", \ + #cmd, sc ? spvc_context_get_last_error_string(sc) : "", \ + res, __FILE__, __LINE__); \ + goto error; \ + } \ + } while (0) + +// Some decorations, like SpvDecorationNonWritable, are actually found on the +// members of a buffer block, rather than the buffer block itself. If all +// members have a certain decoration, SPIRV-Cross considers it to apply to the +// buffer block too, which determines things like whether a SRV or UAV is used +// for an SSBO. This function checks if SPIRV-Cross considers a decoration to +// apply to a buffer block. +static spvc_result buffer_block_has_decoration(spvc_compiler sc_comp, + spvc_variable_id id, + SpvDecoration decoration, + bool *out) +{ + const SpvDecoration *decorations; + size_t num_decorations = 0; + + spvc_result res = spvc_compiler_get_buffer_block_decorations(sc_comp, id, + &decorations, &num_decorations); + if (res != SPVC_SUCCESS) + return res; + + for (size_t j = 0; j < num_decorations; j++) { + if (decorations[j] == decoration) { + *out = true; + return res; + } + } + + *out = false; + return res; +} + +static bool alloc_hlsl_reg_bindings(pl_gpu gpu, pl_pass pass, + struct d3d_pass_stage *pass_s, + spvc_context sc, + spvc_compiler sc_comp, + spvc_resources resources, + spvc_resource_type res_type, + enum glsl_shader_stage stage) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + const spvc_reflected_resource *res_list; + size_t res_count; + + SC(spvc_resources_get_resource_list_for_type(resources, res_type, + &res_list, &res_count)); + + // In a raster pass, one of the UAV slots is used by the runtime for the RTV + int uav_offset = stage == GLSL_SHADER_COMPUTE ? 0 : 1; + int max_uavs = p->max_uavs - uav_offset; + + for (int i = 0; i < res_count; i++) { + unsigned int binding = spvc_compiler_get_decoration(sc_comp, + res_list[i].id, SpvDecorationBinding); + unsigned int descriptor_set = spvc_compiler_get_decoration(sc_comp, + res_list[i].id, SpvDecorationDescriptorSet); + if (descriptor_set != 0) + continue; + + pass_p->max_binding = PL_MAX(pass_p->max_binding, binding); + + spvc_hlsl_resource_binding hlslbind; + spvc_hlsl_resource_binding_init(&hlslbind); + hlslbind.stage = stage_to_spv(stage); + hlslbind.binding = binding; + hlslbind.desc_set = descriptor_set; + + bool has_cbv = false, has_sampler = false, has_srv = false, has_uav = false; + switch (res_type) { + case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER: + has_cbv = true; + break; + case SPVC_RESOURCE_TYPE_STORAGE_BUFFER:; + bool non_writable_bb = false; + SC(buffer_block_has_decoration(sc_comp, res_list[i].id, + SpvDecorationNonWritable, &non_writable_bb)); + if (non_writable_bb) { + has_srv = true; + } else { + has_uav = true; + } + break; + case SPVC_RESOURCE_TYPE_STORAGE_IMAGE:; + bool non_writable = spvc_compiler_has_decoration(sc_comp, + res_list[i].id, SpvDecorationNonWritable); + if (non_writable) { + has_srv = true; + } else { + has_uav = true; + } + break; + case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE: + has_srv = true; + break; + case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE:; + spvc_type type = spvc_compiler_get_type_handle(sc_comp, + res_list[i].type_id); + SpvDim dimension = spvc_type_get_image_dimension(type); + // Uniform texel buffers are technically sampled images, but they + // aren't sampled from, so don't allocate a sampler + if (dimension != SpvDimBuffer) + has_sampler = true; + has_srv = true; + break; + default: + break; + } + + if (has_cbv) { + hlslbind.cbv.register_binding = pass_s->cbvs.num; + PL_ARRAY_APPEND(pass, pass_s->cbvs, binding); + if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { + PL_ERR(gpu, "Too many constant buffers in shader"); + goto error; + } + } + + if (has_sampler) { + hlslbind.sampler.register_binding = pass_s->samplers.num; + PL_ARRAY_APPEND(pass, pass_s->samplers, binding); + if (pass_s->samplers.num > D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) { + PL_ERR(gpu, "Too many samplers in shader"); + goto error; + } + } + + if (has_srv) { + hlslbind.srv.register_binding = pass_s->srvs.num; + PL_ARRAY_APPEND(pass, pass_s->srvs, binding); + if (pass_s->srvs.num > p->max_srvs) { + PL_ERR(gpu, "Too many SRVs in shader"); + goto error; + } + } + + if (has_uav) { + // UAV registers are shared between the vertex and fragment shaders + // in a raster pass, so check if the UAV for this resource has + // already been allocated + bool uav_bound = false; + for (int j = 0; j < pass_p->uavs.num; j++) { + if (pass_p->uavs.elem[j] == binding) { + uav_bound = true; + break; + } + } + + if (!uav_bound) { + hlslbind.uav.register_binding = pass_p->uavs.num + uav_offset; + PL_ARRAY_APPEND(pass, pass_p->uavs, binding); + if (pass_p->uavs.num > max_uavs) { + PL_ERR(gpu, "Too many UAVs in shader"); + goto error; + } + } + } + + SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &hlslbind)); + } + + return true; +error: + return false; +} + +static const char *shader_names[] = { + [GLSL_SHADER_VERTEX] = "vertex", + [GLSL_SHADER_FRAGMENT] = "fragment", + [GLSL_SHADER_COMPUTE] = "compute", +}; + +static ID3DBlob *shader_compile_glsl(pl_gpu gpu, pl_pass pass, + struct d3d_pass_stage *pass_s, + enum glsl_shader_stage stage, + const char *glsl) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + void *tmp = pl_tmp(NULL); + spvc_context sc = NULL; + spvc_compiler sc_comp = NULL; + const char *hlsl = NULL; + ID3DBlob *out = NULL; + ID3DBlob *errors = NULL; + HRESULT hr; + + pl_clock_t start = pl_clock_now(); + pl_str spirv = pl_spirv_compile_glsl(p->spirv, tmp, gpu->glsl, stage, glsl); + if (!spirv.len) + goto error; + + pl_clock_t after_glsl = pl_clock_now(); + pl_log_cpu_time(gpu->log, start, after_glsl, "translating GLSL to SPIR-V"); + + SC(spvc_context_create(&sc)); + + spvc_parsed_ir sc_ir; + SC(spvc_context_parse_spirv(sc, (SpvId *) spirv.buf, + spirv.len / sizeof(SpvId), &sc_ir)); + + SC(spvc_context_create_compiler(sc, SPVC_BACKEND_HLSL, sc_ir, + SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, + &sc_comp)); + + spvc_compiler_options sc_opts; + SC(spvc_compiler_create_compiler_options(sc_comp, &sc_opts)); + + int sc_shader_model; + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + sc_shader_model = 50; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { + sc_shader_model = 41; + } else { + sc_shader_model = 40; + } + + SC(spvc_compiler_options_set_uint(sc_opts, + SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model)); + + // Unlike Vulkan and OpenGL, in D3D11, the clip-space is "flipped" with + // respect to framebuffer-space. In other words, if you render to a pixel at + // (0, -1), you have to sample from (0, 1) to get the value back. We unflip + // it by setting the following option, which inserts the equivalent of + // `gl_Position.y = -gl_Position.y` into the vertex shader + if (stage == GLSL_SHADER_VERTEX) { + SC(spvc_compiler_options_set_bool(sc_opts, + SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE)); + } + + // Bind readonly images and imageBuffers as SRVs. This is done because a lot + // of hardware (especially FL11_x hardware) has very poor format support for + // reading values from UAVs. It allows the common case of readonly and + // writeonly images to support more formats, though the less common case of + // readwrite images still requires format support for UAV loads (represented + // by the PL_FMT_CAP_READWRITE cap in libplacebo.) + // + // Note that setting this option comes at the cost of GLSL support. Readonly + // and readwrite images are the same type in GLSL, but SRV and UAV bound + // textures are different types in HLSL, so for example, a GLSL function + // with an image parameter may fail to compile as HLSL if it's called with a + // readonly image and a readwrite image at different call sites. + SC(spvc_compiler_options_set_bool(sc_opts, + SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV, SPVC_TRUE)); + + SC(spvc_compiler_install_compiler_options(sc_comp, sc_opts)); + + spvc_set active = NULL; + SC(spvc_compiler_get_active_interface_variables(sc_comp, &active)); + spvc_resources resources = NULL; + SC(spvc_compiler_create_shader_resources_for_active_variables( + sc_comp, &resources, active)); + + // Allocate HLSL registers for each resource type + alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, + SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, stage); + alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, + SPVC_RESOURCE_TYPE_SEPARATE_IMAGE, stage); + alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, + SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, stage); + alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, + SPVC_RESOURCE_TYPE_STORAGE_BUFFER, stage); + alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, + SPVC_RESOURCE_TYPE_STORAGE_IMAGE, stage); + + if (stage == GLSL_SHADER_COMPUTE) { + // Check if the gl_NumWorkGroups builtin is used. If it is, we have to + // emulate it with a constant buffer, so allocate it a CBV register. + spvc_variable_id num_workgroups_id = + spvc_compiler_hlsl_remap_num_workgroups_builtin(sc_comp); + if (num_workgroups_id) { + pass_p->num_workgroups_used = true; + + spvc_hlsl_resource_binding binding; + spvc_hlsl_resource_binding_init(&binding); + binding.stage = stage_to_spv(stage); + binding.binding = pass_p->max_binding + 1; + + // Allocate a CBV register for the buffer + binding.cbv.register_binding = pass_s->cbvs.num; + PL_ARRAY_APPEND(pass, pass_s->cbvs, HLSL_BINDING_NUM_WORKGROUPS); + if (pass_s->cbvs.num > + D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { + PL_ERR(gpu, "Not enough constant buffer slots for gl_NumWorkGroups"); + goto error; + } + + spvc_compiler_set_decoration(sc_comp, num_workgroups_id, + SpvDecorationDescriptorSet, 0); + spvc_compiler_set_decoration(sc_comp, num_workgroups_id, + SpvDecorationBinding, binding.binding); + + SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &binding)); + } + } + + SC(spvc_compiler_compile(sc_comp, &hlsl)); + + pl_clock_t after_spvc = pl_clock_now(); + pl_log_cpu_time(gpu->log, after_glsl, after_spvc, "translating SPIR-V to HLSL"); + + hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", + get_shader_target(gpu, stage), + D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &out, + &errors); + if (FAILED(hr)) { + SAFE_RELEASE(out); + PL_ERR(gpu, "D3DCompile failed: %s\n%.*s", pl_hresult_to_str(hr), + (int) ID3D10Blob_GetBufferSize(errors), + (char *) ID3D10Blob_GetBufferPointer(errors)); + goto error; + } + + pl_log_cpu_time(gpu->log, after_spvc, pl_clock_now(), "translating HLSL to DXBC"); + +error:; + if (hlsl) { + int level = out ? PL_LOG_DEBUG : PL_LOG_ERR; + PL_MSG(gpu, level, "%s shader HLSL source:", shader_names[stage]); + pl_msg_source(gpu->log, level, hlsl); + } + + if (sc) + spvc_context_destroy(sc); + SAFE_RELEASE(errors); + pl_free(tmp); + return out; +} + +struct d3d11_cache_header { + uint64_t hash; + bool num_workgroups_used; + int num_main_cbvs; + int num_main_srvs; + int num_main_samplers; + int num_vertex_cbvs; + int num_vertex_srvs; + int num_vertex_samplers; + int num_uavs; + size_t vert_bc_len; + size_t frag_bc_len; + size_t comp_bc_len; +}; + +static inline uint64_t pass_cache_signature(pl_gpu gpu, uint64_t *key, + const struct pl_pass_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + + uint64_t hash = CACHE_KEY_D3D_DXBC; // seed to uniquely identify d3d11 shaders + + pl_hash_merge(&hash, pl_str0_hash(params->glsl_shader)); + if (params->type == PL_PASS_RASTER) + pl_hash_merge(&hash, pl_str0_hash(params->vertex_shader)); + + // store hash based on the shader bodys as the lookup key + if (key) + *key = hash; + + // and add the compiler version information into the verification signature + pl_hash_merge(&hash, p->spirv->signature); + + unsigned spvc_major, spvc_minor, spvc_patch; + spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch); + + pl_hash_merge(&hash, spvc_major); + pl_hash_merge(&hash, spvc_minor); + pl_hash_merge(&hash, spvc_patch); + + pl_hash_merge(&hash, ((uint64_t)p->d3d_compiler_ver.major << 48) + | ((uint64_t)p->d3d_compiler_ver.minor << 32) + | ((uint64_t)p->d3d_compiler_ver.build << 16) + | (uint64_t)p->d3d_compiler_ver.revision); + pl_hash_merge(&hash, p->fl); + + return hash; +} + +static inline size_t cache_payload_size(struct d3d11_cache_header *header) +{ + size_t required = (header->num_main_cbvs + header->num_main_srvs + + header->num_main_samplers + header->num_vertex_cbvs + + header->num_vertex_srvs + header->num_vertex_samplers + + header->num_uavs) * sizeof(int) + header->vert_bc_len + + header->frag_bc_len + header->comp_bc_len; + + return required; +} + +static bool d3d11_use_cached_program(pl_gpu gpu, struct pl_pass_t *pass, + const struct pl_pass_params *params, + pl_cache_obj *obj, uint64_t *out_sig, + pl_str *vert_bc, pl_str *frag_bc, pl_str *comp_bc) +{ + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + const pl_cache gpu_cache = pl_gpu_cache(gpu); + if (!gpu_cache) + return false; + + *out_sig = pass_cache_signature(gpu, &obj->key, params); + if (!pl_cache_get(gpu_cache, obj)) + return false; + + pl_str cache = (pl_str) { obj->data, obj->size }; + if (cache.len < sizeof(struct d3d11_cache_header)) + return false; + + struct d3d11_cache_header *header = (struct d3d11_cache_header *) cache.buf; + cache = pl_str_drop(cache, sizeof(*header)); + + if (header->hash != *out_sig) + return false; + + // determine required cache size before reading anything + size_t required = cache_payload_size(header); + + if (cache.len < required) + return false; + + pass_p->num_workgroups_used = header->num_workgroups_used; + +#define GET_ARRAY(object, name, num_elems) \ + do { \ + PL_ARRAY_MEMDUP(pass, (object)->name, cache.buf, num_elems); \ + cache = pl_str_drop(cache, num_elems * sizeof(*(object)->name.elem)); \ + } while (0) + +#define GET_STAGE_ARRAY(stage, name) \ + GET_ARRAY(&pass_p->stage, name, header->num_##stage##_##name) + + GET_STAGE_ARRAY(main, cbvs); + GET_STAGE_ARRAY(main, srvs); + GET_STAGE_ARRAY(main, samplers); + GET_STAGE_ARRAY(vertex, cbvs); + GET_STAGE_ARRAY(vertex, srvs); + GET_STAGE_ARRAY(vertex, samplers); + GET_ARRAY(pass_p, uavs, header->num_uavs); + +#define GET_SHADER(ptr) \ + do { \ + if (ptr) \ + *ptr = pl_str_take(cache, header->ptr##_len); \ + cache = pl_str_drop(cache, header->ptr##_len); \ + } while (0) + + GET_SHADER(vert_bc); + GET_SHADER(frag_bc); + GET_SHADER(comp_bc); + + return true; +} + +static void d3d11_update_program_cache(pl_gpu gpu, struct pl_pass_t *pass, + uint64_t key, uint64_t sig, + const pl_str *vs_str, const pl_str *ps_str, + const pl_str *cs_str) +{ + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + const pl_cache gpu_cache = pl_gpu_cache(gpu); + if (!gpu_cache) + return; + + struct d3d11_cache_header header = { + .hash = sig, + .num_workgroups_used = pass_p->num_workgroups_used, + .num_main_cbvs = pass_p->main.cbvs.num, + .num_main_srvs = pass_p->main.srvs.num, + .num_main_samplers = pass_p->main.samplers.num, + .num_vertex_cbvs = pass_p->vertex.cbvs.num, + .num_vertex_srvs = pass_p->vertex.srvs.num, + .num_vertex_samplers = pass_p->vertex.samplers.num, + .num_uavs = pass_p->uavs.num, + .vert_bc_len = vs_str ? vs_str->len : 0, + .frag_bc_len = ps_str ? ps_str->len : 0, + .comp_bc_len = cs_str ? cs_str->len : 0, + }; + + size_t cache_size = sizeof(header) + cache_payload_size(&header); + pl_str cache = {0}; + pl_str_append(NULL, &cache, (pl_str){ (uint8_t *) &header, sizeof(header) }); + +#define WRITE_ARRAY(name) pl_str_append(NULL, &cache, \ + (pl_str){ (uint8_t *) pass_p->name.elem, \ + sizeof(*pass_p->name.elem) * pass_p->name.num }) + WRITE_ARRAY(main.cbvs); + WRITE_ARRAY(main.srvs); + WRITE_ARRAY(main.samplers); + WRITE_ARRAY(vertex.cbvs); + WRITE_ARRAY(vertex.srvs); + WRITE_ARRAY(vertex.samplers); + WRITE_ARRAY(uavs); + + if (vs_str) + pl_str_append(NULL, &cache, *vs_str); + + if (ps_str) + pl_str_append(NULL, &cache, *ps_str); + + if (cs_str) + pl_str_append(NULL, &cache, *cs_str); + + pl_assert(cache_size == cache.len); + pl_cache_str(gpu_cache, key, &cache); +} + +void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + + SAFE_RELEASE(pass_p->vs); + SAFE_RELEASE(pass_p->ps); + SAFE_RELEASE(pass_p->cs); + SAFE_RELEASE(pass_p->layout); + SAFE_RELEASE(pass_p->bstate); + SAFE_RELEASE(pass_p->num_workgroups_buf); + + pl_d3d11_flush_message_queue(ctx, "After pass destroy"); + + pl_free((void *) pass); +} + +static bool pass_create_raster(pl_gpu gpu, struct pl_pass_t *pass, + const struct pl_pass_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + ID3DBlob *vs_blob = NULL; + pl_str vs_str = {0}; + ID3DBlob *ps_blob = NULL; + pl_str ps_str = {0}; + D3D11_INPUT_ELEMENT_DESC *in_descs = NULL; + pl_cache_obj obj = {0}; + uint64_t sig = 0; + bool success = false; + + if (d3d11_use_cached_program(gpu, pass, params, &obj, &sig, &vs_str, &ps_str, NULL)) + PL_DEBUG(gpu, "Using cached DXBC shaders"); + + pl_assert((vs_str.len == 0) == (ps_str.len == 0)); + if (vs_str.len == 0) { + vs_blob = shader_compile_glsl(gpu, pass, &pass_p->vertex, + GLSL_SHADER_VERTEX, params->vertex_shader); + if (!vs_blob) + goto error; + + vs_str = (pl_str) { + .buf = ID3D10Blob_GetBufferPointer(vs_blob), + .len = ID3D10Blob_GetBufferSize(vs_blob), + }; + + ps_blob = shader_compile_glsl(gpu, pass, &pass_p->main, + GLSL_SHADER_FRAGMENT, params->glsl_shader); + if (!ps_blob) + goto error; + + ps_str = (pl_str) { + .buf = ID3D10Blob_GetBufferPointer(ps_blob), + .len = ID3D10Blob_GetBufferSize(ps_blob), + }; + } + + D3D(ID3D11Device_CreateVertexShader(p->dev, vs_str.buf, vs_str.len, NULL, + &pass_p->vs)); + + D3D(ID3D11Device_CreatePixelShader(p->dev, ps_str.buf, ps_str.len, NULL, + &pass_p->ps)); + + in_descs = pl_calloc_ptr(pass, params->num_vertex_attribs, in_descs); + for (int i = 0; i < params->num_vertex_attribs; i++) { + struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; + + in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { + // The semantic name doesn't mean much and is just used to verify + // the input description matches the shader. SPIRV-Cross always + // uses TEXCOORD, so we should too. + .SemanticName = "TEXCOORD", + .SemanticIndex = va->location, + .AlignedByteOffset = va->offset, + .Format = fmt_to_dxgi(va->fmt), + }; + } + D3D(ID3D11Device_CreateInputLayout(p->dev, in_descs, + params->num_vertex_attribs, vs_str.buf, vs_str.len, &pass_p->layout)); + + static const D3D11_BLEND blend_options[] = { + [PL_BLEND_ZERO] = D3D11_BLEND_ZERO, + [PL_BLEND_ONE] = D3D11_BLEND_ONE, + [PL_BLEND_SRC_ALPHA] = D3D11_BLEND_SRC_ALPHA, + [PL_BLEND_ONE_MINUS_SRC_ALPHA] = D3D11_BLEND_INV_SRC_ALPHA, + }; + + D3D11_BLEND_DESC bdesc = { + .RenderTarget[0] = { + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + }, + }; + if (params->blend_params) { + bdesc.RenderTarget[0] = (D3D11_RENDER_TARGET_BLEND_DESC) { + .BlendEnable = TRUE, + .SrcBlend = blend_options[params->blend_params->src_rgb], + .DestBlend = blend_options[params->blend_params->dst_rgb], + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = blend_options[params->blend_params->src_alpha], + .DestBlendAlpha = blend_options[params->blend_params->dst_alpha], + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + }; + } + D3D(ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate)); + + d3d11_update_program_cache(gpu, pass, obj.key, sig, &vs_str, &ps_str, NULL); + + success = true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + pl_cache_obj_free(&obj); + pl_free(in_descs); + return success; +} + +static bool pass_create_compute(pl_gpu gpu, struct pl_pass_t *pass, + const struct pl_pass_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + ID3DBlob *cs_blob = NULL; + pl_str cs_str = {0}; + pl_cache_obj obj = {0}; + uint64_t sig = 0; + bool success = false; + + if (d3d11_use_cached_program(gpu, pass, params, &obj, &sig, NULL, NULL, &cs_str)) + PL_DEBUG(gpu, "Using cached DXBC shader"); + + if (cs_str.len == 0) { + cs_blob = shader_compile_glsl(gpu, pass, &pass_p->main, + GLSL_SHADER_COMPUTE, params->glsl_shader); + if (!cs_blob) + goto error; + + cs_str = (pl_str) { + .buf = ID3D10Blob_GetBufferPointer(cs_blob), + .len = ID3D10Blob_GetBufferSize(cs_blob), + }; + } + + D3D(ID3D11Device_CreateComputeShader(p->dev, cs_str.buf, cs_str.len, NULL, + &pass_p->cs)); + + if (pass_p->num_workgroups_used) { + D3D11_BUFFER_DESC bdesc = { + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + .ByteWidth = sizeof(pass_p->last_num_wgs), + }; + D3D(ID3D11Device_CreateBuffer(p->dev, &bdesc, NULL, + &pass_p->num_workgroups_buf)); + } + + d3d11_update_program_cache(gpu, pass, obj.key, sig, NULL, NULL, &cs_str); + + success = true; +error: + pl_cache_obj_free(&obj); + SAFE_RELEASE(cs_blob); + return success; +} + +const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu, + const struct pl_pass_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_d3d11); + pass->params = pl_pass_params_copy(pass, params); + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + *pass_p = (struct pl_pass_d3d11) { + .max_binding = -1, + }; + + if (params->type == PL_PASS_COMPUTE) { + if (!pass_create_compute(gpu, pass, params)) + goto error; + } else { + if (!pass_create_raster(gpu, pass, params)) + goto error; + } + + // Pre-allocate resource arrays to use in pl_pass_run + pass_p->cbv_arr = pl_calloc(pass, + PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num), + sizeof(*pass_p->cbv_arr)); + pass_p->srv_arr = pl_calloc(pass, + PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num), + sizeof(*pass_p->srv_arr)); + pass_p->sampler_arr = pl_calloc(pass, + PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num), + sizeof(*pass_p->sampler_arr)); + pass_p->uav_arr = pl_calloc(pass, pass_p->uavs.num, sizeof(*pass_p->uav_arr)); + + // Find the highest binding number used in `params->descriptors` if we + // haven't found it already. (If the shader was compiled fresh rather than + // loaded from cache, `pass_p->max_binding` should already be set.) + if (pass_p->max_binding == -1) { + for (int i = 0; i < params->num_descriptors; i++) { + pass_p->max_binding = PL_MAX(pass_p->max_binding, + params->descriptors[i].binding); + } + } + + // Build a mapping from binding numbers to descriptor array indexes + int *binding_map = pl_calloc_ptr(pass, pass_p->max_binding + 1, binding_map); + for (int i = 0; i <= pass_p->max_binding; i++) + binding_map[i] = HLSL_BINDING_NOT_USED; + for (int i = 0; i < params->num_descriptors; i++) + binding_map[params->descriptors[i].binding] = i; + +#define MAP_RESOURCES(array) \ + do { \ + for (int i = 0; i < array.num; i++) { \ + if (array.elem[i] > pass_p->max_binding) { \ + array.elem[i] = HLSL_BINDING_NOT_USED; \ + } else if (array.elem[i] >= 0) { \ + array.elem[i] = binding_map[array.elem[i]]; \ + } \ + } \ + } while (0) + + // During shader compilation (or after loading a compiled shader from cache) + // the entries of the following resource lists are shader binding numbers, + // however, it's more efficient for `pl_pass_run` if they refer to indexes + // of the `params->descriptors` array instead, so remap them here + MAP_RESOURCES(pass_p->main.cbvs); + MAP_RESOURCES(pass_p->main.samplers); + MAP_RESOURCES(pass_p->main.srvs); + MAP_RESOURCES(pass_p->vertex.cbvs); + MAP_RESOURCES(pass_p->vertex.samplers); + MAP_RESOURCES(pass_p->vertex.srvs); + MAP_RESOURCES(pass_p->uavs); + pl_free(binding_map); + + pl_d3d11_flush_message_queue(ctx, "After pass create"); + + return pass; + +error: + pl_d3d11_pass_destroy(gpu, pass); + return NULL; +} + +// Shared logic between VS, PS and CS for filling the resource arrays that are +// passed to ID3D11DeviceContext methods +static void fill_resources(pl_gpu gpu, pl_pass pass, + struct d3d_pass_stage *pass_s, + const struct pl_pass_run_params *params, + ID3D11Buffer **cbvs, ID3D11ShaderResourceView **srvs, + ID3D11SamplerState **samplers) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + + for (int i = 0; i < pass_s->cbvs.num; i++) { + int binding = pass_s->cbvs.elem[i]; + if (binding == HLSL_BINDING_NUM_WORKGROUPS) { + cbvs[i] = pass_p->num_workgroups_buf; + continue; + } else if (binding < 0) { + cbvs[i] = NULL; + continue; + } + + pl_buf buf = params->desc_bindings[binding].object; + pl_d3d11_buf_resolve(gpu, buf); + struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); + cbvs[i] = buf_p->buf; + } + + for (int i = 0; i < pass_s->srvs.num; i++) { + int binding = pass_s->srvs.elem[i]; + if (binding < 0) { + srvs[i] = NULL; + continue; + } + + pl_tex tex; + struct pl_tex_d3d11 *tex_p; + pl_buf buf; + struct pl_buf_d3d11 *buf_p; + switch (pass->params.descriptors[binding].type) { + case PL_DESC_SAMPLED_TEX: + case PL_DESC_STORAGE_IMG: + tex = params->desc_bindings[binding].object; + tex_p = PL_PRIV(tex); + srvs[i] = tex_p->srv; + break; + case PL_DESC_BUF_STORAGE: + buf = params->desc_bindings[binding].object; + buf_p = PL_PRIV(buf); + srvs[i] = buf_p->raw_srv; + break; + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + buf = params->desc_bindings[binding].object; + buf_p = PL_PRIV(buf); + srvs[i] = buf_p->texel_srv; + break; + default: + break; + } + } + + for (int i = 0; i < pass_s->samplers.num; i++) { + int binding = pass_s->samplers.elem[i]; + if (binding < 0) { + samplers[i] = NULL; + continue; + } + + struct pl_desc_binding *db = ¶ms->desc_bindings[binding]; + samplers[i] = p->samplers[db->sample_mode][db->address_mode]; + } +} + +static void fill_uavs(pl_pass pass, const struct pl_pass_run_params *params, + ID3D11UnorderedAccessView **uavs) +{ + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + + for (int i = 0; i < pass_p->uavs.num; i++) { + int binding = pass_p->uavs.elem[i]; + if (binding < 0) { + uavs[i] = NULL; + continue; + } + + pl_tex tex; + struct pl_tex_d3d11 *tex_p; + pl_buf buf; + struct pl_buf_d3d11 *buf_p; + switch (pass->params.descriptors[binding].type) { + case PL_DESC_BUF_STORAGE: + buf = params->desc_bindings[binding].object; + buf_p = PL_PRIV(buf); + uavs[i] = buf_p->raw_uav; + break; + case PL_DESC_STORAGE_IMG: + tex = params->desc_bindings[binding].object; + tex_p = PL_PRIV(tex); + uavs[i] = tex_p->uav; + break; + case PL_DESC_BUF_TEXEL_STORAGE: + buf = params->desc_bindings[binding].object; + buf_p = PL_PRIV(buf); + uavs[i] = buf_p->texel_uav; + break; + default: + break; + } + } +} + +static void pass_run_raster(pl_gpu gpu, const struct pl_pass_run_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + pl_pass pass = params->pass; + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + + if (p->fl <= D3D_FEATURE_LEVEL_9_3 && params->index_buf) { + // Index buffers are unsupported because we can't tell if they are an + // index buffer or a vertex buffer on creation, and FL9_x allows only + // one binding type per-buffer + PL_ERR(gpu, "Index buffers are unsupported in FL9_x"); + return; + } + + if (p->fl <= D3D_FEATURE_LEVEL_9_1 && params->index_data && + params->index_fmt != PL_INDEX_UINT16) + { + PL_ERR(gpu, "32-bit index format is unsupported in FL9_1"); + return; + } + + // Figure out how much vertex/index data to upload, if any + size_t vertex_alloc = params->vertex_data ? pl_vertex_buf_size(params) : 0; + size_t index_alloc = params->index_data ? pl_index_buf_size(params) : 0; + + static const DXGI_FORMAT index_fmts[PL_INDEX_FORMAT_COUNT] = { + [PL_INDEX_UINT16] = DXGI_FORMAT_R16_UINT, + [PL_INDEX_UINT32] = DXGI_FORMAT_R32_UINT, + }; + + // Upload vertex data. On >=FL10_0 we use the same buffer for index data, so + // upload that too. + bool share_vertex_index_buf = p->fl > D3D_FEATURE_LEVEL_9_3; + if (vertex_alloc || (share_vertex_index_buf && index_alloc)) { + struct stream_buf_slice slices[] = { + { .data = params->vertex_data, .size = vertex_alloc }, + { .data = params->index_data, .size = index_alloc }, + }; + + if (!stream_buf_upload(gpu, &p->vbuf, slices, + share_vertex_index_buf ? 2 : 1)) { + PL_ERR(gpu, "Failed to upload vertex data"); + return; + } + + if (vertex_alloc) { + ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &p->vbuf.buf, + &(UINT) { pass->params.vertex_stride }, &slices[0].offset); + } + if (share_vertex_index_buf && index_alloc) { + ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->vbuf.buf, + index_fmts[params->index_fmt], slices[1].offset); + } + } + + // Upload index data for <=FL9_3, which must be in its own buffer + if (!share_vertex_index_buf && index_alloc) { + struct stream_buf_slice slices[] = { + { .data = params->index_data, .size = index_alloc }, + }; + + if (!stream_buf_upload(gpu, &p->ibuf, slices, PL_ARRAY_SIZE(slices))) { + PL_ERR(gpu, "Failed to upload index data"); + return; + } + + ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->ibuf.buf, + index_fmts[params->index_fmt], slices[0].offset); + } + + if (params->vertex_buf) { + struct pl_buf_d3d11 *buf_p = PL_PRIV(params->vertex_buf); + ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &buf_p->buf, + &(UINT) { pass->params.vertex_stride }, + &(UINT) { params->buf_offset }); + } + + if (params->index_buf) { + struct pl_buf_d3d11 *buf_p = PL_PRIV(params->index_buf); + ID3D11DeviceContext_IASetIndexBuffer(p->imm, buf_p->buf, + index_fmts[params->index_fmt], params->index_offset); + } + + ID3D11DeviceContext_IASetInputLayout(p->imm, pass_p->layout); + + static const D3D_PRIMITIVE_TOPOLOGY prim_topology[] = { + [PL_PRIM_TRIANGLE_LIST] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + [PL_PRIM_TRIANGLE_STRIP] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + }; + ID3D11DeviceContext_IASetPrimitiveTopology(p->imm, + prim_topology[pass->params.vertex_type]); + + ID3D11DeviceContext_VSSetShader(p->imm, pass_p->vs, NULL, 0); + + ID3D11Buffer **cbvs = pass_p->cbv_arr; + ID3D11ShaderResourceView **srvs = pass_p->srv_arr; + ID3D11SamplerState **samplers = pass_p->sampler_arr; + ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; + + // Set vertex shader resources. The device context is called conditionally + // because the debug layer complains if these are called with 0 resources. + fill_resources(gpu, pass, &pass_p->vertex, params, cbvs, srvs, samplers); + if (pass_p->vertex.cbvs.num) + ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); + if (pass_p->vertex.srvs.num) + ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); + if (pass_p->vertex.samplers.num) + ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); + + ID3D11DeviceContext_RSSetState(p->imm, p->rstate); + ID3D11DeviceContext_RSSetViewports(p->imm, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = params->viewport.x0, + .TopLeftY = params->viewport.y0, + .Width = pl_rect_w(params->viewport), + .Height = pl_rect_h(params->viewport), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->imm, 1, (&(D3D11_RECT) { + .left = params->scissors.x0, + .top = params->scissors.y0, + .right = params->scissors.x1, + .bottom = params->scissors.y1, + })); + + ID3D11DeviceContext_PSSetShader(p->imm, pass_p->ps, NULL, 0); + + // Set pixel shader resources + fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); + if (pass_p->main.cbvs.num) + ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); + if (pass_p->main.srvs.num) + ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); + if (pass_p->main.samplers.num) + ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); + + ID3D11DeviceContext_OMSetBlendState(p->imm, pass_p->bstate, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + ID3D11DeviceContext_OMSetDepthStencilState(p->imm, p->dsstate, 0); + + fill_uavs(pass, params, uavs); + + struct pl_tex_d3d11 *target_p = PL_PRIV(params->target); + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( + p->imm, 1, &target_p->rtv, NULL, 1, pass_p->uavs.num, uavs, NULL); + + if (params->index_data || params->index_buf) { + ID3D11DeviceContext_DrawIndexed(p->imm, params->vertex_count, 0, 0); + } else { + ID3D11DeviceContext_Draw(p->imm, params->vertex_count, 0); + } + + // Unbind everything. It's easier to do this than to actually track state, + // and if we leave the RTV bound, it could trip up D3D's conflict checker. + // Also, apparently unbinding SRVs can prevent a 10level9 bug? + // https://docs.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-prevent-null-srvs + for (int i = 0; i < PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num); i++) + cbvs[i] = NULL; + for (int i = 0; i < PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num); i++) + srvs[i] = NULL; + for (int i = 0; i < PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num); i++) + samplers[i] = NULL; + for (int i = 0; i < pass_p->uavs.num; i++) + uavs[i] = NULL; + if (pass_p->vertex.cbvs.num) + ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); + if (pass_p->vertex.srvs.num) + ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); + if (pass_p->vertex.samplers.num) + ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); + if (pass_p->main.cbvs.num) + ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); + if (pass_p->main.srvs.num) + ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); + if (pass_p->main.samplers.num) + ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( + p->imm, 0, NULL, NULL, 1, pass_p->uavs.num, uavs, NULL); +} + +static void pass_run_compute(pl_gpu gpu, const struct pl_pass_run_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + pl_pass pass = params->pass; + struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); + + // Update gl_NumWorkGroups emulation buffer if necessary + if (pass_p->num_workgroups_used) { + bool needs_update = false; + for (int i = 0; i < 3; i++) { + if (pass_p->last_num_wgs.num_wgs[i] != params->compute_groups[i]) + needs_update = true; + pass_p->last_num_wgs.num_wgs[i] = params->compute_groups[i]; + } + + if (needs_update) { + ID3D11DeviceContext_UpdateSubresource(p->imm, + (ID3D11Resource *) pass_p->num_workgroups_buf, 0, NULL, + &pass_p->last_num_wgs, 0, 0); + } + } + + ID3D11DeviceContext_CSSetShader(p->imm, pass_p->cs, NULL, 0); + + ID3D11Buffer **cbvs = pass_p->cbv_arr; + ID3D11ShaderResourceView **srvs = pass_p->srv_arr; + ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; + ID3D11SamplerState **samplers = pass_p->sampler_arr; + + fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); + fill_uavs(pass, params, uavs); + + if (pass_p->main.cbvs.num) + ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); + if (pass_p->main.srvs.num) + ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); + if (pass_p->main.samplers.num) + ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); + if (pass_p->uavs.num) + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); + + ID3D11DeviceContext_Dispatch(p->imm, params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + // Unbind everything + for (int i = 0; i < pass_p->main.cbvs.num; i++) + cbvs[i] = NULL; + for (int i = 0; i < pass_p->main.srvs.num; i++) + srvs[i] = NULL; + for (int i = 0; i < pass_p->main.samplers.num; i++) + samplers[i] = NULL; + for (int i = 0; i < pass_p->uavs.num; i++) + uavs[i] = NULL; + if (pass_p->main.cbvs.num) + ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); + if (pass_p->main.srvs.num) + ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); + if (pass_p->main.samplers.num) + ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); + if (pass_p->uavs.num) + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); +} + +void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + pl_pass pass = params->pass; + + pl_d3d11_timer_start(gpu, params->timer); + + if (pass->params.type == PL_PASS_COMPUTE) { + pass_run_compute(gpu, params); + } else { + pass_run_raster(gpu, params); + } + + pl_d3d11_timer_end(gpu, params->timer); + pl_d3d11_flush_message_queue(ctx, "After pass run"); +} diff --git a/src/d3d11/gpu_tex.c b/src/d3d11/gpu_tex.c new file mode 100644 index 0000000..d63fc17 --- /dev/null +++ b/src/d3d11/gpu_tex.c @@ -0,0 +1,745 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "gpu.h" +#include "formats.h" + +static inline UINT tex_subresource(pl_tex tex) +{ + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + return tex_p->array_slice >= 0 ? tex_p->array_slice : 0; +} + +static bool tex_init(pl_gpu gpu, pl_tex tex) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + // View formats may be omitted when they match the texture format, but for + // simplicity's sake we always set it. It will match the texture format for + // textures created with tex_create, but it can be different for video + // textures wrapped with pl_d3d11_wrap. + DXGI_FORMAT fmt = fmt_to_dxgi(tex->params.format); + + if (tex->params.sampleable || tex->params.storable) { + D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { + .Format = fmt, + }; + switch (pl_tex_params_dimension(tex->params)) { + case 1: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + srvdesc.Texture1DArray.MipLevels = 1; + srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture1DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + } + break; + case 2: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvdesc.Texture2DArray.MipLevels = 1; + srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture2DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + } + break; + case 3: + // D3D11 does not have Texture3D arrays + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvdesc.Texture3D.MipLevels = 1; + break; + } + D3D(ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, + &tex_p->srv)); + } + + if (tex->params.renderable) { + D3D11_RENDER_TARGET_VIEW_DESC rtvdesc = { + .Format = fmt, + }; + switch (pl_tex_params_dimension(tex->params)) { + case 1: + if (tex_p->array_slice >= 0) { + rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1DARRAY; + rtvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + rtvdesc.Texture1DArray.ArraySize = 1; + } else { + rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D; + } + break; + case 2: + if (tex_p->array_slice >= 0) { + rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; + rtvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + rtvdesc.Texture2DArray.ArraySize = 1; + } else { + rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + } + break; + case 3: + // D3D11 does not have Texture3D arrays + rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D; + rtvdesc.Texture3D.WSize = -1; + break; + } + D3D(ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, &rtvdesc, + &tex_p->rtv)); + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) { + D3D11_UNORDERED_ACCESS_VIEW_DESC uavdesc = { + .Format = fmt, + }; + switch (pl_tex_params_dimension(tex->params)) { + case 1: + if (tex_p->array_slice >= 0) { + uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1DARRAY; + uavdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + uavdesc.Texture1DArray.ArraySize = 1; + } else { + uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1D; + } + break; + case 2: + if (tex_p->array_slice >= 0) { + uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2DARRAY; + uavdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + uavdesc.Texture2DArray.ArraySize = 1; + } else { + uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + } + break; + case 3: + // D3D11 does not have Texture3D arrays + uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D; + uavdesc.Texture3D.WSize = -1; + break; + } + D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, &uavdesc, + &tex_p->uav)); + } + + return true; +error: + return false; +} + +void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + SAFE_RELEASE(tex_p->srv); + SAFE_RELEASE(tex_p->rtv); + SAFE_RELEASE(tex_p->uav); + SAFE_RELEASE(tex_p->res); + SAFE_RELEASE(tex_p->staging); + + pl_d3d11_flush_message_queue(ctx, "After texture destroy"); + + pl_free((void *) tex); +} + +pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); + tex->params = *params; + tex->params.initial_data = NULL; + tex->sampler_type = PL_SAMPLER_NORMAL; + + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + DXGI_FORMAT dxfmt = fmt_to_dxgi(params->format); + + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + if (params->format->emulated) { + tex_p->texel_fmt = pl_find_fmt(gpu, params->format->type, 1, 0, + params->format->host_bits[0], + PL_FMT_CAP_TEXEL_UNIFORM); + + if (!tex_p->texel_fmt) { + PL_ERR(gpu, "Failed picking texel format for emulated texture!"); + goto error; + } + + tex->params.storable = true; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + // On >=FL11_0, blit emulation needs image storage + tex->params.storable |= params->blit_src || params->blit_dst; + + // Blit emulation can use a sampler for linear filtering during stretch + if ((tex->params.format->caps & PL_FMT_CAP_LINEAR) && params->blit_src) + tex->params.sampleable = true; + } else { + // On <FL11_0, blit emulation uses a render pass + tex->params.sampleable |= params->blit_src; + tex->params.renderable |= params->blit_dst; + } + + if (tex->params.sampleable) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + if (tex->params.renderable) + bind_flags |= D3D11_BIND_RENDER_TARGET; + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) + bind_flags |= D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + // Apparently IMMUTABLE textures are efficient, so try to infer whether we + // can use one + if (params->initial_data && !params->format->emulated && + !tex->params.renderable && !tex->params.storable && !params->host_writable) + { + usage = D3D11_USAGE_IMMUTABLE; + } + + // In FL9_x, resources with only D3D11_BIND_SHADER_RESOURCE can't be copied + // from GPU-accessible memory to CPU-accessible memory. The only other bind + // flag we set on this FL is D3D11_BIND_RENDER_TARGET, so set it. + if (p->fl <= D3D_FEATURE_LEVEL_9_3 && tex->params.host_readable) + bind_flags |= D3D11_BIND_RENDER_TARGET; + + // In FL9_x, when using DEFAULT or IMMUTABLE, BindFlags cannot be zero + if (p->fl <= D3D_FEATURE_LEVEL_9_3 && !bind_flags) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA data; + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data && !params->format->emulated) { + data = (D3D11_SUBRESOURCE_DATA) { + .pSysMem = params->initial_data, + .SysMemPitch = params->w * params->format->texel_size, + }; + if (params->d) + data.SysMemSlicePitch = data.SysMemPitch * params->h; + pdata = &data; + } + + switch (pl_tex_params_dimension(*params)) { + case 1:; + D3D11_TEXTURE1D_DESC desc1d = { + .Width = params->w, + .MipLevels = 1, + .ArraySize = 1, + .Format = dxfmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d)); + tex_p->res = (ID3D11Resource *)tex_p->tex1d; + + // Create a staging texture with CPU access for pl_tex_download() + if (params->host_readable) { + desc1d.BindFlags = 0; + desc1d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc1d.Usage = D3D11_USAGE_STAGING; + + D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, NULL, + &tex_p->staging1d)); + tex_p->staging = (ID3D11Resource *) tex_p->staging1d; + } + break; + case 2:; + D3D11_TEXTURE2D_DESC desc2d = { + .Width = params->w, + .Height = params->h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = dxfmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d)); + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + // Create a staging texture with CPU access for pl_tex_download() + if (params->host_readable) { + desc2d.BindFlags = 0; + desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc2d.Usage = D3D11_USAGE_STAGING; + + D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL, + &tex_p->staging2d)); + tex_p->staging = (ID3D11Resource *) tex_p->staging2d; + } + break; + case 3:; + D3D11_TEXTURE3D_DESC desc3d = { + .Width = params->w, + .Height = params->h, + .Depth = params->d, + .MipLevels = 1, + .Format = dxfmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d)); + tex_p->res = (ID3D11Resource *)tex_p->tex3d; + + // Create a staging texture with CPU access for pl_tex_download() + if (params->host_readable) { + desc3d.BindFlags = 0; + desc3d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc3d.Usage = D3D11_USAGE_STAGING; + + D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, NULL, + &tex_p->staging3d)); + tex_p->staging = (ID3D11Resource *) tex_p->staging3d; + } + break; + default: + pl_unreachable(); + } + + tex_p->array_slice = -1; + + if (!tex_init(gpu, tex)) + goto error; + + if (params->initial_data && params->format->emulated) { + struct pl_tex_transfer_params ul_params = { + .tex = tex, + .ptr = (void *) params->initial_data, + .rc = { 0, 0, 0, params->w, params->h, params->d }, + }; + + // Since we re-use GPU helpers which require writable images, just fake it + bool writable = tex->params.host_writable; + tex->params.host_writable = true; + if (!pl_tex_upload(gpu, &ul_params)) + goto error; + tex->params.host_writable = writable; + } + + pl_d3d11_flush_message_queue(ctx, "After texture create"); + + return tex; + +error: + pl_d3d11_tex_destroy(gpu, tex); + return NULL; +} + +pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + + struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); + tex->sampler_type = PL_SAMPLER_NORMAL; + + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + UINT mip_levels = 1; + UINT array_size = 1; + UINT sample_count = 1; + + D3D11_RESOURCE_DIMENSION type; + ID3D11Resource_GetType(params->tex, &type); + + switch (type) { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture1D, + (void **) &tex_p->tex1d)); + tex_p->res = (ID3D11Resource *) tex_p->tex1d; + + D3D11_TEXTURE1D_DESC desc1d; + ID3D11Texture1D_GetDesc(tex_p->tex1d, &desc1d); + + tex->params.w = desc1d.Width; + mip_levels = desc1d.MipLevels; + array_size = desc1d.ArraySize; + fmt = desc1d.Format; + usage = desc1d.Usage; + bind_flags = desc1d.BindFlags; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture2D, + (void **) &tex_p->tex2d)); + tex_p->res = (ID3D11Resource *) tex_p->tex2d; + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + + tex->params.w = desc2d.Width; + tex->params.h = desc2d.Height; + mip_levels = desc2d.MipLevels; + array_size = desc2d.ArraySize; + fmt = desc2d.Format; + sample_count = desc2d.SampleDesc.Count; + usage = desc2d.Usage; + bind_flags = desc2d.BindFlags; + + // Allow the format and size of 2D textures to be overridden to support + // shader views of video resources + if (params->fmt) { + fmt = params->fmt; + tex->params.w = params->w; + tex->params.h = params->h; + } + + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture3D, + (void **) &tex_p->tex3d)); + tex_p->res = (ID3D11Resource *) tex_p->tex3d; + + D3D11_TEXTURE3D_DESC desc3d; + ID3D11Texture3D_GetDesc(tex_p->tex3d, &desc3d); + + tex->params.w = desc3d.Width; + tex->params.h = desc3d.Height; + tex->params.d = desc3d.Depth; + mip_levels = desc3d.MipLevels; + fmt = desc3d.Format; + usage = desc3d.Usage; + bind_flags = desc3d.BindFlags; + break; + + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + case D3D11_RESOURCE_DIMENSION_BUFFER: + PL_ERR(gpu, "Resource is not suitable to wrap"); + goto error; + } + + if (mip_levels != 1) { + PL_ERR(gpu, "Mipmapped textures not supported for wrapping"); + goto error; + } + if (sample_count != 1) { + PL_ERR(gpu, "Multisampled textures not supported for wrapping"); + goto error; + } + if (usage != D3D11_USAGE_DEFAULT) { + PL_ERR(gpu, "Resource is not D3D11_USAGE_DEFAULT"); + goto error; + } + + if (array_size > 1) { + if (params->array_slice < 0 || params->array_slice >= array_size) { + PL_ERR(gpu, "array_slice out of range"); + goto error; + } + tex_p->array_slice = params->array_slice; + } else { + tex_p->array_slice = -1; + } + + if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { + tex->params.sampleable = true; + + // Blit emulation uses a render pass on <FL11_0 + if (p->fl < D3D_FEATURE_LEVEL_11_0) + tex->params.blit_src = true; + } + if (bind_flags & D3D11_BIND_RENDER_TARGET) { + tex->params.renderable = true; + + // Blit emulation uses a render pass on <FL11_0 + if (p->fl < D3D_FEATURE_LEVEL_11_0) + tex->params.blit_dst = true; + } + static const D3D11_BIND_FLAG storable_flags = + D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + if ((bind_flags & storable_flags) == storable_flags) { + tex->params.storable = true; + + // Blit emulation uses image storage on >=FL11_0. A feature level check + // isn't required because <FL11_0 doesn't have storable images. + tex->params.blit_src = tex->params.blit_dst = true; + } + + for (int i = 0; i < gpu->num_formats; i++) { + DXGI_FORMAT target_fmt = fmt_to_dxgi(gpu->formats[i]); + if (fmt == target_fmt) { + tex->params.format = gpu->formats[i]; + break; + } + } + if (!tex->params.format) { + PL_ERR(gpu, "Could not find a suitable pl_fmt for wrapped resource"); + goto error; + } + + if (!tex_init(gpu, tex)) + goto error; + + pl_d3d11_flush_message_queue(ctx, "After texture wrap"); + + return tex; + +error: + pl_d3d11_tex_destroy(gpu, tex); + return NULL; +} + +void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + // Resource discarding requires D3D11.1 + if (!p->imm1) + return; + + // Prefer discarding a view to discarding the whole resource. The reason + // for this is that a pl_tex can refer to a single member of a texture + // array. Discarding the SRV, RTV or UAV should only discard that member. + if (tex_p->rtv) { + ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->rtv); + } else if (tex_p->uav) { + ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->uav); + } else if (tex_p->srv) { + ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->srv); + } else if (tex_p->array_slice < 0) { + // If there are no views, only discard if the ID3D11Resource is not a + // texture array + ID3D11DeviceContext1_DiscardResource(p->imm1, tex_p->res); + } + + pl_d3d11_flush_message_queue(ctx, "After texture invalidate"); +} + +void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, + const union pl_clear_color color) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + + if (tex->params.format->type == PL_FMT_UINT) { + if (tex_p->uav) { + ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, + color.u); + } else { + float c[4] = { color.u[0], color.u[1], color.u[2], color.u[3] }; + ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); + } + + } else if (tex->params.format->type == PL_FMT_SINT) { + if (tex_p->uav) { + ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, + (const uint32_t *)color.i); + } else { + float c[4] = { color.i[0], color.i[1], color.i[2], color.i[3] }; + ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); + } + + } else if (tex_p->rtv) { + ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, color.f); + } else { + ID3D11DeviceContext_ClearUnorderedAccessViewFloat(p->imm, tex_p->uav, color.f); + } + + pl_d3d11_flush_message_queue(ctx, "After texture clear"); +} + +#define pl_rect3d_to_box(rc) \ + ((D3D11_BOX) { \ + .left = rc.x0, .top = rc.y0, .front = rc.z0, \ + .right = rc.x1, .bottom = rc.y1, .back = rc.z1, \ + }) + +void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + struct pl_tex_d3d11 *src_p = PL_PRIV(params->src); + DXGI_FORMAT src_fmt = fmt_to_dxgi(params->src->params.format); + struct pl_tex_d3d11 *dst_p = PL_PRIV(params->dst); + DXGI_FORMAT dst_fmt = fmt_to_dxgi(params->dst->params.format); + + // If the blit operation doesn't require flipping, scaling or format + // conversion, we can use CopySubresourceRegion + pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; + if (pl_rect3d_eq(src_rc, dst_rc) && src_fmt == dst_fmt) { + pl_rect3d rc = params->src_rc; + pl_rect3d_normalize(&rc); + + ID3D11DeviceContext_CopySubresourceRegion(p->imm, dst_p->res, + tex_subresource(params->dst), rc.x0, rc.y0, rc.z0, src_p->res, + tex_subresource(params->src), &pl_rect3d_to_box(rc)); + } else if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + if (!pl_tex_blit_compute(gpu, params)) + PL_ERR(gpu, "Failed compute shader fallback blit"); + } else { + pl_tex_blit_raster(gpu, params); + } + + pl_d3d11_flush_message_queue(ctx, "After texture blit"); +} + +bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + pl_tex tex = params->tex; + pl_fmt fmt = tex->params.format; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + struct pl_tex_transfer_params *slices = NULL; + bool ret = false; + + pl_d3d11_timer_start(gpu, params->timer); + + if (fmt->emulated) { + + int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices); + for (int i = 0; i < num_slices; i++) { + // Copy the source data buffer into an intermediate buffer + pl_buf tbuf = pl_buf_create(gpu, pl_buf_params( + .memory_type = PL_BUF_MEM_DEVICE, + .format = tex_p->texel_fmt, + .size = pl_tex_transfer_size(&slices[i]), + .initial_data = slices[i].ptr, + .storable = true, + )); + + if (!tbuf) { + PL_ERR(gpu, "Failed creating buffer for tex upload fallback!"); + goto error; + } + + slices[i].ptr = NULL; + slices[i].buf = tbuf; + slices[i].buf_offset = 0; + bool ok = pl_tex_upload_texel(gpu, &slices[i]); + pl_buf_destroy(gpu, &tbuf); + if (!ok) + goto error; + } + + } else { + + ID3D11DeviceContext_UpdateSubresource(p->imm, tex_p->res, + tex_subresource(tex), &pl_rect3d_to_box(params->rc), params->ptr, + params->row_pitch, params->depth_pitch); + + } + + ret = true; + +error: + pl_d3d11_timer_end(gpu, params->timer); + pl_d3d11_flush_message_queue(ctx, "After texture upload"); + + pl_free(slices); + return ret; +} + +bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + struct pl_gpu_d3d11 *p = PL_PRIV(gpu); + struct d3d11_ctx *ctx = p->ctx; + const struct pl_tex_t *tex = params->tex; + pl_fmt fmt = tex->params.format; + struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); + struct pl_tex_transfer_params *slices = NULL; + bool ret = false; + + if (!tex_p->staging) + return false; + + pl_d3d11_timer_start(gpu, params->timer); + + if (fmt->emulated) { + + pl_buf tbuf = NULL; + int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices); + for (int i = 0; i < num_slices; i++) { + const size_t slice_size = pl_tex_transfer_size(&slices[i]); + bool ok = pl_buf_recreate(gpu, &tbuf, pl_buf_params( + .storable = true, + .size = slice_size, + .memory_type = PL_BUF_MEM_DEVICE, + .format = tex_p->texel_fmt, + .host_readable = true, + )); + + if (!ok) { + PL_ERR(gpu, "Failed creating buffer for tex download fallback!"); + goto error; + } + + void *ptr = slices[i].ptr; + slices[i].ptr = NULL; + slices[i].buf = tbuf; + slices[i].buf_offset = 0; + + // Download into an intermediate buffer first + ok = pl_tex_download_texel(gpu, &slices[i]); + ok = ok && pl_buf_read(gpu, tbuf, 0, ptr, slice_size); + if (!ok) { + pl_buf_destroy(gpu, &tbuf); + goto error; + } + } + pl_buf_destroy(gpu, &tbuf); + + } else { + + ID3D11DeviceContext_CopySubresourceRegion(p->imm, + (ID3D11Resource *) tex_p->staging, 0, params->rc.x0, params->rc.y0, + params->rc.z0, tex_p->res, tex_subresource(tex), + &pl_rect3d_to_box(params->rc)); + + D3D11_MAPPED_SUBRESOURCE lock; + D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) tex_p->staging, 0, + D3D11_MAP_READ, 0, &lock)); + + char *cdst = params->ptr; + char *csrc = lock.pData; + size_t line_size = pl_rect_w(params->rc) * tex->params.format->texel_size; + for (int z = 0; z < pl_rect_d(params->rc); z++) { + for (int y = 0; y < pl_rect_h(params->rc); y++) { + memcpy(cdst + z * params->depth_pitch + y * params->row_pitch, + csrc + (params->rc.z0 + z) * lock.DepthPitch + + (params->rc.y0 + y) * lock.RowPitch + params->rc.x0, + line_size); + } + } + + ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource*)tex_p->staging, 0); + } + + ret = true; + +error: + pl_d3d11_timer_end(gpu, params->timer); + pl_d3d11_flush_message_queue(ctx, "After texture download"); + + pl_free(slices); + return ret; +} diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build new file mode 100644 index 0000000..d4c4b44 --- /dev/null +++ b/src/d3d11/meson.build @@ -0,0 +1,41 @@ +d3d11 = get_option('d3d11') +d3d11_header = cc.check_header('d3d11.h', required: false) # needed publicly +d3d11_headers_extra = [ # needed internally + cc.check_header('d3d11_4.h', required: d3d11), + cc.check_header('dxgi1_6.h', required: d3d11), +] +d3d11_deps = [ + dependency('spirv-cross-c-shared', version: '>=0.29.0', required: d3d11), + cc.find_library('version', required: d3d11), +] + +d3d11 = d3d11.require(d3d11_header) +foreach h : d3d11_headers_extra + d3d11 = d3d11.require(h) +endforeach +foreach d : d3d11_deps + d3d11 = d3d11.require(d.found()) +endforeach + +components.set('d3d11', d3d11.allowed()) +if d3d11.allowed() + conf_internal.set('PL_HAVE_DXGI_DEBUG', + cc.has_header_symbol('dxgidebug.h', 'IID_IDXGIInfoQueue')) + conf_internal.set('PL_HAVE_DXGI_DEBUG_D3D11', + cc.has_header_symbol('d3d11sdklayers.h', 'DXGI_DEBUG_D3D11')) + add_project_arguments(['-DCOBJMACROS'], language: ['c', 'cpp']) + build_deps += declare_dependency(dependencies: d3d11_deps) + tests += 'd3d11.c' + sources += [ + 'd3d11/context.c', + 'd3d11/formats.c', + 'd3d11/gpu.c', + 'd3d11/gpu_buf.c', + 'd3d11/gpu_tex.c', + 'd3d11/gpu_pass.c', + 'd3d11/swapchain.c', + 'd3d11/utils.c', + ] +elif d3d11_header + sources += 'd3d11/stubs.c' +endif diff --git a/src/d3d11/stubs.c b/src/d3d11/stubs.c new file mode 100644 index 0000000..b3f259c --- /dev/null +++ b/src/d3d11/stubs.c @@ -0,0 +1,56 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "../common.h" +#include "log.h" + +#include <libplacebo/d3d11.h> + +const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS }; + +pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params) +{ + pl_fatal(log, "libplacebo compiled without D3D11 support!"); + return NULL; +} + +void pl_d3d11_destroy(pl_d3d11 *pd3d11) +{ + pl_d3d11 d3d11 = *pd3d11; + pl_assert(!d3d11); +} + +pl_d3d11 pl_d3d11_get(pl_gpu gpu) +{ + return NULL; +} + +pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, + const struct pl_d3d11_swapchain_params *params) +{ + pl_unreachable(); +} + +IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw) +{ + pl_unreachable(); +} + +pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params) +{ + pl_unreachable(); +} diff --git a/src/d3d11/swapchain.c b/src/d3d11/swapchain.c new file mode 100644 index 0000000..8a53632 --- /dev/null +++ b/src/d3d11/swapchain.c @@ -0,0 +1,667 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <versionhelpers.h> +#include <math.h> + +#include "gpu.h" +#include "swapchain.h" +#include "utils.h" + +struct d3d11_csp_mapping { + DXGI_COLOR_SPACE_TYPE d3d11_csp; + DXGI_FORMAT d3d11_fmt; + struct pl_color_space out_csp; +}; + +static struct d3d11_csp_mapping map_pl_csp_to_d3d11(const struct pl_color_space *hint, + bool use_8bit_sdr) +{ + if (pl_color_space_is_hdr(hint) && + hint->transfer != PL_COLOR_TRC_LINEAR) + { + struct pl_color_space pl_csp = pl_color_space_hdr10; + pl_csp.hdr = (struct pl_hdr_metadata) { + // Whitelist only values that we support signalling metadata for + .prim = hint->hdr.prim, + .min_luma = hint->hdr.min_luma, + .max_luma = hint->hdr.max_luma, + .max_cll = hint->hdr.max_cll, + .max_fall = hint->hdr.max_fall, + }; + + return (struct d3d11_csp_mapping){ + .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, + .d3d11_fmt = DXGI_FORMAT_R10G10B10A2_UNORM, + .out_csp = pl_csp, + }; + } else if (pl_color_primaries_is_wide_gamut(hint->primaries) || + hint->transfer == PL_COLOR_TRC_LINEAR) + { + // scRGB a la VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT, + // so could be utilized for HDR/wide gamut content as well + // with content that goes beyond 0.0-1.0. + return (struct d3d11_csp_mapping){ + .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, + .d3d11_fmt = DXGI_FORMAT_R16G16B16A16_FLOAT, + .out_csp = { + .primaries = PL_COLOR_PRIM_BT_709, + .transfer = PL_COLOR_TRC_LINEAR, + } + }; + } + + return (struct d3d11_csp_mapping){ + .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, + .d3d11_fmt = use_8bit_sdr ? DXGI_FORMAT_R8G8B8A8_UNORM : + DXGI_FORMAT_R10G10B10A2_UNORM, + .out_csp = pl_color_space_monitor, + }; +} + +struct priv { + struct pl_sw_fns impl; + + struct d3d11_ctx *ctx; + IDXGISwapChain *swapchain; + pl_tex backbuffer; + + // Currently requested or applied swap chain configuration. + // Affected by received colorspace hints. + struct d3d11_csp_mapping csp_map; + + // Whether a swapchain backbuffer format reconfiguration has been + // requested by means of an additional resize action. + bool update_swapchain_format; + + // Whether 10-bit backbuffer format is disabled for SDR content. + bool disable_10bit_sdr; + + // Fallback to 8-bit RGB was triggered due to lack of compatiblity + bool fallback_8bit_rgb; +}; + +static void d3d11_sw_destroy(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + + pl_tex_destroy(sw->gpu, &p->backbuffer); + SAFE_RELEASE(p->swapchain); + pl_free((void *) sw); +} + +static int d3d11_sw_latency(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + + UINT max_latency; + IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); + return max_latency; +} + +static pl_tex get_backbuffer(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + ID3D11Texture2D *backbuffer = NULL; + pl_tex tex = NULL; + + D3D(IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, + (void **) &backbuffer)); + + tex = pl_d3d11_wrap(sw->gpu, pl_d3d11_wrap_params( + .tex = (ID3D11Resource *) backbuffer, + )); + +error: + SAFE_RELEASE(backbuffer); + return tex; +} + +static bool d3d11_sw_resize(pl_swapchain sw, int *width, int *height) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + + DXGI_SWAP_CHAIN_DESC desc = {0}; + IDXGISwapChain_GetDesc(p->swapchain, &desc); + int w = PL_DEF(*width, desc.BufferDesc.Width); + int h = PL_DEF(*height, desc.BufferDesc.Height); + bool format_changed = p->csp_map.d3d11_fmt != desc.BufferDesc.Format; + if (format_changed) { + PL_INFO(ctx, "Attempting to reconfigure swap chain format: %s -> %s", + pl_get_dxgi_format_name(desc.BufferDesc.Format), + pl_get_dxgi_format_name(p->csp_map.d3d11_fmt)); + } + + if (w != desc.BufferDesc.Width || h != desc.BufferDesc.Height || + format_changed) + { + if (p->backbuffer) { + PL_ERR(sw, "Tried resizing the swapchain while a frame was in " + "progress! Please submit the current frame first."); + return false; + } + + HRESULT hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h, + p->csp_map.d3d11_fmt, desc.Flags); + + if (hr == E_INVALIDARG && p->csp_map.d3d11_fmt != DXGI_FORMAT_R8G8B8A8_UNORM) + { + PL_WARN(sw, "Reconfiguring the swapchain failed, re-trying with R8G8B8A8_UNORM fallback."); + D3D(IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h, + DXGI_FORMAT_R8G8B8A8_UNORM, desc.Flags)); + + // re-configure the colorspace to 8-bit RGB SDR fallback + p->csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true); + p->fallback_8bit_rgb = true; + } + else if (FAILED(hr)) + { + PL_ERR(sw, "Reconfiguring the swapchain failed with error: %s", pl_hresult_to_str(hr)); + return false; + } + } + + *width = w; + *height = h; + p->update_swapchain_format = false; + return true; + +error: + return false; +} + +static bool d3d11_sw_start_frame(pl_swapchain sw, + struct pl_swapchain_frame *out_frame) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + + if (ctx->is_failed) + return false; + if (p->backbuffer) { + PL_ERR(sw, "Attempted calling `pl_swapchain_start_frame` while a frame " + "was already in progress! Call `pl_swapchain_submit_frame` first."); + return false; + } + + if (p->update_swapchain_format) { + int w = 0, h = 0; + if (!d3d11_sw_resize(sw, &w, &h)) + return false; + } + + p->backbuffer = get_backbuffer(sw); + if (!p->backbuffer) + return false; + + int bits = 0; + pl_fmt fmt = p->backbuffer->params.format; + for (int i = 0; i < fmt->num_components; i++) + bits = PL_MAX(bits, fmt->component_depth[i]); + + *out_frame = (struct pl_swapchain_frame) { + .fbo = p->backbuffer, + .flipped = false, + .color_repr = { + .sys = PL_COLOR_SYSTEM_RGB, + .levels = PL_COLOR_LEVELS_FULL, + .alpha = PL_ALPHA_UNKNOWN, + .bits = { + .sample_depth = bits, + .color_depth = bits, + }, + }, + .color_space = p->csp_map.out_csp, + }; + + return true; +} + +static bool d3d11_sw_submit_frame(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + + // Release the backbuffer. We shouldn't hold onto it unnecessarily, because + // it prevents external code from resizing the swapchain, which we'd + // otherwise support just fine. + pl_tex_destroy(sw->gpu, &p->backbuffer); + + return !ctx->is_failed; +} + +static void d3d11_sw_swap_buffers(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + + // Present can fail with a device removed error + D3D(IDXGISwapChain_Present(p->swapchain, 1, 0)); + +error: + return; +} + +static DXGI_HDR_METADATA_HDR10 set_hdr10_metadata(const struct pl_hdr_metadata *hdr) +{ + return (DXGI_HDR_METADATA_HDR10) { + .RedPrimary = { roundf(hdr->prim.red.x * 50000), + roundf(hdr->prim.red.y * 50000) }, + .GreenPrimary = { roundf(hdr->prim.green.x * 50000), + roundf(hdr->prim.green.y * 50000) }, + .BluePrimary = { roundf(hdr->prim.blue.x * 50000), + roundf(hdr->prim.blue.y * 50000) }, + .WhitePoint = { roundf(hdr->prim.white.x * 50000), + roundf(hdr->prim.white.y * 50000) }, + .MaxMasteringLuminance = roundf(hdr->max_luma), + .MinMasteringLuminance = roundf(hdr->min_luma * 10000), + .MaxContentLightLevel = roundf(hdr->max_cll), + .MaxFrameAverageLightLevel = roundf(hdr->max_fall), + }; +} + +static bool set_swapchain_metadata(struct d3d11_ctx *ctx, + IDXGISwapChain3 *swapchain3, + struct d3d11_csp_mapping *csp_map) +{ + IDXGISwapChain4 *swapchain4 = NULL; + bool ret = false; + bool is_hdr = pl_color_space_is_hdr(&csp_map->out_csp); + DXGI_HDR_METADATA_HDR10 hdr10 = is_hdr ? + set_hdr10_metadata(&csp_map->out_csp.hdr) : (DXGI_HDR_METADATA_HDR10){ 0 }; + + D3D(IDXGISwapChain3_SetColorSpace1(swapchain3, csp_map->d3d11_csp)); + + // if we succeeded to set the color space, it's good enough, + // since older versions of Windows 10 will not have swapchain v4 available. + ret = true; + + if (FAILED(IDXGISwapChain3_QueryInterface(swapchain3, &IID_IDXGISwapChain4, + (void **)&swapchain4))) + { + PL_TRACE(ctx, "v4 swap chain interface is not available, skipping HDR10 " + "metadata configuration."); + goto error; + } + + D3D(IDXGISwapChain4_SetHDRMetaData(swapchain4, + is_hdr ? + DXGI_HDR_METADATA_TYPE_HDR10 : + DXGI_HDR_METADATA_TYPE_NONE, + is_hdr ? sizeof(hdr10) : 0, + is_hdr ? &hdr10 : NULL)); + + goto success; + +error: + csp_map->out_csp.hdr = (struct pl_hdr_metadata) { 0 }; +success: + SAFE_RELEASE(swapchain4); + return ret; +} + +static bool d3d11_format_supported(struct d3d11_ctx *ctx, DXGI_FORMAT fmt) +{ + UINT sup = 0; + UINT wanted_sup = + D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_DISPLAY | + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE | D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE; + + D3D(ID3D11Device_CheckFormatSupport(ctx->dev, fmt, &sup)); + + return (sup & wanted_sup) == wanted_sup; + +error: + return false; +} + +static bool d3d11_csp_supported(struct d3d11_ctx *ctx, + IDXGISwapChain3 *swapchain3, + DXGI_COLOR_SPACE_TYPE color_space) +{ + UINT csp_support_flags = 0; + + D3D(IDXGISwapChain3_CheckColorSpaceSupport(swapchain3, + color_space, + &csp_support_flags)); + + return (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT); + +error: + return false; +} + +static void update_swapchain_color_config(pl_swapchain sw, + const struct pl_color_space *csp, + bool is_internal) +{ + struct priv *p = PL_PRIV(sw); + struct d3d11_ctx *ctx = p->ctx; + IDXGISwapChain3 *swapchain3 = NULL; + struct d3d11_csp_mapping old_map = p->csp_map; + + // ignore config changes in fallback mode + if (p->fallback_8bit_rgb) + goto cleanup; + + HRESULT hr = IDXGISwapChain_QueryInterface(p->swapchain, &IID_IDXGISwapChain3, + (void **)&swapchain3); + if (FAILED(hr)) { + PL_TRACE(ctx, "v3 swap chain interface is not available, skipping " + "color space configuration."); + swapchain3 = NULL; + } + + // Lack of swap chain v3 means we cannot control swap chain color space; + // Only effective formats are the 8 and 10 bit RGB ones. + struct d3d11_csp_mapping csp_map = + map_pl_csp_to_d3d11(swapchain3 ? csp : &pl_color_space_unknown, + p->disable_10bit_sdr); + + if (p->csp_map.d3d11_fmt == csp_map.d3d11_fmt && + p->csp_map.d3d11_csp == csp_map.d3d11_csp && + pl_color_space_equal(&p->csp_map.out_csp, &csp_map.out_csp)) + goto cleanup; + + PL_INFO(ctx, "%s swap chain configuration%s: format: %s, color space: %s.", + is_internal ? "Initial" : "New", + is_internal ? "" : " received from hint", + pl_get_dxgi_format_name(csp_map.d3d11_fmt), + pl_get_dxgi_csp_name(csp_map.d3d11_csp)); + + bool fmt_supported = d3d11_format_supported(ctx, csp_map.d3d11_fmt); + bool csp_supported = swapchain3 ? + d3d11_csp_supported(ctx, swapchain3, csp_map.d3d11_csp) : true; + if (!fmt_supported || !csp_supported) { + PL_ERR(ctx, "New swap chain configuration was deemed not supported: " + "format: %s, color space: %s. Failling back to 8bit RGB.", + fmt_supported ? "supported" : "unsupported", + csp_supported ? "supported" : "unsupported"); + // fall back to 8bit sRGB if requested configuration is not supported + csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true); + } + + p->csp_map = csp_map; + p->update_swapchain_format = true; + + if (!swapchain3) + goto cleanup; + + if (!set_swapchain_metadata(ctx, swapchain3, &p->csp_map)) { + // format succeeded, but color space configuration failed + p->csp_map = old_map; + p->csp_map.d3d11_fmt = csp_map.d3d11_fmt; + } + + pl_d3d11_flush_message_queue(ctx, "After colorspace hint"); + +cleanup: + SAFE_RELEASE(swapchain3); +} + +static void d3d11_sw_colorspace_hint(pl_swapchain sw, + const struct pl_color_space *csp) +{ + update_swapchain_color_config(sw, csp, false); +} + +IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + IDXGISwapChain_AddRef(p->swapchain); + return p->swapchain; +} + +static const struct pl_sw_fns d3d11_swapchain = { + .destroy = d3d11_sw_destroy, + .latency = d3d11_sw_latency, + .resize = d3d11_sw_resize, + .colorspace_hint = d3d11_sw_colorspace_hint, + .start_frame = d3d11_sw_start_frame, + .submit_frame = d3d11_sw_submit_frame, + .swap_buffers = d3d11_sw_swap_buffers, +}; + +static HRESULT create_swapchain_1_2(struct d3d11_ctx *ctx, + IDXGIFactory2 *factory, const struct pl_d3d11_swapchain_params *params, + bool flip, UINT width, UINT height, DXGI_FORMAT format, + IDXGISwapChain **swapchain_out) +{ + IDXGISwapChain *swapchain = NULL; + IDXGISwapChain1 *swapchain1 = NULL; + HRESULT hr; + + DXGI_SWAP_CHAIN_DESC1 desc = { + .Width = width, + .Height = height, + .Format = format, + .SampleDesc.Count = 1, + .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, + .Flags = params->flags, + }; + + if (ID3D11Device_GetFeatureLevel(ctx->dev) >= D3D_FEATURE_LEVEL_11_0) + desc.BufferUsage |= DXGI_USAGE_UNORDERED_ACCESS; + + if (flip) { + UINT max_latency; + IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); + + // Make sure we have at least enough buffers to allow `max_latency` + // frames in-flight at once, plus one frame for the frontbuffer + desc.BufferCount = max_latency + 1; + + if (IsWindows10OrGreater()) { + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + } else { + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + } + + desc.BufferCount = PL_MIN(desc.BufferCount, DXGI_MAX_SWAP_CHAIN_BUFFERS); + } else { + desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + desc.BufferCount = 1; + } + + if (params->window) { + hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *) ctx->dev, + params->window, &desc, NULL, NULL, &swapchain1); + } else if (params->core_window) { + hr = IDXGIFactory2_CreateSwapChainForCoreWindow(factory, + (IUnknown *) ctx->dev, params->core_window, &desc, NULL, &swapchain1); + } else { + hr = IDXGIFactory2_CreateSwapChainForComposition(factory, + (IUnknown *) ctx->dev, &desc, NULL, &swapchain1); + } + if (FAILED(hr)) + goto done; + hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, + (void **) &swapchain); + if (FAILED(hr)) + goto done; + + *swapchain_out = swapchain; + swapchain = NULL; + +done: + SAFE_RELEASE(swapchain1); + SAFE_RELEASE(swapchain); + return hr; +} + +static HRESULT create_swapchain_1_1(struct d3d11_ctx *ctx, + IDXGIFactory1 *factory, const struct pl_d3d11_swapchain_params *params, + UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out) +{ + DXGI_SWAP_CHAIN_DESC desc = { + .BufferDesc = { + .Width = width, + .Height = height, + .Format = format, + }, + .SampleDesc.Count = 1, + .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, + .BufferCount = 1, + .OutputWindow = params->window, + .Windowed = TRUE, + .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, + .Flags = params->flags, + }; + + return IDXGIFactory1_CreateSwapChain(factory, (IUnknown *) ctx->dev, &desc, + swapchain_out); +} + +static IDXGISwapChain *create_swapchain(struct d3d11_ctx *ctx, + const struct pl_d3d11_swapchain_params *params, DXGI_FORMAT format) +{ + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + IDXGIFactory1 *factory = NULL; + IDXGIFactory2 *factory2 = NULL; + IDXGISwapChain *swapchain = NULL; + bool success = false; + HRESULT hr; + + D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_IDXGIDevice1, + (void **) &dxgi_dev)); + D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); + D3D(IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void **) &factory)); + + hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, + (void **) &factory2); + if (FAILED(hr)) + factory2 = NULL; + + bool flip = factory2 && !params->blit; + UINT width = PL_DEF(params->width, 1); + UINT height = PL_DEF(params->height, 1); + + // If both width and height are unset, the default size is the window size + if (params->window && params->width == 0 && params->height == 0) { + RECT rc; + if (GetClientRect(params->window, &rc)) { + width = PL_DEF(rc.right - rc.left, 1); + height = PL_DEF(rc.bottom - rc.top, 1); + } + } + + // Return here to retry creating the swapchain + do { + if (factory2) { + // Create a DXGI 1.2+ (Windows 8+) swap chain if possible + hr = create_swapchain_1_2(ctx, factory2, params, flip, width, + height, format, &swapchain); + } else { + // Fall back to DXGI 1.1 (Windows 7) + hr = create_swapchain_1_1(ctx, factory, params, width, height, + format, &swapchain); + } + if (SUCCEEDED(hr)) + break; + + pl_d3d11_after_error(ctx, hr); + if (flip) { + PL_DEBUG(ctx, "Failed to create flip-model swapchain, trying bitblt"); + flip = false; + continue; + } + + PL_FATAL(ctx, "Failed to create swapchain: %s", pl_hresult_to_str(hr)); + goto error; + } while (true); + + // Prevent DXGI from making changes to the window, otherwise it will hook + // the Alt+Enter keystroke and make it trigger an ugly transition to + // legacy exclusive fullscreen mode. + IDXGIFactory_MakeWindowAssociation(factory, params->window, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | + DXGI_MWA_NO_PRINT_SCREEN); + + success = true; +error: + if (!success) + SAFE_RELEASE(swapchain); + SAFE_RELEASE(factory2); + SAFE_RELEASE(factory); + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + return swapchain; +} + +pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, + const struct pl_d3d11_swapchain_params *params) +{ + struct d3d11_ctx *ctx = PL_PRIV(d3d11); + pl_gpu gpu = d3d11->gpu; + bool success = false; + + struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); + struct priv *p = PL_PRIV(sw); + *sw = (struct pl_swapchain_t) { + .log = gpu->log, + .gpu = gpu, + }; + *p = (struct priv) { + .impl = d3d11_swapchain, + .ctx = ctx, + // default to standard 8 or 10 bit RGB, unset pl_color_space + .csp_map = { + .d3d11_fmt = params->disable_10bit_sdr ? + DXGI_FORMAT_R8G8B8A8_UNORM : + (d3d11_format_supported(ctx, DXGI_FORMAT_R10G10B10A2_UNORM) ? + DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM), + }, + .disable_10bit_sdr = params->disable_10bit_sdr, + }; + + if (params->swapchain) { + p->swapchain = params->swapchain; + IDXGISwapChain_AddRef(params->swapchain); + } else { + p->swapchain = create_swapchain(ctx, params, p->csp_map.d3d11_fmt); + if (!p->swapchain) + goto error; + } + + DXGI_SWAP_CHAIN_DESC scd = {0}; + IDXGISwapChain_GetDesc(p->swapchain, &scd); + if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL || + scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD) { + PL_INFO(gpu, "Using flip-model presentation"); + } else { + PL_INFO(gpu, "Using bitblt-model presentation"); + } + + p->csp_map.d3d11_fmt = scd.BufferDesc.Format; + + update_swapchain_color_config(sw, &pl_color_space_unknown, true); + + success = true; +error: + if (!success) { + PL_FATAL(gpu, "Failed to create Direct3D 11 swapchain"); + d3d11_sw_destroy(sw); + sw = NULL; + } + return sw; +} diff --git a/src/d3d11/utils.c b/src/d3d11/utils.c new file mode 100644 index 0000000..47154b5 --- /dev/null +++ b/src/d3d11/utils.c @@ -0,0 +1,500 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> + +#include "utils.h" + +// D3D11.3 message IDs, not present in mingw-w64 v9 +#define D3D11_MESSAGE_ID_CREATE_FENCE (0x30020c) +#define D3D11_MESSAGE_ID_DESTROY_FENCE (0x30020a) + +#ifdef PL_HAVE_DXGI_DEBUG +static enum pl_log_level log_level_override(unsigned int id) +{ + switch (id) { + // These warnings can happen when a pl_timer is used too often before a + // blocking pl_swapchain_swap_buffers() or pl_gpu_finish(), overflowing + // its internal ring buffer and causing older query objects to be reused + // before their results are read. This is expected behavior, so reduce + // the log level to PL_LOG_TRACE to prevent log spam. + case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS: + case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS: + return PL_LOG_TRACE; + + // D3D11 writes log messages every time an object is created or + // destroyed. That results in a lot of log spam, so force PL_LOG_TRACE. +#define OBJ_LIFETIME_MESSAGES(obj) \ + case D3D11_MESSAGE_ID_CREATE_ ## obj: \ + case D3D11_MESSAGE_ID_DESTROY_ ## obj + + OBJ_LIFETIME_MESSAGES(CONTEXT): + OBJ_LIFETIME_MESSAGES(BUFFER): + OBJ_LIFETIME_MESSAGES(TEXTURE1D): + OBJ_LIFETIME_MESSAGES(TEXTURE2D): + OBJ_LIFETIME_MESSAGES(TEXTURE3D): + OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW): + OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW): + OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW): + OBJ_LIFETIME_MESSAGES(VERTEXSHADER): + OBJ_LIFETIME_MESSAGES(HULLSHADER): + OBJ_LIFETIME_MESSAGES(DOMAINSHADER): + OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER): + OBJ_LIFETIME_MESSAGES(PIXELSHADER): + OBJ_LIFETIME_MESSAGES(INPUTLAYOUT): + OBJ_LIFETIME_MESSAGES(SAMPLER): + OBJ_LIFETIME_MESSAGES(BLENDSTATE): + OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE): + OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE): + OBJ_LIFETIME_MESSAGES(QUERY): + OBJ_LIFETIME_MESSAGES(PREDICATE): + OBJ_LIFETIME_MESSAGES(COUNTER): + OBJ_LIFETIME_MESSAGES(COMMANDLIST): + OBJ_LIFETIME_MESSAGES(CLASSINSTANCE): + OBJ_LIFETIME_MESSAGES(CLASSLINKAGE): + OBJ_LIFETIME_MESSAGES(COMPUTESHADER): + OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW): + OBJ_LIFETIME_MESSAGES(VIDEODECODER): + OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM): + OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR): + OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW): + OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW): + OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW): + OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE): + OBJ_LIFETIME_MESSAGES(FENCE): + return PL_LOG_TRACE; + +#undef OBJ_LIFETIME_MESSAGES + + // Don't force the log level of any other messages. It will be mapped + // from the D3D severity code instead. + default: + return PL_LOG_NONE; + } +} +#endif + +void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header) +{ +#ifdef PL_HAVE_DXGI_DEBUG + if (!ctx->iqueue) + return; + + static const enum pl_log_level severity_map[] = { + [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION] = PL_LOG_FATAL, + [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR] = PL_LOG_ERR, + [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING] = PL_LOG_WARN, + [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO] = PL_LOG_DEBUG, + [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE] = PL_LOG_DEBUG, + }; + + enum pl_log_level header_printed = PL_LOG_NONE; + + // After the storage limit is reached and ID3D11InfoQueue::ClearStoredMessages + // is called message counter seems to be initialized to -1 which is quite big + // number if we read it as uint64_t. Any subsequent call to the + // ID3D11InfoQueue::GetNumStoredMessages will be off by one. + // Use ID3D11InfoQueue_GetNumStoredMessagesAllowedByRetrievalFilter without + // any filter set, which seem to be unaffected by this bug and return correct + // number of messages. + // IDXGIInfoQueue seems to be unaffected, but keep the same way of retrival + uint64_t messages = IDXGIInfoQueue_GetNumStoredMessagesAllowedByRetrievalFilters(ctx->iqueue, DXGI_DEBUG_ALL); + + // Just to be on the safe side, check also for the mentioned -1 value... + if (!messages || messages == UINT64_C(-1)) + return; + + uint64_t discarded = + IDXGIInfoQueue_GetNumMessagesDiscardedByMessageCountLimit(ctx->iqueue, DXGI_DEBUG_ALL); + if (discarded > ctx->last_discarded) { + PL_WARN(ctx, "%s:", header); + header_printed = PL_LOG_WARN; + + // Notify number of messages skipped due to the message count limit + PL_WARN(ctx, " (skipped %"PRIu64" debug layer messages)", + discarded - ctx->last_discarded); + ctx->last_discarded = discarded; + } + + // Copy debug layer messages to libplacebo's log output + for (uint64_t i = 0; i < messages; i++) { + SIZE_T len; + if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, NULL, &len))) + goto error; + + pl_grow((void *) ctx->d3d11, &ctx->dxgi_msg, len); + DXGI_INFO_QUEUE_MESSAGE *dxgi_msg = ctx->dxgi_msg; + + if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, dxgi_msg, &len))) + goto error; + + enum pl_log_level level = PL_LOG_NONE; + if (IsEqualGUID(&dxgi_msg->Producer, &DXGI_DEBUG_D3D11)) + level = log_level_override(dxgi_msg->ID); + if (level == PL_LOG_NONE) + level = severity_map[dxgi_msg->Severity]; + + if (pl_msg_test(ctx->log, level)) { + // If the header hasn't been printed, or it was printed for a lower + // log level than the current message, print it (again) + if (header_printed == PL_LOG_NONE || header_printed > level) { + PL_MSG(ctx, level, "%s:", header); + pl_log_stack_trace(ctx->log, level); + header_printed = level; + } + + PL_MSG(ctx, level, " %d: %.*s", (int) dxgi_msg->ID, + (int) dxgi_msg->DescriptionByteLength, dxgi_msg->pDescription); + } + + if (dxgi_msg->Severity <= DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR) + pl_debug_abort(); + } + +error: + IDXGIInfoQueue_ClearStoredMessages(ctx->iqueue, DXGI_DEBUG_ALL); +#endif +} + +HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr) +{ + // This can be called before we have a device + if (!ctx->dev) + return hr; + + switch (hr) { + case DXGI_ERROR_DEVICE_HUNG: + case DXGI_ERROR_DEVICE_RESET: + case DXGI_ERROR_DRIVER_INTERNAL_ERROR: + ctx->is_failed = true; + break; + case D3DDDIERR_DEVICEREMOVED: + case DXGI_ERROR_DEVICE_REMOVED: + hr = ID3D11Device_GetDeviceRemovedReason(ctx->dev); + ctx->is_failed = true; + break; + } + if (ctx->is_failed) + PL_ERR(ctx, "Device lost!"); + return hr; +} + +HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr) +{ + hr = pl_d3d11_check_device_removed(ctx, hr); + pl_d3d11_flush_message_queue(ctx, "After error"); + return hr; +} + +struct dll_version pl_get_dll_version(const wchar_t *name) +{ + void *data = NULL; + struct dll_version ret = {0}; + + DWORD size = GetFileVersionInfoSizeW(name, &(DWORD) {0}); + if (!size) + goto error; + data = pl_alloc(NULL, size); + + if (!GetFileVersionInfoW(name, 0, size, data)) + goto error; + + VS_FIXEDFILEINFO *ffi; + UINT ffi_len; + if (!VerQueryValueW(data, L"\\", (void**)&ffi, &ffi_len)) + goto error; + if (ffi_len < sizeof(*ffi)) + goto error; + + ret = (struct dll_version) { + .major = HIWORD(ffi->dwFileVersionMS), + .minor = LOWORD(ffi->dwFileVersionMS), + .build = HIWORD(ffi->dwFileVersionLS), + .revision = LOWORD(ffi->dwFileVersionLS), + }; + +error: + pl_free(data); + return ret; +} + +wchar_t *pl_from_utf8(void *ctx, const char *str) +{ + int count = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); + pl_assert(count > 0); + wchar_t *ret = pl_calloc_ptr(ctx, count, ret); + MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, count); + return ret; +} + +char *pl_to_utf8(void *ctx, const wchar_t *str) +{ + int count = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); + pl_assert(count > 0); + char *ret = pl_calloc_ptr(ctx, count, ret); + WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, count, NULL, NULL); + return ret; +} + +static const char *hresult_str(HRESULT hr) +{ + switch (hr) { +#define CASE(name) case name: return #name + CASE(S_OK); + CASE(S_FALSE); + CASE(E_ABORT); + CASE(E_ACCESSDENIED); + CASE(E_FAIL); + CASE(E_HANDLE); + CASE(E_INVALIDARG); + CASE(E_NOINTERFACE); + CASE(E_NOTIMPL); + CASE(E_OUTOFMEMORY); + CASE(E_POINTER); + CASE(E_UNEXPECTED); + + CASE(DXGI_ERROR_ACCESS_DENIED); + CASE(DXGI_ERROR_ACCESS_LOST); + CASE(DXGI_ERROR_CANNOT_PROTECT_CONTENT); + CASE(DXGI_ERROR_DEVICE_HUNG); + CASE(DXGI_ERROR_DEVICE_REMOVED); + CASE(DXGI_ERROR_DEVICE_RESET); + CASE(DXGI_ERROR_DRIVER_INTERNAL_ERROR); + CASE(DXGI_ERROR_FRAME_STATISTICS_DISJOINT); + CASE(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE); + CASE(DXGI_ERROR_INVALID_CALL); + CASE(DXGI_ERROR_MORE_DATA); + CASE(DXGI_ERROR_NAME_ALREADY_EXISTS); + CASE(DXGI_ERROR_NONEXCLUSIVE); + CASE(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE); + CASE(DXGI_ERROR_NOT_FOUND); + CASE(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED); + CASE(DXGI_ERROR_REMOTE_OUTOFMEMORY); + CASE(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE); + CASE(DXGI_ERROR_SDK_COMPONENT_MISSING); + CASE(DXGI_ERROR_SESSION_DISCONNECTED); + CASE(DXGI_ERROR_UNSUPPORTED); + CASE(DXGI_ERROR_WAIT_TIMEOUT); + CASE(DXGI_ERROR_WAS_STILL_DRAWING); +#undef CASE + + default: + return "Unknown error"; + } +} + +static char *format_error(void *ctx, DWORD error) +{ + wchar_t *wstr; + if (!FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPWSTR)&wstr, 0, NULL)) + { + return NULL; + } + + // Trim any trailing newline from the message + for (int i = wcslen(wstr) - 1; i >= 0; i--) { + if (wstr[i] != '\r' && wstr[i] != '\n') { + wstr[i + 1] = '\0'; + break; + } + } + + char *str = pl_to_utf8(ctx, wstr); + LocalFree(wstr); + return str; +} + +char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr) +{ + char *fmsg = format_error(NULL, hr); + const char *code = hresult_str(hr); + if (fmsg) { + snprintf(buf, buf_size, "%s (%s, 0x%08lx)", fmsg, code, hr); + } else { + snprintf(buf, buf_size, "%s, 0x%08lx", code, hr); + } + pl_free(fmsg); + return buf; +} + +#define D3D11_DXGI_ENUM(prefix, define) { case prefix ## define: return #define; } + +const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt) +{ + switch (fmt) { + D3D11_DXGI_ENUM(DXGI_FORMAT_, UNKNOWN); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G8X24_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT_S8X24_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT_X8X24_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, X32_TYPELESS_G8X24_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R11G11B10_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R24G8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D24_UNORM_S8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R24_UNORM_X8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, X24_TYPELESS_G8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R9G9B9E5_SHAREDEXP); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_B8G8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, G8R8_G8B8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G6R5_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G5R5A1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10_XR_BIAS_A2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_UF16); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_SF16); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, AYUV); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y410); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y416); + D3D11_DXGI_ENUM(DXGI_FORMAT_, NV12); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P010); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P016); + D3D11_DXGI_ENUM(DXGI_FORMAT_, 420_OPAQUE); + D3D11_DXGI_ENUM(DXGI_FORMAT_, YUY2); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y210); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y216); + D3D11_DXGI_ENUM(DXGI_FORMAT_, NV11); + D3D11_DXGI_ENUM(DXGI_FORMAT_, AI44); + D3D11_DXGI_ENUM(DXGI_FORMAT_, IA44); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P8); + D3D11_DXGI_ENUM(DXGI_FORMAT_, A8P8); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B4G4R4A4_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P208); + D3D11_DXGI_ENUM(DXGI_FORMAT_, V208); + D3D11_DXGI_ENUM(DXGI_FORMAT_, V408); + D3D11_DXGI_ENUM(DXGI_FORMAT_, FORCE_UINT); + } + + return "<unknown>"; +} + +const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp) +{ + switch ((int) csp) { + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G10_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RESERVED); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_NONE_P709_X601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G2084_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G2084_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_GHLG_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_GHLG_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, CUSTOM); + } + + return "<unknown>"; +} diff --git a/src/d3d11/utils.h b/src/d3d11/utils.h new file mode 100644 index 0000000..86b4072 --- /dev/null +++ b/src/d3d11/utils.h @@ -0,0 +1,88 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "common.h" + +#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709 ((DXGI_COLOR_SPACE_TYPE)20) +#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020 ((DXGI_COLOR_SPACE_TYPE)21) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709 ((DXGI_COLOR_SPACE_TYPE)22) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)23) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24) + +// Flush debug messages from D3D11's info queue to libplacebo's log output. +// Should be called regularly. +void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header); + +// Some D3D11 functions can fail with a set of HRESULT codes which indicate the +// device has been removed. This is equivalent to libplacebo's gpu_is_failed +// state and indicates that the pl_gpu needs to be recreated. This function +// checks for one of those HRESULTs, sets the failed state, and returns a +// specific HRESULT that indicates why the device was removed (eg. GPU hang, +// driver crash, etc.) +HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr); + +// Helper function for the D3D() macro, though it can be called directly when +// handling D3D11 errors if the D3D() macro isn't suitable for some reason. +// Calls `pl_d3d11_check_device_removed` and `pl_d3d11_drain_debug_messages` and +// returns the specific HRESULT from `pl_d3d11_check_device_removed` for logging +// purposes. +HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr); + +// Convenience macro for running DXGI/D3D11 functions and performing appropriate +// actions on failure. Can also be used for any HRESULT-returning function. +#define D3D(call) \ + do { \ + HRESULT hr_ = (call); \ + if (FAILED(hr_)) { \ + hr_ = pl_d3d11_after_error(ctx, hr_); \ + PL_ERR(ctx, "%s: %s (%s:%d)", #call, pl_hresult_to_str(hr_), \ + __FILE__, __LINE__); \ + goto error; \ + } \ + } while (0); + +// Conditionally release a COM interface and set the pointer to NULL +#define SAFE_RELEASE(iface) \ + do { \ + if (iface) \ + (iface)->lpVtbl->Release(iface); \ + (iface) = NULL; \ + } while (0) + +struct dll_version { + uint16_t major; + uint16_t minor; + uint16_t build; + uint16_t revision; +}; + +// Get the version number of a DLL. This calls GetFileVersionInfoW, which should +// call LoadLibraryExW internally, so it should get the same copy of the DLL +// that is loaded into memory if there is a copy in System32 and a copy in the +// %PATH% or application directory. +struct dll_version pl_get_dll_version(const wchar_t *name); + +wchar_t *pl_from_utf8(void *ctx, const char *str); +char *pl_to_utf8(void *ctx, const wchar_t *str); + +#define pl_hresult_to_str(hr) pl_hresult_to_str_buf((char[256]){0}, 256, (hr)) +char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr); + +const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp); +const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt); |