summaryrefslogtreecommitdiffstats
path: root/video/out/d3d11
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--video/out/d3d11/context.c566
-rw-r--r--video/out/d3d11/context.h9
-rw-r--r--video/out/d3d11/hwdec_d3d11va.c258
-rw-r--r--video/out/d3d11/hwdec_dxva2dxgi.c478
-rw-r--r--video/out/d3d11/ra_d3d11.c2544
-rw-r--r--video/out/d3d11/ra_d3d11.h47
6 files changed, 3902 insertions, 0 deletions
diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c
new file mode 100644
index 0000000..05f04fd
--- /dev/null
+++ b/video/out/d3d11/context.c
@@ -0,0 +1,566 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+
+#include "video/out/gpu/context.h"
+#include "video/out/gpu/d3d11_helpers.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/w32_common.h"
+#include "context.h"
+#include "ra_d3d11.h"
+
+static int d3d11_validate_adapter(struct mp_log *log,
+ const struct m_option *opt,
+ struct bstr name, const char **value);
+
+struct d3d11_opts {
+ int feature_level;
+ int warp;
+ bool flip;
+ int sync_interval;
+ char *adapter_name;
+ int output_format;
+ int color_space;
+ bool exclusive_fs;
+};
+
+#define OPT_BASE_STRUCT struct d3d11_opts
+const struct m_sub_options d3d11_conf = {
+ .opts = (const struct m_option[]) {
+ {"d3d11-warp", OPT_CHOICE(warp,
+ {"auto", -1},
+ {"no", 0},
+ {"yes", 1})},
+ {"d3d11-feature-level", OPT_CHOICE(feature_level,
+ {"12_1", D3D_FEATURE_LEVEL_12_1},
+ {"12_0", D3D_FEATURE_LEVEL_12_0},
+ {"11_1", D3D_FEATURE_LEVEL_11_1},
+ {"11_0", D3D_FEATURE_LEVEL_11_0},
+ {"10_1", D3D_FEATURE_LEVEL_10_1},
+ {"10_0", D3D_FEATURE_LEVEL_10_0},
+ {"9_3", D3D_FEATURE_LEVEL_9_3},
+ {"9_2", D3D_FEATURE_LEVEL_9_2},
+ {"9_1", D3D_FEATURE_LEVEL_9_1})},
+ {"d3d11-flip", OPT_BOOL(flip)},
+ {"d3d11-sync-interval", OPT_INT(sync_interval), M_RANGE(0, 4)},
+ {"d3d11-adapter", OPT_STRING_VALIDATE(adapter_name,
+ d3d11_validate_adapter)},
+ {"d3d11-output-format", OPT_CHOICE(output_format,
+ {"auto", DXGI_FORMAT_UNKNOWN},
+ {"rgba8", DXGI_FORMAT_R8G8B8A8_UNORM},
+ {"bgra8", DXGI_FORMAT_B8G8R8A8_UNORM},
+ {"rgb10_a2", DXGI_FORMAT_R10G10B10A2_UNORM},
+ {"rgba16f", DXGI_FORMAT_R16G16B16A16_FLOAT})},
+ {"d3d11-output-csp", OPT_CHOICE(color_space,
+ {"auto", -1},
+ {"srgb", DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709},
+ {"linear", DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709},
+ {"pq", DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020},
+ {"bt.2020", DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020})},
+ {"d3d11-exclusive-fs", OPT_BOOL(exclusive_fs)},
+ {0}
+ },
+ .defaults = &(const struct d3d11_opts) {
+ .feature_level = D3D_FEATURE_LEVEL_12_1,
+ .warp = -1,
+ .flip = true,
+ .sync_interval = 1,
+ .adapter_name = NULL,
+ .output_format = DXGI_FORMAT_UNKNOWN,
+ .color_space = -1,
+ },
+ .size = sizeof(struct d3d11_opts)
+};
+
+struct priv {
+ struct d3d11_opts *opts;
+ struct m_config_cache *opts_cache;
+
+ struct mp_vo_opts *vo_opts;
+ struct m_config_cache *vo_opts_cache;
+
+ struct ra_tex *backbuffer;
+ ID3D11Device *device;
+ IDXGISwapChain *swapchain;
+ struct mp_colorspace swapchain_csp;
+
+ int64_t perf_freq;
+ unsigned sync_refresh_count;
+ int64_t sync_qpc_time;
+ int64_t vsync_duration_qpc;
+ int64_t last_submit_qpc;
+};
+
+static int d3d11_validate_adapter(struct mp_log *log,
+ const struct m_option *opt,
+ struct bstr name, const char **value)
+{
+ struct bstr param = bstr0(*value);
+ bool help = bstr_equals0(param, "help");
+ bool adapter_matched = false;
+ struct bstr listing = { 0 };
+
+ if (bstr_equals0(param, "")) {
+ return 0;
+ }
+
+ adapter_matched = mp_d3d11_list_or_verify_adapters(log,
+ help ? bstr0(NULL) : param,
+ help ? &listing : NULL);
+
+ if (help) {
+ mp_info(log, "Available D3D11 adapters:\n%.*s",
+ BSTR_P(listing));
+ talloc_free(listing.start);
+ return M_OPT_EXIT;
+ }
+
+ if (!adapter_matched) {
+ mp_err(log, "No adapter matching '%.*s'!\n", BSTR_P(param));
+ }
+
+ return adapter_matched ? 0 : M_OPT_INVALID;
+}
+
+static struct ra_tex *get_backbuffer(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+ ID3D11Texture2D *backbuffer = NULL;
+ struct ra_tex *tex = NULL;
+ HRESULT hr;
+
+ hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D,
+ (void**)&backbuffer);
+ if (FAILED(hr)) {
+ MP_ERR(ctx, "Couldn't get swapchain image\n");
+ goto done;
+ }
+
+ tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer);
+done:
+ SAFE_RELEASE(backbuffer);
+ return tex;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+ HRESULT hr;
+
+ if (p->backbuffer) {
+ MP_ERR(ctx, "Attempt at resizing while a frame was in progress!\n");
+ return false;
+ }
+
+ hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth,
+ ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0);
+ if (FAILED(hr)) {
+ MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ return true;
+}
+
+static bool d3d11_reconfig(struct ra_ctx *ctx)
+{
+ vo_w32_config(ctx->vo);
+ return resize(ctx);
+}
+
+static int d3d11_color_depth(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+ DXGI_SWAP_CHAIN_DESC desc;
+
+ HRESULT hr = IDXGISwapChain_GetDesc(p->swapchain, &desc);
+ if (FAILED(hr)) {
+ MP_ERR(sw->ctx, "Failed to query swap chain description: %s!\n",
+ mp_HRESULT_to_str(hr));
+ return 0;
+ }
+
+ const struct ra_format *ra_fmt =
+ ra_d3d11_get_ra_format(sw->ctx->ra, desc.BufferDesc.Format);
+ if (!ra_fmt)
+ return 0;
+
+ return ra_fmt->component_depth[0];
+}
+
+static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+ struct priv *p = sw->priv;
+
+ if (!out_fbo)
+ return true;
+
+ assert(!p->backbuffer);
+
+ p->backbuffer = get_backbuffer(sw->ctx);
+ if (!p->backbuffer)
+ return false;
+
+ *out_fbo = (struct ra_fbo) {
+ .tex = p->backbuffer,
+ .flip = false,
+ .color_space = p->swapchain_csp
+ };
+ return true;
+}
+
+static bool d3d11_submit_frame(struct ra_swapchain *sw,
+ const struct vo_frame *frame)
+{
+ struct priv *p = sw->priv;
+
+ ra_d3d11_flush(sw->ctx->ra);
+ ra_tex_free(sw->ctx->ra, &p->backbuffer);
+ return true;
+}
+
+static int64_t qpc_to_ns(struct ra_swapchain *sw, int64_t qpc)
+{
+ struct priv *p = sw->priv;
+
+ // Convert QPC units (1/perf_freq seconds) to nanoseconds. This will work
+ // without overflow because the QPC value is guaranteed not to roll-over
+ // within 100 years, so perf_freq must be less than 2.9*10^9.
+ return qpc / p->perf_freq * INT64_C(1000000000) +
+ qpc % p->perf_freq * INT64_C(1000000000) / p->perf_freq;
+}
+
+static int64_t qpc_ns_now(struct ra_swapchain *sw)
+{
+ LARGE_INTEGER perf_count;
+ QueryPerformanceCounter(&perf_count);
+ return qpc_to_ns(sw, perf_count.QuadPart);
+}
+
+static void d3d11_swap_buffers(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+
+ m_config_cache_update(p->opts_cache);
+
+ LARGE_INTEGER perf_count;
+ QueryPerformanceCounter(&perf_count);
+ p->last_submit_qpc = perf_count.QuadPart;
+
+ IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0);
+}
+
+static void d3d11_get_vsync(struct ra_swapchain *sw, struct vo_vsync_info *info)
+{
+ struct priv *p = sw->priv;
+ HRESULT hr;
+
+ m_config_cache_update(p->opts_cache);
+
+ // The calculations below are only valid if mpv presents on every vsync
+ if (p->opts->sync_interval != 1)
+ return;
+
+ // They're also only valid for flip model swapchains
+ DXGI_SWAP_CHAIN_DESC desc;
+ hr = IDXGISwapChain_GetDesc(p->swapchain, &desc);
+ if (FAILED(hr) || (desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL &&
+ desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_DISCARD))
+ {
+ return;
+ }
+
+ // GetLastPresentCount returns a sequential ID for the frame submitted by
+ // the last call to IDXGISwapChain::Present()
+ UINT submit_count;
+ hr = IDXGISwapChain_GetLastPresentCount(p->swapchain, &submit_count);
+ if (FAILED(hr))
+ return;
+
+ // GetFrameStatistics returns two pairs. The first is (PresentCount,
+ // PresentRefreshCount) which relates a present ID (on the same timeline as
+ // GetLastPresentCount) to the physical vsync it was displayed on. The
+ // second is (SyncRefreshCount, SyncQPCTime), which relates a physical vsync
+ // to a timestamp on the same clock as QueryPerformanceCounter.
+ DXGI_FRAME_STATISTICS stats;
+ hr = IDXGISwapChain_GetFrameStatistics(p->swapchain, &stats);
+ if (hr == DXGI_ERROR_FRAME_STATISTICS_DISJOINT) {
+ p->sync_refresh_count = 0;
+ p->sync_qpc_time = 0;
+ }
+ if (FAILED(hr))
+ return;
+
+ info->last_queue_display_time = 0;
+ info->vsync_duration = 0;
+ // Detecting skipped vsyncs is possible but not supported yet
+ info->skipped_vsyncs = -1;
+
+ // Get the number of physical vsyncs that have passed since the start of the
+ // playback or disjoint event.
+ // Check for 0 here, since sometimes GetFrameStatistics returns S_OK but
+ // with 0s in some (all?) members of DXGI_FRAME_STATISTICS.
+ unsigned src_passed = 0;
+ if (stats.SyncRefreshCount && p->sync_refresh_count)
+ src_passed = stats.SyncRefreshCount - p->sync_refresh_count;
+ if (p->sync_refresh_count == 0)
+ p->sync_refresh_count = stats.SyncRefreshCount;
+
+ // Get the elapsed time passed between the above vsyncs
+ unsigned sqt_passed = 0;
+ if (stats.SyncQPCTime.QuadPart && p->sync_qpc_time)
+ sqt_passed = stats.SyncQPCTime.QuadPart - p->sync_qpc_time;
+ if (p->sync_qpc_time == 0)
+ p->sync_qpc_time = stats.SyncQPCTime.QuadPart;
+
+ // If any vsyncs have passed, estimate the physical frame rate
+ if (src_passed && sqt_passed)
+ p->vsync_duration_qpc = sqt_passed / src_passed;
+ if (p->vsync_duration_qpc)
+ info->vsync_duration = qpc_to_ns(sw, p->vsync_duration_qpc);
+
+ // If the physical frame rate is known and the other members of
+ // DXGI_FRAME_STATISTICS are non-0, estimate the timing of the next frame
+ if (p->vsync_duration_qpc && stats.PresentCount &&
+ stats.PresentRefreshCount && stats.SyncRefreshCount &&
+ stats.SyncQPCTime.QuadPart)
+ {
+ // It's not clear if PresentRefreshCount and SyncRefreshCount can refer
+ // to different frames, but in case they can, assuming mpv presents on
+ // every frame, guess the present count that relates to SyncRefreshCount.
+ unsigned expected_sync_pc = stats.PresentCount +
+ (stats.SyncRefreshCount - stats.PresentRefreshCount);
+
+ // Now guess the timestamp of the last submitted frame based on the
+ // timestamp of the frame at SyncRefreshCount and the frame rate
+ int queued_frames = submit_count - expected_sync_pc;
+ int64_t last_queue_display_time_qpc = stats.SyncQPCTime.QuadPart +
+ queued_frames * p->vsync_duration_qpc;
+
+ // Only set the estimated display time if it's after the last submission
+ // time. It could be before if mpv skips a lot of frames.
+ if (last_queue_display_time_qpc >= p->last_submit_qpc) {
+ info->last_queue_display_time = mp_time_ns() +
+ (qpc_to_ns(sw, last_queue_display_time_qpc) - qpc_ns_now(sw));
+ }
+ }
+}
+
+static bool d3d11_set_fullscreen(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+ HRESULT hr;
+
+ m_config_cache_update(p->opts_cache);
+
+ if (!p->swapchain) {
+ MP_ERR(ctx, "Full screen configuration was requested before D3D11 "
+ "swap chain was ready!");
+ return false;
+ }
+
+ // we only want exclusive FS if we are entering FS and
+ // exclusive FS is enabled. Otherwise disable exclusive FS.
+ bool enable_exclusive_fs = p->vo_opts->fullscreen &&
+ p->opts->exclusive_fs;
+
+ MP_VERBOSE(ctx, "%s full-screen exclusive mode while %s fullscreen\n",
+ enable_exclusive_fs ? "Enabling" : "Disabling",
+ ctx->vo->opts->fullscreen ? "entering" : "leaving");
+
+ hr = IDXGISwapChain_SetFullscreenState(p->swapchain,
+ enable_exclusive_fs, NULL);
+ if (FAILED(hr))
+ return false;
+
+ if (!resize(ctx))
+ return false;
+
+ return true;
+}
+
+static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+ struct priv *p = ctx->priv;
+ int ret = -1;
+ bool fullscreen_switch_needed = false;
+
+ switch (request) {
+ case VOCTRL_VO_OPTS_CHANGED: {
+ void *changed_option;
+
+ while (m_config_cache_get_next_changed(p->vo_opts_cache,
+ &changed_option))
+ {
+ struct mp_vo_opts *vo_opts = p->vo_opts_cache->opts;
+
+ if (changed_option == &vo_opts->fullscreen) {
+ fullscreen_switch_needed = true;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // if leaving full screen, handle d3d11 stuff first, then general
+ // windowing
+ if (fullscreen_switch_needed && !p->vo_opts->fullscreen) {
+ if (!d3d11_set_fullscreen(ctx))
+ return VO_FALSE;
+
+ fullscreen_switch_needed = false;
+ }
+
+ ret = vo_w32_control(ctx->vo, events, request, arg);
+
+ // if entering full screen, handle d3d11 after general windowing stuff
+ if (fullscreen_switch_needed && p->vo_opts->fullscreen) {
+ if (!d3d11_set_fullscreen(ctx))
+ return VO_FALSE;
+
+ fullscreen_switch_needed = false;
+ }
+
+ if (*events & VO_EVENT_RESIZE) {
+ if (!resize(ctx))
+ return VO_ERROR;
+ }
+ return ret;
+}
+
+static void d3d11_uninit(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+
+ if (p->swapchain)
+ IDXGISwapChain_SetFullscreenState(p->swapchain, FALSE, NULL);
+
+ if (ctx->ra)
+ ra_tex_free(ctx->ra, &p->backbuffer);
+ SAFE_RELEASE(p->swapchain);
+ vo_w32_uninit(ctx->vo);
+ SAFE_RELEASE(p->device);
+
+ // Destroy the RA last to prevent objects we hold from showing up in D3D's
+ // leak checker
+ if (ctx->ra)
+ ctx->ra->fns->destroy(ctx->ra);
+}
+
+static const struct ra_swapchain_fns d3d11_swapchain = {
+ .color_depth = d3d11_color_depth,
+ .start_frame = d3d11_start_frame,
+ .submit_frame = d3d11_submit_frame,
+ .swap_buffers = d3d11_swap_buffers,
+ .get_vsync = d3d11_get_vsync,
+};
+
+static bool d3d11_init(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+ p->opts_cache = m_config_cache_alloc(ctx, ctx->global, &d3d11_conf);
+ p->opts = p->opts_cache->opts;
+
+ p->vo_opts_cache = m_config_cache_alloc(ctx, ctx->vo->global, &vo_sub_opts);
+ p->vo_opts = p->vo_opts_cache->opts;
+
+ LARGE_INTEGER perf_freq;
+ QueryPerformanceFrequency(&perf_freq);
+ p->perf_freq = perf_freq.QuadPart;
+
+ struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain);
+ sw->priv = p;
+ sw->ctx = ctx;
+ sw->fns = &d3d11_swapchain;
+
+ struct d3d11_device_opts dopts = {
+ .debug = ctx->opts.debug,
+ .allow_warp = p->opts->warp != 0,
+ .force_warp = p->opts->warp == 1,
+ .max_feature_level = p->opts->feature_level,
+ .max_frame_latency = ctx->vo->opts->swapchain_depth,
+ .adapter_name = p->opts->adapter_name,
+ };
+ if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device))
+ goto error;
+
+ if (!spirv_compiler_init(ctx))
+ goto error;
+ ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv);
+ if (!ctx->ra)
+ goto error;
+
+ if (!vo_w32_init(ctx->vo))
+ goto error;
+
+ UINT usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT;
+ if (ID3D11Device_GetFeatureLevel(p->device) >= D3D_FEATURE_LEVEL_11_0 &&
+ p->opts->output_format != DXGI_FORMAT_B8G8R8A8_UNORM)
+ {
+ usage |= DXGI_USAGE_UNORDERED_ACCESS;
+ }
+
+ struct d3d11_swapchain_opts scopts = {
+ .window = vo_w32_hwnd(ctx->vo),
+ .width = ctx->vo->dwidth,
+ .height = ctx->vo->dheight,
+ .format = p->opts->output_format,
+ .color_space = p->opts->color_space,
+ .configured_csp = &p->swapchain_csp,
+ .flip = p->opts->flip,
+ // Add one frame for the backbuffer and one frame of "slack" to reduce
+ // contention with the window manager when acquiring the backbuffer
+ .length = ctx->vo->opts->swapchain_depth + 2,
+ .usage = usage,
+ };
+ if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain))
+ goto error;
+
+ return true;
+
+error:
+ d3d11_uninit(ctx);
+ return false;
+}
+
+IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra)
+{
+ if (ra->swapchain->fns != &d3d11_swapchain)
+ return NULL;
+
+ struct priv *p = ra->priv;
+
+ IDXGISwapChain_AddRef(p->swapchain);
+
+ return p->swapchain;
+}
+
+const struct ra_ctx_fns ra_ctx_d3d11 = {
+ .type = "d3d11",
+ .name = "d3d11",
+ .reconfig = d3d11_reconfig,
+ .control = d3d11_control,
+ .init = d3d11_init,
+ .uninit = d3d11_uninit,
+};
diff --git a/video/out/d3d11/context.h b/video/out/d3d11/context.h
new file mode 100644
index 0000000..8a9ef4c
--- /dev/null
+++ b/video/out/d3d11/context.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <dxgi.h>
+
+#include "video/out/gpu/context.h"
+
+// Get the underlying D3D11 swap chain from an RA context. The returned swap chain is
+// refcounted and must be released by the caller.
+IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra);
diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c
new file mode 100644
index 0000000..6aaa12b
--- /dev/null
+++ b/video/out/d3d11/hwdec_d3d11va.c
@@ -0,0 +1,258 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+
+#include "common/common.h"
+#include "options/m_config.h"
+#include "osdep/windows_utils.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/gpu/hwdec.h"
+
+struct d3d11va_opts {
+ bool zero_copy;
+};
+
+#define OPT_BASE_STRUCT struct d3d11va_opts
+const struct m_sub_options d3d11va_conf = {
+ .opts = (const struct m_option[]) {
+ {"d3d11va-zero-copy", OPT_BOOL(zero_copy)},
+ {0}
+ },
+ .defaults = &(const struct d3d11va_opts) {0},
+ .size = sizeof(struct d3d11va_opts)
+};
+
+struct priv_owner {
+ struct d3d11va_opts *opts;
+
+ struct mp_hwdec_ctx hwctx;
+ ID3D11Device *device;
+ ID3D11Device1 *device1;
+};
+
+struct priv {
+ // 1-copy path
+ ID3D11DeviceContext1 *ctx;
+ ID3D11Texture2D *copy_tex;
+
+ // zero-copy path
+ int num_planes;
+ const struct ra_format *fmt[4];
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ av_buffer_unref(&p->hwctx.av_device_ref);
+ SAFE_RELEASE(p->device);
+ SAFE_RELEASE(p->device1);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ HRESULT hr;
+
+ if (!ra_is_d3d11(hw->ra_ctx->ra))
+ return -1;
+ p->device = ra_d3d11_get_device(hw->ra_ctx->ra);
+ if (!p->device)
+ return -1;
+
+ p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf);
+
+ // D3D11VA requires Direct3D 11.1, so this should always succeed
+ hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1,
+ (void**)&p->device1);
+ if (FAILED(hr)) {
+ MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n",
+ mp_HRESULT_to_str(hr));
+ return -1;
+ }
+
+ ID3D10Multithread *multithread;
+ hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread,
+ (void **)&multithread);
+ if (FAILED(hr)) {
+ MP_ERR(hw, "Failed to get Multithread interface: %s\n",
+ mp_HRESULT_to_str(hr));
+ return -1;
+ }
+ ID3D10Multithread_SetMultithreadProtected(multithread, TRUE);
+ ID3D10Multithread_Release(multithread);
+
+ static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0};
+ p->hwctx = (struct mp_hwdec_ctx){
+ .driver_name = hw->driver->name,
+ .av_device_ref = d3d11_wrap_device_ref(p->device),
+ .supported_formats = subfmts,
+ .hw_imgfmt = IMGFMT_D3D11,
+ };
+
+ if (!p->hwctx.av_device_ref) {
+ MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+ return -1;
+ }
+
+ hwdec_devices_add(hw->devs, &p->hwctx);
+ return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ for (int i = 0; i < 4; i++)
+ ra_tex_free(mapper->ra, &mapper->tex[i]);
+ SAFE_RELEASE(p->copy_tex);
+ SAFE_RELEASE(p->ctx);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+ struct priv_owner *o = mapper->owner->priv;
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+ mapper->dst_params.hw_subfmt = 0;
+
+ struct ra_imgfmt_desc desc = {0};
+
+ if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+ return -1;
+
+ if (o->opts->zero_copy) {
+ // In the zero-copy path, we create the ra_tex objects in the map
+ // operation, so we just need to store the format of each plane
+ p->num_planes = desc.num_planes;
+ for (int i = 0; i < desc.num_planes; i++)
+ p->fmt[i] = desc.planes[i];
+ } else {
+ struct mp_image layout = {0};
+ mp_image_set_params(&layout, &mapper->dst_params);
+
+ DXGI_FORMAT copy_fmt;
+ switch (mapper->dst_params.imgfmt) {
+ case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break;
+ case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break;
+ default: return -1;
+ }
+
+ D3D11_TEXTURE2D_DESC copy_desc = {
+ .Width = mapper->dst_params.w,
+ .Height = mapper->dst_params.h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = copy_fmt,
+ .BindFlags = D3D11_BIND_SHADER_RESOURCE,
+ };
+ hr = ID3D11Device_CreateTexture2D(o->device, &copy_desc, NULL,
+ &p->copy_tex);
+ if (FAILED(hr)) {
+ MP_FATAL(mapper, "Could not create shader resource texture\n");
+ return -1;
+ }
+
+ for (int i = 0; i < desc.num_planes; i++) {
+ mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex,
+ mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0,
+ desc.planes[i]);
+ if (!mapper->tex[i]) {
+ MP_FATAL(mapper, "Could not create RA texture view\n");
+ return -1;
+ }
+ }
+
+ // A ref to the immediate context is needed for CopySubresourceRegion
+ ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx);
+ }
+
+ return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ ID3D11Texture2D *tex = (void *)mapper->src->planes[0];
+ int subresource = (intptr_t)mapper->src->planes[1];
+
+ if (p->copy_tex) {
+ ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx,
+ (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0,
+ (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = mapper->dst_params.w,
+ .bottom = mapper->dst_params.h,
+ .back = 1,
+ }), D3D11_COPY_DISCARD);
+
+ // We no longer need the original texture after copying it.
+ mp_image_unrefp(&mapper->src);
+ } else {
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex, &desc2d);
+
+ for (int i = 0; i < p->num_planes; i++) {
+ // The video decode texture may include padding, so the size of the
+ // ra_tex needs to be determined by the actual size of the Tex2D
+ bool chroma = i >= 1;
+ int w = desc2d.Width / (chroma ? 2 : 1);
+ int h = desc2d.Height / (chroma ? 2 : 1);
+
+ mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex,
+ w, h, subresource, p->fmt[i]);
+ if (!mapper->tex[i])
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ if (p->copy_tex)
+ return;
+ for (int i = 0; i < 4; i++)
+ ra_tex_free(mapper->ra, &mapper->tex[i]);
+}
+
+const struct ra_hwdec_driver ra_hwdec_d3d11va = {
+ .name = "d3d11va",
+ .priv_size = sizeof(struct priv_owner),
+ .imgfmts = {IMGFMT_D3D11, 0},
+ .init = init,
+ .uninit = uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ .unmap = mapper_unmap,
+ },
+};
diff --git a/video/out/d3d11/hwdec_dxva2dxgi.c b/video/out/d3d11/hwdec_dxva2dxgi.c
new file mode 100644
index 0000000..62158d4
--- /dev/null
+++ b/video/out/d3d11/hwdec_dxva2dxgi.c
@@ -0,0 +1,478 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d9.h>
+#include <d3d11.h>
+#include <dxva2api.h>
+
+#include "common/common.h"
+#include "osdep/windows_utils.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/gpu/hwdec.h"
+
+struct priv_owner {
+ struct mp_hwdec_ctx hwctx;
+ ID3D11Device *dev11;
+ IDirect3DDevice9Ex *dev9;
+};
+
+struct queue_surf {
+ ID3D11Texture2D *tex11;
+ ID3D11Query *idle11;
+ ID3D11Texture2D *stage11;
+ IDirect3DTexture9 *tex9;
+ IDirect3DSurface9 *surf9;
+ IDirect3DSurface9 *stage9;
+ struct ra_tex *tex;
+
+ bool busy11; // The surface is currently being used by D3D11
+};
+
+struct priv {
+ ID3D11Device *dev11;
+ ID3D11DeviceContext *ctx11;
+ IDirect3DDevice9Ex *dev9;
+
+ // Surface queue stuff. Following Microsoft recommendations, a queue of
+ // surfaces is used to share images between D3D9 and D3D11. This allows
+ // multiple D3D11 frames to be in-flight at once.
+ struct queue_surf **queue;
+ int queue_len;
+ int queue_pos;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ av_buffer_unref(&p->hwctx.av_device_ref);
+ SAFE_RELEASE(p->dev11);
+ SAFE_RELEASE(p->dev9);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ IDirect3D9Ex *d3d9ex = NULL;
+ int ret = -1;
+ HRESULT hr;
+
+ if (!ra_is_d3d11(hw->ra_ctx->ra))
+ goto done;
+ p->dev11 = ra_d3d11_get_device(hw->ra_ctx->ra);
+ if (!p->dev11)
+ goto done;
+
+ d3d_load_dlls();
+ if (!d3d9_dll) {
+ MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", mp_LastError_to_str());
+ goto done;
+ }
+ if (!dxva2_dll) {
+ MP_FATAL(hw, "Failed to load \"dxva2.dll\": %s\n", mp_LastError_to_str());
+ goto done;
+ }
+
+ HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D);
+ Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex");
+ if (!Direct3DCreate9Ex) {
+ MP_FATAL(hw, "Direct3D 9Ex not supported\n");
+ goto done;
+ }
+
+ hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex);
+ if (FAILED(hr)) {
+ MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n", mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ D3DPRESENT_PARAMETERS pparams = {
+ .BackBufferWidth = 16,
+ .BackBufferHeight = 16,
+ .BackBufferCount = 1,
+ .SwapEffect = D3DSWAPEFFECT_DISCARD,
+ .hDeviceWindow = GetDesktopWindow(),
+ .Windowed = TRUE,
+ .Flags = D3DPRESENTFLAG_VIDEO,
+ };
+ hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, D3DADAPTER_DEFAULT,
+ D3DDEVTYPE_HAL, GetDesktopWindow(), D3DCREATE_NOWINDOWCHANGES |
+ D3DCREATE_FPU_PRESERVE | D3DCREATE_HARDWARE_VERTEXPROCESSING |
+ D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_MULTITHREADED, &pparams,
+ NULL, &p->dev9);
+ if (FAILED(hr)) {
+ MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ // Check if it's possible to StretchRect() from NV12 to XRGB surfaces
+ hr = IDirect3D9Ex_CheckDeviceFormatConversion(d3d9ex, D3DADAPTER_DEFAULT,
+ D3DDEVTYPE_HAL, MAKEFOURCC('N', 'V', '1', '2'), D3DFMT_X8R8G8B8);
+ if (hr != S_OK) {
+ MP_FATAL(hw, "Can't StretchRect from NV12 to XRGB surfaces\n");
+ goto done;
+ }
+
+ p->hwctx = (struct mp_hwdec_ctx){
+ .driver_name = hw->driver->name,
+ .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->dev9),
+ .hw_imgfmt = IMGFMT_DXVA2,
+ };
+
+ if (!p->hwctx.av_device_ref) {
+ MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+ goto done;
+ }
+
+ hwdec_devices_add(hw->devs, &p->hwctx);
+
+ ret = 0;
+done:
+ SAFE_RELEASE(d3d9ex);
+ return ret;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+ struct priv_owner *o = mapper->owner->priv;
+ struct priv *p = mapper->priv;
+
+ ID3D11Device_AddRef(o->dev11);
+ p->dev11 = o->dev11;
+ IDirect3DDevice9Ex_AddRef(o->dev9);
+ p->dev9 = o->dev9;
+ ID3D11Device_GetImmediateContext(o->dev11, &p->ctx11);
+
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = IMGFMT_RGB0;
+ mapper->dst_params.hw_subfmt = 0;
+ return 0;
+}
+
+static void surf_destroy(struct ra_hwdec_mapper *mapper,
+ struct queue_surf *surf)
+{
+ if (!surf)
+ return;
+ SAFE_RELEASE(surf->tex11);
+ SAFE_RELEASE(surf->idle11);
+ SAFE_RELEASE(surf->stage11);
+ SAFE_RELEASE(surf->tex9);
+ SAFE_RELEASE(surf->surf9);
+ SAFE_RELEASE(surf->stage9);
+ ra_tex_free(mapper->ra, &surf->tex);
+ talloc_free(surf);
+}
+
+static struct queue_surf *surf_create(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ IDXGIResource *res11 = NULL;
+ bool success = false;
+ HRESULT hr;
+
+ struct queue_surf *surf = talloc_ptrtype(p, surf);
+
+ D3D11_TEXTURE2D_DESC desc11 = {
+ .Width = mapper->src->w,
+ .Height = mapper->src->h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = DXGI_FORMAT_B8G8R8X8_UNORM,
+ .SampleDesc.Count = 1,
+ .Usage = D3D11_USAGE_DEFAULT,
+ .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET,
+ .MiscFlags = D3D11_RESOURCE_MISC_SHARED,
+ };
+ hr = ID3D11Device_CreateTexture2D(p->dev11, &desc11, NULL, &surf->tex11);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to create D3D11 texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ // Try to use a 16x16 staging texture, unless the source surface is
+ // smaller. Ideally, a 1x1 texture would be sufficient, but Microsoft's
+ // D3D9ExDXGISharedSurf example uses 16x16 to avoid driver bugs.
+ D3D11_TEXTURE2D_DESC sdesc11 = {
+ .Width = MPMIN(16, desc11.Width),
+ .Height = MPMIN(16, desc11.Height),
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = DXGI_FORMAT_B8G8R8X8_UNORM,
+ .SampleDesc.Count = 1,
+ .Usage = D3D11_USAGE_STAGING,
+ .CPUAccessFlags = D3D11_CPU_ACCESS_READ,
+ };
+ hr = ID3D11Device_CreateTexture2D(p->dev11, &sdesc11, NULL, &surf->stage11);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to create D3D11 staging texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ hr = ID3D11Texture2D_QueryInterface(surf->tex11, &IID_IDXGIResource,
+ (void**)&res11);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to get share handle: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ HANDLE share_handle;
+ hr = IDXGIResource_GetSharedHandle(res11, &share_handle);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to get share handle: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ hr = ID3D11Device_CreateQuery(p->dev11,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &surf->idle11);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to create D3D11 query: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ // Share the D3D11 texture with D3D9Ex
+ hr = IDirect3DDevice9Ex_CreateTexture(p->dev9, desc11.Width, desc11.Height,
+ 1, D3DUSAGE_RENDERTARGET, D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT,
+ &surf->tex9, &share_handle);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to create D3D9 texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ hr = IDirect3DTexture9_GetSurfaceLevel(surf->tex9, 0, &surf->surf9);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to get D3D9 surface: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ // As above, try to use a 16x16 staging texture to avoid driver bugs
+ hr = IDirect3DDevice9Ex_CreateRenderTarget(p->dev9,
+ MPMIN(16, desc11.Width), MPMIN(16, desc11.Height), D3DFMT_X8R8G8B8,
+ D3DMULTISAMPLE_NONE, 0, TRUE, &surf->stage9, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Failed to create D3D9 staging surface: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto done;
+ }
+
+ surf->tex = ra_d3d11_wrap_tex(mapper->ra, (ID3D11Resource *)surf->tex11);
+ if (!surf->tex)
+ goto done;
+
+ success = true;
+done:
+ if (!success)
+ surf_destroy(mapper, surf);
+ SAFE_RELEASE(res11);
+ return success ? surf : NULL;
+}
+
+// true if the surface is currently in-use by the D3D11 graphics pipeline
+static bool surf_is_idle11(struct ra_hwdec_mapper *mapper,
+ struct queue_surf *surf)
+{
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+ BOOL idle;
+
+ if (!surf->busy11)
+ return true;
+
+ hr = ID3D11DeviceContext_GetData(p->ctx11,
+ (ID3D11Asynchronous *)surf->idle11, &idle, sizeof(idle),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH);
+ if (FAILED(hr) || hr == S_FALSE || !idle)
+ return false;
+
+ surf->busy11 = false;
+ return true;
+}
+
+// If the surface is currently in-use by the D3D11 graphics pipeline, wait for
+// it to become idle. Should only be called in the queue-underflow case.
+static bool surf_wait_idle11(struct ra_hwdec_mapper *mapper,
+ struct queue_surf *surf)
+{
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+
+ ID3D11DeviceContext_CopySubresourceRegion(p->ctx11,
+ (ID3D11Resource *)surf->stage11, 0, 0, 0, 0,
+ (ID3D11Resource *)surf->tex11, 0, (&(D3D11_BOX){
+ .right = MPMIN(16, mapper->src->w),
+ .bottom = MPMIN(16, mapper->src->h),
+ .back = 1,
+ }));
+
+ // Block until the surface becomes idle (see surf_wait_idle9())
+ D3D11_MAPPED_SUBRESOURCE map = {0};
+ hr = ID3D11DeviceContext_Map(p->ctx11, (ID3D11Resource *)surf->stage11, 0,
+ D3D11_MAP_READ, 0, &map);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Couldn't map D3D11 staging texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ ID3D11DeviceContext_Unmap(p->ctx11, (ID3D11Resource *)surf->stage11, 0);
+ surf->busy11 = false;
+ return true;
+}
+
+static bool surf_wait_idle9(struct ra_hwdec_mapper *mapper,
+ struct queue_surf *surf)
+{
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+
+ // Rather than polling for the surface to become idle, copy part of the
+ // surface to a staging texture and map it. This should block until the
+ // surface becomes idle. Microsoft's ISurfaceQueue does this as well.
+ RECT rc = {0, 0, MPMIN(16, mapper->src->w), MPMIN(16, mapper->src->h)};
+ hr = IDirect3DDevice9Ex_StretchRect(p->dev9, surf->surf9, &rc, surf->stage9,
+ &rc, D3DTEXF_NONE);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Couldn't copy to D3D9 staging texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ D3DLOCKED_RECT lock;
+ hr = IDirect3DSurface9_LockRect(surf->stage9, &lock, NULL, D3DLOCK_READONLY);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "Couldn't map D3D9 staging texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ IDirect3DSurface9_UnlockRect(surf->stage9);
+ p->queue[p->queue_pos]->busy11 = true;
+ return true;
+}
+
+static struct queue_surf *surf_acquire(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+
+ if (!p->queue_len || !surf_is_idle11(mapper, p->queue[p->queue_pos])) {
+ if (p->queue_len < 16) {
+ struct queue_surf *surf = surf_create(mapper);
+ if (!surf)
+ return NULL;
+
+ // The next surface is busy, so grow the queue
+ MP_TARRAY_INSERT_AT(p, p->queue, p->queue_len, p->queue_pos, surf);
+ MP_DBG(mapper, "Queue grew to %d surfaces\n", p->queue_len);
+ } else {
+ // For sanity, don't let the queue grow beyond 16 surfaces. It
+ // should never get this big. If it does, wait for the surface to
+ // become idle rather than polling it.
+ if (!surf_wait_idle11(mapper, p->queue[p->queue_pos]))
+ return NULL;
+ MP_WARN(mapper, "Queue underflow!\n");
+ }
+ }
+ return p->queue[p->queue_pos];
+}
+
+static void surf_release(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ ID3D11DeviceContext_End(p->ctx11,
+ (ID3D11Asynchronous *)p->queue[p->queue_pos]->idle11);
+
+ // The current surface is now in-flight, move to the next surface
+ p->queue_pos++;
+ if (p->queue_pos >= p->queue_len)
+ p->queue_pos = 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+
+ for (int i = 0; i < p->queue_len; i++)
+ surf_destroy(mapper, p->queue[i]);
+
+ SAFE_RELEASE(p->ctx11);
+ SAFE_RELEASE(p->dev9);
+ SAFE_RELEASE(p->dev11);
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+
+ struct queue_surf *surf = surf_acquire(mapper);
+ if (!surf)
+ return -1;
+
+ RECT rc = {0, 0, mapper->src->w, mapper->src->h};
+ IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3];
+
+ hr = IDirect3DDevice9Ex_StretchRect(p->dev9, hw_surface, &rc, surf->surf9,
+ &rc, D3DTEXF_NONE);
+ if (FAILED(hr)) {
+ MP_ERR(mapper, "StretchRect() failed: %s\n", mp_HRESULT_to_str(hr));
+ return -1;
+ }
+
+ if (!surf_wait_idle9(mapper, surf))
+ return -1;
+
+ mapper->tex[0] = surf->tex;
+ return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+
+ if (p->queue_pos < p->queue_len &&
+ p->queue[p->queue_pos]->tex == mapper->tex[0])
+ {
+ surf_release(mapper);
+ mapper->tex[0] = NULL;
+ }
+}
+
+const struct ra_hwdec_driver ra_hwdec_dxva2dxgi = {
+ .name = "dxva2-dxgi",
+ .priv_size = sizeof(struct priv_owner),
+ .imgfmts = {IMGFMT_DXVA2, 0},
+ .init = init,
+ .uninit = uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ .unmap = mapper_unmap,
+ },
+};
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
new file mode 100644
index 0000000..84fd004
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.c
@@ -0,0 +1,2544 @@
+#include <windows.h>
+#include <versionhelpers.h>
+#include <d3d11_1.h>
+#include <d3d11sdklayers.h>
+#include <dxgi1_2.h>
+#include <d3dcompiler.h>
+#include <spirv_cross_c.h>
+
+#include "common/msg.h"
+#include "osdep/io.h"
+#include "osdep/subprocess.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/gpu/utils.h"
+
+#include "ra_d3d11.h"
+
+#ifndef D3D11_1_UAV_SLOT_COUNT
+#define D3D11_1_UAV_SLOT_COUNT (64)
+#endif
+#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
+
+// D3D11.3 message IDs, not present in mingw-w64 v9
+#define D3D11_MESSAGE_ID_CREATE_FENCE ((D3D11_MESSAGE_ID)0x300209)
+#define D3D11_MESSAGE_ID_DESTROY_FENCE ((D3D11_MESSAGE_ID)0x30020b)
+
+struct dll_version {
+ uint16_t major;
+ uint16_t minor;
+ uint16_t build;
+ uint16_t revision;
+};
+
+struct ra_d3d11 {
+ struct spirv_compiler *spirv;
+
+ ID3D11Device *dev;
+ ID3D11Device1 *dev1;
+ ID3D11DeviceContext *ctx;
+ ID3D11DeviceContext1 *ctx1;
+ pD3DCompile D3DCompile;
+
+ struct dll_version d3d_compiler_ver;
+
+ // Debug interfaces (--gpu-debug)
+ ID3D11Debug *debug;
+ ID3D11InfoQueue *iqueue;
+
+ // Device capabilities
+ D3D_FEATURE_LEVEL fl;
+ bool has_clear_view;
+ bool has_timestamp_queries;
+ int max_uavs;
+
+ // Streaming dynamic vertex buffer, which is used for all renderpasses
+ ID3D11Buffer *vbuf;
+ size_t vbuf_size;
+ size_t vbuf_used;
+
+ // clear() renderpass resources (only used when has_clear_view is false)
+ ID3D11PixelShader *clear_ps;
+ ID3D11VertexShader *clear_vs;
+ ID3D11InputLayout *clear_layout;
+ ID3D11Buffer *clear_vbuf;
+ ID3D11Buffer *clear_cbuf;
+
+ // blit() renderpass resources
+ ID3D11PixelShader *blit_float_ps;
+ ID3D11VertexShader *blit_vs;
+ ID3D11InputLayout *blit_layout;
+ ID3D11Buffer *blit_vbuf;
+ ID3D11SamplerState *blit_sampler;
+};
+
+struct d3d_tex {
+ // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
+ // hold an additional reference to the texture object.
+ ID3D11Resource *res;
+
+ ID3D11Texture1D *tex1d;
+ ID3D11Texture2D *tex2d;
+ ID3D11Texture3D *tex3d;
+ int array_slice;
+
+ // Staging texture for tex_download(), 2D only
+ ID3D11Texture2D *staging;
+
+ ID3D11ShaderResourceView *srv;
+ ID3D11RenderTargetView *rtv;
+ ID3D11UnorderedAccessView *uav;
+ ID3D11SamplerState *sampler;
+};
+
+struct d3d_buf {
+ ID3D11Buffer *buf;
+ ID3D11UnorderedAccessView *uav;
+ void *data; // System-memory mirror of the data in buf
+ bool dirty; // Is buf out of date?
+};
+
+struct d3d_rpass {
+ ID3D11PixelShader *ps;
+ ID3D11VertexShader *vs;
+ ID3D11ComputeShader *cs;
+ ID3D11InputLayout *layout;
+ ID3D11BlendState *bstate;
+};
+
+struct d3d_timer {
+ ID3D11Query *ts_start;
+ ID3D11Query *ts_end;
+ ID3D11Query *disjoint;
+ uint64_t result; // Latches the result from the previous use of the timer
+};
+
+struct d3d_fmt {
+ const char *name;
+ int components;
+ int bytes;
+ int bits[4];
+ DXGI_FORMAT fmt;
+ enum ra_ctype ctype;
+ bool unordered;
+};
+
+static const char clear_vs[] = "\
+float4 main(float2 pos : POSITION) : SV_Position\n\
+{\n\
+ return float4(pos, 0.0, 1.0);\n\
+}\n\
+";
+
+static const char clear_ps[] = "\
+cbuffer ps_cbuf : register(b0) {\n\
+ float4 color : packoffset(c0);\n\
+}\n\
+\n\
+float4 main(float4 pos : SV_Position) : SV_Target\n\
+{\n\
+ return color;\n\
+}\n\
+";
+
+struct blit_vert {
+ float x, y, u, v;
+};
+
+static const char blit_vs[] = "\
+void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
+ out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
+{\n\
+ out_pos = float4(pos, 0.0, 1.0);\n\
+ out_coord = coord;\n\
+}\n\
+";
+
+static const char blit_float_ps[] = "\
+Texture2D<float4> tex : register(t0);\n\
+SamplerState samp : register(s0);\n\
+\n\
+float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
+{\n\
+ return tex.Sample(samp, coord);\n\
+}\n\
+";
+
+#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
+static struct d3d_fmt formats[] = {
+ { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) },
+ { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) },
+ { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) },
+ { "r16", 1, 2, {16}, DXFMT(R16, UNORM) },
+ { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) },
+ { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
+
+ { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) },
+ { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) },
+ { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) },
+ { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) },
+
+ { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) },
+ { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) },
+ { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
+ { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) },
+ { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) },
+ { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) },
+ { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
+
+ { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) },
+ { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = true },
+ { "bgrx8", 3, 4, { 8, 8, 8}, DXFMT(B8G8R8X8, UNORM), .unordered = true },
+};
+
+static bool dll_version_equal(struct dll_version a, struct dll_version b)
+{
+ return a.major == b.major &&
+ a.minor == b.minor &&
+ a.build == b.build &&
+ a.revision == b.revision;
+}
+
+DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt)
+{
+ struct d3d_fmt *d3d = fmt->priv;
+ return d3d->fmt;
+}
+
+const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt)
+{
+ for (int i = 0; i < ra->num_formats; i++) {
+ struct ra_format *ra_fmt = ra->formats[i];
+
+ if (ra_d3d11_get_format(ra_fmt) == fmt)
+ return ra_fmt;
+ }
+
+ return NULL;
+}
+
+static void setup_formats(struct ra *ra)
+{
+ // All formats must be usable as a 2D texture
+ static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
+ // SHADER_SAMPLE indicates support for linear sampling, point always works
+ static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
+ // RA requires renderable surfaces to be blendable as well
+ static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+ D3D11_FORMAT_SUPPORT_BLENDABLE;
+ // Typed UAVs are equivalent to images. RA only cares if they're storable.
+ static const UINT sup_store = D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW;
+ static const UINT sup2_store = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
+ struct d3d_fmt *d3dfmt = &formats[i];
+ UINT support = 0;
+ hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
+ if (FAILED(hr))
+ continue;
+ if ((support & sup_basic) != sup_basic)
+ continue;
+
+ D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3dfmt->fmt };
+ ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2,
+ &sup2, sizeof(sup2));
+ UINT support2 = sup2.OutFormatSupport2;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = d3dfmt->name,
+ .priv = d3dfmt,
+ .ctype = d3dfmt->ctype,
+ .ordered = !d3dfmt->unordered,
+ .num_components = d3dfmt->components,
+ .pixel_size = d3dfmt->bytes,
+ .linear_filter = (support & sup_filter) == sup_filter,
+ .renderable = (support & sup_render) == sup_render,
+ .storable = p->fl >= D3D_FEATURE_LEVEL_11_0 &&
+ (support & sup_store) == sup_store &&
+ (support2 & sup2_store) == sup2_store,
+ };
+
+ if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
+ ra->caps |= RA_CAP_TEX_1D;
+
+ for (int j = 0; j < d3dfmt->components; j++)
+ fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
+
+ fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+}
+
+static bool tex_init(struct ra *ra, struct ra_tex *tex)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+ HRESULT hr;
+
+ // A SRV is required for renderpasses and blitting, since blitting can use
+ // a renderpass internally
+ if (params->render_src || params->blit_src) {
+ // Always specify the SRV format for simplicity. This will match the
+ // texture format for textures created with tex_create, but it can be
+ // different for wrapped planar video textures.
+ D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
+ .Format = ra_d3d11_get_format(params->format),
+ };
+ switch (params->dimensions) {
+ case 1:
+ if (tex_p->array_slice >= 0) {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
+ srvdesc.Texture1DArray.MipLevels = 1;
+ srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
+ srvdesc.Texture1DArray.ArraySize = 1;
+ } else {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+ srvdesc.Texture1D.MipLevels = 1;
+ }
+ break;
+ case 2:
+ if (tex_p->array_slice >= 0) {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
+ srvdesc.Texture2DArray.MipLevels = 1;
+ srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
+ srvdesc.Texture2DArray.ArraySize = 1;
+ } else {
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+ srvdesc.Texture2D.MipLevels = 1;
+ }
+ break;
+ case 3:
+ // D3D11 does not have Texture3D arrays
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+ srvdesc.Texture3D.MipLevels = 1;
+ break;
+ }
+ hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
+ &tex_p->srv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Samplers are required for renderpasses, but not blitting, since the blit
+ // code uses its own point sampler
+ if (params->render_src) {
+ D3D11_SAMPLER_DESC sdesc = {
+ .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .ComparisonFunc = D3D11_COMPARISON_NEVER,
+ .MinLOD = 0,
+ .MaxLOD = D3D11_FLOAT32_MAX,
+ .MaxAnisotropy = 1,
+ };
+ if (params->src_linear)
+ sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
+ if (params->src_repeat) {
+ sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
+ D3D11_TEXTURE_ADDRESS_WRAP;
+ }
+ // The runtime pools sampler state objects internally, so we don't have
+ // to worry about resource usage when creating one for every ra_tex
+ hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Like SRVs, an RTV is required for renderpass output and blitting
+ if (params->render_dst || params->blit_dst) {
+ hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
+ &tex_p->rtv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
+ hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
+ &tex_p->uav);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ return true;
+error:
+ return false;
+}
+
+static void tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+ struct d3d_tex *tex_p = tex->priv;
+
+ SAFE_RELEASE(tex_p->srv);
+ SAFE_RELEASE(tex_p->rtv);
+ SAFE_RELEASE(tex_p->uav);
+ SAFE_RELEASE(tex_p->sampler);
+ SAFE_RELEASE(tex_p->res);
+ SAFE_RELEASE(tex_p->staging);
+ talloc_free(tex);
+}
+
+static struct ra_tex *tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ // Only 2D textures may be downloaded for now
+ if (params->downloadable && params->dimensions != 2)
+ return NULL;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+ DXGI_FORMAT fmt = ra_d3d11_get_format(params->format);
+
+ D3D11_SUBRESOURCE_DATA data;
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data) {
+ data = (D3D11_SUBRESOURCE_DATA) {
+ .pSysMem = params->initial_data,
+ .SysMemPitch = params->w * params->format->pixel_size,
+ };
+ if (params->dimensions >= 3)
+ data.SysMemSlicePitch = data.SysMemPitch * params->h;
+ pdata = &data;
+ }
+
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ if (params->render_src || params->blit_src)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+ if (params->render_dst || params->blit_dst)
+ bind_flags |= D3D11_BIND_RENDER_TARGET;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
+ bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
+
+ // Apparently IMMUTABLE textures are efficient, so try to infer whether we
+ // can use one
+ if (params->initial_data && !params->render_dst && !params->storage_dst &&
+ !params->blit_dst && !params->host_mutable)
+ usage = D3D11_USAGE_IMMUTABLE;
+
+ switch (params->dimensions) {
+ case 1:;
+ D3D11_TEXTURE1D_DESC desc1d = {
+ .Width = params->w,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture1D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex1d;
+ break;
+ case 2:;
+ D3D11_TEXTURE2D_DESC desc2d = {
+ .Width = params->w,
+ .Height = params->h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture2D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+ // Create a staging texture with CPU access for tex_download()
+ if (params->downloadable) {
+ desc2d.BindFlags = 0;
+ desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+ desc2d.Usage = D3D11_USAGE_STAGING;
+
+ hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL,
+ &tex_p->staging);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to staging texture: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+ break;
+ case 3:;
+ D3D11_TEXTURE3D_DESC desc3d = {
+ .Width = params->w,
+ .Height = params->h,
+ .Depth = params->d,
+ .MipLevels = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture3D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex3d;
+ break;
+ default:
+ MP_ASSERT_UNREACHABLE();
+ }
+
+ tex_p->array_slice = -1;
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
+{
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ struct ra_tex_params *params = &tex->params;
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+ DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ D3D11_RESOURCE_DIMENSION type;
+ ID3D11Resource_GetType(res, &type);
+ switch (type) {
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+ hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
+ (void**)&tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+ if (desc2d.MipLevels != 1) {
+ MP_ERR(ra, "Mipmapped textures not supported for wrapping\n");
+ goto error;
+ }
+ if (desc2d.ArraySize != 1) {
+ MP_ERR(ra, "Texture arrays not supported for wrapping\n");
+ goto error;
+ }
+ if (desc2d.SampleDesc.Count != 1) {
+ MP_ERR(ra, "Multisampled textures not supported for wrapping\n");
+ goto error;
+ }
+
+ params->dimensions = 2;
+ params->w = desc2d.Width;
+ params->h = desc2d.Height;
+ params->d = 1;
+ usage = desc2d.Usage;
+ bind_flags = desc2d.BindFlags;
+ fmt = desc2d.Format;
+ break;
+ default:
+ // We could wrap Texture1D/3D as well, but keep it simple, since this
+ // function is only used for swapchain backbuffers at the moment
+ MP_ERR(ra, "Resource is not suitable to wrap\n");
+ goto error;
+ }
+
+ for (int i = 0; i < ra->num_formats; i++) {
+ DXGI_FORMAT target_fmt = ra_d3d11_get_format(ra->formats[i]);
+ if (fmt == target_fmt) {
+ params->format = ra->formats[i];
+ break;
+ }
+ }
+ if (!params->format) {
+ MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n");
+ goto error;
+ }
+
+ if (bind_flags & D3D11_BIND_SHADER_RESOURCE) {
+ params->render_src = params->blit_src = true;
+ params->src_linear = params->format->linear_filter;
+ }
+ if (bind_flags & D3D11_BIND_RENDER_TARGET)
+ params->render_dst = params->blit_dst = true;
+ if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
+ params->storage_dst = true;
+
+ if (usage != D3D11_USAGE_DEFAULT) {
+ MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n");
+ goto error;
+ }
+
+ tex_p->array_slice = -1;
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
+ int w, int h, int array_slice,
+ const struct ra_format *fmt)
+{
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ struct ra_tex_params *params = &tex->params;
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+ tex_p->tex2d = res;
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+ ID3D11Texture2D_AddRef(res);
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+ if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) {
+ MP_ERR(ra, "Video resource is not bindable\n");
+ goto error;
+ }
+
+ params->dimensions = 2;
+ params->w = w;
+ params->h = h;
+ params->d = 1;
+ params->render_src = true;
+ params->src_linear = true;
+ // fmt can be different to the texture format for planar video textures
+ params->format = fmt;
+
+ if (desc2d.ArraySize > 1) {
+ tex_p->array_slice = array_slice;
+ } else {
+ tex_p->array_slice = -1;
+ }
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+ int *array_slice)
+{
+ struct d3d_tex *tex_p = tex->priv;
+
+ ID3D11Resource_AddRef(tex_p->res);
+ if (array_slice)
+ *array_slice = tex_p->array_slice;
+ return tex_p->res;
+}
+
+static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_tex *tex = params->tex;
+ struct d3d_tex *tex_p = tex->priv;
+
+ if (!params->src) {
+ MP_ERR(ra, "Pixel buffers are not supported\n");
+ return false;
+ }
+
+ const char *src = params->src;
+ ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0;
+ ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0;
+ bool invalidate = true;
+ D3D11_BOX rc;
+ D3D11_BOX *prc = NULL;
+
+ if (tex->params.dimensions == 2) {
+ stride = params->stride;
+
+ if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 ||
+ params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h))
+ {
+ rc = (D3D11_BOX) {
+ .left = params->rc->x0,
+ .top = params->rc->y0,
+ .front = 0,
+ .right = params->rc->x1,
+ .bottom = params->rc->y1,
+ .back = 1,
+ };
+ prc = &rc;
+ invalidate = params->invalidate;
+ }
+ }
+
+ int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
+ if (p->ctx1) {
+ ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res,
+ subresource, prc, src, stride, pitch,
+ invalidate ? D3D11_COPY_DISCARD : 0);
+ } else {
+ ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource,
+ prc, src, stride, pitch);
+ }
+
+ return true;
+}
+
+static bool tex_download(struct ra *ra, struct ra_tex_download_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_tex *tex = params->tex;
+ struct d3d_tex *tex_p = tex->priv;
+ HRESULT hr;
+
+ if (!tex_p->staging)
+ return false;
+
+ ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging,
+ tex_p->res);
+
+ D3D11_MAPPED_SUBRESOURCE lock;
+ hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0,
+ D3D11_MAP_READ, 0, &lock);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ char *cdst = params->dst;
+ char *csrc = lock.pData;
+ for (int y = 0; y < tex->params.h; y++) {
+ memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch,
+ MPMIN(params->stride, lock.RowPitch));
+ }
+
+ ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0);
+
+ return true;
+}
+
+static void buf_destroy(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+ struct d3d_buf *buf_p = buf->priv;
+ SAFE_RELEASE(buf_p->buf);
+ SAFE_RELEASE(buf_p->uav);
+ talloc_free(buf);
+}
+
+static struct ra_buf *buf_create(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ // D3D11 does not support permanent mapping or pixel buffers
+ if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD)
+ return NULL;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+ buf->params = *params;
+ buf->params.initial_data = NULL;
+
+ struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf);
+
+ D3D11_SUBRESOURCE_DATA data;
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data) {
+ data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data };
+ pdata = &data;
+ }
+
+ D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
+ switch (params->type) {
+ case RA_BUF_TYPE_SHADER_STORAGE:
+ desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
+ desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float));
+ desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
+ break;
+ case RA_BUF_TYPE_UNIFORM:
+ desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4]));
+ break;
+ }
+
+ hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // D3D11 doesn't allow constant buffer updates that aren't aligned to a
+ // full constant boundary (vec4,) and some drivers don't allow partial
+ // constant buffer updates at all. To support partial buffer updates, keep
+ // a mirror of the buffer data in system memory and upload the whole thing
+ // before the buffer is used.
+ if (params->host_mutable)
+ buf_p->data = talloc_zero_size(buf, desc.ByteWidth);
+
+ if (params->type == RA_BUF_TYPE_SHADER_STORAGE) {
+ D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .NumElements = desc.ByteWidth / sizeof(float),
+ .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
+ },
+ };
+ hr = ID3D11Device_CreateUnorderedAccessView(p->dev,
+ (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ return buf;
+error:
+ buf_destroy(ra, buf);
+ return NULL;
+}
+
+static void buf_resolve(struct ra *ra, struct ra_buf *buf)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_buf *buf_p = buf->priv;
+
+ if (!buf->params.host_mutable || !buf_p->dirty)
+ return;
+
+ // Synchronize the GPU buffer with the system-memory copy
+ ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf,
+ 0, NULL, buf_p->data, 0, 0);
+ buf_p->dirty = false;
+}
+
+static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ struct d3d_buf *buf_p = buf->priv;
+
+ char *cdata = buf_p->data;
+ memcpy(cdata + offset, data, size);
+ buf_p->dirty = true;
+}
+
+static const char *get_shader_target(struct ra *ra, enum glsl_shader type)
+{
+ struct ra_d3d11 *p = ra->priv;
+ switch (p->fl) {
+ default:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_5_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_5_0";
+ case GLSL_SHADER_COMPUTE: return "cs_5_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_1:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_1";
+ case GLSL_SHADER_COMPUTE: return "cs_4_1";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_0:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0";
+ case GLSL_SHADER_COMPUTE: return "cs_4_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_3:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_2:
+ case D3D_FEATURE_LEVEL_9_1:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
+ }
+ break;
+ }
+ return NULL;
+}
+
+static const char *shader_type_name(enum glsl_shader type)
+{
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vertex";
+ case GLSL_SHADER_FRAGMENT: return "fragment";
+ case GLSL_SHADER_COMPUTE: return "compute";
+ default: return "unknown";
+ }
+}
+
+static bool setup_clear_rpass(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3DBlob *vs_blob = NULL;
+ ID3DBlob *ps_blob = NULL;
+ HRESULT hr;
+
+ hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_VERTEX),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreateVertexShader(p->dev,
+ ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+ NULL, &p->clear_vs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreatePixelShader(p->dev,
+ ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
+ NULL, &p->clear_ps);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+ { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+ };
+ hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+ ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() IA layout: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // clear() always draws to a quad covering the whole viewport
+ static const float verts[] = {
+ -1, -1,
+ 1, -1,
+ 1, 1,
+ -1, 1,
+ -1, -1,
+ 1, 1,
+ };
+ D3D11_BUFFER_DESC vdesc = {
+ .ByteWidth = sizeof(verts),
+ .Usage = D3D11_USAGE_IMMUTABLE,
+ .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+ };
+ D3D11_SUBRESOURCE_DATA vdata = {
+ .pSysMem = verts,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_BUFFER_DESC cdesc = {
+ .ByteWidth = sizeof(float[4]),
+ .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() constant buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return true;
+error:
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return false;
+}
+
+static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+
+ ID3D11DeviceContext_UpdateSubresource(p->ctx,
+ (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0);
+
+ ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout);
+ ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf,
+ &(UINT) { sizeof(float[2]) }, &(UINT) { 0 });
+ ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+ D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+ ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0);
+
+ ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+ .Width = params->w,
+ .Height = params->h,
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ }));
+ ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+ .left = rc->x0,
+ .top = rc->y0,
+ .right = rc->x1,
+ .bottom = rc->y1,
+ }));
+ ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0);
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf);
+
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL);
+ ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
+ D3D11_DEFAULT_SAMPLE_MASK);
+
+ ID3D11DeviceContext_Draw(p->ctx, 6, 0);
+
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1,
+ &(ID3D11Buffer *){ NULL });
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
+}
+
+static void clear(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+
+ if (!tex_p->rtv)
+ return;
+
+ if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) {
+ if (p->has_clear_view) {
+ ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv,
+ color, (&(D3D11_RECT) {
+ .left = rc->x0,
+ .top = rc->y0,
+ .right = rc->x1,
+ .bottom = rc->y1,
+ }), 1);
+ } else {
+ clear_rpass(ra, tex, color, rc);
+ }
+ } else {
+ ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color);
+ }
+}
+
+static bool setup_blit_rpass(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3DBlob *vs_blob = NULL;
+ ID3DBlob *float_ps_blob = NULL;
+ HRESULT hr;
+
+ hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_VERTEX),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreateVertexShader(p->dev,
+ ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+ NULL, &p->blit_vs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL,
+ "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreatePixelShader(p->dev,
+ ID3D10Blob_GetBufferPointer(float_ps_blob),
+ ID3D10Blob_GetBufferSize(float_ps_blob),
+ NULL, &p->blit_float_ps);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+ { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+ { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 },
+ };
+ hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+ ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() IA layout: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_BUFFER_DESC vdesc = {
+ .ByteWidth = sizeof(struct blit_vert[6]),
+ .Usage = D3D11_USAGE_DEFAULT,
+ .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // Blit always uses point sampling, regardless of the source texture
+ D3D11_SAMPLER_DESC sdesc = {
+ .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .ComparisonFunc = D3D11_COMPARISON_NEVER,
+ .MinLOD = 0,
+ .MaxLOD = D3D11_FLOAT32_MAX,
+ .MaxAnisotropy = 1,
+ };
+ hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() sampler: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(float_ps_blob);
+ return true;
+error:
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(float_ps_blob);
+ return false;
+}
+
+static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *dst_p = dst->priv;
+ struct d3d_tex *src_p = src->priv;
+
+ float u_min = (double)src_rc->x0 / src->params.w;
+ float u_max = (double)src_rc->x1 / src->params.w;
+ float v_min = (double)src_rc->y0 / src->params.h;
+ float v_max = (double)src_rc->y1 / src->params.h;
+
+ struct blit_vert verts[6] = {
+ { .x = -1, .y = -1, .u = u_min, .v = v_max },
+ { .x = 1, .y = -1, .u = u_max, .v = v_max },
+ { .x = 1, .y = 1, .u = u_max, .v = v_min },
+ { .x = -1, .y = 1, .u = u_min, .v = v_min },
+ };
+ verts[4] = verts[0];
+ verts[5] = verts[2];
+ ID3D11DeviceContext_UpdateSubresource(p->ctx,
+ (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0);
+
+ ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout);
+ ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf,
+ &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 });
+ ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+ D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+ ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0);
+
+ ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+ .TopLeftX = dst_rc->x0,
+ .TopLeftY = dst_rc->y0,
+ .Width = mp_rect_w(*dst_rc),
+ .Height = mp_rect_h(*dst_rc),
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ }));
+ ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+ .left = dst_rc->x0,
+ .top = dst_rc->y0,
+ .right = dst_rc->x1,
+ .bottom = dst_rc->y1,
+ }));
+
+ ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0);
+ ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv);
+ ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler);
+
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL);
+ ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
+ D3D11_DEFAULT_SAMPLE_MASK);
+
+ ID3D11DeviceContext_Draw(p->ctx, 6, 0);
+
+ ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1,
+ &(ID3D11ShaderResourceView *) { NULL });
+ ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1,
+ &(ID3D11SamplerState *) { NULL });
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
+}
+
+static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *dst_p = dst->priv;
+ struct d3d_tex *src_p = src->priv;
+ struct mp_rect dst_rc = *dst_rc_ptr;
+ struct mp_rect src_rc = *src_rc_ptr;
+
+ assert(dst->params.dimensions == 2);
+ assert(src->params.dimensions == 2);
+
+ // A zero-sized target rectangle is a no-op
+ if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc))
+ return;
+
+ // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's
+ // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc.
+ if (dst_rc.x0 > dst_rc.x1) {
+ MPSWAP(int, dst_rc.x0, dst_rc.x1);
+ MPSWAP(int, src_rc.x0, src_rc.x1);
+ }
+ if (dst_rc.y0 > dst_rc.y1) {
+ MPSWAP(int, dst_rc.y0, dst_rc.y1);
+ MPSWAP(int, src_rc.y0, src_rc.y1);
+ }
+
+ // If format conversion, stretching or flipping is required, a renderpass
+ // must be used
+ if (dst->params.format != src->params.format ||
+ mp_rect_w(dst_rc) != mp_rect_w(src_rc) ||
+ mp_rect_h(dst_rc) != mp_rect_h(src_rc))
+ {
+ blit_rpass(ra, dst, src, &dst_rc, &src_rc);
+ } else {
+ int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0;
+ int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0;
+ ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr,
+ dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) {
+ .left = src_rc.x0,
+ .top = src_rc.y0,
+ .front = 0,
+ .right = src_rc.x1,
+ .bottom = src_rc.y1,
+ .back = 1,
+ }));
+ }
+}
+
+static int desc_namespace(struct ra *ra, enum ra_vartype type)
+{
+ // Images and SSBOs both use UAV bindings
+ if (type == RA_VARTYPE_IMG_W)
+ type = RA_VARTYPE_BUF_RW;
+ return type;
+}
+
+static bool compile_glsl(struct ra *ra, enum glsl_shader type,
+ const char *glsl, ID3DBlob **out)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct spirv_compiler *spirv = p->spirv;
+ void *ta_ctx = talloc_new(NULL);
+ spvc_result sc_res = SPVC_SUCCESS;
+ spvc_context sc_ctx = NULL;
+ spvc_parsed_ir sc_ir = NULL;
+ spvc_compiler sc_compiler = NULL;
+ spvc_compiler_options sc_opts = NULL;
+ const char *hlsl = NULL;
+ ID3DBlob *errors = NULL;
+ bool success = false;
+ HRESULT hr;
+
+ int sc_shader_model;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ sc_shader_model = 50;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
+ sc_shader_model = 41;
+ } else {
+ sc_shader_model = 40;
+ }
+
+ int64_t start_ns = mp_time_ns();
+
+ bstr spv_module;
+ if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module))
+ goto done;
+
+ int64_t shaderc_ns = mp_time_ns();
+
+ sc_res = spvc_context_create(&sc_ctx);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+
+ sc_res = spvc_context_parse_spirv(sc_ctx, (SpvId *)spv_module.start,
+ spv_module.len / sizeof(SpvId), &sc_ir);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+
+ sc_res = spvc_context_create_compiler(sc_ctx, SPVC_BACKEND_HLSL, sc_ir,
+ SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
+ &sc_compiler);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+
+ sc_res = spvc_compiler_create_compiler_options(sc_compiler, &sc_opts);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+ sc_res = spvc_compiler_options_set_uint(sc_opts,
+ SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+ if (type == GLSL_SHADER_VERTEX) {
+ // FLIP_VERTEX_Y is only valid for vertex shaders
+ sc_res = spvc_compiler_options_set_bool(sc_opts,
+ SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+ }
+ sc_res = spvc_compiler_install_compiler_options(sc_compiler, sc_opts);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+
+ sc_res = spvc_compiler_compile(sc_compiler, &hlsl);
+ if (sc_res != SPVC_SUCCESS)
+ goto done;
+
+ int64_t cross_ns = mp_time_ns();
+
+ hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
+ get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out,
+ &errors);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr),
+ (int)ID3D10Blob_GetBufferSize(errors),
+ (char*)ID3D10Blob_GetBufferPointer(errors));
+ goto done;
+ }
+
+ int64_t d3dcompile_ns = mp_time_ns();
+
+ MP_VERBOSE(ra, "Compiled a %s shader in %lldns\n", shader_type_name(type),
+ d3dcompile_ns - start_ns);
+ MP_VERBOSE(ra, "shaderc: %lldns, SPIRV-Cross: %lldns, D3DCompile: %lldns\n",
+ shaderc_ns - start_ns,
+ cross_ns - shaderc_ns,
+ d3dcompile_ns - cross_ns);
+
+ success = true;
+done:
+ if (sc_res != SPVC_SUCCESS) {
+ MP_MSG(ra, MSGL_ERR, "SPIRV-Cross failed: %s\n",
+ spvc_context_get_last_error_string(sc_ctx));
+ }
+ int level = success ? MSGL_DEBUG : MSGL_ERR;
+ MP_MSG(ra, level, "GLSL source:\n");
+ mp_log_source(ra->log, level, glsl);
+ if (hlsl) {
+ MP_MSG(ra, level, "HLSL source:\n");
+ mp_log_source(ra->log, level, hlsl);
+ }
+ SAFE_RELEASE(errors);
+ if (sc_ctx)
+ spvc_context_destroy(sc_ctx);
+ talloc_free(ta_ctx);
+ return success;
+}
+
+static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+ struct d3d_rpass *pass_p = pass->priv;
+
+ SAFE_RELEASE(pass_p->vs);
+ SAFE_RELEASE(pass_p->ps);
+ SAFE_RELEASE(pass_p->cs);
+ SAFE_RELEASE(pass_p->layout);
+ SAFE_RELEASE(pass_p->bstate);
+ talloc_free(pass);
+}
+
+static D3D11_BLEND map_ra_blend(enum ra_blend blend)
+{
+ switch (blend) {
+ default:
+ case RA_BLEND_ZERO: return D3D11_BLEND_ZERO;
+ case RA_BLEND_ONE: return D3D11_BLEND_ONE;
+ case RA_BLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA;
+ case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
+ };
+}
+
+static size_t vbuf_upload(struct ra *ra, void *data, size_t size)
+{
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ // Arbitrary size limit in case there is an insane number of vertices
+ if (size > 1e9) {
+ MP_ERR(ra, "Vertex buffer is too large\n");
+ return -1;
+ }
+
+ // If the vertex data doesn't fit, realloc the vertex buffer
+ if (size > p->vbuf_size) {
+ size_t new_size = p->vbuf_size;
+ // Arbitrary base size
+ if (!new_size)
+ new_size = 64 * 1024;
+ while (new_size < size)
+ new_size *= 2;
+
+ ID3D11Buffer *new_buf;
+ D3D11_BUFFER_DESC vbuf_desc = {
+ .ByteWidth = new_size,
+ .Usage = D3D11_USAGE_DYNAMIC,
+ .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+ .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create vertex buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ return -1;
+ }
+
+ SAFE_RELEASE(p->vbuf);
+ p->vbuf = new_buf;
+ p->vbuf_size = new_size;
+ p->vbuf_used = 0;
+ }
+
+ bool discard = false;
+ size_t offset = p->vbuf_used;
+ if (offset + size > p->vbuf_size) {
+ // We reached the end of the buffer, so discard and wrap around
+ discard = true;
+ offset = 0;
+ }
+
+ D3D11_MAPPED_SUBRESOURCE map = { 0 };
+ hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0,
+ discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE,
+ 0, &map);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr));
+ return -1;
+ }
+
+ char *cdata = map.pData;
+ memcpy(cdata + offset, data, size);
+
+ ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0);
+
+ p->vbuf_used = offset + size;
+ return offset;
+}
+
+static const char cache_magic[4] = "RD11";
+static const int cache_version = 3;
+
+struct cache_header {
+ char magic[sizeof(cache_magic)];
+ int cache_version;
+ char compiler[SPIRV_NAME_MAX_LEN];
+ int spv_compiler_version;
+ unsigned spvc_compiler_major;
+ unsigned spvc_compiler_minor;
+ unsigned spvc_compiler_patch;
+ struct dll_version d3d_compiler_version;
+ int feature_level;
+ size_t vert_bytecode_len;
+ size_t frag_bytecode_len;
+ size_t comp_bytecode_len;
+};
+
+static void load_cached_program(struct ra *ra,
+ const struct ra_renderpass_params *params,
+ bstr *vert_bc,
+ bstr *frag_bc,
+ bstr *comp_bc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct spirv_compiler *spirv = p->spirv;
+ bstr cache = params->cached_program;
+
+ if (cache.len < sizeof(struct cache_header))
+ return;
+
+ struct cache_header *header = (struct cache_header *)cache.start;
+ cache = bstr_cut(cache, sizeof(*header));
+
+ unsigned spvc_major, spvc_minor, spvc_patch;
+ spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
+
+ if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0)
+ return;
+ if (header->cache_version != cache_version)
+ return;
+ if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
+ return;
+ if (header->spv_compiler_version != spirv->compiler_version)
+ return;
+ if (header->spvc_compiler_major != spvc_major)
+ return;
+ if (header->spvc_compiler_minor != spvc_minor)
+ return;
+ if (header->spvc_compiler_patch != spvc_patch)
+ return;
+ if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver))
+ return;
+ if (header->feature_level != p->fl)
+ return;
+
+ if (header->vert_bytecode_len && vert_bc) {
+ *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len);
+ MP_VERBOSE(ra, "Using cached vertex shader\n");
+ }
+ cache = bstr_cut(cache, header->vert_bytecode_len);
+
+ if (header->frag_bytecode_len && frag_bc) {
+ *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len);
+ MP_VERBOSE(ra, "Using cached fragment shader\n");
+ }
+ cache = bstr_cut(cache, header->frag_bytecode_len);
+
+ if (header->comp_bytecode_len && comp_bc) {
+ *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len);
+ MP_VERBOSE(ra, "Using cached compute shader\n");
+ }
+ cache = bstr_cut(cache, header->comp_bytecode_len);
+}
+
+static void save_cached_program(struct ra *ra, struct ra_renderpass *pass,
+ bstr vert_bc,
+ bstr frag_bc,
+ bstr comp_bc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct spirv_compiler *spirv = p->spirv;
+
+ unsigned spvc_major, spvc_minor, spvc_patch;
+ spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
+
+ struct cache_header header = {
+ .cache_version = cache_version,
+ .spv_compiler_version = p->spirv->compiler_version,
+ .spvc_compiler_major = spvc_major,
+ .spvc_compiler_minor = spvc_minor,
+ .spvc_compiler_patch = spvc_patch,
+ .d3d_compiler_version = p->d3d_compiler_ver,
+ .feature_level = p->fl,
+ .vert_bytecode_len = vert_bc.len,
+ .frag_bytecode_len = frag_bc.len,
+ .comp_bytecode_len = comp_bc.len,
+ };
+ memcpy(header.magic, cache_magic, sizeof(header.magic));
+ strncpy(header.compiler, spirv->name, sizeof(header.compiler));
+
+ struct bstr *prog = &pass->params.cached_program;
+ bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) });
+ bstr_xappend(pass, prog, vert_bc);
+ bstr_xappend(pass, prog, frag_bc);
+ bstr_xappend(pass, prog, comp_bc);
+}
+
+static struct ra_renderpass *renderpass_create_raster(struct ra *ra,
+ struct ra_renderpass *pass, const struct ra_renderpass_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_rpass *pass_p = pass->priv;
+ ID3DBlob *vs_blob = NULL;
+ ID3DBlob *ps_blob = NULL;
+ HRESULT hr;
+
+ // load_cached_program will load compiled shader bytecode into vert_bc and
+ // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL.
+ bstr vert_bc = {0};
+ bstr frag_bc = {0};
+ load_cached_program(ra, params, &vert_bc, &frag_bc, NULL);
+
+ if (!vert_bc.start) {
+ if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader,
+ &vs_blob))
+ goto error;
+ vert_bc = (bstr){
+ ID3D10Blob_GetBufferPointer(vs_blob),
+ ID3D10Blob_GetBufferSize(vs_blob),
+ };
+ }
+
+ hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len,
+ NULL, &pass_p->vs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ if (!frag_bc.start) {
+ if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader,
+ &ps_blob))
+ goto error;
+ frag_bc = (bstr){
+ ID3D10Blob_GetBufferPointer(ps_blob),
+ ID3D10Blob_GetBufferSize(ps_blob),
+ };
+ }
+
+ hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len,
+ NULL, &pass_p->ps);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass,
+ D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs);
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+
+ DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+ switch (inp->type) {
+ case RA_VARTYPE_FLOAT:
+ switch (inp->dim_v) {
+ case 1: fmt = DXGI_FORMAT_R32_FLOAT; break;
+ case 2: fmt = DXGI_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
+ }
+ break;
+ case RA_VARTYPE_BYTE_UNORM:
+ switch (inp->dim_v) {
+ case 1: fmt = DXGI_FORMAT_R8_UNORM; break;
+ case 2: fmt = DXGI_FORMAT_R8G8_UNORM; break;
+ // There is no 3-component 8-bit DXGI format
+ case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break;
+ }
+ break;
+ }
+ if (fmt == DXGI_FORMAT_UNKNOWN) {
+ MP_ERR(ra, "Could not find suitable vertex input format\n");
+ goto error;
+ }
+
+ in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
+ // The semantic name doesn't mean much and is just used to verify
+ // the input description matches the shader. SPIRV-Cross always
+ // uses TEXCOORD, so we should too.
+ .SemanticName = "TEXCOORD",
+ .SemanticIndex = i,
+ .AlignedByteOffset = inp->offset,
+ .Format = fmt,
+ };
+ }
+
+ hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ params->num_vertex_attribs, vert_bc.start, vert_bc.len,
+ &pass_p->layout);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ talloc_free(in_descs);
+ in_descs = NULL;
+
+ D3D11_BLEND_DESC bdesc = {
+ .RenderTarget[0] = {
+ .BlendEnable = params->enable_blend,
+ .SrcBlend = map_ra_blend(params->blend_src_rgb),
+ .DestBlend = map_ra_blend(params->blend_dst_rgb),
+ .BlendOp = D3D11_BLEND_OP_ADD,
+ .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha),
+ .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha),
+ .BlendOpAlpha = D3D11_BLEND_OP_ADD,
+ .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
+ },
+ };
+ hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0});
+
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return pass;
+
+error:
+ renderpass_destroy(ra, pass);
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return NULL;
+}
+
+static struct ra_renderpass *renderpass_create_compute(struct ra *ra,
+ struct ra_renderpass *pass, const struct ra_renderpass_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_rpass *pass_p = pass->priv;
+ ID3DBlob *cs_blob = NULL;
+ HRESULT hr;
+
+ bstr comp_bc = {0};
+ load_cached_program(ra, params, NULL, NULL, &comp_bc);
+
+ if (!comp_bc.start) {
+ if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader,
+ &cs_blob))
+ goto error;
+ comp_bc = (bstr){
+ ID3D10Blob_GetBufferPointer(cs_blob),
+ ID3D10Blob_GetBufferSize(cs_blob),
+ };
+ }
+ hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len,
+ NULL, &pass_p->cs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create compute shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc);
+
+ SAFE_RELEASE(cs_blob);
+ return pass;
+error:
+ renderpass_destroy(ra, pass);
+ SAFE_RELEASE(cs_blob);
+ return NULL;
+}
+
+static struct ra_renderpass *renderpass_create(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+ pass->params = *ra_renderpass_params_copy(pass, params);
+ pass->params.cached_program = (bstr){0};
+ pass->priv = talloc_zero(pass, struct d3d_rpass);
+
+ if (params->type == RA_RENDERPASS_TYPE_COMPUTE) {
+ return renderpass_create_compute(ra, pass, params);
+ } else {
+ return renderpass_create_raster(ra, pass, params);
+ }
+}
+
+static void renderpass_run_raster(struct ra *ra,
+ const struct ra_renderpass_run_params *params,
+ ID3D11Buffer *ubos[], int ubos_len,
+ ID3D11SamplerState *samplers[],
+ ID3D11ShaderResourceView *srvs[],
+ int samplers_len,
+ ID3D11UnorderedAccessView *uavs[],
+ int uavs_len)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_renderpass *pass = params->pass;
+ struct d3d_rpass *pass_p = pass->priv;
+
+ UINT vbuf_offset = vbuf_upload(ra, params->vertex_data,
+ pass->params.vertex_stride * params->vertex_count);
+ if (vbuf_offset == (UINT)-1)
+ return;
+
+ ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout);
+ ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf,
+ &pass->params.vertex_stride, &vbuf_offset);
+ ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+ D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+ ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0);
+
+ ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+ .TopLeftX = params->viewport.x0,
+ .TopLeftY = params->viewport.y0,
+ .Width = mp_rect_w(params->viewport),
+ .Height = mp_rect_h(params->viewport),
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ }));
+ ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+ .left = params->scissors.x0,
+ .top = params->scissors.y0,
+ .right = params->scissors.x1,
+ .bottom = params->scissors.y1,
+ }));
+ ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0);
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+ ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+ ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
+
+ struct ra_tex *target = params->target;
+ struct d3d_tex *target_p = target->priv;
+ ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1,
+ &target_p->rtv, NULL, 1, uavs_len, uavs, NULL);
+ ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL,
+ D3D11_DEFAULT_SAMPLE_MASK);
+
+ ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0);
+
+ // Unbind everything. It's easier to do this than to actually track state,
+ // and if we leave the RTV bound, it could trip up D3D's conflict checker.
+ for (int i = 0; i < ubos_len; i++)
+ ubos[i] = NULL;
+ for (int i = 0; i < samplers_len; i++) {
+ samplers[i] = NULL;
+ srvs[i] = NULL;
+ }
+ for (int i = 0; i < uavs_len; i++)
+ uavs[i] = NULL;
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+ ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+ ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
+ ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0,
+ NULL, NULL, 1, uavs_len, uavs, NULL);
+}
+
+static void renderpass_run_compute(struct ra *ra,
+ const struct ra_renderpass_run_params *params,
+ ID3D11Buffer *ubos[], int ubos_len,
+ ID3D11SamplerState *samplers[],
+ ID3D11ShaderResourceView *srvs[],
+ int samplers_len,
+ ID3D11UnorderedAccessView *uavs[],
+ int uavs_len)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_renderpass *pass = params->pass;
+ struct d3d_rpass *pass_p = pass->priv;
+
+ ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0);
+ ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+ ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+ ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
+ ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
+ NULL);
+
+ ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0],
+ params->compute_groups[1],
+ params->compute_groups[2]);
+
+ for (int i = 0; i < ubos_len; i++)
+ ubos[i] = NULL;
+ for (int i = 0; i < samplers_len; i++) {
+ samplers[i] = NULL;
+ srvs[i] = NULL;
+ }
+ for (int i = 0; i < uavs_len; i++)
+ uavs[i] = NULL;
+ ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+ ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+ ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
+ ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
+ NULL);
+}
+
+static void renderpass_run(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_renderpass *pass = params->pass;
+ enum ra_renderpass_type type = pass->params.type;
+
+ ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0};
+ int ubos_len = 0;
+
+ ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
+ ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
+ int samplers_len = 0;
+
+ ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0};
+ int uavs_len = 0;
+
+ // In a raster pass, one of the UAV slots is used by the runtime for the RTV
+ int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs
+ : p->max_uavs - 1;
+
+ // Gather the input variables used in this pass. These will be mapped to
+ // HLSL registers.
+ for (int i = 0; i < params->num_values; i++) {
+ struct ra_renderpass_input_val *val = &params->values[i];
+ int binding = pass->params.inputs[val->index].binding;
+ switch (pass->params.inputs[val->index].type) {
+ case RA_VARTYPE_BUF_RO:
+ if (binding >= MP_ARRAY_SIZE(ubos)) {
+ MP_ERR(ra, "Too many constant buffers in pass\n");
+ return;
+ }
+ struct ra_buf *buf_ro = *(struct ra_buf **)val->data;
+ buf_resolve(ra, buf_ro);
+ struct d3d_buf *buf_ro_p = buf_ro->priv;
+ ubos[binding] = buf_ro_p->buf;
+ ubos_len = MPMAX(ubos_len, binding + 1);
+ break;
+ case RA_VARTYPE_BUF_RW:
+ if (binding > uavs_max) {
+ MP_ERR(ra, "Too many UAVs in pass\n");
+ return;
+ }
+ struct ra_buf *buf_rw = *(struct ra_buf **)val->data;
+ buf_resolve(ra, buf_rw);
+ struct d3d_buf *buf_rw_p = buf_rw->priv;
+ uavs[binding] = buf_rw_p->uav;
+ uavs_len = MPMAX(uavs_len, binding + 1);
+ break;
+ case RA_VARTYPE_TEX:
+ if (binding >= MP_ARRAY_SIZE(samplers)) {
+ MP_ERR(ra, "Too many textures in pass\n");
+ return;
+ }
+ struct ra_tex *tex = *(struct ra_tex **)val->data;
+ struct d3d_tex *tex_p = tex->priv;
+ samplers[binding] = tex_p->sampler;
+ srvs[binding] = tex_p->srv;
+ samplers_len = MPMAX(samplers_len, binding + 1);
+ break;
+ case RA_VARTYPE_IMG_W:
+ if (binding > uavs_max) {
+ MP_ERR(ra, "Too many UAVs in pass\n");
+ return;
+ }
+ struct ra_tex *img = *(struct ra_tex **)val->data;
+ struct d3d_tex *img_p = img->priv;
+ uavs[binding] = img_p->uav;
+ uavs_len = MPMAX(uavs_len, binding + 1);
+ break;
+ }
+ }
+
+ if (type == RA_RENDERPASS_TYPE_COMPUTE) {
+ renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs,
+ samplers_len, uavs, uavs_len);
+ } else {
+ renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs,
+ samplers_len, uavs, uavs_len);
+ }
+}
+
+static void timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+ if (!ratimer)
+ return;
+ struct d3d_timer *timer = ratimer;
+
+ SAFE_RELEASE(timer->ts_start);
+ SAFE_RELEASE(timer->ts_end);
+ SAFE_RELEASE(timer->disjoint);
+ talloc_free(timer);
+}
+
+static ra_timer *timer_create(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ if (!p->has_timestamp_queries)
+ return NULL;
+
+ struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer);
+ HRESULT hr;
+
+ hr = ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // Measuring duration in D3D11 requires three queries: start and end
+ // timestamps, and a disjoint query containing a flag which says whether
+ // the timestamps are usable or if a discontinuity occurred between them,
+ // like a change in power state or clock speed. The disjoint query also
+ // contains the timer frequency, so the timestamps are useless without it.
+ hr = ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ return timer;
+error:
+ timer_destroy(ra, timer);
+ return NULL;
+}
+
+static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq)
+{
+ static const uint64_t ns_per_s = 1000000000llu;
+ return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq;
+}
+
+static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_timer *timer = ratimer;
+ HRESULT hr;
+
+ UINT64 start, end;
+ D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
+
+ hr = ID3D11DeviceContext_GetData(p->ctx,
+ (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH);
+ if (FAILED(hr) || hr == S_FALSE)
+ return 0;
+ hr = ID3D11DeviceContext_GetData(p->ctx,
+ (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH);
+ if (FAILED(hr) || hr == S_FALSE)
+ return 0;
+ hr = ID3D11DeviceContext_GetData(p->ctx,
+ (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj),
+ D3D11_ASYNC_GETDATA_DONOTFLUSH);
+ if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency)
+ return 0;
+
+ return timestamp_to_ns(end - start, dj.Frequency);
+}
+
+static void timer_start(struct ra *ra, ra_timer *ratimer)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_timer *timer = ratimer;
+
+ // Latch the last result of this ra_timer (returned by timer_stop)
+ timer->result = timer_get_result(ra, ratimer);
+
+ ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
+ ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start);
+}
+
+static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_timer *timer = ratimer;
+
+ ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end);
+ ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
+
+ return timer->result;
+}
+
+static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev)
+{
+ switch (sev) {
+ case D3D11_MESSAGE_SEVERITY_CORRUPTION:
+ return MSGL_FATAL;
+ case D3D11_MESSAGE_SEVERITY_ERROR:
+ return MSGL_ERR;
+ case D3D11_MESSAGE_SEVERITY_WARNING:
+ return MSGL_WARN;
+ default:
+ case D3D11_MESSAGE_SEVERITY_INFO:
+ case D3D11_MESSAGE_SEVERITY_MESSAGE:
+ return MSGL_DEBUG;
+ }
+}
+
+static int map_msg_severity_by_id(D3D11_MESSAGE_ID id,
+ D3D11_MESSAGE_SEVERITY sev)
+{
+ switch (id) {
+ // These are normal. The RA timer queue habitually reuses timer objects
+ // without retrieving the results.
+ case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS:
+ case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS:
+ return MSGL_TRACE;
+
+ // D3D11 writes log messages every time an object is created or
+ // destroyed. That results in a lot of log spam, so force MSGL_TRACE.
+#define OBJ_LIFETIME_MESSAGES(obj) \
+ case D3D11_MESSAGE_ID_CREATE_ ## obj: \
+ case D3D11_MESSAGE_ID_DESTROY_ ## obj
+
+ OBJ_LIFETIME_MESSAGES(CONTEXT):
+ OBJ_LIFETIME_MESSAGES(BUFFER):
+ OBJ_LIFETIME_MESSAGES(TEXTURE1D):
+ OBJ_LIFETIME_MESSAGES(TEXTURE2D):
+ OBJ_LIFETIME_MESSAGES(TEXTURE3D):
+ OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW):
+ OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW):
+ OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW):
+ OBJ_LIFETIME_MESSAGES(VERTEXSHADER):
+ OBJ_LIFETIME_MESSAGES(HULLSHADER):
+ OBJ_LIFETIME_MESSAGES(DOMAINSHADER):
+ OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER):
+ OBJ_LIFETIME_MESSAGES(PIXELSHADER):
+ OBJ_LIFETIME_MESSAGES(INPUTLAYOUT):
+ OBJ_LIFETIME_MESSAGES(SAMPLER):
+ OBJ_LIFETIME_MESSAGES(BLENDSTATE):
+ OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE):
+ OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE):
+ OBJ_LIFETIME_MESSAGES(QUERY):
+ OBJ_LIFETIME_MESSAGES(PREDICATE):
+ OBJ_LIFETIME_MESSAGES(COUNTER):
+ OBJ_LIFETIME_MESSAGES(COMMANDLIST):
+ OBJ_LIFETIME_MESSAGES(CLASSINSTANCE):
+ OBJ_LIFETIME_MESSAGES(CLASSLINKAGE):
+ OBJ_LIFETIME_MESSAGES(COMPUTESHADER):
+ OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW):
+ OBJ_LIFETIME_MESSAGES(VIDEODECODER):
+ OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM):
+ OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR):
+ OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW):
+ OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE):
+ OBJ_LIFETIME_MESSAGES(FENCE):
+ return MSGL_TRACE;
+
+#undef OBJ_LIFETIME_MESSAGES
+
+ default:
+ return map_msg_severity(sev);
+ }
+}
+
+static void debug_marker(struct ra *ra, const char *msg)
+{
+ struct ra_d3d11 *p = ra->priv;
+ void *talloc_ctx = talloc_new(NULL);
+ HRESULT hr;
+
+ if (!p->iqueue)
+ goto done;
+
+ // Copy debug-layer messages to mpv's log output
+ bool printed_header = false;
+ uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue);
+ for (uint64_t i = 0; i < messages; i++) {
+ SIZE_T len;
+ hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len);
+ if (FAILED(hr) || !len)
+ goto done;
+
+ D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len);
+ hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len);
+ if (FAILED(hr))
+ goto done;
+
+ int msgl = map_msg_severity_by_id(d3dmsg->ID, d3dmsg->Severity);
+ if (mp_msg_test(ra->log, msgl)) {
+ if (!printed_header)
+ MP_INFO(ra, "%s:\n", msg);
+ printed_header = true;
+
+ MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID,
+ (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription);
+ talloc_free(d3dmsg);
+ }
+ }
+
+ ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+done:
+ talloc_free(talloc_ctx);
+}
+
+static void destroy(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+
+ // Release everything except the interfaces needed to perform leak checking
+ SAFE_RELEASE(p->clear_ps);
+ SAFE_RELEASE(p->clear_vs);
+ SAFE_RELEASE(p->clear_layout);
+ SAFE_RELEASE(p->clear_vbuf);
+ SAFE_RELEASE(p->clear_cbuf);
+ SAFE_RELEASE(p->blit_float_ps);
+ SAFE_RELEASE(p->blit_vs);
+ SAFE_RELEASE(p->blit_layout);
+ SAFE_RELEASE(p->blit_vbuf);
+ SAFE_RELEASE(p->blit_sampler);
+ SAFE_RELEASE(p->vbuf);
+ SAFE_RELEASE(p->ctx1);
+ SAFE_RELEASE(p->dev1);
+ SAFE_RELEASE(p->dev);
+
+ if (p->ctx) {
+ // Destroy the device context synchronously so referenced objects don't
+ // show up in the leak check
+ ID3D11DeviceContext_ClearState(p->ctx);
+ ID3D11DeviceContext_Flush(p->ctx);
+ }
+ SAFE_RELEASE(p->ctx);
+
+ if (p->debug) {
+ // Report any leaked objects
+ debug_marker(ra, "after destroy");
+ ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL);
+ debug_marker(ra, "after leak check");
+ ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY);
+ debug_marker(ra, "after leak summary");
+ }
+ SAFE_RELEASE(p->debug);
+ SAFE_RELEASE(p->iqueue);
+
+ talloc_free(ra);
+}
+
+static struct ra_fns ra_fns_d3d11 = {
+ .destroy = destroy,
+ .tex_create = tex_create,
+ .tex_destroy = tex_destroy,
+ .tex_upload = tex_upload,
+ .tex_download = tex_download,
+ .buf_create = buf_create,
+ .buf_destroy = buf_destroy,
+ .buf_update = buf_update,
+ .clear = clear,
+ .blit = blit,
+ .uniform_layout = std140_layout,
+ .desc_namespace = desc_namespace,
+ .renderpass_create = renderpass_create,
+ .renderpass_destroy = renderpass_destroy,
+ .renderpass_run = renderpass_run,
+ .timer_create = timer_create,
+ .timer_destroy = timer_destroy,
+ .timer_start = timer_start,
+ .timer_stop = timer_stop,
+ .debug_marker = debug_marker,
+};
+
+void ra_d3d11_flush(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3D11DeviceContext_Flush(p->ctx);
+}
+
+static void init_debug_layer(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug,
+ (void**)&p->debug);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
+ return;
+ }
+
+ hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue,
+ (void**)&p->iqueue);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
+ return;
+ }
+
+ // Store an unlimited amount of messages in the buffer. This is fine
+ // because we flush stored messages regularly (in debug_marker.)
+ ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1);
+
+ // Push empty filter to get everything
+ D3D11_INFO_QUEUE_FILTER filter = {0};
+ ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter);
+}
+
+static struct dll_version get_dll_version(HMODULE dll)
+{
+ void *ctx = talloc_new(NULL);
+ struct dll_version ret = { 0 };
+
+ HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO),
+ VS_FILE_INFO);
+ if (!rsrc)
+ goto done;
+ DWORD size = SizeofResource(dll, rsrc);
+ HGLOBAL res = LoadResource(dll, rsrc);
+ if (!res)
+ goto done;
+ void *ptr = LockResource(res);
+ if (!ptr)
+ goto done;
+ void *copy = talloc_memdup(ctx, ptr, size);
+
+ VS_FIXEDFILEINFO *ffi;
+ UINT ffi_len;
+ if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len))
+ goto done;
+ if (ffi_len < sizeof(*ffi))
+ goto done;
+
+ ret.major = HIWORD(ffi->dwFileVersionMS);
+ ret.minor = LOWORD(ffi->dwFileVersionMS);
+ ret.build = HIWORD(ffi->dwFileVersionLS);
+ ret.revision = LOWORD(ffi->dwFileVersionLS);
+
+done:
+ talloc_free(ctx);
+ return ret;
+}
+
+static bool load_d3d_compiler(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ HMODULE d3dcompiler = NULL;
+
+ // Try the inbox D3DCompiler first (Windows 8.1 and up)
+ if (IsWindows8Point1OrGreater()) {
+ d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL,
+ LOAD_LIBRARY_SEARCH_SYSTEM32);
+ }
+ // Check for a packaged version of d3dcompiler_47.dll
+ if (!d3dcompiler)
+ d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll");
+ // Try d3dcompiler_46.dll from the Windows 8 SDK
+ if (!d3dcompiler)
+ d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll");
+ // Try d3dcompiler_43.dll from the June 2010 DirectX SDK
+ if (!d3dcompiler)
+ d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll");
+ // Can't find any compiler DLL, so give up
+ if (!d3dcompiler)
+ return false;
+
+ p->d3d_compiler_ver = get_dll_version(d3dcompiler);
+
+ p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile");
+ if (!p->D3DCompile)
+ return false;
+ return true;
+}
+
+static void find_max_texture_dimension(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+
+ D3D11_TEXTURE2D_DESC desc = {
+ .Width = ra->max_texture_wh,
+ .Height = ra->max_texture_wh,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = DXGI_FORMAT_R8_UNORM,
+ .BindFlags = D3D11_BIND_SHADER_RESOURCE,
+ };
+ while (true) {
+ desc.Height = desc.Width *= 2;
+ if (desc.Width >= 0x8000000u)
+ return;
+ if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL)))
+ return;
+ ra->max_texture_wh = desc.Width;
+ }
+}
+
+struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
+ struct spirv_compiler *spirv)
+{
+ HRESULT hr;
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_d3d11;
+
+ // Even Direct3D 10level9 supports 3D textures
+ ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO |
+ RA_CAP_BLIT | spirv->ra_caps;
+
+ ra->glsl_version = spirv->glsl_version;
+ ra->glsl_vulkan = true;
+
+ struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11);
+ p->spirv = spirv;
+
+ int minor = 0;
+ ID3D11Device_AddRef(dev);
+ p->dev = dev;
+ ID3D11Device_GetImmediateContext(p->dev, &p->ctx);
+ hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1,
+ (void**)&p->dev1);
+ if (SUCCEEDED(hr)) {
+ minor = 1;
+ ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1);
+
+ D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 };
+ hr = ID3D11Device_CheckFeatureSupport(p->dev,
+ D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts));
+ if (SUCCEEDED(hr)) {
+ p->has_clear_view = fopts.ClearView;
+ }
+ }
+
+ MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor);
+
+ p->fl = ID3D11Device_GetFeatureLevel(p->dev);
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+ ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) {
+ ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ } else {
+ ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0)
+ ra->caps |= RA_CAP_GATHER;
+ if (p->fl >= D3D_FEATURE_LEVEL_10_0)
+ ra->caps |= RA_CAP_FRAGCOORD;
+
+ // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+ ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
+ ra->max_shmem = 32 * 1024;
+ ra->max_compute_group_threads =
+ D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
+ p->max_uavs = D3D11_1_UAV_SLOT_COUNT;
+ } else {
+ p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
+ }
+
+ if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG)
+ init_debug_layer(ra);
+
+ // Some level 9_x devices don't have timestamp queries
+ hr = ID3D11Device_CreateQuery(p->dev,
+ &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL);
+ p->has_timestamp_queries = SUCCEEDED(hr);
+
+ debug_marker(ra, "before maximum Texture2D size lookup");
+
+ // According to MSDN, the above texture sizes are just minimums and drivers
+ // may support larger textures. See:
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx
+ find_max_texture_dimension(ra);
+
+ // Ignore any messages during find_max_texture_dimension
+ if (p->iqueue)
+ ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+
+ MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh,
+ ra->max_texture_wh);
+
+ if (!load_d3d_compiler(ra)) {
+ MP_FATAL(ra, "Could not find D3DCompiler DLL\n");
+ goto error;
+ }
+
+ MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n",
+ p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor,
+ p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision);
+
+ setup_formats(ra);
+
+ // The rasterizer state never changes, so set it up here
+ ID3D11RasterizerState *rstate;
+ D3D11_RASTERIZER_DESC rdesc = {
+ .FillMode = D3D11_FILL_SOLID,
+ .CullMode = D3D11_CULL_NONE,
+ .FrontCounterClockwise = FALSE,
+ .DepthClipEnable = TRUE, // Required for 10level9
+ .ScissorEnable = TRUE,
+ };
+ hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ ID3D11DeviceContext_RSSetState(p->ctx, rstate);
+ SAFE_RELEASE(rstate);
+
+ // If the device doesn't support ClearView, we have to set up a
+ // shader-based clear() implementation
+ if (!p->has_clear_view && !setup_clear_rpass(ra))
+ goto error;
+
+ if (!setup_blit_rpass(ra))
+ goto error;
+
+ return ra;
+
+error:
+ destroy(ra);
+ return NULL;
+}
+
+ID3D11Device *ra_d3d11_get_device(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3D11Device_AddRef(p->dev);
+ return p->dev;
+}
+
+bool ra_is_d3d11(struct ra *ra)
+{
+ return ra->fns == &ra_fns_d3d11;
+}
diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h
new file mode 100644
index 0000000..6f62a7f
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <stdbool.h>
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+
+#include "video/out/gpu/ra.h"
+#include "video/out/gpu/spirv.h"
+
+// Get the underlying DXGI format from an RA format
+DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt);
+
+// Gets the matching ra_format for a given DXGI format.
+// Returns a nullptr in case of no known match.
+const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt);
+
+// Create an RA instance from a D3D11 device. This takes a reference to the
+// device, which is released when the RA instance is destroyed.
+struct ra *ra_d3d11_create(ID3D11Device *device, struct mp_log *log,
+ struct spirv_compiler *spirv);
+
+// Flush the immediate context of the wrapped D3D11 device
+void ra_d3d11_flush(struct ra *ra);
+
+// Create an RA texture from a D3D11 resource. This takes a reference to the
+// texture, which is released when the RA texture is destroyed.
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res);
+
+// As above, but for a D3D11VA video resource. The fmt parameter selects which
+// plane of a planar format will be mapped when the RA texture is used.
+// array_slice should be set for texture arrays and is ignored for non-arrays.
+struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
+ int w, int h, int array_slice,
+ const struct ra_format *fmt);
+
+// Get the underlying D3D11 resource from an RA texture. The returned resource
+// is refcounted and must be released by the caller.
+ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+ int *array_slice);
+
+// Get the underlying D3D11 device from an RA instance. The returned device is
+// refcounted and must be released by the caller.
+ID3D11Device *ra_d3d11_get_device(struct ra *ra);
+
+// True if the RA instance was created with ra_d3d11_create()
+bool ra_is_d3d11(struct ra *ra);