summaryrefslogtreecommitdiffstats
path: root/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp')
-rw-r--r--src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp7219
1 files changed, 7219 insertions, 0 deletions
diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp
new file mode 100644
index 00000000..7af7f511
--- /dev/null
+++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp
@@ -0,0 +1,7219 @@
+#include "d3d9_device.h"
+
+#include "d3d9_interface.h"
+#include "d3d9_swapchain.h"
+#include "d3d9_caps.h"
+#include "d3d9_util.h"
+#include "d3d9_texture.h"
+#include "d3d9_buffer.h"
+#include "d3d9_vertex_declaration.h"
+#include "d3d9_shader.h"
+#include "d3d9_query.h"
+#include "d3d9_stateblock.h"
+#include "d3d9_monitor.h"
+#include "d3d9_spec_constants.h"
+#include "d3d9_names.h"
+#include "d3d9_format_helpers.h"
+
+#include "../dxvk/dxvk_adapter.h"
+#include "../dxvk/dxvk_instance.h"
+
+#include "../util/util_bit.h"
+#include "../util/util_math.h"
+
+#include "d3d9_initializer.h"
+
+#include <algorithm>
+#include <cfloat>
+#ifdef MSC_VER
+#pragma fenv_access (on)
+#endif
+
+namespace dxvk {
+
+ D3D9DeviceEx::D3D9DeviceEx(
+ D3D9InterfaceEx* pParent,
+ D3D9Adapter* pAdapter,
+ D3DDEVTYPE DeviceType,
+ HWND hFocusWindow,
+ DWORD BehaviorFlags,
+ Rc<DxvkDevice> dxvkDevice)
+ : m_parent ( pParent )
+ , m_deviceType ( DeviceType )
+ , m_window ( hFocusWindow )
+ , m_behaviorFlags ( BehaviorFlags )
+ , m_adapter ( pAdapter )
+ , m_dxvkDevice ( dxvkDevice )
+ , m_shaderModules ( new D3D9ShaderModuleSet )
+ , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() )
+ , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED )
+ , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false )
+ , m_csThread ( dxvkDevice->createContext() )
+ , m_csChunk ( AllocCsChunk() ) {
+ // If we can SWVP, then we use an extended constant set
+ // as SWVP has many more slots available than HWVP.
+ bool canSWVP = CanSWVP();
+ DetermineConstantLayouts(canSWVP);
+
+ if (canSWVP)
+ Logger::info("D3D9DeviceEx: Using extended constant set for software vertex processing.");
+
+ m_initializer = new D3D9Initializer(m_dxvkDevice);
+ m_converter = new D3D9FormatHelper(m_dxvkDevice);
+
+ EmitCs([
+ cDevice = m_dxvkDevice
+ ] (DxvkContext* ctx) {
+ ctx->beginRecording(cDevice->createCommandList());
+
+ DxvkLogicOpState loState;
+ loState.enableLogicOp = VK_FALSE;
+ loState.logicOp = VK_LOGIC_OP_CLEAR;
+ ctx->setLogicOpState(loState);
+ });
+
+ if (!(BehaviorFlags & D3DCREATE_FPU_PRESERVE))
+ SetupFPU();
+
+ m_dxsoOptions = DxsoOptions(this, m_d3d9Options);
+
+ CreateConstantBuffers();
+
+ m_availableMemory = DetermineInitialTextureMemory();
+ }
+
+
+ D3D9DeviceEx::~D3D9DeviceEx() {
+ Flush();
+ SynchronizeCsThread();
+
+ delete m_initializer;
+ delete m_converter;
+
+ m_dxvkDevice->waitForIdle(); // Sync Device
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) {
+ if (ppvObject == nullptr)
+ return E_POINTER;
+
+ *ppvObject = nullptr;
+
+ bool extended = m_parent->IsExtended()
+ && riid == __uuidof(IDirect3DDevice9Ex);
+
+ if (riid == __uuidof(IUnknown)
+ || riid == __uuidof(IDirect3DDevice9)
+ || extended) {
+ *ppvObject = ref(this);
+ return S_OK;
+ }
+
+ // We want to ignore this if the extended device is queried and we weren't made extended.
+ if (riid == __uuidof(IDirect3DDevice9Ex))
+ return E_NOINTERFACE;
+
+ Logger::warn("D3D9DeviceEx::QueryInterface: Unknown interface query");
+ Logger::warn(str::format(riid));
+ return E_NOINTERFACE;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::TestCooperativeLevel() {
+ // Equivelant of D3D11/DXGI present tests. We can always present.
+ return D3D_OK;
+ }
+
+
+ UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() {
+ // This is not meant to be accurate.
+ // The values are also wildly incorrect in d3d9... But some games rely
+ // on this inaccurate value...
+
+ // Clamp to megabyte range, as per spec.
+ constexpr UINT range = 0xfff00000;
+
+ // Can't have negative memory!
+ int64_t memory = std::max<int64_t>(m_availableMemory.load(), 0);
+
+ return UINT(memory) & range;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EvictManagedResources() {
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) {
+ if (ppD3D9 == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ *ppD3D9 = m_parent.ref();
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) {
+ return m_adapter->GetDeviceCaps(m_deviceType, pCaps);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) {
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->GetDisplayMode(pMode);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) {
+ if (pParameters == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ pParameters->AdapterOrdinal = m_adapter->GetOrdinal();
+ pParameters->BehaviorFlags = m_behaviorFlags;
+ pParameters->DeviceType = m_deviceType;
+ pParameters->hFocusWindow = m_window;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCursorProperties(
+ UINT XHotSpot,
+ UINT YHotSpot,
+ IDirect3DSurface9* pCursorBitmap) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pCursorBitmap == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ auto* cursorTex = GetCommonTexture(pCursorBitmap);
+ if (unlikely(cursorTex->Desc()->Format != D3D9Format::A8R8G8B8))
+ return D3DERR_INVALIDCALL;
+
+ uint32_t inputWidth = cursorTex->Desc()->Width;
+ uint32_t inputHeight = cursorTex->Desc()->Height;
+
+ // Always use a hardware cursor when windowed.
+ bool hwCursor = m_presentParams.Windowed;
+
+ // Always use a hardware cursor w/h <= 32 px
+ hwCursor |= inputWidth <= HardwareCursorWidth
+ || inputHeight <= HardwareCursorHeight;
+
+ if (hwCursor) {
+ D3DLOCKED_BOX lockedBox;
+ HRESULT hr = LockImage(cursorTex, 0, 0, &lockedBox, nullptr, D3DLOCK_READONLY);
+ if (FAILED(hr))
+ return hr;
+
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(lockedBox.pBits);
+
+ // Windows works with a stride of 128, lets respect that.
+ // Copy data to the bitmap...
+ CursorBitmap bitmap = { 0 };
+ size_t copyPitch = std::min<size_t>(
+ HardwareCursorPitch,
+ inputWidth * inputHeight * HardwareCursorFormatSize);
+
+ for (uint32_t h = 0; h < HardwareCursorHeight; h++)
+ std::memcpy(&bitmap[h * HardwareCursorPitch], &data[h * lockedBox.RowPitch], copyPitch);
+
+ UnlockImage(cursorTex, 0, 0);
+
+ // Set this as our cursor.
+ return m_cursor.SetHardwareCursor(XHotSpot, YHotSpot, bitmap);
+ }
+
+ // Software Cursor...
+ Logger::warn("D3D9DeviceEx::SetCursorProperties: Software cursor not implemented.");
+ return D3D_OK;
+ }
+
+
+ void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) {
+ D3D9DeviceLock lock = LockDevice();
+
+ // I was not able to find an instance
+ // where the cursor update was not immediate.
+
+ // Fullscreen + Windowed seem to have the same
+ // behaviour here.
+
+ // Hence we ignore the flag D3DCURSOR_IMMEDIATE_UPDATE.
+
+ m_cursor.UpdateCursor(X, Y);
+ }
+
+
+ BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return m_cursor.ShowCursor(bShow);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain(
+ D3DPRESENT_PARAMETERS* pPresentationParameters,
+ IDirect3DSwapChain9** ppSwapChain) {
+ return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(pSwapChain);
+
+ if (unlikely(pSwapChain == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ // This only returns the implicit swapchain...
+
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ *pSwapChain = static_cast<IDirect3DSwapChain9*>(m_implicitSwapchain.ref());
+
+ return D3D_OK;
+ }
+
+
+ UINT STDMETHODCALLTYPE D3D9DeviceEx::GetNumberOfSwapChains() {
+ // This only counts the implicit swapchain...
+
+ return 1;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) {
+ D3D9DeviceLock lock = LockDevice();
+
+ HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr);
+ if (FAILED(hr))
+ return hr;
+
+ hr = ResetState(pPresentationParameters);
+ if (FAILED(hr))
+ return hr;
+
+ Flush();
+ SynchronizeCsThread();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Present(
+ const RECT* pSourceRect,
+ const RECT* pDestRect,
+ HWND hDestWindowOverride,
+ const RGNDATA* pDirtyRegion) {
+ return PresentEx(
+ pSourceRect,
+ pDestRect,
+ hDestWindowOverride,
+ pDirtyRegion,
+ 0);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetBackBuffer(
+ UINT iSwapChain,
+ UINT iBackBuffer,
+ D3DBACKBUFFER_TYPE Type,
+ IDirect3DSurface9** ppBackBuffer) {
+ InitReturnPtr(ppBackBuffer);
+
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->GetBackBuffer(iBackBuffer, Type, ppBackBuffer);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) {
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->GetRasterStatus(pRasterStatus);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) {
+ return m_implicitSwapchain->SetDialogBoxMode(bEnableDialogs);
+ }
+
+
+ void STDMETHODCALLTYPE D3D9DeviceEx::SetGammaRamp(
+ UINT iSwapChain,
+ DWORD Flags,
+ const D3DGAMMARAMP* pRamp) {
+ if (unlikely(iSwapChain != 0))
+ return;
+
+ m_implicitSwapchain->SetGammaRamp(Flags, pRamp);
+ }
+
+
+ void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) {
+ if (unlikely(iSwapChain != 0))
+ return;
+
+ m_implicitSwapchain->GetGammaRamp(pRamp);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateTexture(
+ UINT Width,
+ UINT Height,
+ UINT Levels,
+ DWORD Usage,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DTexture9** ppTexture,
+ HANDLE* pSharedHandle) {
+ InitReturnPtr(ppTexture);
+
+ if (unlikely(ppTexture == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = Width;
+ desc.Height = Height;
+ desc.Depth = 1;
+ desc.ArraySize = 1;
+ desc.MipLevels = Levels;
+ desc.Usage = Usage;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = Pool;
+ desc.Discard = FALSE;
+ desc.MultiSample = D3DMULTISAMPLE_NONE;
+ desc.MultisampleQuality = 0;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = FALSE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9Texture2D> texture = new D3D9Texture2D(this, &desc);
+
+ void* initialData = nullptr;
+
+ if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr)
+ initialData = *(reinterpret_cast<void**>(pSharedHandle));
+
+ m_initializer->InitTexture(texture->GetCommonTexture(), initialData);
+ *ppTexture = texture.ref();
+
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVolumeTexture(
+ UINT Width,
+ UINT Height,
+ UINT Depth,
+ UINT Levels,
+ DWORD Usage,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DVolumeTexture9** ppVolumeTexture,
+ HANDLE* pSharedHandle) {
+ InitReturnPtr(ppVolumeTexture);
+
+ if (unlikely(ppVolumeTexture == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = Width;
+ desc.Height = Height;
+ desc.Depth = Depth;
+ desc.ArraySize = 1;
+ desc.MipLevels = Levels;
+ desc.Usage = Usage;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = Pool;
+ desc.Discard = FALSE;
+ desc.MultiSample = D3DMULTISAMPLE_NONE;
+ desc.MultisampleQuality = 0;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = FALSE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9Texture3D> texture = new D3D9Texture3D(this, &desc);
+ m_initializer->InitTexture(texture->GetCommonTexture());
+ *ppVolumeTexture = texture.ref();
+
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateCubeTexture(
+ UINT EdgeLength,
+ UINT Levels,
+ DWORD Usage,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DCubeTexture9** ppCubeTexture,
+ HANDLE* pSharedHandle) {
+ InitReturnPtr(ppCubeTexture);
+
+ if (unlikely(ppCubeTexture == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = EdgeLength;
+ desc.Height = EdgeLength;
+ desc.Depth = 1;
+ desc.ArraySize = 6; // A cube has 6 faces, wowwie!
+ desc.MipLevels = Levels;
+ desc.Usage = Usage;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = Pool;
+ desc.Discard = FALSE;
+ desc.MultiSample = D3DMULTISAMPLE_NONE;
+ desc.MultisampleQuality = 0;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = FALSE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9TextureCube> texture = new D3D9TextureCube(this, &desc);
+ m_initializer->InitTexture(texture->GetCommonTexture());
+ *ppCubeTexture = texture.ref();
+
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexBuffer(
+ UINT Length,
+ DWORD Usage,
+ DWORD FVF,
+ D3DPOOL Pool,
+ IDirect3DVertexBuffer9** ppVertexBuffer,
+ HANDLE* pSharedHandle) {
+ InitReturnPtr(ppVertexBuffer);
+
+ if (unlikely(ppVertexBuffer == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_BUFFER_DESC desc;
+ desc.Format = D3D9Format::VERTEXDATA;
+ desc.FVF = FVF;
+ desc.Pool = Pool;
+ desc.Size = Length;
+ desc.Type = D3DRTYPE_VERTEXBUFFER;
+ desc.Usage = Usage;
+
+ if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9VertexBuffer> buffer = new D3D9VertexBuffer(this, &desc);
+ m_initializer->InitBuffer(buffer->GetCommonBuffer());
+ *ppVertexBuffer = buffer.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateIndexBuffer(
+ UINT Length,
+ DWORD Usage,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DIndexBuffer9** ppIndexBuffer,
+ HANDLE* pSharedHandle) {
+ InitReturnPtr(ppIndexBuffer);
+
+ if (unlikely(ppIndexBuffer == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_BUFFER_DESC desc;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = Pool;
+ desc.Size = Length;
+ desc.Type = D3DRTYPE_INDEXBUFFER;
+ desc.Usage = Usage;
+
+ if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9IndexBuffer> buffer = new D3D9IndexBuffer(this, &desc);
+ m_initializer->InitBuffer(buffer->GetCommonBuffer());
+ *ppIndexBuffer = buffer.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTarget(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DMULTISAMPLE_TYPE MultiSample,
+ DWORD MultisampleQuality,
+ BOOL Lockable,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle) {
+ return CreateRenderTargetEx(
+ Width,
+ Height,
+ Format,
+ MultiSample,
+ MultisampleQuality,
+ Lockable,
+ ppSurface,
+ pSharedHandle,
+ 0);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurface(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DMULTISAMPLE_TYPE MultiSample,
+ DWORD MultisampleQuality,
+ BOOL Discard,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle) {
+ return CreateDepthStencilSurfaceEx(
+ Width,
+ Height,
+ Format,
+ MultiSample,
+ MultisampleQuality,
+ Discard,
+ ppSurface,
+ pSharedHandle,
+ 0);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateSurface(
+ IDirect3DSurface9* pSourceSurface,
+ const RECT* pSourceRect,
+ IDirect3DSurface9* pDestinationSurface,
+ const POINT* pDestPoint) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface);
+ D3D9Surface* dst = static_cast<D3D9Surface*>(pDestinationSurface);
+
+ if (unlikely(src == nullptr || dst == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture();
+ D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
+
+ if (unlikely(srcTextureInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(srcTextureInfo->Desc()->Format != dstTextureInfo->Desc()->Format))
+ return D3DERR_INVALIDCALL;
+
+ const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTextureInfo->GetFormatMapping().FormatColor);
+
+ VkOffset3D srcBlockOffset = { 0u, 0u, 0u };
+ VkOffset3D dstOffset = { 0u, 0u, 0u };
+ VkExtent3D texLevelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource());
+ VkExtent3D texLevelBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize);
+
+ VkExtent3D copyExtent = texLevelExtent;
+
+ if (pSourceRect != nullptr) {
+ const VkExtent3D extent = { uint32_t(pSourceRect->right - pSourceRect->left), uint32_t(pSourceRect->bottom - pSourceRect->top), 1 };
+
+ const bool extentAligned = extent.width % formatInfo->blockSize.width == 0
+ && extent.height % formatInfo->blockSize.height == 0;
+
+ if (pSourceRect->left < 0
+ || pSourceRect->top < 0
+ || pSourceRect->right <= pSourceRect->left
+ || pSourceRect->bottom <= pSourceRect->top
+ || pSourceRect->left % formatInfo->blockSize.width != 0
+ || pSourceRect->top % formatInfo->blockSize.height != 0
+ || (extent != texLevelExtent && !extentAligned))
+ return D3DERR_INVALIDCALL;
+
+ srcBlockOffset = { pSourceRect->left / int32_t(formatInfo->blockSize.width),
+ pSourceRect->top / int32_t(formatInfo->blockSize.height),
+ 0u };
+
+ copyExtent = { extent.width,
+ extent.height,
+ 1u };
+ }
+
+ if (pDestPoint != nullptr) {
+ if (pDestPoint->x % formatInfo->blockSize.width != 0
+ || pDestPoint->y % formatInfo->blockSize.height != 0
+ || pDestPoint->x < 0
+ || pDestPoint->y < 0)
+ return D3DERR_INVALIDCALL;
+
+ dstOffset = { pDestPoint->x,
+ pDestPoint->y,
+ 0u };
+ }
+
+ VkExtent3D copyBlockCount = util::computeBlockCount(copyExtent, formatInfo->blockSize);
+
+ const auto dstSubresource = vk::makeSubresourceLayers(
+ dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource()));
+
+ DxvkBufferSliceHandle srcSlice = srcTextureInfo->GetMappedSlice(src->GetSubresource());
+ VkDeviceSize dirtySize = copyBlockCount.width * copyBlockCount.height * formatInfo->elementSize;
+ D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize);
+ VkDeviceSize pitch = align(texLevelBlockCount.width * formatInfo->elementSize, 4);
+ VkDeviceSize copySrcOffset = srcBlockOffset.z * texLevelBlockCount.height * pitch
+ + srcBlockOffset.y * pitch
+ + srcBlockOffset.x * formatInfo->elementSize;
+
+ void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset;
+ util::packImageData(
+ slice.mapPtr, srcData, copyBlockCount, formatInfo->elementSize,
+ pitch, pitch * texLevelBlockCount.height);
+
+ Rc<DxvkImage> dstImage = dstTextureInfo->GetImage();
+
+ EmitCs([
+ cDstImage = std::move(dstImage),
+ cSrcSlice = slice.slice,
+ cDstLayers = dstSubresource,
+ cDstOffset = dstOffset,
+ cCopyExtent = copyExtent
+ ] (DxvkContext* ctx) {
+ ctx->copyBufferToImage(
+ cDstImage, cDstLayers, cDstOffset, cCopyExtent,
+ cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0);
+ });
+
+ dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true);
+
+ if (dstTextureInfo->IsAutomaticMip())
+ MarkTextureMipsDirty(dstTextureInfo);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture(
+ IDirect3DBaseTexture9* pSourceTexture,
+ IDirect3DBaseTexture9* pDestinationTexture) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (!pDestinationTexture || !pSourceTexture)
+ return D3DERR_INVALIDCALL;
+
+ D3D9CommonTexture* dstTexInfo = GetCommonTexture(pDestinationTexture);
+ D3D9CommonTexture* srcTexInfo = GetCommonTexture(pSourceTexture);
+
+ if (unlikely(srcTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTexInfo->Desc()->Pool != D3DPOOL_DEFAULT))
+ return D3DERR_INVALIDCALL;
+
+ const Rc<DxvkImage> dstImage = dstTexInfo->GetImage();
+ const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTexInfo->GetFormatMapping().FormatColor);
+ uint32_t mipLevels = std::min(srcTexInfo->Desc()->MipLevels, dstTexInfo->Desc()->MipLevels);
+ uint32_t arraySlices = std::min(srcTexInfo->Desc()->ArraySize, dstTexInfo->Desc()->ArraySize);
+
+ if (unlikely(srcTexInfo->IsAutomaticMip() && !dstTexInfo->IsAutomaticMip()))
+ return D3DERR_INVALIDCALL;
+
+ if (dstTexInfo->IsAutomaticMip())
+ mipLevels = 1;
+
+ for (uint32_t a = 0; a < arraySlices; a++) {
+ const D3DBOX& box = srcTexInfo->GetDirtyBox(a);
+ if (box.Left >= box.Right || box.Top >= box.Bottom || box.Front >= box.Back)
+ continue;
+
+ for (uint32_t m = 0; m < mipLevels; m++) {
+ VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 };
+
+ VkOffset3D scaledBoxOffset = {
+ int32_t(alignDown(box.Left >> m, formatInfo->blockSize.width)),
+ int32_t(alignDown(box.Top >> m, formatInfo->blockSize.height)),
+ int32_t(alignDown(box.Front >> m, formatInfo->blockSize.depth))
+ };
+ VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({
+ uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))),
+ uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))),
+ uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth)))
+ }, m);
+ VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize);
+ VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize);
+
+ VkExtent3D texLevelExtent = dstImage->mipLevelExtent(m);
+ VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize);
+
+ scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width);
+ scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height);
+ scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth);
+
+ VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize;
+ D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize);
+ VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize);
+ VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
+ VkDeviceSize copySrcOffset = boxOffsetBlockCount.z * texLevelExtentBlockCount.height * pitch
+ + boxOffsetBlockCount.y * pitch
+ + boxOffsetBlockCount.x * formatInfo->elementSize;
+
+ void* srcData = reinterpret_cast<uint8_t*>(srcTexInfo->GetMappedSlice(srcTexInfo->CalcSubresource(a, m)).mapPtr) + copySrcOffset;
+ util::packImageData(
+ slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize,
+ pitch, pitch * texLevelExtentBlockCount.height);
+
+ scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width, scaledAlignedBoxExtent.width);
+ scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height, scaledAlignedBoxExtent.height);
+ scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth, scaledAlignedBoxExtent.depth);
+
+ EmitCs([
+ cDstImage = dstImage,
+ cSrcSlice = slice.slice,
+ cDstLayers = dstLayers,
+ cExtent = scaledAlignedBoxExtent,
+ cOffset = scaledBoxOffset
+ ] (DxvkContext* ctx) {
+ ctx->copyBufferToImage(
+ cDstImage, cDstLayers,
+ cOffset, cExtent,
+ cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0);
+ });
+
+ dstTexInfo->SetWrittenByGPU(dstTexInfo->CalcSubresource(a, m), true);
+ }
+ }
+
+ srcTexInfo->ClearDirtyBoxes();
+ if (dstTexInfo->IsAutomaticMip() && mipLevels != dstTexInfo->Desc()->MipLevels)
+ MarkTextureMipsDirty(dstTexInfo);
+
+ FlushImplicit(false);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData(
+ IDirect3DSurface9* pRenderTarget,
+ IDirect3DSurface9* pDestSurface) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9Surface* src = static_cast<D3D9Surface*>(pRenderTarget);
+ D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface);
+
+ if (unlikely(src == nullptr || dst == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (pRenderTarget == pDestSurface)
+ return D3D_OK;
+
+ D3D9CommonTexture* dstTexInfo = GetCommonTexture(dst);
+ D3D9CommonTexture* srcTexInfo = GetCommonTexture(src);
+
+ if (srcTexInfo->Desc()->Format != dstTexInfo->Desc()->Format)
+ return D3DERR_INVALIDCALL;
+
+ if (dstTexInfo->Desc()->Pool == D3DPOOL_DEFAULT)
+ return this->StretchRect(pRenderTarget, nullptr, pDestSurface, nullptr, D3DTEXF_NONE);
+
+ Rc<DxvkBuffer> dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource());
+
+ Rc<DxvkImage> srcImage = srcTexInfo->GetImage();
+ const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format);
+
+ const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource());
+ VkImageSubresourceLayers srcSubresourceLayers = {
+ srcSubresource.aspectMask,
+ srcSubresource.mipLevel,
+ srcSubresource.arrayLayer, 1 };
+
+ VkExtent3D srcExtent = srcTexInfo->GetExtentMip(src->GetMipLevel());
+
+ VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(srcExtent, srcFormatInfo->blockSize);
+ VkDeviceSize pitch = align(texLevelExtentBlockCount.width * uint32_t(srcFormatInfo->elementSize), 4);
+ uint32_t pitchBlocks = uint32_t(pitch / srcFormatInfo->elementSize);
+ VkExtent2D dstExtent = VkExtent2D{ pitchBlocks,
+ texLevelExtentBlockCount.height * pitchBlocks };
+
+ EmitCs([
+ cBuffer = dstBuffer,
+ cImage = srcImage,
+ cSubresources = srcSubresourceLayers,
+ cLevelExtent = srcExtent,
+ cDstExtent = dstExtent
+ ] (DxvkContext* ctx) {
+ ctx->copyImageToBuffer(cBuffer, 0, 4, 0,
+ cImage, cSubresources, VkOffset3D { 0, 0, 0 },
+ cLevelExtent);
+ });
+
+ dstTexInfo->SetWrittenByGPU(dst->GetSubresource(), true);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) {
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->GetFrontBufferData(pDestSurface);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::StretchRect(
+ IDirect3DSurface9* pSourceSurface,
+ const RECT* pSourceRect,
+ IDirect3DSurface9* pDestSurface,
+ const RECT* pDestRect,
+ D3DTEXTUREFILTERTYPE Filter) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface);
+ D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface);
+
+ if (unlikely(src == nullptr || dst == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(src == dst))
+ return D3DERR_INVALIDCALL;
+
+ bool fastPath = true;
+
+ D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
+ D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture();
+
+ if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT ||
+ srcTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT))
+ return D3DERR_INVALIDCALL;
+
+ Rc<DxvkImage> dstImage = dstTextureInfo->GetImage();
+ Rc<DxvkImage> srcImage = srcTextureInfo->GetImage();
+
+ const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(dstImage->info().format);
+ const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format);
+
+ const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, dst->GetSubresource());
+ const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource());
+
+ if (unlikely((srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && m_flags.test(D3D9DeviceFlag::InScene)))
+ return D3DERR_INVALIDCALL;
+
+ VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel);
+ VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel);
+
+ D3D9Format srcFormat = srcTextureInfo->Desc()->Format;
+ D3D9Format dstFormat = dstTextureInfo->Desc()->Format;
+
+ // We may only fast path copy non identicals one way!
+ // We don't know what garbage could be in the X8 data.
+ bool similar = AreFormatsSimilar(srcFormat, dstFormat);
+
+ // Copies are only supported on similar formats.
+ fastPath &= similar;
+
+ // Copies are only supported if the sample count matches,
+ // otherwise we need to resolve.
+ bool needsResolve = srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT;
+ bool fbBlit = dstImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT;
+ fastPath &= !fbBlit;
+
+ // Copies would only work if we are block aligned.
+ if (pSourceRect != nullptr) {
+ fastPath &= (pSourceRect->left % srcFormatInfo->blockSize.width == 0);
+ fastPath &= (pSourceRect->right % srcFormatInfo->blockSize.width == 0);
+ fastPath &= (pSourceRect->top % srcFormatInfo->blockSize.height == 0);
+ fastPath &= (pSourceRect->bottom % srcFormatInfo->blockSize.height == 0);
+ }
+
+ if (pDestRect != nullptr) {
+ fastPath &= (pDestRect->left % dstFormatInfo->blockSize.width == 0);
+ fastPath &= (pDestRect->top % dstFormatInfo->blockSize.height == 0);
+ }
+
+ VkImageSubresourceLayers dstSubresourceLayers = {
+ dstSubresource.aspectMask,
+ dstSubresource.mipLevel,
+ dstSubresource.arrayLayer, 1 };
+
+ VkImageSubresourceLayers srcSubresourceLayers = {
+ srcSubresource.aspectMask,
+ srcSubresource.mipLevel,
+ srcSubresource.arrayLayer, 1 };
+
+ VkImageBlit blitInfo;
+ blitInfo.dstSubresource = dstSubresourceLayers;
+ blitInfo.srcSubresource = srcSubresourceLayers;
+
+ blitInfo.dstOffsets[0] = pDestRect != nullptr
+ ? VkOffset3D{ int32_t(pDestRect->left), int32_t(pDestRect->top), 0 }
+ : VkOffset3D{ 0, 0, 0 };
+
+ blitInfo.dstOffsets[1] = pDestRect != nullptr
+ ? VkOffset3D{ int32_t(pDestRect->right), int32_t(pDestRect->bottom), 1 }
+ : VkOffset3D{ int32_t(dstExtent.width), int32_t(dstExtent.height), 1 };
+
+ blitInfo.srcOffsets[0] = pSourceRect != nullptr
+ ? VkOffset3D{ int32_t(pSourceRect->left), int32_t(pSourceRect->top), 0 }
+ : VkOffset3D{ 0, 0, 0 };
+
+ blitInfo.srcOffsets[1] = pSourceRect != nullptr
+ ? VkOffset3D{ int32_t(pSourceRect->right), int32_t(pSourceRect->bottom), 1 }
+ : VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 };
+
+ if (unlikely(IsBlitRegionInvalid(blitInfo.srcOffsets, srcExtent)))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(IsBlitRegionInvalid(blitInfo.dstOffsets, dstExtent)))
+ return D3DERR_INVALIDCALL;
+
+ VkExtent3D srcCopyExtent =
+ { uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x),
+ uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y),
+ uint32_t(blitInfo.srcOffsets[1].z - blitInfo.srcOffsets[0].z) };
+
+ VkExtent3D dstCopyExtent =
+ { uint32_t(blitInfo.dstOffsets[1].x - blitInfo.dstOffsets[0].x),
+ uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y),
+ uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) };
+
+ // Copies would only work if the extents match. (ie. no stretching)
+ bool stretch = srcCopyExtent != dstCopyExtent;
+ fastPath &= !stretch;
+
+ if (!fastPath || needsResolve) {
+ // Compressed destination formats are forbidden for blits.
+ if (dstFormatInfo->flags.test(DxvkFormatFlag::BlockCompressed))
+ return D3DERR_INVALIDCALL;
+ }
+
+ auto EmitResolveCS = [&](const Rc<DxvkImage>& resolveDst, bool intermediate) {
+ VkImageResolve region;
+ region.srcSubresource = blitInfo.srcSubresource;
+ region.srcOffset = blitInfo.srcOffsets[0];
+ region.dstSubresource = intermediate ? blitInfo.srcSubresource : blitInfo.dstSubresource;
+ region.dstOffset = intermediate ? blitInfo.srcOffsets[0] : blitInfo.dstOffsets[0];
+ region.extent = srcCopyExtent;
+
+ EmitCs([
+ cDstImage = resolveDst,
+ cSrcImage = srcImage,
+ cRegion = region
+ ] (DxvkContext* ctx) {
+ if (cRegion.srcSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ ctx->resolveImage(
+ cDstImage, cSrcImage, cRegion,
+ VK_FORMAT_UNDEFINED);
+ }
+ else {
+ ctx->resolveDepthStencilImage(
+ cDstImage, cSrcImage, cRegion,
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR);
+ }
+ });
+ };
+
+ if (fastPath) {
+ if (needsResolve) {
+ EmitResolveCS(dstImage, false);
+ } else {
+ EmitCs([
+ cDstImage = dstImage,
+ cSrcImage = srcImage,
+ cDstLayers = blitInfo.dstSubresource,
+ cSrcLayers = blitInfo.srcSubresource,
+ cDstOffset = blitInfo.dstOffsets[0],
+ cSrcOffset = blitInfo.srcOffsets[0],
+ cExtent = srcCopyExtent
+ ] (DxvkContext* ctx) {
+ ctx->copyImage(
+ cDstImage, cDstLayers, cDstOffset,
+ cSrcImage, cSrcLayers, cSrcOffset,
+ cExtent);
+ });
+ }
+ }
+ else {
+ if (needsResolve) {
+ auto resolveSrc = srcTextureInfo->GetResolveImage();
+
+ EmitResolveCS(resolveSrc, true);
+ srcImage = resolveSrc;
+ }
+
+ EmitCs([
+ cDstImage = dstImage,
+ cDstMap = dstTextureInfo->GetMapping().Swizzle,
+ cSrcImage = srcImage,
+ cSrcMap = srcTextureInfo->GetMapping().Swizzle,
+ cBlitInfo = blitInfo,
+ cFilter = stretch ? DecodeFilter(Filter) : VK_FILTER_NEAREST
+ ] (DxvkContext* ctx) {
+ ctx->blitImage(
+ cDstImage,
+ cDstMap,
+ cSrcImage,
+ cSrcMap,
+ cBlitInfo,
+ cFilter);
+ });
+ }
+
+ dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true);
+
+ if (dstTextureInfo->IsAutomaticMip())
+ MarkTextureMipsDirty(dstTextureInfo);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ColorFill(
+ IDirect3DSurface9* pSurface,
+ const RECT* pRect,
+ D3DCOLOR Color) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9Surface* dst = static_cast<D3D9Surface*>(pSurface);
+
+ if (unlikely(dst == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
+
+ if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT))
+ return D3DERR_INVALIDCALL;
+
+ VkExtent3D mipExtent = dstTextureInfo->GetExtentMip(dst->GetSubresource());
+
+ VkOffset3D offset = VkOffset3D{ 0u, 0u, 0u };
+ VkExtent3D extent = mipExtent;
+
+ bool isFullExtent = true;
+ if (pRect != nullptr) {
+ ConvertRect(*pRect, offset, extent);
+
+ isFullExtent = offset == VkOffset3D{ 0u, 0u, 0u }
+ && extent == mipExtent;
+ }
+
+ Rc<DxvkImageView> rtView = dst->GetRenderTargetView(false);
+
+ VkClearValue clearValue;
+ DecodeD3DCOLOR(Color, clearValue.color.float32);
+
+ // Fast path for games that may use this as an
+ // alternative to Clear on render targets.
+ if (isFullExtent && rtView != nullptr) {
+ EmitCs([
+ cImageView = rtView,
+ cClearValue = clearValue
+ ] (DxvkContext* ctx) {
+ ctx->clearRenderTarget(
+ cImageView,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ cClearValue);
+ });
+ } else {
+ if (unlikely(rtView == nullptr)) {
+ const D3D9Format format = dstTextureInfo->Desc()->Format;
+ if (format != D3D9Format::NULL_FORMAT)
+ Logger::err(str::format("D3D9DeviceEx::ColorFill: Unsupported format ", format));
+
+ return D3D_OK;
+ }
+
+ EmitCs([
+ cImageView = rtView,
+ cOffset = offset,
+ cExtent = extent,
+ cClearValue = clearValue
+ ] (DxvkContext* ctx) {
+ ctx->clearImageView(
+ cImageView,
+ cOffset, cExtent,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ cClearValue);
+ });
+ }
+
+ dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true);
+
+ if (dstTextureInfo->IsAutomaticMip())
+ MarkTextureMipsDirty(dstTextureInfo);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurface(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle) {
+ return CreateOffscreenPlainSurfaceEx(
+ Width, Height,
+ Format, Pool,
+ ppSurface, pSharedHandle,
+ 0);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget(
+ DWORD RenderTargetIndex,
+ IDirect3DSurface9* pRenderTarget) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets
+ || (pRenderTarget == nullptr && RenderTargetIndex == 0)))
+ return D3DERR_INVALIDCALL;
+
+ D3D9Surface* rt = static_cast<D3D9Surface*>(pRenderTarget);
+ D3D9CommonTexture* texInfo = rt != nullptr
+ ? rt->GetCommonTexture()
+ : nullptr;
+
+ if (unlikely(rt != nullptr && !(texInfo->Desc()->Usage & D3DUSAGE_RENDERTARGET)))
+ return D3DERR_INVALIDCALL;
+
+ if (RenderTargetIndex == 0) {
+ auto rtSize = rt->GetSurfaceExtent();
+
+ D3DVIEWPORT9 viewport;
+ viewport.X = 0;
+ viewport.Y = 0;
+ viewport.Width = rtSize.width;
+ viewport.Height = rtSize.height;
+ viewport.MinZ = 0.0f;
+ viewport.MaxZ = 1.0f;
+
+ RECT scissorRect;
+ scissorRect.left = 0;
+ scissorRect.top = 0;
+ scissorRect.right = rtSize.width;
+ scissorRect.bottom = rtSize.height;
+
+ if (m_state.viewport != viewport) {
+ m_flags.set(D3D9DeviceFlag::DirtyFFViewport);
+ m_flags.set(D3D9DeviceFlag::DirtyPointScale);
+ m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
+ m_state.viewport = viewport;
+ }
+
+ if (m_state.scissorRect != scissorRect) {
+ m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
+ m_state.scissorRect = scissorRect;
+ }
+ }
+
+ if (m_state.renderTargets[RenderTargetIndex] == rt)
+ return D3D_OK;
+
+ // Do a strong flush if the first render target is changed.
+ FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE);
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+
+ m_state.renderTargets[RenderTargetIndex] = rt;
+
+ UpdateBoundRTs(RenderTargetIndex);
+ UpdateActiveRTs(RenderTargetIndex);
+
+ uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs;
+
+ m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex);
+
+ if (rt != nullptr) {
+ if (texInfo->GetMapping().Swizzle.a == VK_COMPONENT_SWIZZLE_ONE)
+ m_alphaSwizzleRTs |= 1 << RenderTargetIndex;
+
+ if (texInfo->IsAutomaticMip())
+ texInfo->SetNeedsMipGen(true);
+
+ texInfo->SetWrittenByGPU(rt->GetSubresource(), true);
+ }
+
+ if (originalAlphaSwizzleRTs != m_alphaSwizzleRTs)
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+
+ if (RenderTargetIndex == 0) {
+ bool validSampleMask = texInfo->Desc()->MultiSample > D3DMULTISAMPLE_NONMASKABLE;
+
+ if (validSampleMask != m_flags.test(D3D9DeviceFlag::ValidSampleMask)) {
+ m_flags.clr(D3D9DeviceFlag::ValidSampleMask);
+ if (validSampleMask)
+ m_flags.set(D3D9DeviceFlag::ValidSampleMask);
+
+ m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
+ }
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget(
+ DWORD RenderTargetIndex,
+ IDirect3DSurface9** ppRenderTarget) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppRenderTarget);
+
+ if (unlikely(ppRenderTarget == nullptr || RenderTargetIndex > caps::MaxSimultaneousRenderTargets))
+ return D3DERR_INVALIDCALL;
+
+ if (m_state.renderTargets[RenderTargetIndex] == nullptr)
+ return D3DERR_NOTFOUND;
+
+ *ppRenderTarget = m_state.renderTargets[RenderTargetIndex].ref();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9Surface* ds = static_cast<D3D9Surface*>(pNewZStencil);
+
+ if (unlikely(ds && !(ds->GetCommonTexture()->Desc()->Usage & D3DUSAGE_DEPTHSTENCIL)))
+ return D3DERR_INVALIDCALL;
+
+ if (m_state.depthStencil == ds)
+ return D3D_OK;
+
+ FlushImplicit(FALSE);
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+
+ if (ds != nullptr) {
+ float rValue = GetDepthBufferRValue(ds->GetCommonTexture()->GetFormatMapping().FormatColor);
+ if (m_depthBiasScale != rValue) {
+ m_depthBiasScale = rValue;
+ m_flags.set(D3D9DeviceFlag::DirtyDepthBias);
+ }
+ }
+
+ m_state.depthStencil = ds;
+
+ UpdateActiveHazardsDS(UINT32_MAX);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppZStencilSurface);
+
+ if (unlikely(ppZStencilSurface == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (m_state.depthStencil == nullptr)
+ return D3DERR_NOTFOUND;
+
+ *ppZStencilSurface = m_state.depthStencil.ref();
+
+ return D3D_OK;
+ }
+
+ // The Begin/EndScene functions actually do nothing.
+ // Some games don't even call them.
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(m_flags.test(D3D9DeviceFlag::InScene)))
+ return D3DERR_INVALIDCALL;
+
+ m_flags.set(D3D9DeviceFlag::InScene);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(!m_flags.test(D3D9DeviceFlag::InScene)))
+ return D3DERR_INVALIDCALL;
+
+ FlushImplicit(true);
+
+ m_flags.clr(D3D9DeviceFlag::InScene);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Clear(
+ DWORD Count,
+ const D3DRECT* pRects,
+ DWORD Flags,
+ D3DCOLOR Color,
+ float Z,
+ DWORD Stencil) {
+ if (unlikely(!Count && pRects))
+ return D3D_OK;
+
+ D3D9DeviceLock lock = LockDevice();
+
+ const auto& vp = m_state.viewport;
+ const auto& sc = m_state.scissorRect;
+
+ bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE];
+ bool scissor = m_state.renderStates[D3DRS_SCISSORTESTENABLE];
+
+ VkOffset3D offset = { int32_t(vp.X), int32_t(vp.Y), 0 };
+ VkExtent3D extent = { vp.Width, vp.Height, 1u };
+
+ if (scissor) {
+ offset.x = std::max<int32_t> (offset.x, sc.left);
+ offset.y = std::max<int32_t> (offset.y, sc.top);
+
+ extent.width = std::min<uint32_t>(extent.width, sc.right - offset.x);
+ extent.height = std::min<uint32_t>(extent.height, sc.bottom - offset.y);
+ }
+
+ // This becomes pretty unreadable in one singular if statement...
+ if (Count) {
+ // If pRects is null, or our first rect encompasses the viewport:
+ if (!pRects)
+ Count = 0;
+ else if (pRects[0].x1 <= offset.x && pRects[0].y1 <= offset.y
+ && pRects[0].x2 >= offset.x + int32_t(extent.width) && pRects[0].y2 >= offset.y + int32_t(extent.height))
+ Count = 0;
+ }
+
+ // Here, Count of 0 will denote whether or not to care about user rects.
+ VkClearValue clearValueDepth;
+ clearValueDepth.depthStencil.depth = Z;
+ clearValueDepth.depthStencil.stencil = Stencil;
+
+ VkClearValue clearValueColor;
+ DecodeD3DCOLOR(Color, clearValueColor.color.float32);
+
+ VkImageAspectFlags depthAspectMask = 0;
+ if (m_state.depthStencil != nullptr) {
+ if (Flags & D3DCLEAR_ZBUFFER)
+ depthAspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
+
+ if (Flags & D3DCLEAR_STENCIL)
+ depthAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ depthAspectMask &= imageFormatInfo(m_state.depthStencil->GetCommonTexture()->GetFormatMapping().FormatColor)->aspectMask;
+ }
+
+ auto ClearImageView = [this](
+ uint32_t alignment,
+ VkOffset3D offset,
+ VkExtent3D extent,
+ const Rc<DxvkImageView>& imageView,
+ VkImageAspectFlags aspectMask,
+ VkClearValue clearValue) {
+
+ VkExtent3D imageExtent = imageView->mipLevelExtent(0);
+ extent.width = std::min(imageExtent.width, extent.width);
+ extent.height = std::min(imageExtent.height, extent.height);
+
+ if (unlikely(uint32_t(offset.x) >= imageExtent.width || uint32_t(offset.y) >= imageExtent.height))
+ return;
+
+ const bool fullClear = align(extent.width, alignment) == align(imageExtent.width, alignment)
+ && align(extent.height, alignment) == align(imageExtent.height, alignment)
+ && offset.x == 0
+ && offset.y == 0;
+
+ if (fullClear) {
+ EmitCs([
+ cClearValue = clearValue,
+ cAspectMask = aspectMask,
+ cImageView = imageView
+ ] (DxvkContext* ctx) {
+ ctx->clearRenderTarget(
+ cImageView,
+ cAspectMask,
+ cClearValue);
+ });
+ }
+ else {
+ EmitCs([
+ cClearValue = clearValue,
+ cAspectMask = aspectMask,
+ cImageView = imageView,
+ cOffset = offset,
+ cExtent = extent
+ ] (DxvkContext* ctx) {
+ ctx->clearImageView(
+ cImageView,
+ cOffset, cExtent,
+ cAspectMask,
+ cClearValue);
+ });
+ }
+ };
+
+ auto ClearViewRect = [&](
+ uint32_t alignment,
+ VkOffset3D offset,
+ VkExtent3D extent) {
+ // Clear depth if we need to.
+ if (depthAspectMask != 0)
+ ClearImageView(alignment, offset, extent, m_state.depthStencil->GetDepthStencilView(), depthAspectMask, clearValueDepth);
+
+ // Clear render targets if we need to.
+ if (Flags & D3DCLEAR_TARGET) {
+ for (uint32_t rt : bit::BitMask(m_boundRTs)) {
+ const auto& rts = m_state.renderTargets[rt];
+ const auto& rtv = rts->GetRenderTargetView(srgb);
+
+ if (likely(rtv != nullptr)) {
+ ClearImageView(alignment, offset, extent, rtv, VK_IMAGE_ASPECT_COLOR_BIT, clearValueColor);
+
+ D3D9CommonTexture* dstTexture = rts->GetCommonTexture();
+
+ if (dstTexture->IsAutomaticMip())
+ MarkTextureMipsDirty(dstTexture);
+ }
+ }
+ }
+ };
+
+ // A Hat in Time and other UE3 games only gets partial clears here
+ // because of an oversized rt height due to their weird alignment...
+ // This works around that.
+ uint32_t alignment = m_d3d9Options.lenientClear ? 8 : 1;
+
+ if (!Count) {
+ // Clear our viewport & scissor minified region in this rendertarget.
+ ClearViewRect(alignment, offset, extent);
+ }
+ else {
+ // Clear the application provided rects.
+ for (uint32_t i = 0; i < Count; i++) {
+ VkOffset3D rectOffset = {
+ std::max<int32_t>(pRects[i].x1, offset.x),
+ std::max<int32_t>(pRects[i].y1, offset.y),
+ 0
+ };
+
+ VkExtent3D rectExtent = {
+ std::min<uint32_t>(pRects[i].x2, offset.x + extent.width) - rectOffset.x,
+ std::min<uint32_t>(pRects[i].y2, offset.y + extent.height) - rectOffset.y,
+ 1u
+ };
+
+ ClearViewRect(alignment, rectOffset, rectExtent);
+ }
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) {
+ return SetStateTransform(GetTransformIndex(State), pMatrix);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pMatrix == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pMatrix = bit::cast<D3DMATRIX>(m_state.transforms[GetTransformIndex(State)]);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->MultiplyStateTransform(TransformState, pMatrix);
+
+ uint32_t idx = GetTransformIndex(TransformState);
+
+ m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix);
+
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD))
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetViewport(pViewport);
+
+ if (m_state.viewport == *pViewport)
+ return D3D_OK;
+
+ m_state.viewport = *pViewport;
+
+ m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
+ m_flags.set(D3D9DeviceFlag::DirtyFFViewport);
+ m_flags.set(D3D9DeviceFlag::DirtyPointScale);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (pViewport == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ *pViewport = m_state.viewport;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pMaterial == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetMaterial(pMaterial);
+
+ m_state.material = *pMaterial;
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pMaterial == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pMaterial = m_state.material;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pLight == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord())) {
+ Logger::warn("D3D9DeviceEx::SetLight: State block not implemented.");
+ return D3D_OK;
+ }
+
+ if (Index >= m_state.lights.size())
+ m_state.lights.resize(Index + 1);
+
+ m_state.lights[Index] = *pLight;
+
+ if (m_state.IsLightEnabled(Index))
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pLight == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index]))
+ return D3DERR_INVALIDCALL;
+
+ *pLight = m_state.lights[Index].value();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(Index >= m_state.lights.size()))
+ m_state.lights.resize(Index + 1);
+
+ if (unlikely(!m_state.lights[Index]))
+ m_state.lights[Index] = DefaultLight;
+
+ if (m_state.IsLightEnabled(Index) == !!Enable)
+ return D3D_OK;
+
+ uint32_t searchIndex = UINT32_MAX;
+ uint32_t setIndex = Index;
+
+ if (!Enable)
+ std::swap(searchIndex, setIndex);
+
+ for (auto& idx : m_state.enabledLightIndices) {
+ if (idx == searchIndex) {
+ idx = setIndex;
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+ }
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pEnable == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index]))
+ return D3DERR_INVALIDCALL;
+
+ *pEnable = m_state.IsLightEnabled(Index) ? 128 : 0; // Weird quirk but OK.
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(Index >= caps::MaxClipPlanes || !pPlane))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetClipPlane(Index, pPlane);
+
+ bool dirty = false;
+
+ for (uint32_t i = 0; i < 4; i++) {
+ dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i];
+ m_state.clipPlanes[Index].coeff[i] = pPlane[i];
+ }
+
+ bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index);
+ dirty &= enabled;
+
+ if (dirty)
+ m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipPlane(DWORD Index, float* pPlane) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(Index >= caps::MaxClipPlanes || !pPlane))
+ return D3DERR_INVALIDCALL;
+
+ for (uint32_t i = 0; i < 4; i++)
+ pPlane[i] = m_state.clipPlanes[Index].coeff[i];
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) {
+ D3D9DeviceLock lock = LockDevice();
+
+ // D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK
+ if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) {
+ return D3D_OK;
+ }
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetRenderState(State, Value);
+
+ auto& states = m_state.renderStates;
+
+ bool changed = states[State] != Value;
+
+ if (likely(changed)) {
+ const bool oldClipPlaneEnabled = IsClipPlaneEnabled();
+
+ const bool oldDepthBiasEnabled = IsDepthBiasEnabled();
+
+ const bool oldATOC = IsAlphaToCoverageEnabled();
+ const bool oldNVDB = states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB);
+ const bool oldAlphaTest = IsAlphaTestEnabled();
+
+ states[State] = Value;
+
+ // AMD's driver hack for ATOC and RESZ
+ if (unlikely(State == D3DRS_POINTSIZE)) {
+ // ATOC
+ constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::A2M1);
+ constexpr uint32_t AlphaToCoverageDisable = uint32_t(D3D9Format::A2M0);
+
+ if (Value == AlphaToCoverageEnable
+ || Value == AlphaToCoverageDisable) {
+ m_amdATOC = Value == AlphaToCoverageEnable;
+
+ bool newATOC = IsAlphaToCoverageEnabled();
+ bool newAlphaTest = IsAlphaTestEnabled();
+
+ if (oldATOC != newATOC)
+ m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
+
+ if (oldAlphaTest != newAlphaTest)
+ m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
+
+ return D3D_OK;
+ }
+
+ // RESZ
+ constexpr uint32_t RESZ = 0x7fa05000;
+ if (Value == RESZ) {
+ ResolveZ();
+ return D3D_OK;
+ }
+ }
+
+ // NV's driver hack for ATOC.
+ if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
+ constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::ATOC);
+ constexpr uint32_t AlphaToCoverageDisable = 0;
+
+ if (Value == AlphaToCoverageEnable
+ || Value == AlphaToCoverageDisable) {
+ m_nvATOC = Value == AlphaToCoverageEnable;
+
+ bool newATOC = IsAlphaToCoverageEnabled();
+ bool newAlphaTest = IsAlphaTestEnabled();
+
+ if (oldATOC != newATOC)
+ m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
+
+ if (oldAlphaTest != newAlphaTest)
+ m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
+
+ return D3D_OK;
+ }
+
+ if (unlikely(Value == uint32_t(D3D9Format::COPM))) {
+ // UE3 calls this MinimalNVIDIADriverShaderOptimization
+ Logger::info("D3D9DeviceEx::SetRenderState: MinimalNVIDIADriverShaderOptimization is unsupported");
+ return D3D_OK;
+ }
+ }
+
+ switch (State) {
+ case D3DRS_SEPARATEALPHABLENDENABLE:
+ case D3DRS_ALPHABLENDENABLE:
+ case D3DRS_BLENDOP:
+ case D3DRS_BLENDOPALPHA:
+ case D3DRS_DESTBLEND:
+ case D3DRS_DESTBLENDALPHA:
+ case D3DRS_SRCBLEND:
+ case D3DRS_SRCBLENDALPHA:
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+ break;
+
+ case D3DRS_COLORWRITEENABLE:
+ UpdateActiveRTs(0);
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+ break;
+ case D3DRS_COLORWRITEENABLE1:
+ UpdateActiveRTs(1);
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+ break;
+ case D3DRS_COLORWRITEENABLE2:
+ UpdateActiveRTs(2);
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+ break;
+ case D3DRS_COLORWRITEENABLE3:
+ UpdateActiveRTs(3);
+ m_flags.set(D3D9DeviceFlag::DirtyBlendState);
+ break;
+
+ case D3DRS_ALPHATESTENABLE: {
+ bool newATOC = IsAlphaToCoverageEnabled();
+ bool newAlphaTest = IsAlphaTestEnabled();
+
+ if (oldATOC != newATOC)
+ m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
+
+ if (oldAlphaTest != newAlphaTest)
+ m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
+
+ break;
+ }
+
+ case D3DRS_ALPHAFUNC:
+ m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
+ break;
+
+ case D3DRS_BLENDFACTOR:
+ BindBlendFactor();
+ break;
+
+ case D3DRS_MULTISAMPLEMASK:
+ if (m_flags.test(D3D9DeviceFlag::ValidSampleMask))
+ m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
+ break;
+
+ case D3DRS_ZWRITEENABLE:
+ if (m_activeHazardsDS != 0)
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+
+ m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState);
+ break;
+
+ case D3DRS_ZENABLE:
+ case D3DRS_ZFUNC:
+ case D3DRS_TWOSIDEDSTENCILMODE:
+ case D3DRS_STENCILENABLE:
+ case D3DRS_STENCILFAIL:
+ case D3DRS_STENCILZFAIL:
+ case D3DRS_STENCILPASS:
+ case D3DRS_STENCILFUNC:
+ case D3DRS_CCW_STENCILFAIL:
+ case D3DRS_CCW_STENCILZFAIL:
+ case D3DRS_CCW_STENCILPASS:
+ case D3DRS_CCW_STENCILFUNC:
+ case D3DRS_STENCILMASK:
+ case D3DRS_STENCILWRITEMASK:
+ m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState);
+ break;
+
+ case D3DRS_STENCILREF:
+ BindDepthStencilRefrence();
+ break;
+
+ case D3DRS_SCISSORTESTENABLE:
+ m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
+ break;
+
+ case D3DRS_SRGBWRITEENABLE:
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+ break;
+
+ case D3DRS_DEPTHBIAS:
+ case D3DRS_SLOPESCALEDEPTHBIAS: {
+ const bool depthBiasEnabled = IsDepthBiasEnabled();
+
+ if (depthBiasEnabled != oldDepthBiasEnabled)
+ m_flags.set(D3D9DeviceFlag::DirtyRasterizerState);
+
+ if (depthBiasEnabled)
+ m_flags.set(D3D9DeviceFlag::DirtyDepthBias);
+
+ break;
+ }
+ case D3DRS_CULLMODE:
+ case D3DRS_FILLMODE:
+ m_flags.set(D3D9DeviceFlag::DirtyRasterizerState);
+ break;
+
+ case D3DRS_CLIPPLANEENABLE: {
+ const bool clipPlaneEnabled = IsClipPlaneEnabled();
+
+ if (clipPlaneEnabled != oldClipPlaneEnabled)
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+
+ m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
+ break;
+ }
+
+ case D3DRS_ALPHAREF:
+ UpdatePushConstant<D3D9RenderStateItem::AlphaRef>();
+ break;
+
+ case D3DRS_TEXTUREFACTOR:
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelData);
+ break;
+
+ case D3DRS_DIFFUSEMATERIALSOURCE:
+ case D3DRS_AMBIENTMATERIALSOURCE:
+ case D3DRS_SPECULARMATERIALSOURCE:
+ case D3DRS_EMISSIVEMATERIALSOURCE:
+ case D3DRS_COLORVERTEX:
+ case D3DRS_LIGHTING:
+ case D3DRS_NORMALIZENORMALS:
+ case D3DRS_LOCALVIEWER:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+
+ case D3DRS_AMBIENT:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+ break;
+
+ case D3DRS_SPECULARENABLE:
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+ break;
+
+ case D3DRS_FOGENABLE:
+ case D3DRS_FOGVERTEXMODE:
+ case D3DRS_FOGTABLEMODE:
+ m_flags.set(D3D9DeviceFlag::DirtyFogState);
+ break;
+
+ case D3DRS_RANGEFOGENABLE:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+
+ case D3DRS_FOGCOLOR:
+ m_flags.set(D3D9DeviceFlag::DirtyFogColor);
+ break;
+
+ case D3DRS_FOGSTART:
+ m_flags.set(D3D9DeviceFlag::DirtyFogScale);
+ break;
+
+ case D3DRS_FOGEND:
+ m_flags.set(D3D9DeviceFlag::DirtyFogScale);
+ m_flags.set(D3D9DeviceFlag::DirtyFogEnd);
+ break;
+
+ case D3DRS_FOGDENSITY:
+ m_flags.set(D3D9DeviceFlag::DirtyFogDensity);
+ break;
+
+ case D3DRS_POINTSIZE:
+ UpdatePushConstant<D3D9RenderStateItem::PointSize>();
+ break;
+
+ case D3DRS_POINTSIZE_MIN:
+ UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>();
+ break;
+
+ case D3DRS_POINTSIZE_MAX:
+ UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>();
+ break;
+
+ case D3DRS_POINTSCALE_A:
+ case D3DRS_POINTSCALE_B:
+ case D3DRS_POINTSCALE_C:
+ m_flags.set(D3D9DeviceFlag::DirtyPointScale);
+ break;
+
+ case D3DRS_POINTSCALEENABLE:
+ case D3DRS_POINTSPRITEENABLE:
+ // Nothing to do here!
+ // This is handled in UpdatePointMode.
+ break;
+
+ case D3DRS_SHADEMODE:
+ if (m_state.pixelShader != nullptr) {
+ BindShader<DxsoProgramType::PixelShader>(
+ GetCommonShader(m_state.pixelShader),
+ GetPixelShaderPermutation());
+ }
+
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+ break;
+
+ case D3DRS_TWEENFACTOR:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+ break;
+
+ case D3DRS_VERTEXBLEND:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+
+ case D3DRS_INDEXEDVERTEXBLENDENABLE:
+ if (CanSWVP() && Value)
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
+
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+
+ case D3DRS_ADAPTIVETESS_X:
+ case D3DRS_ADAPTIVETESS_Z:
+ case D3DRS_ADAPTIVETESS_W:
+ if (states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB) || oldNVDB) {
+ m_flags.set(D3D9DeviceFlag::DirtyDepthBounds);
+ break;
+ }
+
+ default:
+ static bool s_errorShown[256];
+
+ if (!std::exchange(s_errorShown[State], true))
+ Logger::warn(str::format("D3D9DeviceEx::SetRenderState: Unhandled render state ", State));
+ break;
+ }
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pValue == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) {
+ return D3DERR_INVALIDCALL;
+ }
+
+ if (State < D3DRS_ZENABLE || State > D3DRS_BLENDOPALPHA)
+ *pValue = 0;
+ else
+ *pValue = m_state.renderStates[State];
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock(
+ D3DSTATEBLOCKTYPE Type,
+ IDirect3DStateBlock9** ppSB) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppSB);
+
+ if (unlikely(ppSB == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9StateBlock> sb = new D3D9StateBlock(this, ConvertStateBlockType(Type));
+ *ppSB = sb.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(m_recorder != nullptr))
+ return D3DERR_INVALIDCALL;
+
+ m_recorder = new D3D9StateBlock(this, D3D9StateBlockType::None);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppSB);
+
+ if (unlikely(ppSB == nullptr || m_recorder == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *ppSB = m_recorder.ref();
+ m_recorder = nullptr;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipStatus(const D3DCLIPSTATUS9* pClipStatus) {
+ Logger::warn("D3D9DeviceEx::SetClipStatus: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipStatus(D3DCLIPSTATUS9* pClipStatus) {
+ Logger::warn("D3D9DeviceEx::GetClipStatus: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (ppTexture == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ *ppTexture = nullptr;
+
+ if (unlikely(InvalidSampler(Stage)))
+ return D3D_OK;
+
+ DWORD stateSampler = RemapSamplerState(Stage);
+
+ *ppTexture = ref(m_state.textures[stateSampler]);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) {
+ if (unlikely(InvalidSampler(Stage)))
+ return D3D_OK;
+
+ DWORD stateSampler = RemapSamplerState(Stage);
+
+ return SetStateTexture(stateSampler, pTexture);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTextureStageState(
+ DWORD Stage,
+ D3DTEXTURESTAGESTATETYPE Type,
+ DWORD* pValue) {
+ auto dxvkType = RemapTextureStageStateType(Type);
+
+ if (unlikely(pValue == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pValue = 0;
+
+ if (unlikely(Stage >= caps::TextureStageCount))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(dxvkType >= TextureStageStateCount))
+ return D3DERR_INVALIDCALL;
+
+ *pValue = m_state.textureStages[Stage][dxvkType];
+
+ return D3D_OK;
+ }
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTextureStageState(
+ DWORD Stage,
+ D3DTEXTURESTAGESTATETYPE Type,
+ DWORD Value) {
+ return SetStateTextureStageState(Stage, RemapTextureStageStateType(Type), Value);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSamplerState(
+ DWORD Sampler,
+ D3DSAMPLERSTATETYPE Type,
+ DWORD* pValue) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pValue == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pValue = 0;
+
+ if (unlikely(InvalidSampler(Sampler)))
+ return D3D_OK;
+
+ Sampler = RemapSamplerState(Sampler);
+
+ *pValue = m_state.samplerStates[Sampler][Type];
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSamplerState(
+ DWORD Sampler,
+ D3DSAMPLERSTATETYPE Type,
+ DWORD Value) {
+ if (unlikely(InvalidSampler(Sampler)))
+ return D3D_OK;
+
+ uint32_t stateSampler = RemapSamplerState(Sampler);
+
+ return SetStateSamplerState(stateSampler, Type, Value);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ValidateDevice(DWORD* pNumPasses) {
+ if (pNumPasses != nullptr)
+ *pNumPasses = 1;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries) {
+ // This succeeds even though we don't advertise support.
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries) {
+ // Don't advertise support for this...
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCurrentTexturePalette(UINT PaletteNumber) {
+ // This succeeds even though we don't advertise support.
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCurrentTexturePalette(UINT *PaletteNumber) {
+ // Don't advertise support for this...
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pRect == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetScissorRect(pRect);
+
+ if (m_state.scissorRect == *pRect)
+ return D3D_OK;
+
+ m_state.scissorRect = *pRect;
+
+ m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pRect == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pRect = m_state.scissorRect;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSoftwareVertexProcessing(BOOL bSoftware) {
+ auto lock = LockDevice();
+
+ if (bSoftware && !CanSWVP())
+ return D3DERR_INVALIDCALL;
+
+ m_isSWVP = bSoftware;
+
+ return D3D_OK;
+ }
+
+
+ BOOL STDMETHODCALLTYPE D3D9DeviceEx::GetSoftwareVertexProcessing() {
+ auto lock = LockDevice();
+
+ return m_isSWVP;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetNPatchMode(float nSegments) {
+ return D3D_OK;
+ }
+
+
+ float STDMETHODCALLTYPE D3D9DeviceEx::GetNPatchMode() {
+ return 0.0f;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitive(
+ D3DPRIMITIVETYPE PrimitiveType,
+ UINT StartVertex,
+ UINT PrimitiveCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(config::FullValidation && !PrimitiveCount))
+ return S_OK;
+
+ PrepareDraw(PrimitiveType);
+
+ EmitCs([this,
+ cPrimType = PrimitiveType,
+ cPrimCount = PrimitiveCount,
+ cStartVertex = StartVertex,
+ cInstanceCount = GetInstanceCount()
+ ](DxvkContext* ctx) {
+ auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
+
+ ApplyPrimitiveType(ctx, cPrimType);
+
+ ctx->draw(
+ drawInfo.vertexCount, drawInfo.instanceCount,
+ cStartVertex, 0);
+ });
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive(
+ D3DPRIMITIVETYPE PrimitiveType,
+ INT BaseVertexIndex,
+ UINT MinVertexIndex,
+ UINT NumVertices,
+ UINT StartIndex,
+ UINT PrimitiveCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(config::FullValidation && !PrimitiveCount))
+ return S_OK;
+
+ PrepareDraw(PrimitiveType);
+
+ EmitCs([this,
+ cPrimType = PrimitiveType,
+ cPrimCount = PrimitiveCount,
+ cStartIndex = StartIndex,
+ cBaseVertexIndex = BaseVertexIndex,
+ cInstanceCount = GetInstanceCount()
+ ](DxvkContext* ctx) {
+ auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
+
+ ApplyPrimitiveType(ctx, cPrimType);
+
+ ctx->drawIndexed(
+ drawInfo.vertexCount, drawInfo.instanceCount,
+ cStartIndex,
+ cBaseVertexIndex, 0);
+ });
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitiveUP(
+ D3DPRIMITIVETYPE PrimitiveType,
+ UINT PrimitiveCount,
+ const void* pVertexStreamZeroData,
+ UINT VertexStreamZeroStride) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(config::FullValidation && !PrimitiveCount))
+ return S_OK;
+
+ PrepareDraw(PrimitiveType);
+
+ auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0);
+
+ const uint32_t dataSize = GetUPDataSize(drawInfo.vertexCount, VertexStreamZeroStride);
+ const uint32_t bufferSize = GetUPBufferSize(drawInfo.vertexCount, VertexStreamZeroStride);
+
+ auto upSlice = AllocTempBuffer<true>(bufferSize);
+ FillUPVertexBuffer(upSlice.mapPtr, pVertexStreamZeroData, dataSize, bufferSize);
+
+ EmitCs([this,
+ cBufferSlice = std::move(upSlice.slice),
+ cPrimType = PrimitiveType,
+ cPrimCount = PrimitiveCount,
+ cInstanceCount = GetInstanceCount(),
+ cStride = VertexStreamZeroStride
+ ](DxvkContext* ctx) {
+ auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
+
+ ApplyPrimitiveType(ctx, cPrimType);
+
+ ctx->bindVertexBuffer(0, cBufferSlice, cStride);
+ ctx->draw(
+ drawInfo.vertexCount, drawInfo.instanceCount,
+ 0, 0);
+ ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
+ });
+
+ m_state.vertexBuffers[0].vertexBuffer = nullptr;
+ m_state.vertexBuffers[0].offset = 0;
+ m_state.vertexBuffers[0].stride = 0;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitiveUP(
+ D3DPRIMITIVETYPE PrimitiveType,
+ UINT MinVertexIndex,
+ UINT NumVertices,
+ UINT PrimitiveCount,
+ const void* pIndexData,
+ D3DFORMAT IndexDataFormat,
+ const void* pVertexStreamZeroData,
+ UINT VertexStreamZeroStride) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(config::FullValidation && !PrimitiveCount))
+ return S_OK;
+
+ PrepareDraw(PrimitiveType);
+
+ auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0);
+
+ const uint32_t vertexDataSize = GetUPDataSize(MinVertexIndex + NumVertices, VertexStreamZeroStride);
+ const uint32_t vertexBufferSize = GetUPBufferSize(MinVertexIndex + NumVertices, VertexStreamZeroStride);
+
+ const uint32_t indexSize = IndexDataFormat == D3DFMT_INDEX16 ? 2 : 4;
+ const uint32_t indicesSize = drawInfo.vertexCount * indexSize;
+
+ const uint32_t upSize = vertexBufferSize + indicesSize;
+
+ auto upSlice = AllocTempBuffer<true>(upSize);
+ uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr);
+ FillUPVertexBuffer(data, pVertexStreamZeroData, vertexDataSize, vertexBufferSize);
+ std::memcpy(data + vertexBufferSize, pIndexData, indicesSize);
+
+ EmitCs([this,
+ cVertexSize = vertexBufferSize,
+ cBufferSlice = std::move(upSlice.slice),
+ cPrimType = PrimitiveType,
+ cPrimCount = PrimitiveCount,
+ cStride = VertexStreamZeroStride,
+ cInstanceCount = GetInstanceCount(),
+ cIndexType = DecodeIndexType(
+ static_cast<D3D9Format>(IndexDataFormat))
+ ](DxvkContext* ctx) {
+ auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
+
+ ApplyPrimitiveType(ctx, cPrimType);
+
+ ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
+ ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
+ ctx->drawIndexed(
+ drawInfo.vertexCount, drawInfo.instanceCount,
+ 0,
+ 0, 0);
+ ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
+ ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
+ });
+
+ m_state.vertexBuffers[0].vertexBuffer = nullptr;
+ m_state.vertexBuffers[0].offset = 0;
+ m_state.vertexBuffers[0].stride = 0;
+
+ m_state.indices = nullptr;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ProcessVertices(
+ UINT SrcStartIndex,
+ UINT DestIndex,
+ UINT VertexCount,
+ IDirect3DVertexBuffer9* pDestBuffer,
+ IDirect3DVertexDeclaration9* pVertexDecl,
+ DWORD Flags) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (!SupportsSWVP()) {
+ static bool s_errorShown = false;
+
+ if (!std::exchange(s_errorShown, true))
+ Logger::err("D3D9DeviceEx::ProcessVertices: SWVP emu unsupported (vertexPipelineStoresAndAtomics)");
+
+ return D3D_OK;
+ }
+
+ D3D9CommonBuffer* dst = static_cast<D3D9VertexBuffer*>(pDestBuffer)->GetCommonBuffer();
+ D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*> (pVertexDecl);
+
+ PrepareDraw(D3DPT_FORCE_DWORD);
+
+ if (decl == nullptr) {
+ DWORD FVF = dst->Desc()->FVF;
+
+ auto iter = m_fvfTable.find(FVF);
+
+ if (iter == m_fvfTable.end()) {
+ decl = new D3D9VertexDecl(this, FVF);
+ m_fvfTable.insert(std::make_pair(FVF, decl));
+ }
+ else
+ decl = iter->second.ptr();
+ }
+
+ uint32_t offset = DestIndex * decl->GetSize();
+
+ auto slice = dst->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>();
+ slice = slice.subSlice(offset, slice.length() - offset);
+
+ EmitCs([this,
+ cDecl = ref(decl),
+ cVertexCount = VertexCount,
+ cStartIndex = SrcStartIndex,
+ cInstanceCount = GetInstanceCount(),
+ cBufferSlice = slice,
+ cIndexed = m_state.indices != nullptr
+ ](DxvkContext* ctx) {
+ Rc<DxvkShader> shader = m_swvpEmulator.GetShaderModule(this, cDecl);
+
+ auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount);
+
+ if (drawInfo.instanceCount != 1) {
+ drawInfo.instanceCount = 1;
+
+ Logger::warn("D3D9DeviceEx::ProcessVertices: instancing unsupported");
+ }
+
+ ApplyPrimitiveType(ctx, D3DPT_POINTLIST);
+
+ // Unbind the pixel shader, we aren't drawing
+ // to avoid val errors / UB.
+ ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, nullptr);
+
+ ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, shader);
+ ctx->bindResourceBuffer(getSWVPBufferSlot(), cBufferSlice);
+ ctx->draw(
+ drawInfo.vertexCount, drawInfo.instanceCount,
+ cStartIndex, 0);
+ ctx->bindResourceBuffer(getSWVPBufferSlot(), DxvkBufferSlice());
+ ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, nullptr);
+ });
+
+ // We unbound the pixel shader before,
+ // let's make sure that gets rebound.
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+
+ if (m_state.pixelShader != nullptr) {
+ BindShader<DxsoProgramTypes::PixelShader>(
+ GetCommonShader(m_state.pixelShader),
+ GetPixelShaderPermutation());
+ }
+
+ if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) {
+ uint32_t copySize = VertexCount * decl->GetSize();
+
+ EmitCs([
+ cSrcBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_REAL>(),
+ cDstBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(),
+ cOffset = offset,
+ cCopySize = copySize
+ ](DxvkContext* ctx) {
+ ctx->copyBuffer(cDstBuffer, cOffset, cSrcBuffer, cOffset, cCopySize);
+ });
+ }
+
+ dst->SetWrittenByGPU(true);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration(
+ const D3DVERTEXELEMENT9* pVertexElements,
+ IDirect3DVertexDeclaration9** ppDecl) {
+ InitReturnPtr(ppDecl);
+
+ if (unlikely(ppDecl == nullptr || pVertexElements == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ const D3DVERTEXELEMENT9* counter = pVertexElements;
+ while (counter->Stream != 0xFF)
+ counter++;
+
+ const uint32_t declCount = uint32_t(counter - pVertexElements);
+
+ try {
+ const Com<D3D9VertexDecl> decl = new D3D9VertexDecl(this, pVertexElements, declCount);
+ *ppDecl = decl.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*>(pDecl);
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetVertexDeclaration(decl);
+
+ if (decl == m_state.vertexDecl.ptr())
+ return D3D_OK;
+
+ bool dirtyFFShader = decl == nullptr || m_state.vertexDecl == nullptr;
+ if (!dirtyFFShader)
+ dirtyFFShader |= decl->TestFlag(D3D9VertexDeclFlag::HasPositionT) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT)
+ || decl->TestFlag(D3D9VertexDeclFlag::HasColor0) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0)
+ || decl->TestFlag(D3D9VertexDeclFlag::HasColor1) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1)
+ || decl->GetTexcoordMask() != m_state.vertexDecl->GetTexcoordMask();
+
+ if (dirtyFFShader)
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+
+ m_state.vertexDecl = decl;
+
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppDecl);
+
+ if (ppDecl == nullptr)
+ return D3D_OK;
+
+ if (m_state.vertexDecl == nullptr)
+ return D3D_OK;
+
+ *ppDecl = m_state.vertexDecl.ref();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (FVF == 0)
+ return D3D_OK;
+
+ D3D9VertexDecl* decl = nullptr;
+
+ auto iter = m_fvfTable.find(FVF);
+
+ if (iter == m_fvfTable.end()) {
+ decl = new D3D9VertexDecl(this, FVF);
+ m_fvfTable.insert(std::make_pair(FVF, decl));
+ }
+ else
+ decl = iter->second.ptr();
+
+ return this->SetVertexDeclaration(decl);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (pFVF == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ *pFVF = m_state.vertexDecl != nullptr
+ ? m_state.vertexDecl->GetFVF()
+ : 0;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader(
+ const DWORD* pFunction,
+ IDirect3DVertexShader9** ppShader) {
+ // CreateVertexShader does not init the
+ // return ptr unlike CreatePixelShader
+
+ if (unlikely(ppShader == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ DxsoModuleInfo moduleInfo;
+ moduleInfo.options = m_dxsoOptions;
+
+ D3D9CommonShader module;
+
+ if (FAILED(this->CreateShaderModule(&module,
+ VK_SHADER_STAGE_VERTEX_BIT,
+ pFunction,
+ &moduleInfo)))
+ return D3DERR_INVALIDCALL;
+
+ *ppShader = ref(new D3D9VertexShader(this, module));
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9VertexShader* shader = static_cast<D3D9VertexShader*>(pShader);
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetVertexShader(shader);
+
+ if (shader == m_state.vertexShader.ptr())
+ return D3D_OK;
+
+ auto* oldShader = GetCommonShader(m_state.vertexShader);
+ auto* newShader = GetCommonShader(shader);
+
+ bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
+ bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
+
+ m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader;
+ m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
+
+ if (newShader && oldShader) {
+ m_consts[DxsoProgramTypes::VertexShader].dirty
+ |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
+ || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
+ || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
+ }
+
+ m_state.vertexShader = shader;
+
+ if (shader != nullptr) {
+ m_flags.clr(D3D9DeviceFlag::DirtyProgVertexShader);
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+
+ BindShader<DxsoProgramTypes::VertexShader>(
+ GetCommonShader(shader),
+ GetVertexShaderPermutation());
+
+ m_vsShaderMasks = newShader->GetShaderMask();
+ }
+ else
+ m_vsShaderMasks = D3D9ShaderMasks();
+
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppShader);
+
+ if (unlikely(ppShader == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *ppShader = m_state.vertexShader.ref();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantF(
+ UINT StartRegister,
+ const float* pConstantData,
+ UINT Vector4fCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Float>(
+ StartRegister,
+ pConstantData,
+ Vector4fCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantF(
+ UINT StartRegister,
+ float* pConstantData,
+ UINT Vector4fCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Float>(
+ StartRegister,
+ pConstantData,
+ Vector4fCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantI(
+ UINT StartRegister,
+ const int* pConstantData,
+ UINT Vector4iCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Int>(
+ StartRegister,
+ pConstantData,
+ Vector4iCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantI(
+ UINT StartRegister,
+ int* pConstantData,
+ UINT Vector4iCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Int>(
+ StartRegister,
+ pConstantData,
+ Vector4iCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantB(
+ UINT StartRegister,
+ const BOOL* pConstantData,
+ UINT BoolCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Bool>(
+ StartRegister,
+ pConstantData,
+ BoolCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantB(
+ UINT StartRegister,
+ BOOL* pConstantData,
+ UINT BoolCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::VertexShader,
+ D3D9ConstantType::Bool>(
+ StartRegister,
+ pConstantData,
+ BoolCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSource(
+ UINT StreamNumber,
+ IDirect3DVertexBuffer9* pStreamData,
+ UINT OffsetInBytes,
+ UINT Stride) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(StreamNumber >= caps::MaxStreams))
+ return D3DERR_INVALIDCALL;
+
+ D3D9VertexBuffer* buffer = static_cast<D3D9VertexBuffer*>(pStreamData);
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStreamSource(
+ StreamNumber,
+ buffer,
+ OffsetInBytes,
+ Stride);
+
+ auto& vbo = m_state.vertexBuffers[StreamNumber];
+ bool needsUpdate = vbo.vertexBuffer != buffer;
+
+ if (needsUpdate)
+ vbo.vertexBuffer = buffer;
+
+ if (buffer != nullptr) {
+ needsUpdate |= vbo.offset != OffsetInBytes
+ || vbo.stride != Stride;
+
+ vbo.offset = OffsetInBytes;
+ vbo.stride = Stride;
+ }
+
+ if (needsUpdate)
+ BindVertexBuffer(StreamNumber, buffer, OffsetInBytes, Stride);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSource(
+ UINT StreamNumber,
+ IDirect3DVertexBuffer9** ppStreamData,
+ UINT* pOffsetInBytes,
+ UINT* pStride) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppStreamData);
+
+ if (likely(pOffsetInBytes != nullptr))
+ *pOffsetInBytes = 0;
+
+ if (likely(pStride != nullptr))
+ *pStride = 0;
+
+ if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(StreamNumber >= caps::MaxStreams))
+ return D3DERR_INVALIDCALL;
+
+ const auto& vbo = m_state.vertexBuffers[StreamNumber];
+
+ *ppStreamData = vbo.vertexBuffer.ref();
+ *pOffsetInBytes = vbo.offset;
+ *pStride = vbo.stride;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(StreamNumber >= caps::MaxStreams))
+ return D3DERR_INVALIDCALL;
+
+ const bool indexed = Setting & D3DSTREAMSOURCE_INDEXEDDATA;
+ const bool instanced = Setting & D3DSTREAMSOURCE_INSTANCEDATA;
+
+ if (unlikely(StreamNumber == 0 && instanced))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(instanced && indexed))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(Setting == 0))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStreamSourceFreq(StreamNumber, Setting);
+
+ if (m_state.streamFreq[StreamNumber] == Setting)
+ return D3D_OK;
+
+ m_state.streamFreq[StreamNumber] = Setting;
+
+ if (instanced)
+ m_instancedData |= 1u << StreamNumber;
+ else
+ m_instancedData &= ~(1u << StreamNumber);
+
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(StreamNumber >= caps::MaxStreams))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(pSetting == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pSetting = m_state.streamFreq[StreamNumber];
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9IndexBuffer* buffer = static_cast<D3D9IndexBuffer*>(pIndexData);
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetIndices(buffer);
+
+ if (buffer == m_state.indices.ptr())
+ return D3D_OK;
+
+ m_state.indices = buffer;
+
+ BindIndices();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) {
+ D3D9DeviceLock lock = LockDevice();
+ InitReturnPtr(ppIndexData);
+
+ if (unlikely(ppIndexData == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *ppIndexData = m_state.indices.ref();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader(
+ const DWORD* pFunction,
+ IDirect3DPixelShader9** ppShader) {
+ InitReturnPtr(ppShader);
+
+ if (unlikely(ppShader == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ DxsoModuleInfo moduleInfo;
+ moduleInfo.options = m_dxsoOptions;
+
+ D3D9CommonShader module;
+
+ if (FAILED(this->CreateShaderModule(&module,
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ pFunction,
+ &moduleInfo)))
+ return D3DERR_INVALIDCALL;
+
+ *ppShader = ref(new D3D9PixelShader(this, module));
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) {
+ D3D9DeviceLock lock = LockDevice();
+
+ D3D9PixelShader* shader = static_cast<D3D9PixelShader*>(pShader);
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetPixelShader(shader);
+
+ if (shader == m_state.pixelShader.ptr())
+ return D3D_OK;
+
+ auto* oldShader = GetCommonShader(m_state.pixelShader);
+ auto* newShader = GetCommonShader(shader);
+
+ bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
+ bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
+
+ m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader;
+ m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
+
+ if (newShader && oldShader) {
+ m_consts[DxsoProgramTypes::PixelShader].dirty
+ |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
+ || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
+ || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
+ }
+
+ m_state.pixelShader = shader;
+
+ if (shader != nullptr) {
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+
+ BindShader<DxsoProgramTypes::PixelShader>(
+ GetCommonShader(shader),
+ GetPixelShaderPermutation());
+
+ m_psShaderMasks = newShader->GetShaderMask();
+ }
+ else {
+ // TODO: What fixed function textures are in use?
+ // Currently we are making all 8 of them as in use here.
+
+ // The RT output is always 0 for fixed function.
+ m_psShaderMasks = FixedFunctionMask;
+ }
+
+ UpdateActiveHazardsRT(UINT32_MAX);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppShader);
+
+ if (unlikely(ppShader == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *ppShader = m_state.pixelShader.ref();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantF(
+ UINT StartRegister,
+ const float* pConstantData,
+ UINT Vector4fCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants <
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Float>(
+ StartRegister,
+ pConstantData,
+ Vector4fCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantF(
+ UINT StartRegister,
+ float* pConstantData,
+ UINT Vector4fCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Float>(
+ StartRegister,
+ pConstantData,
+ Vector4fCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantI(
+ UINT StartRegister,
+ const int* pConstantData,
+ UINT Vector4iCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants<
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Int>(
+ StartRegister,
+ pConstantData,
+ Vector4iCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantI(
+ UINT StartRegister,
+ int* pConstantData,
+ UINT Vector4iCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Int>(
+ StartRegister,
+ pConstantData,
+ Vector4iCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantB(
+ UINT StartRegister,
+ const BOOL* pConstantData,
+ UINT BoolCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return SetShaderConstants<
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Bool>(
+ StartRegister,
+ pConstantData,
+ BoolCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantB(
+ UINT StartRegister,
+ BOOL* pConstantData,
+ UINT BoolCount) {
+ D3D9DeviceLock lock = LockDevice();
+
+ return GetShaderConstants<
+ DxsoProgramTypes::PixelShader,
+ D3D9ConstantType::Bool>(
+ StartRegister,
+ pConstantData,
+ BoolCount);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawRectPatch(
+ UINT Handle,
+ const float* pNumSegs,
+ const D3DRECTPATCH_INFO* pRectPatchInfo) {
+ static bool s_errorShown = false;
+
+ if (!std::exchange(s_errorShown, true))
+ Logger::warn("D3D9DeviceEx::DrawRectPatch: Stub");
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawTriPatch(
+ UINT Handle,
+ const float* pNumSegs,
+ const D3DTRIPATCH_INFO* pTriPatchInfo) {
+ static bool s_errorShown = false;
+
+ if (!std::exchange(s_errorShown, true))
+ Logger::warn("D3D9DeviceEx::DrawTriPatch: Stub");
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) {
+ static bool s_errorShown = false;
+
+ if (!std::exchange(s_errorShown, true))
+ Logger::warn("D3D9DeviceEx::DeletePatch: Stub");
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) {
+ HRESULT hr = D3D9Query::QuerySupported(this, Type);
+
+ if (ppQuery == nullptr || hr != D3D_OK)
+ return hr;
+
+ try {
+ *ppQuery = ref(new D3D9Query(this, Type));
+ return D3D_OK;
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ // Ex Methods
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetConvolutionMonoKernel(
+ UINT width,
+ UINT height,
+ float* rows,
+ float* columns) {
+ // We don't advertise support for this.
+ return D3DERR_INVALIDCALL;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ComposeRects(
+ IDirect3DSurface9* pSrc,
+ IDirect3DSurface9* pDst,
+ IDirect3DVertexBuffer9* pSrcRectDescs,
+ UINT NumRects,
+ IDirect3DVertexBuffer9* pDstRectDescs,
+ D3DCOMPOSERECTSOP Operation,
+ int Xoffset,
+ int Yoffset) {
+ Logger::warn("D3D9DeviceEx::ComposeRects: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetGPUThreadPriority(INT* pPriority) {
+ Logger::warn("D3D9DeviceEx::GetGPUThreadPriority: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetGPUThreadPriority(INT Priority) {
+ Logger::warn("D3D9DeviceEx::SetGPUThreadPriority: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) {
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->WaitForVBlank();
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources) {
+ Logger::warn("D3D9DeviceEx::CheckResourceResidency: Stub");
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaximumFrameLatency(UINT MaxLatency) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (MaxLatency == 0)
+ MaxLatency = DefaultFrameLatency;
+
+ if (MaxLatency > MaxFrameLatency)
+ MaxLatency = MaxFrameLatency;
+
+ m_frameLatency = MaxLatency;
+
+ m_implicitSwapchain->SyncFrameLatency();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaximumFrameLatency(UINT* pMaxLatency) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(pMaxLatency == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ *pMaxLatency = m_frameLatency;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckDeviceState(HWND hDestinationWindow) {
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::PresentEx(
+ const RECT* pSourceRect,
+ const RECT* pDestRect,
+ HWND hDestWindowOverride,
+ const RGNDATA* pDirtyRegion,
+ DWORD dwFlags) {
+ return m_implicitSwapchain->Present(
+ pSourceRect,
+ pDestRect,
+ hDestWindowOverride,
+ pDirtyRegion,
+ dwFlags);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTargetEx(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DMULTISAMPLE_TYPE MultiSample,
+ DWORD MultisampleQuality,
+ BOOL Lockable,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle,
+ DWORD Usage) {
+ InitReturnPtr(ppSurface);
+
+ if (unlikely(ppSurface == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = Width;
+ desc.Height = Height;
+ desc.Depth = 1;
+ desc.ArraySize = 1;
+ desc.MipLevels = 1;
+ desc.Usage = Usage | D3DUSAGE_RENDERTARGET;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = D3DPOOL_DEFAULT;
+ desc.Discard = FALSE;
+ desc.MultiSample = MultiSample;
+ desc.MultisampleQuality = MultisampleQuality;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = TRUE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr);
+ m_initializer->InitTexture(surface->GetCommonTexture());
+ *ppSurface = surface.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurfaceEx(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DPOOL Pool,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle,
+ DWORD Usage) {
+ InitReturnPtr(ppSurface);
+
+ if (unlikely(ppSurface == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = Width;
+ desc.Height = Height;
+ desc.Depth = 1;
+ desc.ArraySize = 1;
+ desc.MipLevels = 1;
+ desc.Usage = Usage;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = Pool;
+ desc.Discard = FALSE;
+ desc.MultiSample = D3DMULTISAMPLE_NONE;
+ desc.MultisampleQuality = 0;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = Pool == D3DPOOL_DEFAULT;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr);
+ m_initializer->InitTexture(surface->GetCommonTexture());
+ *ppSurface = surface.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurfaceEx(
+ UINT Width,
+ UINT Height,
+ D3DFORMAT Format,
+ D3DMULTISAMPLE_TYPE MultiSample,
+ DWORD MultisampleQuality,
+ BOOL Discard,
+ IDirect3DSurface9** ppSurface,
+ HANDLE* pSharedHandle,
+ DWORD Usage) {
+ InitReturnPtr(ppSurface);
+
+ if (unlikely(ppSurface == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = Width;
+ desc.Height = Height;
+ desc.Depth = 1;
+ desc.ArraySize = 1;
+ desc.MipLevels = 1;
+ desc.Usage = Usage | D3DUSAGE_DEPTHSTENCIL;
+ desc.Format = EnumerateFormat(Format);
+ desc.Pool = D3DPOOL_DEFAULT;
+ desc.Discard = Discard;
+ desc.MultiSample = MultiSample;
+ desc.MultisampleQuality = MultisampleQuality;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = TRUE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_INVALIDCALL;
+
+ try {
+ const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr);
+ m_initializer->InitTexture(surface->GetCommonTexture());
+ *ppSurface = surface.ref();
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx(
+ D3DPRESENT_PARAMETERS* pPresentationParameters,
+ D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
+ D3D9DeviceLock lock = LockDevice();
+
+ HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode);
+ if (FAILED(hr))
+ return hr;
+
+ return D3D_OK;
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayModeEx(
+ UINT iSwapChain,
+ D3DDISPLAYMODEEX* pMode,
+ D3DDISPLAYROTATION* pRotation) {
+ if (unlikely(iSwapChain != 0))
+ return D3DERR_INVALIDCALL;
+
+ return m_implicitSwapchain->GetDisplayModeEx(pMode, pRotation);
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChainEx(
+ D3DPRESENT_PARAMETERS* pPresentationParameters,
+ const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
+ IDirect3DSwapChain9** ppSwapChain) {
+ D3D9DeviceLock lock = LockDevice();
+
+ InitReturnPtr(ppSwapChain);
+
+ if (ppSwapChain == nullptr || pPresentationParameters == nullptr)
+ return D3DERR_INVALIDCALL;
+
+ // Additional fullscreen swapchains are forbidden.
+ if (!pPresentationParameters->Windowed)
+ return D3DERR_INVALIDCALL;
+
+ // We can't make another swapchain if we are fullscreen.
+ if (!m_implicitSwapchain->GetPresentParams()->Windowed)
+ return D3DERR_INVALIDCALL;
+
+ m_implicitSwapchain->Invalidate(pPresentationParameters->hDeviceWindow);
+
+ try {
+ auto* swapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode);
+ *ppSwapChain = ref(swapchain);
+ }
+ catch (const DxvkError & e) {
+ Logger::err(e.message());
+ return D3DERR_NOTAVAILABLE;
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::SetStateSamplerState(
+ DWORD StateSampler,
+ D3DSAMPLERSTATETYPE Type,
+ DWORD Value) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStateSamplerState(StateSampler, Type, Value);
+
+ auto& state = m_state.samplerStates;
+
+ if (state[StateSampler][Type] == Value)
+ return D3D_OK;
+
+ state[StateSampler][Type] = Value;
+
+ if (Type == D3DSAMP_ADDRESSU
+ || Type == D3DSAMP_ADDRESSV
+ || Type == D3DSAMP_ADDRESSW
+ || Type == D3DSAMP_MAGFILTER
+ || Type == D3DSAMP_MINFILTER
+ || Type == D3DSAMP_MIPFILTER
+ || Type == D3DSAMP_MAXANISOTROPY
+ || Type == D3DSAMP_MIPMAPLODBIAS
+ || Type == D3DSAMP_MAXMIPLEVEL
+ || Type == D3DSAMP_BORDERCOLOR)
+ m_dirtySamplerStates |= 1u << StateSampler;
+ else if (Type == D3DSAMP_SRGBTEXTURE && (m_activeTextures & (1u << StateSampler)))
+ m_dirtyTextures |= 1u << StateSampler;
+
+ constexpr DWORD Fetch4Enabled = MAKEFOURCC('G', 'E', 'T', '4');
+ constexpr DWORD Fetch4Disabled = MAKEFOURCC('G', 'E', 'T', '1');
+
+ if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) {
+ if (unlikely(Value == Fetch4Enabled)) {
+ m_fetch4Enabled |= 1u << StateSampler;
+ if (state[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT)
+ m_fetch4 |= 1u << StateSampler;
+ }
+ else if (unlikely(Value == Fetch4Disabled)) {
+ m_fetch4Enabled &= ~(1u << StateSampler);
+ m_fetch4 &= ~(1u << StateSampler);
+ }
+ }
+
+ if (unlikely(Type == D3DSAMP_MAGFILTER && (m_fetch4Enabled & (1u << StateSampler)))) {
+ if (Value == D3DTEXF_POINT)
+ m_fetch4 |= 1u << StateSampler;
+ else
+ m_fetch4 &= ~(1u << StateSampler);
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStateTexture(StateSampler, pTexture);
+
+ if (m_state.textures[StateSampler] == pTexture)
+ return D3D_OK;
+
+ auto oldTexture = GetCommonTexture(m_state.textures[StateSampler]);
+ auto newTexture = GetCommonTexture(pTexture);
+
+ // We need to check our ops and disable respective stages.
+ // Given we have transition from a null resource to
+ // a valid resource or vice versa.
+ if (StateSampler < 16) {
+ const uint32_t offset = StateSampler * 2;
+ const uint32_t textureType = newTexture != nullptr
+ ? uint32_t(newTexture->GetType() - D3DRTYPE_TEXTURE)
+ : 0;
+ const uint32_t textureBitMask = 0b11u << offset;
+ const uint32_t textureBits = textureType << offset;
+
+ m_textureTypes &= ~textureBitMask;
+ m_textureTypes |= textureBits;
+
+ if (newTexture == nullptr || oldTexture == nullptr)
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+ }
+
+ DWORD oldUsage = oldTexture != nullptr ? oldTexture->Desc()->Usage : 0;
+ DWORD newUsage = newTexture != nullptr ? newTexture->Desc()->Usage : 0;
+
+ if (newTexture != nullptr) {
+ const bool oldDepth = m_depthTextures & (1u << StateSampler);
+ const bool newDepth = newTexture->IsShadow();
+
+ if (oldDepth != newDepth) {
+ m_depthTextures &= ~(1u << StateSampler);
+ if (newDepth)
+ m_depthTextures |= 1u << StateSampler;
+
+ m_dirtySamplerStates |= 1u << StateSampler;
+ }
+ }
+
+ DWORD combinedUsage = oldUsage | newUsage;
+
+ TextureChangePrivate(m_state.textures[StateSampler], pTexture);
+
+ m_dirtyTextures |= 1u << StateSampler;
+
+ UpdateActiveTextures(StateSampler, combinedUsage);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStateTransform(idx, pMatrix);
+
+ m_state.transforms[idx] = ConvertMatrix(pMatrix);
+
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD))
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::SetStateTextureStageState(
+ DWORD Stage,
+ D3D9TextureStageStateTypes Type,
+ DWORD Value) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(Stage >= caps::TextureStageCount))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(Type >= TextureStageStateCount))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetStateTextureStageState(Stage, Type, Value);
+
+ if (likely(m_state.textureStages[Stage][Type] != Value)) {
+ m_state.textureStages[Stage][Type] = Value;
+
+ switch (Type) {
+ case DXVK_TSS_COLOROP:
+ case DXVK_TSS_COLORARG0:
+ case DXVK_TSS_COLORARG1:
+ case DXVK_TSS_COLORARG2:
+ case DXVK_TSS_ALPHAOP:
+ case DXVK_TSS_ALPHAARG0:
+ case DXVK_TSS_ALPHAARG1:
+ case DXVK_TSS_ALPHAARG2:
+ case DXVK_TSS_RESULTARG:
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+ break;
+
+ case DXVK_TSS_TEXCOORDINDEX:
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ break;
+
+ case DXVK_TSS_TEXTURETRANSFORMFLAGS:
+ m_projectionBitfield &= ~(1 << Stage);
+ if (Value & D3DTTFF_PROJECTED)
+ m_projectionBitfield |= 1 << Stage;
+
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+ break;
+
+ case DXVK_TSS_BUMPENVMAT00:
+ case DXVK_TSS_BUMPENVMAT01:
+ case DXVK_TSS_BUMPENVMAT10:
+ case DXVK_TSS_BUMPENVMAT11:
+ case DXVK_TSS_BUMPENVLSCALE:
+ case DXVK_TSS_BUMPENVLOFFSET:
+ case DXVK_TSS_CONSTANT:
+ m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData);
+ break;
+
+ default: break;
+ }
+ }
+
+ return D3D_OK;
+ }
+
+
+ bool D3D9DeviceEx::IsExtended() {
+ return m_parent->IsExtended();
+ }
+
+
+ bool D3D9DeviceEx::SupportsSWVP() {
+ return m_dxvkDevice->features().core.features.vertexPipelineStoresAndAtomics;
+ }
+
+
+ HWND D3D9DeviceEx::GetWindow() {
+ return m_window;
+ }
+
+
+ DxvkDeviceFeatures D3D9DeviceEx::GetDeviceFeatures(const Rc<DxvkAdapter>& adapter) {
+ DxvkDeviceFeatures supported = adapter->features();
+ DxvkDeviceFeatures enabled = {};
+
+ // Geometry shaders are used for some meta ops
+ enabled.core.features.geometryShader = VK_TRUE;
+ enabled.core.features.robustBufferAccess = VK_TRUE;
+ enabled.extRobustness2.robustBufferAccess2 = supported.extRobustness2.robustBufferAccess2;
+
+ enabled.extMemoryPriority.memoryPriority = supported.extMemoryPriority.memoryPriority;
+
+ enabled.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation = supported.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation;
+
+ enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor;
+ enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor;
+
+ // Null Descriptors
+ enabled.extRobustness2.nullDescriptor = supported.extRobustness2.nullDescriptor;
+
+ // ProcessVertices
+ enabled.core.features.vertexPipelineStoresAndAtomics = supported.core.features.vertexPipelineStoresAndAtomics;
+
+ // DXVK Meta
+ enabled.core.features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
+ enabled.core.features.imageCubeArray = VK_TRUE;
+
+ // SM1 level hardware
+ enabled.core.features.depthClamp = VK_TRUE;
+ enabled.core.features.depthBiasClamp = VK_TRUE;
+ enabled.core.features.fillModeNonSolid = VK_TRUE;
+ enabled.core.features.pipelineStatisticsQuery = supported.core.features.pipelineStatisticsQuery;
+ enabled.core.features.sampleRateShading = VK_TRUE;
+ enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy;
+ enabled.core.features.shaderClipDistance = VK_TRUE;
+ enabled.core.features.shaderCullDistance = VK_TRUE;
+
+ // Ensure we support real BC formats and unofficial vendor ones.
+ enabled.core.features.textureCompressionBC = VK_TRUE;
+
+ enabled.extDepthClipEnable.depthClipEnable = supported.extDepthClipEnable.depthClipEnable;
+ enabled.extHostQueryReset.hostQueryReset = supported.extHostQueryReset.hostQueryReset;
+
+ // SM2 level hardware
+ enabled.core.features.occlusionQueryPrecise = VK_TRUE;
+
+ // SM3 level hardware
+ enabled.core.features.multiViewport = VK_TRUE;
+ enabled.core.features.independentBlend = VK_TRUE;
+
+ // D3D10 level hardware supports this in D3D9 native.
+ enabled.core.features.fullDrawIndexUint32 = VK_TRUE;
+
+ // Enable depth bounds test if we support it.
+ enabled.core.features.depthBounds = supported.core.features.depthBounds;
+
+ if (supported.extCustomBorderColor.customBorderColorWithoutFormat) {
+ enabled.extCustomBorderColor.customBorderColors = VK_TRUE;
+ enabled.extCustomBorderColor.customBorderColorWithoutFormat = VK_TRUE;
+ }
+
+ return enabled;
+ }
+
+
+ void D3D9DeviceEx::DetermineConstantLayouts(bool canSWVP) {
+ m_vsLayout.floatCount = canSWVP ? uint32_t(m_d3d9Options.swvpFloatCount) : caps::MaxFloatConstantsVS;
+ m_vsLayout.intCount = canSWVP ? uint32_t(m_d3d9Options.swvpIntCount) : caps::MaxOtherConstants;
+ m_vsLayout.boolCount = canSWVP ? uint32_t(m_d3d9Options.swvpBoolCount) : caps::MaxOtherConstants;
+ m_vsLayout.bitmaskCount = align(m_vsLayout.boolCount, 32) / 32;
+
+ m_psLayout.floatCount = caps::MaxFloatConstantsPS;
+ m_psLayout.intCount = caps::MaxOtherConstants;
+ m_psLayout.boolCount = caps::MaxOtherConstants;
+ m_psLayout.bitmaskCount = align(m_psLayout.boolCount, 32) / 32;
+ }
+
+
+ template<bool UpBuffer>
+ D3D9BufferSlice D3D9DeviceEx::AllocTempBuffer(VkDeviceSize size) {
+ constexpr VkDeviceSize DefaultSize = 1 << 20;
+
+ VkMemoryPropertyFlags memoryFlags
+ = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+ | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+
+ if constexpr (UpBuffer) {
+ memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+
+ D3D9BufferSlice& currentSlice = UpBuffer ? m_upBuffer : m_managedUploadBuffer;
+
+ if (size <= DefaultSize) {
+ if (unlikely(!currentSlice.slice.defined())) {
+ DxvkBufferCreateInfo info;
+ info.size = DefaultSize;
+ if constexpr (UpBuffer) {
+ info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
+ | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+ info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+ | VK_ACCESS_INDEX_READ_BIT;
+ info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+ } else {
+ info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
+ info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ info.access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
+ }
+
+ currentSlice.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags));
+ currentSlice.mapPtr = currentSlice.slice.mapPtr(0);
+ } else if (unlikely(currentSlice.slice.length() < size)) {
+ auto physSlice = currentSlice.slice.buffer()->allocSlice();
+
+ currentSlice.slice = DxvkBufferSlice(currentSlice.slice.buffer());
+ currentSlice.mapPtr = physSlice.mapPtr;
+
+ EmitCs([
+ cBuffer = currentSlice.slice.buffer(),
+ cSlice = physSlice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+ }
+
+ D3D9BufferSlice result;
+ result.slice = currentSlice.slice.subSlice(0, size);
+ result.mapPtr = reinterpret_cast<char*>(currentSlice.mapPtr) + currentSlice.slice.offset();
+
+ VkDeviceSize adjust = align(size, CACHE_LINE_SIZE);
+ currentSlice.slice = currentSlice.slice.subSlice(adjust, currentSlice.slice.length() - adjust);
+ return result;
+ } else {
+ // Create a temporary buffer for very large allocations
+ DxvkBufferCreateInfo info;
+ info.size = size;
+ if constexpr (UpBuffer) {
+ info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
+ | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+ info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+ | VK_ACCESS_INDEX_READ_BIT;
+ info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+ } else {
+ info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
+ info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ info.access = VK_ACCESS_TRANSFER_READ_BIT;
+ }
+
+ D3D9BufferSlice result;
+ result.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags));
+ result.mapPtr = result.slice.mapPtr(0);
+ return result;
+ }
+ }
+
+ bool D3D9DeviceEx::ShouldRecord() {
+ return m_recorder != nullptr && !m_recorder->IsApplying();
+ }
+
+
+ D3D9_VK_FORMAT_MAPPING D3D9DeviceEx::LookupFormat(
+ D3D9Format Format) const {
+ return m_adapter->GetFormatMapping(Format);
+ }
+
+ const DxvkFormatInfo* D3D9DeviceEx::UnsupportedFormatInfo(
+ D3D9Format Format) const {
+ return m_adapter->GetUnsupportedFormatInfo(Format);
+ }
+
+ bool D3D9DeviceEx::WaitForResource(
+ const Rc<DxvkResource>& Resource,
+ DWORD MapFlags) {
+ // Wait for the any pending D3D9 command to be executed
+ // on the CS thread so that we can determine whether the
+ // resource is currently in use or not.
+
+ // Determine access type to wait for based on map mode
+ DxvkAccess access = (MapFlags & D3DLOCK_READONLY)
+ ? DxvkAccess::Write
+ : DxvkAccess::Read;
+
+ if (!Resource->isInUse(access))
+ SynchronizeCsThread();
+
+ if (Resource->isInUse(access)) {
+ if (MapFlags & D3DLOCK_DONOTWAIT) {
+ // We don't have to wait, but misbehaving games may
+ // still try to spin on `Map` until the resource is
+ // idle, so we should flush pending commands
+ FlushImplicit(FALSE);
+ return false;
+ }
+ else {
+ // Make sure pending commands using the resource get
+ // executed on the the GPU if we have to wait for it
+ Flush();
+ SynchronizeCsThread();
+
+ Resource->waitIdle(access);
+ }
+ }
+
+ return true;
+ }
+
+
+ uint32_t D3D9DeviceEx::CalcImageLockOffset(
+ uint32_t SlicePitch,
+ uint32_t RowPitch,
+ const DxvkFormatInfo* FormatInfo,
+ const D3DBOX* pBox) {
+ if (pBox == nullptr)
+ return 0;
+
+ std::array<uint32_t, 3> offsets = { pBox->Front, pBox->Top, pBox->Left };
+
+ uint32_t elementSize = 1;
+
+ if (FormatInfo != nullptr) {
+ elementSize = FormatInfo->elementSize;
+
+ offsets[0] = offsets[0] / FormatInfo->blockSize.depth;
+ offsets[1] = offsets[1] / FormatInfo->blockSize.height;
+ offsets[2] = offsets[2] / FormatInfo->blockSize.width;
+ }
+
+ return offsets[0] * SlicePitch +
+ offsets[1] * RowPitch +
+ offsets[2] * elementSize;
+ }
+
+
+ HRESULT D3D9DeviceEx::LockImage(
+ D3D9CommonTexture* pResource,
+ UINT Face,
+ UINT MipLevel,
+ D3DLOCKED_BOX* pLockedBox,
+ const D3DBOX* pBox,
+ DWORD Flags) {
+ D3D9DeviceLock lock = LockDevice();
+
+ UINT Subresource = pResource->CalcSubresource(Face, MipLevel);
+
+ // Don't allow multiple lockings.
+ if (unlikely(pResource->GetLocked(Subresource)))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_READONLY)) == (D3DLOCK_DISCARD | D3DLOCK_READONLY)))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(!m_d3d9Options.allowDoNotWait))
+ Flags &= ~D3DLOCK_DONOTWAIT;
+
+ if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)))
+ Flags &= ~D3DLOCK_DISCARD;
+
+ auto& desc = *(pResource->Desc());
+
+ bool alloced = pResource->CreateBufferSubresource(Subresource);
+
+ const Rc<DxvkBuffer> mappedBuffer = pResource->GetBuffer(Subresource);
+
+ auto& formatMapping = pResource->GetFormatMapping();
+
+ const DxvkFormatInfo* formatInfo = formatMapping.IsValid()
+ ? imageFormatInfo(formatMapping.FormatColor) : UnsupportedFormatInfo(pResource->Desc()->Format);
+
+ auto subresource = pResource->GetSubresourceFromIndex(
+ formatInfo->aspectMask, Subresource);
+
+ VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel);
+ VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize);
+
+ const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM;
+ const bool managed = IsPoolManaged(desc.Pool);
+ const bool scratch = desc.Pool == D3DPOOL_SCRATCH;
+
+ bool fullResource = pBox == nullptr;
+ if (unlikely(!fullResource)) {
+ VkOffset3D lockOffset;
+ VkExtent3D lockExtent;
+
+ ConvertBox(*pBox, lockOffset, lockExtent);
+
+ fullResource = lockOffset == VkOffset3D{ 0, 0, 0 }
+ && lockExtent.width >= levelExtent.width
+ && lockExtent.height >= levelExtent.height
+ && lockExtent.depth >= levelExtent.depth;
+ }
+
+ // If we are not locking the entire image
+ // a partial discard is meant to occur.
+ // We can't really implement that, so just ignore discard
+ // if we are not locking the full resource
+
+ // DISCARD is also ignored for MANAGED and SYSTEMEM.
+ // DISCARD is not ignored for non-DYNAMIC unlike what the docs say.
+
+ if (!fullResource || desc.Pool != D3DPOOL_DEFAULT)
+ Flags &= ~D3DLOCK_DISCARD;
+
+ if (desc.Usage & D3DUSAGE_WRITEONLY)
+ Flags &= ~D3DLOCK_READONLY;
+
+ const bool readOnly = Flags & D3DLOCK_READONLY;
+ pResource->SetReadOnlyLocked(Subresource, readOnly);
+
+ bool renderable = desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL);
+
+ // If we recently wrote to the texture on the gpu,
+ // then we need to copy -> buffer
+ // We are also always dirty if we are a render target,
+ // a depth stencil, or auto generate mipmaps.
+ bool wasWrittenByGPU = pResource->WasWrittenByGPU(Subresource) || renderable;
+ pResource->SetWrittenByGPU(Subresource, false);
+
+ DxvkBufferSliceHandle physSlice;
+
+ if (Flags & D3DLOCK_DISCARD) {
+ // We do not have to preserve the contents of the
+ // buffer if the entire image gets discarded.
+ physSlice = pResource->DiscardMapSlice(Subresource);
+
+ EmitCs([
+ cImageBuffer = std::move(mappedBuffer),
+ cBufferSlice = physSlice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cImageBuffer, cBufferSlice);
+ });
+ }
+ else if ((managed && !m_d3d9Options.evictManagedOnUnlock) || scratch || systemmem) {
+ // Managed and scratch resources
+ // are meant to be able to provide readback without waiting.
+ // We always keep a copy of them in system memory for this reason.
+ // No need to wait as its not in use.
+ physSlice = pResource->GetMappedSlice(Subresource);
+
+ // We do not need to wait for the resource in the event the
+ // calling app promises not to overwrite data that is in use
+ // or is reading. Remember! This will only trigger for MANAGED resources
+ // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting.
+ const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(Subresource);
+ const bool skipWait = (scratch || managed || (systemmem && !wasWrittenByGPU))
+ && (usesStagingBuffer || readOnly);
+
+ if (alloced) {
+ std::memset(physSlice.mapPtr, 0, physSlice.length);
+ }
+ else if (!skipWait) {
+ if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappedBuffer, D3DLOCK_DONOTWAIT))
+ pResource->EnableStagingBufferUploads(Subresource);
+
+ if (!WaitForResource(mappedBuffer, Flags))
+ return D3DERR_WASSTILLDRAWING;
+ }
+ }
+ else {
+ physSlice = pResource->GetMappedSlice(Subresource);
+
+ if (!alloced || wasWrittenByGPU) {
+ if (unlikely(wasWrittenByGPU)) {
+ Rc<DxvkImage> resourceImage = pResource->GetImage();
+
+ Rc<DxvkImage> mappedImage = resourceImage->info().sampleCount != 1
+ ? pResource->GetResolveImage()
+ : std::move(resourceImage);
+
+ // When using any map mode which requires the image contents
+ // to be preserved, and if the GPU has write access to the
+ // image, copy the current image contents into the buffer.
+ auto subresourceLayers = vk::makeSubresourceLayers(subresource);
+
+ // We need to resolve this, some games
+ // lock MSAA render targets even though
+ // that's entirely illegal and they explicitly
+ // tell us that they do NOT want to lock them...
+ if (resourceImage != nullptr) {
+ EmitCs([
+ cMainImage = resourceImage,
+ cResolveImage = mappedImage,
+ cSubresource = subresourceLayers
+ ] (DxvkContext* ctx) {
+ VkImageResolve region;
+ region.srcSubresource = cSubresource;
+ region.srcOffset = VkOffset3D { 0, 0, 0 };
+ region.dstSubresource = cSubresource;
+ region.dstOffset = VkOffset3D { 0, 0, 0 };
+ region.extent = cMainImage->mipLevelExtent(cSubresource.mipLevel);
+
+ if (cSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ ctx->resolveImage(
+ cResolveImage, cMainImage, region,
+ cMainImage->info().format);
+ }
+ else {
+ ctx->resolveDepthStencilImage(
+ cResolveImage, cMainImage, region,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
+ }
+ });
+ }
+
+ VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format);
+
+ EmitCs([
+ cImageBuffer = mappedBuffer,
+ cImage = std::move(mappedImage),
+ cSubresources = subresourceLayers,
+ cLevelExtent = levelExtent,
+ cPackedFormat = packedFormat
+ ] (DxvkContext* ctx) {
+ if (cSubresources.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ ctx->copyImageToBuffer(cImageBuffer, 0, 4, 0,
+ cImage, cSubresources, VkOffset3D { 0, 0, 0 },
+ cLevelExtent);
+ } else {
+ // Copying DS to a packed buffer is only supported for D24S8 and D32S8
+ // right now so the 4 byte row alignment is guaranteed by the format size
+ ctx->copyDepthStencilImageToPackedBuffer(
+ cImageBuffer, 0,
+ VkOffset2D { 0, 0 },
+ VkExtent2D { cLevelExtent.width, cLevelExtent.height },
+ cImage, cSubresources,
+ VkOffset2D { 0, 0 },
+ VkExtent2D { cLevelExtent.width, cLevelExtent.height },
+ cPackedFormat);
+ }
+ });
+ }
+
+ if (!WaitForResource(mappedBuffer, Flags))
+ return D3DERR_WASSTILLDRAWING;
+ } else {
+ // If we are a new alloc, and we weren't written by the GPU
+ // that means that we are a newly initialized
+ // texture, and hence can just memset -> 0 and
+ // avoid a wait here.
+ std::memset(physSlice.mapPtr, 0, physSlice.length);
+ }
+ }
+
+ const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2;
+ // Set up map pointer.
+ if (atiHack) {
+ // We need to lie here. The game is expected to use this info and do a workaround.
+ // It's stupid. I know.
+ pLockedBox->RowPitch = align(std::max(desc.Width >> MipLevel, 1u), 4);
+ pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u);
+ }
+ else {
+ // Data is tightly packed within the mapped buffer.
+ pLockedBox->RowPitch = align(formatInfo->elementSize * blockCount.width, 4);
+ pLockedBox->SlicePitch = pLockedBox->RowPitch * blockCount.height;
+ }
+
+ pResource->SetLocked(Subresource, true);
+
+ const bool noDirtyUpdate = Flags & D3DLOCK_NO_DIRTY_UPDATE;
+ if (likely((pResource->IsManaged() && m_d3d9Options.evictManagedOnUnlock)
+ || ((desc.Pool == D3DPOOL_DEFAULT || !noDirtyUpdate) && !readOnly))) {
+ if (pBox && MipLevel != 0) {
+ D3DBOX scaledBox = *pBox;
+ scaledBox.Left <<= MipLevel;
+ scaledBox.Right = std::min(scaledBox.Right << MipLevel, pResource->Desc()->Width);
+ scaledBox.Top <<= MipLevel;
+ scaledBox.Bottom = std::min(scaledBox.Bottom << MipLevel, pResource->Desc()->Height);
+ scaledBox.Back <<= MipLevel;
+ scaledBox.Front = std::min(scaledBox.Front << MipLevel, pResource->Desc()->Depth);
+ pResource->AddDirtyBox(&scaledBox, Face);
+ } else {
+ pResource->AddDirtyBox(pBox, Face);
+ }
+ }
+
+ if (managed && !m_d3d9Options.evictManagedOnUnlock && !readOnly) {
+ pResource->SetNeedsUpload(Subresource, true);
+
+ for (uint32_t i : bit::BitMask(m_activeTextures)) {
+ // Guaranteed to not be nullptr...
+ auto texInfo = GetCommonTexture(m_state.textures[i]);
+
+ if (texInfo == pResource) {
+ m_activeTexturesToUpload |= 1 << i;
+ // We can early out here, no need to add another index for this.
+ break;
+ }
+ }
+ }
+
+ const uint32_t offset = CalcImageLockOffset(
+ pLockedBox->SlicePitch,
+ pLockedBox->RowPitch,
+ (!atiHack) ? formatInfo : nullptr,
+ pBox);
+
+
+ uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr);
+ data += offset;
+ pLockedBox->pBits = data;
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::UnlockImage(
+ D3D9CommonTexture* pResource,
+ UINT Face,
+ UINT MipLevel) {
+ D3D9DeviceLock lock = LockDevice();
+
+ UINT Subresource = pResource->CalcSubresource(Face, MipLevel);
+
+ // We weren't locked anyway!
+ if (unlikely(!pResource->GetLocked(Subresource)))
+ return D3D_OK;
+
+ pResource->SetLocked(Subresource, false);
+
+ // Flush image contents from staging if we aren't read only
+ // and we aren't deferring for managed.
+ const D3DBOX& box = pResource->GetDirtyBox(Face);
+ bool shouldFlush = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED;
+ shouldFlush &= box.Left < box.Right && box.Top < box.Bottom && box.Front < box.Back;
+ shouldFlush &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock;
+
+ if (shouldFlush) {
+ this->FlushImage(pResource, Subresource);
+ if (!pResource->IsAnySubresourceLocked())
+ pResource->ClearDirtyBoxes();
+ }
+
+ // Toss our staging buffer if we're not dynamic
+ // and we aren't managed (for sysmem copy.)
+ bool shouldToss = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED;
+ shouldToss &= !pResource->IsDynamic();
+ shouldToss &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock;
+
+ if (shouldToss) {
+ pResource->DestroyBufferSubresource(Subresource);
+ pResource->SetWrittenByGPU(Subresource, true);
+ }
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::FlushImage(
+ D3D9CommonTexture* pResource,
+ UINT Subresource) {
+ const Rc<DxvkImage> image = pResource->GetImage();
+
+ // Now that data has been written into the buffer,
+ // we need to copy its contents into the image
+ const DxvkBufferSliceHandle srcSlice = pResource->GetMappedSlice(Subresource);
+
+ auto formatInfo = imageFormatInfo(image->info().format);
+ auto subresource = pResource->GetSubresourceFromIndex(
+ formatInfo->aspectMask, Subresource);
+
+ VkImageSubresourceLayers subresourceLayers = {
+ subresource.aspectMask,
+ subresource.mipLevel,
+ subresource.arrayLayer, 1 };
+
+ auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo;
+
+ if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) {
+ VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, subresource.mipLevel, subresource.arrayLayer, 1 };
+
+ const D3DBOX& box = pResource->GetDirtyBox(subresource.arrayLayer);
+ VkOffset3D scaledBoxOffset = {
+ int32_t(alignDown(box.Left >> subresource.mipLevel, formatInfo->blockSize.width)),
+ int32_t(alignDown(box.Top >> subresource.mipLevel, formatInfo->blockSize.height)),
+ int32_t(alignDown(box.Front >> subresource.mipLevel, formatInfo->blockSize.depth))
+ };
+ VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({
+ uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))),
+ uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))),
+ uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth)))
+ }, subresource.mipLevel);
+ VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize);
+ VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize);
+
+ VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel);
+ VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize);
+
+ scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width);
+ scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height);
+ scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth);
+
+ VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize);
+ VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
+ VkDeviceSize copySrcOffset = boxOffsetBlockCount.z * texLevelExtentBlockCount.height * pitch
+ + boxOffsetBlockCount.y * pitch
+ + boxOffsetBlockCount.x * formatInfo->elementSize;
+
+ VkDeviceSize rowAlignment = 0;
+ DxvkBufferSlice copySrcSlice;
+ if (pResource->DoesStagingBufferUploads(Subresource)) {
+ VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize;
+ D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize);
+ copySrcSlice = slice.slice;
+ void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset;
+ util::packImageData(
+ slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize,
+ pitch, pitch * texLevelExtentBlockCount.height);
+ } else {
+ copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(Subresource), copySrcOffset, srcSlice.length);
+ rowAlignment = pitch; // row alignment can act as the pitch parameter
+ }
+
+ EmitCs([
+ cSrcSlice = std::move(copySrcSlice),
+ cDstImage = image,
+ cDstLayers = dstLayers,
+ cDstLevelExtent = scaledAlignedBoxExtent,
+ cOffset = scaledBoxOffset,
+ cRowAlignment = rowAlignment
+ ] (DxvkContext* ctx) {
+ ctx->copyBufferToImage(
+ cDstImage, cDstLayers,
+ cOffset, cDstLevelExtent,
+ cSrcSlice.buffer(), cSrcSlice.offset(),
+ cRowAlignment, 0);
+ });
+ }
+ else {
+ const DxvkFormatInfo* formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor);
+ VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel);
+ VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize);
+ // Add more blocks for the other planes that we might have.
+ // TODO: PLEASE CLEAN ME
+ texLevelExtentBlockCount.height *= std::min(convertFormat.PlaneCount, 2u);
+
+ // the converter can not handle the 4 aligned pitch so we always repack into a staging buffer
+ D3D9BufferSlice slice = AllocTempBuffer<false>(srcSlice.length);
+ VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
+
+ util::packImageData(
+ slice.mapPtr, srcSlice.mapPtr, texLevelExtentBlockCount, formatInfo->elementSize,
+ pitch, std::min(convertFormat.PlaneCount, 2u) * pitch * texLevelExtentBlockCount.height);
+
+ Flush();
+ SynchronizeCsThread();
+
+ m_converter->ConvertFormat(
+ convertFormat,
+ image, subresourceLayers,
+ slice.slice);
+ }
+
+ if (pResource->IsAutomaticMip())
+ MarkTextureMipsDirty(pResource);
+
+ return D3D_OK;
+ }
+
+
+ void D3D9DeviceEx::EmitGenerateMips(
+ D3D9CommonTexture* pResource) {
+ if (pResource->IsManaged())
+ UploadManagedTexture(pResource);
+
+ EmitCs([
+ cImageView = pResource->GetSampleView(false),
+ cFilter = pResource->GetMipFilter()
+ ] (DxvkContext* ctx) {
+ ctx->generateMipmaps(cImageView, DecodeFilter(cFilter));
+ });
+ }
+
+
+ HRESULT D3D9DeviceEx::LockBuffer(
+ D3D9CommonBuffer* pResource,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void** ppbData,
+ DWORD Flags) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (unlikely(ppbData == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (!m_d3d9Options.allowDiscard)
+ Flags &= ~D3DLOCK_DISCARD;
+
+ auto& desc = *pResource->Desc();
+
+ // Ignore DISCARD if NOOVERWRITE is set
+ if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)))
+ Flags &= ~D3DLOCK_DISCARD;
+
+ // Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2)
+ // The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC
+ // but tests say otherwise!
+ if (desc.Pool != D3DPOOL_DEFAULT)
+ Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE);
+
+ // Ignore DONOTWAIT if we are DYNAMIC
+ // Yes... D3D9 is a good API.
+ if (desc.Usage & D3DUSAGE_DYNAMIC)
+ Flags &= ~D3DLOCK_DONOTWAIT;
+
+ // We only bounds check for MANAGED.
+ // (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure
+ // how that works given it is meant to be a DIRECT access..?)
+ const bool respectUserBounds = !(Flags & D3DLOCK_DISCARD) &&
+ SizeToLock != 0;
+
+ // If we don't respect the bounds, encompass it all in our tests/checks
+ // These values may be out of range and don't get clamped.
+ uint32_t offset = respectUserBounds ? OffsetToLock : 0;
+ uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size;
+ D3D9Range lockRange = D3D9Range(offset, offset + size);
+
+ if ((desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY))
+ pResource->DirtyRange().Conjoin(lockRange);
+
+ Rc<DxvkBuffer> mappingBuffer = pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>();
+
+ DxvkBufferSliceHandle physSlice;
+
+ if (Flags & D3DLOCK_DISCARD) {
+ // Allocate a new backing slice for the buffer and set
+ // it as the 'new' mapped slice. This assumes that the
+ // only way to invalidate a buffer is by mapping it.
+ physSlice = pResource->DiscardMapSlice();
+
+ EmitCs([
+ cBuffer = std::move(mappingBuffer),
+ cBufferSlice = physSlice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cBufferSlice);
+ });
+
+ pResource->SetWrittenByGPU(false);
+ pResource->GPUReadingRange().Clear();
+ }
+ else {
+ // Use map pointer from previous map operation. This
+ // way we don't have to synchronize with the CS thread
+ // if the map mode is D3DLOCK_NOOVERWRITE.
+ physSlice = pResource->GetMappedSlice();
+
+ // NOOVERWRITE promises that they will not write in a currently used area.
+ // Therefore we can skip waiting for these two cases.
+ // We can also skip waiting if there is not dirty range overlap, if we are one of those resources.
+
+ // If we are respecting the bounds ie. (MANAGED) we can test overlap
+ // of our bounds, otherwise we just ignore this and go for it all the time.
+ const bool wasWrittenByGPU = pResource->WasWrittenByGPU();
+ const bool readOnly = Flags & D3DLOCK_READONLY;
+ const bool noOverlap = !pResource->GPUReadingRange().Overlaps(lockRange);
+ const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE;
+ const bool usesStagingBuffer = pResource->DoesStagingBufferUploads();
+ const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT;
+ const bool skipWait = (!wasWrittenByGPU && (usesStagingBuffer || readOnly || (noOverlap && !directMapping))) || noOverwrite;
+ if (!skipWait) {
+ if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappingBuffer, D3DLOCK_DONOTWAIT))
+ pResource->EnableStagingBufferUploads();
+
+ if (!WaitForResource(mappingBuffer, Flags))
+ return D3DERR_WASSTILLDRAWING;
+
+ pResource->SetWrittenByGPU(false);
+ pResource->GPUReadingRange().Clear();
+ }
+ }
+
+ uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr);
+ // The offset/size is not clamped to or affected by the desc size.
+ data += OffsetToLock;
+
+ *ppbData = reinterpret_cast<void*>(data);
+
+ DWORD oldFlags = pResource->GetMapFlags();
+
+ // We need to remove the READONLY flags from the map flags
+ // if there was ever a non-readonly upload.
+ if (!(Flags & D3DLOCK_READONLY))
+ oldFlags &= ~D3DLOCK_READONLY;
+
+ pResource->SetMapFlags(Flags | oldFlags);
+ pResource->IncrementLockCount();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::FlushBuffer(
+ D3D9CommonBuffer* pResource) {
+ auto dstBuffer = pResource->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>();
+ auto srcSlice = pResource->GetMappedSlice();
+
+ D3D9Range& range = pResource->DirtyRange();
+
+ DxvkBufferSlice copySrcSlice;
+ if (pResource->DoesStagingBufferUploads()) {
+ D3D9BufferSlice slice = AllocTempBuffer<false>(range.max - range.min);
+ copySrcSlice = slice.slice;
+ void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + range.min;
+ memcpy(slice.mapPtr, srcData, range.max - range.min);
+ } else {
+ copySrcSlice = DxvkBufferSlice(pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(), range.min, range.max - range.min);
+ }
+
+ EmitCs([
+ cDstSlice = dstBuffer,
+ cSrcSlice = copySrcSlice,
+ cDstOffset = range.min,
+ cLength = range.max - range.min
+ ] (DxvkContext* ctx) {
+ ctx->copyBuffer(
+ cDstSlice.buffer(),
+ cDstSlice.offset() + cDstOffset,
+ cSrcSlice.buffer(),
+ cSrcSlice.offset(),
+ cLength);
+ });
+
+ pResource->GPUReadingRange().Conjoin(pResource->DirtyRange());
+ pResource->DirtyRange().Clear();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::UnlockBuffer(
+ D3D9CommonBuffer* pResource) {
+ D3D9DeviceLock lock = LockDevice();
+
+ if (pResource->DecrementLockCount() != 0)
+ return D3D_OK;
+
+ if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER)
+ return D3D_OK;
+
+ if (pResource->DirtyRange().IsDegenerate())
+ return D3D_OK;
+
+ pResource->SetMapFlags(0);
+
+ if (pResource->Desc()->Pool != D3DPOOL_DEFAULT)
+ return D3D_OK;
+
+ FlushImplicit(FALSE);
+
+ FlushBuffer(pResource);
+
+ return D3D_OK;
+ }
+
+
+ void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) {
+ m_csThread.dispatchChunk(std::move(chunk));
+ m_csIsBusy = true;
+ }
+
+
+ void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) {
+ // Flush only if the GPU is about to go idle, in
+ // order to keep the number of submissions low.
+ uint32_t pending = m_dxvkDevice->pendingSubmissions();
+
+ if (StrongHint || pending <= MaxPendingSubmits) {
+ auto now = dxvk::high_resolution_clock::now();
+
+ uint32_t delay = MinFlushIntervalUs
+ + IncFlushIntervalUs * pending;
+
+ // Prevent flushing too often in short intervals.
+ if (now - m_lastFlush >= std::chrono::microseconds(delay))
+ Flush();
+ }
+ }
+
+
+ void D3D9DeviceEx::SynchronizeCsThread() {
+ D3D9DeviceLock lock = LockDevice();
+
+ // Dispatch current chunk so that all commands
+ // recorded prior to this function will be run
+ FlushCsChunk();
+
+ if (m_csThread.isBusy())
+ m_csThread.synchronize();
+ }
+
+
+ void D3D9DeviceEx::SetupFPU() {
+ // Should match d3d9 float behaviour.
+
+#if defined(_MSC_VER)
+ // For MSVC we can use these cross arch and platform funcs to set the FPU.
+ // This will work on any platform, x86, x64, ARM, etc.
+
+ // Clear exceptions.
+ _clearfp();
+
+ // Disable exceptions
+ _controlfp(_MCW_EM, _MCW_EM);
+
+#ifndef _WIN64
+ // Use 24 bit precision
+ _controlfp(_PC_24, _MCW_PC);
+#endif
+
+ // Round to nearest
+ _controlfp(_RC_NEAR, _MCW_RC);
+#elif (defined(__GNUC__) || defined(__MINGW32__)) && (defined(__i386__) || defined(__x86_64__) || defined(__ia64))
+ // For GCC/MinGW we can use inline asm to set it.
+ // This only works for x86 and x64 processors however.
+
+ uint16_t control;
+
+ // Get current control word.
+ __asm__ __volatile__("fnstcw %0" : "=m" (*&control));
+
+ // Clear existing settings.
+ control &= 0xF0C0;
+
+ // Disable exceptions
+ // Use 24 bit precision
+ // Round to nearest
+ control |= 0x003F;
+
+ // Set new control word.
+ __asm__ __volatile__("fldcw %0" : : "m" (*&control));
+#else
+ Logger::warn("D3D9DeviceEx::SetupFPU: not supported on this arch.");
+#endif
+ }
+
+
+ int64_t D3D9DeviceEx::DetermineInitialTextureMemory() {
+ auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties();
+
+ VkDeviceSize availableTextureMemory = 0;
+
+ for (uint32_t i = 0; i < memoryProp.memoryHeapCount; i++)
+ availableTextureMemory += memoryProp.memoryHeaps[i].size;
+
+ constexpr VkDeviceSize Megabytes = 1024 * 1024;
+
+ // The value returned is a 32-bit value, so we need to clamp it.
+ VkDeviceSize maxMemory = (VkDeviceSize(m_d3d9Options.maxAvailableMemory) * Megabytes) - 1;
+ availableTextureMemory = std::min(availableTextureMemory, maxMemory);
+
+ return int64_t(availableTextureMemory);
+ }
+
+
+ Rc<DxvkBuffer> D3D9DeviceEx::CreateConstantBuffer(
+ bool SSBO,
+ VkDeviceSize Size,
+ DxsoProgramType ShaderStage,
+ DxsoConstantBuffers BufferType) {
+ DxvkBufferCreateInfo info = { };
+ info.usage = SSBO ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ info.access = SSBO ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_UNIFORM_READ_BIT;
+ info.size = Size;
+ info.stages = ShaderStage == DxsoProgramType::VertexShader
+ ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
+ : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+ VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+ if (m_d3d9Options.deviceLocalConstantBuffers)
+ memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+
+ Rc<DxvkBuffer> buffer = m_dxvkDevice->createBuffer(info, memoryFlags);
+
+ const uint32_t slotId = computeResourceSlotId(
+ ShaderStage, DxsoBindingType::ConstantBuffer,
+ BufferType);
+
+ EmitCs([
+ cSlotId = slotId,
+ cBuffer = buffer
+ ] (DxvkContext* ctx) {
+ ctx->bindResourceBuffer(cSlotId,
+ DxvkBufferSlice(cBuffer, 0, cBuffer->info().size));
+ });
+
+ return buffer;
+ }
+
+
+ void D3D9DeviceEx::CreateConstantBuffers() {
+ m_consts[DxsoProgramTypes::VertexShader].buffer =
+ CreateConstantBuffer(m_dxsoOptions.vertexConstantBufferAsSSBO,
+ m_vsLayout.totalSize(),
+ DxsoProgramType::VertexShader,
+ DxsoConstantBuffers::VSConstantBuffer);
+
+ m_consts[DxsoProgramTypes::PixelShader].buffer =
+ CreateConstantBuffer(false,
+ m_psLayout.totalSize(),
+ DxsoProgramType::PixelShader,
+ DxsoConstantBuffers::PSConstantBuffer);
+
+ m_vsClipPlanes =
+ CreateConstantBuffer(false,
+ caps::MaxClipPlanes * sizeof(D3D9ClipPlane),
+ DxsoProgramType::VertexShader,
+ DxsoConstantBuffers::VSClipPlanes);
+
+ m_vsFixedFunction =
+ CreateConstantBuffer(false,
+ sizeof(D3D9FixedFunctionVS),
+ DxsoProgramType::VertexShader,
+ DxsoConstantBuffers::VSFixedFunction);
+
+ m_psFixedFunction =
+ CreateConstantBuffer(false,
+ sizeof(D3D9FixedFunctionPS),
+ DxsoProgramType::PixelShader,
+ DxsoConstantBuffers::PSFixedFunction);
+
+ m_psShared =
+ CreateConstantBuffer(false,
+ sizeof(D3D9SharedPS),
+ DxsoProgramType::PixelShader,
+ DxsoConstantBuffers::PSShared);
+
+ m_vsVertexBlend =
+ CreateConstantBuffer(true,
+ CanSWVP()
+ ? sizeof(D3D9FixedFunctionVertexBlendDataSW)
+ : sizeof(D3D9FixedFunctionVertexBlendDataHW),
+ DxsoProgramType::VertexShader,
+ DxsoConstantBuffers::VSVertexBlendData);
+ }
+
+
+ template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
+ inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) {
+ const D3D9ConstantSets& constSet = m_consts[ShaderStage];
+
+ auto* dst = reinterpret_cast<HardwareLayoutType*>(pData);
+
+ if (constSet.meta.maxConstIndexF)
+ std::memcpy(dst->fConsts, Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4));
+ if (constSet.meta.maxConstIndexI)
+ std::memcpy(dst->iConsts, Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i));
+ }
+
+
+ template <typename SoftwareLayoutType, typename ShaderType>
+ inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
+ const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader];
+
+ auto dst = reinterpret_cast<uint8_t*>(pData);
+
+ if (constSet.meta.maxConstIndexF)
+ std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4));
+ if (constSet.meta.maxConstIndexI)
+ std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i));
+ if (constSet.meta.maxConstIndexB)
+ std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize());
+ }
+
+
+ template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
+ inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
+ D3D9ConstantSets& constSet = m_consts[ShaderStage];
+
+ if (!constSet.dirty)
+ return;
+
+ constSet.dirty = false;
+
+ DxvkBufferSliceHandle slice = constSet.buffer->allocSlice();
+
+ EmitCs([
+ cBuffer = constSet.buffer,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+
+ if constexpr (ShaderStage == DxsoProgramType::PixelShader)
+ UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
+ else if (likely(!CanSWVP()))
+ UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
+ else
+ UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader);
+
+ if (constSet.meta.needsConstantCopies) {
+ Vector4* data = reinterpret_cast<Vector4*>(slice.mapPtr);
+
+ auto& shaderConsts = GetCommonShader(Shader)->GetConstants();
+
+ for (const auto& constant : shaderConsts)
+ data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
+ }
+ }
+
+
+ template <DxsoProgramType ShaderStage>
+ void D3D9DeviceEx::UploadConstants() {
+ if constexpr (ShaderStage == DxsoProgramTypes::VertexShader)
+ return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts, m_vsLayout, m_state.vertexShader);
+ else
+ return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts, m_psLayout, m_state.pixelShader);
+ }
+
+
+ void D3D9DeviceEx::UpdateClipPlanes() {
+ m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes);
+
+ auto slice = m_vsClipPlanes->allocSlice();
+ auto dst = reinterpret_cast<D3D9ClipPlane*>(slice.mapPtr);
+
+ for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
+ dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i))
+ ? m_state.clipPlanes[i]
+ : D3D9ClipPlane();
+ }
+
+ EmitCs([
+ cBuffer = m_vsClipPlanes,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+ }
+
+
+ template <uint32_t Offset, uint32_t Length>
+ void D3D9DeviceEx::UpdatePushConstant(const void* pData) {
+ struct ConstantData { uint8_t Data[Length]; };
+
+ auto* constData = reinterpret_cast<const ConstantData*>(pData);
+
+ EmitCs([
+ cData = *constData
+ ](DxvkContext* ctx) {
+ ctx->pushConstants(Offset, Length, &cData);
+ });
+ }
+
+
+ template <D3D9RenderStateItem Item>
+ void D3D9DeviceEx::UpdatePushConstant() {
+ auto& rs = m_state.renderStates;
+
+ if constexpr (Item == D3D9RenderStateItem::AlphaRef) {
+ float alpha = float(rs[D3DRS_ALPHAREF] & 0xFF) / 255.0f;
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, alphaRef), sizeof(float)>(&alpha);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::FogColor) {
+ Vector4 color;
+ DecodeD3DCOLOR(D3DCOLOR(rs[D3DRS_FOGCOLOR]), color.data);
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogColor), sizeof(D3D9RenderStateInfo::fogColor)>(&color);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::FogDensity) {
+ float density = bit::cast<float>(rs[D3DRS_FOGDENSITY]);
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogDensity), sizeof(float)>(&density);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::FogEnd) {
+ float end = bit::cast<float>(rs[D3DRS_FOGEND]);
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogEnd), sizeof(float)>(&end);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::FogScale) {
+ float end = bit::cast<float>(rs[D3DRS_FOGEND]);
+ float start = bit::cast<float>(rs[D3DRS_FOGSTART]);
+
+ float scale = 1.0f / (end - start);
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogScale), sizeof(float)>(&scale);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointSize) {
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSize), sizeof(float)>(&rs[D3DRS_POINTSIZE]);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointSizeMin) {
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMin), sizeof(float)>(&rs[D3DRS_POINTSIZE_MIN]);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointSizeMax) {
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMax), sizeof(float)>(&rs[D3DRS_POINTSIZE_MAX]);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointScaleA) {
+ float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_A]);
+ scale /= float(m_state.viewport.Height * m_state.viewport.Height);
+
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleA), sizeof(float)>(&scale);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointScaleB) {
+ float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_B]);
+ scale /= float(m_state.viewport.Height * m_state.viewport.Height);
+
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleB), sizeof(float)>(&scale);
+ }
+ else if constexpr (Item == D3D9RenderStateItem::PointScaleC) {
+ float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_C]);
+ scale /= float(m_state.viewport.Height * m_state.viewport.Height);
+
+ UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleC), sizeof(float)>(&scale);
+ }
+ else
+ Logger::warn("D3D9: Invalid push constant set to update.");
+ }
+
+
+
+ void D3D9DeviceEx::Flush() {
+ D3D9DeviceLock lock = LockDevice();
+
+ m_initializer->Flush();
+ m_converter->Flush();
+
+ if (m_csIsBusy || !m_csChunk->empty()) {
+ // Add commands to flush the threaded
+ // context, then flush the command list
+ EmitCs([](DxvkContext* ctx) {
+ ctx->flushCommandList();
+ });
+
+ FlushCsChunk();
+
+ // Reset flush timer used for implicit flushes
+ m_lastFlush = dxvk::high_resolution_clock::now();
+ m_csIsBusy = false;
+ }
+ }
+
+
+ inline void D3D9DeviceEx::UpdateBoundRTs(uint32_t index) {
+ const uint32_t bit = 1 << index;
+
+ m_boundRTs &= ~bit;
+
+ if (m_state.renderTargets[index] != nullptr &&
+ !m_state.renderTargets[index]->IsNull())
+ m_boundRTs |= bit;
+ }
+
+
+ inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) {
+ if (!config::HazardTrackingEnabled)
+ return;
+
+ const uint32_t bit = 1 << index;
+
+ m_activeRTs &= ~bit;
+
+ if ((m_boundRTs & bit) != 0 &&
+ m_state.renderTargets[index]->GetBaseTexture() != nullptr &&
+ m_state.renderStates[ColorWriteIndex(index)])
+ m_activeRTs |= bit;
+
+ UpdateActiveHazardsRT(bit);
+ }
+
+
+ inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index, DWORD combinedUsage) {
+ if (!config::ManagedUploadTrackingEnabled && !config::HazardTrackingEnabled && !config::MipGenTrackingEnabled)
+ return;
+
+ const uint32_t bit = 1 << index;
+
+ if (config::HazardTrackingEnabled) {
+ m_activeRTTextures &= ~bit;
+ m_activeDSTextures &= ~bit;
+ }
+ m_activeTextures &= ~bit;
+ if (config::ManagedUploadTrackingEnabled)
+ m_activeTexturesToUpload &= ~bit;
+ if (config::MipGenTrackingEnabled)
+ m_activeTexturesToGen &= ~bit;
+
+ auto tex = GetCommonTexture(m_state.textures[index]);
+ if (tex != nullptr) {
+ m_activeTextures |= bit;
+
+ if (unlikely(config::HazardTrackingEnabled && tex->IsRenderTarget()))
+ m_activeRTTextures |= bit;
+
+ if (unlikely(config::HazardTrackingEnabled && tex->IsDepthStencil()))
+ m_activeDSTextures |= bit;
+
+ if (unlikely(config::ManagedUploadTrackingEnabled && tex->NeedsAnyUpload()))
+ m_activeTexturesToUpload |= bit;
+
+ if (unlikely(config::MipGenTrackingEnabled && tex->NeedsMipGen()))
+ m_activeTexturesToGen |= bit;
+ }
+
+ if (config::HazardTrackingEnabled) {
+ if (unlikely(combinedUsage & D3DUSAGE_RENDERTARGET))
+ UpdateActiveHazardsRT(UINT32_MAX);
+
+ if (unlikely(combinedUsage & D3DUSAGE_DEPTHSTENCIL))
+ UpdateActiveHazardsDS(bit);
+ }
+ }
+
+
+ inline void D3D9DeviceEx::UpdateActiveHazardsRT(uint32_t rtMask) {
+ if (!config::HazardTrackingEnabled)
+ return;
+
+ auto masks = m_psShaderMasks;
+ masks.rtMask &= m_activeRTs & rtMask;
+ masks.samplerMask &= m_activeRTTextures;
+
+ m_activeHazardsRT = m_activeHazardsRT & (~rtMask);
+ for (uint32_t rtIdx : bit::BitMask(masks.rtMask)) {
+ for (uint32_t samplerIdx : bit::BitMask(masks.samplerMask)) {
+ D3D9Surface* rtSurf = m_state.renderTargets[rtIdx].ptr();
+
+ IDirect3DBaseTexture9* rtBase = rtSurf->GetBaseTexture();
+ IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx];
+
+ // HACK: Don't mark for hazards if we aren't rendering to mip 0!
+ // Some games use screenspace passes like this for blurring
+ // Sampling from mip 0 (texture) -> mip 1 (rt)
+ // and we'd trigger the hazard path otherwise which is unnecessary,
+ // and would shove us into GENERAL and emitting readback barriers.
+ if (likely(rtSurf->GetMipLevel() != 0 || rtBase != texBase))
+ continue;
+
+ m_activeHazardsRT |= 1 << rtIdx;
+ }
+ }
+ }
+
+
+ inline void D3D9DeviceEx::UpdateActiveHazardsDS(uint32_t texMask) {
+ if (!config::HazardTrackingEnabled)
+ return;
+
+ m_activeHazardsDS = m_activeHazardsDS & (~texMask);
+ if (m_state.depthStencil != nullptr &&
+ m_state.depthStencil->GetBaseTexture() != nullptr) {
+ uint32_t samplerMask = m_activeDSTextures & texMask;
+ for (uint32_t samplerIdx : bit::BitMask(samplerMask)) {
+ IDirect3DBaseTexture9* dsBase = m_state.depthStencil->GetBaseTexture();
+ IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx];
+
+ if (likely(dsBase != texBase))
+ continue;
+
+ m_activeHazardsDS |= 1 << samplerIdx;
+ }
+ }
+ }
+
+
+ void D3D9DeviceEx::MarkRenderHazards() {
+ if (!config::HazardTrackingEnabled)
+ return;
+
+ for (uint32_t rtIdx : bit::BitMask(m_activeHazardsRT)) {
+ // Guaranteed to not be nullptr...
+ auto tex = m_state.renderTargets[rtIdx]->GetCommonTexture();
+ if (unlikely(!tex->MarkHazardous())) {
+ TransitionImage(tex, VK_IMAGE_LAYOUT_GENERAL);
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+ }
+ }
+ }
+
+
+ void D3D9DeviceEx::UploadManagedTexture(D3D9CommonTexture* pResource) {
+ for (uint32_t subresource = 0; subresource < pResource->CountSubresources(); subresource++) {
+ if (!pResource->NeedsUpload(subresource) || pResource->GetBuffer(subresource) == nullptr)
+ continue;
+
+ this->FlushImage(pResource, subresource);
+ }
+
+ pResource->ClearDirtyBoxes();
+ pResource->ClearNeedsUpload();
+ }
+
+
+ void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) {
+ if (!config::ManagedUploadTrackingEnabled)
+ return;
+
+ // Guaranteed to not be nullptr...
+ for (uint32_t texIdx : bit::BitMask(mask))
+ UploadManagedTexture(GetCommonTexture(m_state.textures[texIdx]));
+
+ m_activeTexturesToUpload &= ~mask;
+ }
+
+
+ void D3D9DeviceEx::GenerateTextureMips(uint32_t mask) {
+ for (uint32_t texIdx : bit::BitMask(mask)) {
+ // Guaranteed to not be nullptr...
+ auto texInfo = GetCommonTexture(m_state.textures[texIdx]);
+
+ if (texInfo->NeedsMipGen()) {
+ this->EmitGenerateMips(texInfo);
+ texInfo->SetNeedsMipGen(false);
+ }
+ }
+
+ m_activeTexturesToGen &= ~mask;
+ }
+
+
+ void D3D9DeviceEx::MarkTextureMipsDirty(D3D9CommonTexture* pResource) {
+ if (!config::MipGenTrackingEnabled)
+ return;
+
+ pResource->SetNeedsMipGen(true);
+ pResource->MarkAllWrittenByGPU();
+
+ for (uint32_t i : bit::BitMask(m_activeTextures)) {
+ // Guaranteed to not be nullptr...
+ auto texInfo = GetCommonTexture(m_state.textures[i]);
+
+ if (texInfo == pResource) {
+ m_activeTexturesToGen |= 1 << i;
+ // We can early out here, no need to add another index for this.
+ break;
+ }
+ }
+ }
+
+
+ void D3D9DeviceEx::MarkTextureMipsUnDirty(D3D9CommonTexture* pResource) {
+ if (!config::MipGenTrackingEnabled)
+ return;
+
+ pResource->SetNeedsMipGen(false);
+
+ for (uint32_t i : bit::BitMask(m_activeTextures)) {
+ // Guaranteed to not be nullptr...
+ auto texInfo = GetCommonTexture(m_state.textures[i]);
+
+ if (texInfo == pResource)
+ m_activeTexturesToGen &= ~(1 << i);
+ }
+ }
+
+
+ void D3D9DeviceEx::MarkTextureUploaded(D3D9CommonTexture* pResource) {
+ if (!config::ManagedUploadTrackingEnabled)
+ return;
+
+ for (uint32_t i : bit::BitMask(m_activeTextures)) {
+ // Guaranteed to not be nullptr...
+ auto texInfo = GetCommonTexture(m_state.textures[i]);
+
+ if (texInfo == pResource)
+ m_activeTexturesToUpload &= ~(1 << i);
+ }
+ }
+
+
+ template <bool Points>
+ void D3D9DeviceEx::UpdatePointMode() {
+ if constexpr (!Points) {
+ m_lastPointMode = 0;
+
+ EmitCs([](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, 0);
+ });
+ }
+ else {
+ auto& rs = m_state.renderStates;
+
+ const bool scale = rs[D3DRS_POINTSCALEENABLE] && !UseProgrammableVS();
+ const bool sprite = rs[D3DRS_POINTSPRITEENABLE];
+
+ const uint32_t scaleBit = scale ? 1u : 0u;
+ const uint32_t spriteBit = sprite ? 2u : 0u;
+
+ uint32_t mode = scaleBit | spriteBit;
+
+ if (rs[D3DRS_POINTSCALEENABLE] && m_flags.test(D3D9DeviceFlag::DirtyPointScale)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyPointScale);
+
+ UpdatePushConstant<D3D9RenderStateItem::PointScaleA>();
+ UpdatePushConstant<D3D9RenderStateItem::PointScaleB>();
+ UpdatePushConstant<D3D9RenderStateItem::PointScaleC>();
+ }
+
+ if (unlikely(mode != m_lastPointMode)) {
+ EmitCs([cMode = mode] (DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, cMode);
+ });
+
+ m_lastPointMode = mode;
+ }
+ }
+ }
+
+
+ void D3D9DeviceEx::UpdateFog() {
+ auto& rs = m_state.renderStates;
+
+ bool fogEnabled = rs[D3DRS_FOGENABLE];
+
+ bool pixelFog = rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE && fogEnabled;
+ bool vertexFog = rs[D3DRS_FOGVERTEXMODE] != D3DFOG_NONE && fogEnabled && !pixelFog;
+
+ auto UpdateFogConstants = [&](D3DFOGMODE FogMode) {
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogColor)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogColor);
+ UpdatePushConstant<D3D9RenderStateItem::FogColor>();
+ }
+
+ if (FogMode == D3DFOG_LINEAR) {
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogScale)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogScale);
+ UpdatePushConstant<D3D9RenderStateItem::FogScale>();
+ }
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogEnd)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogEnd);
+ UpdatePushConstant<D3D9RenderStateItem::FogEnd>();
+ }
+ }
+ else if (FogMode == D3DFOG_EXP || FogMode == D3DFOG_EXP2) {
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogDensity)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogDensity);
+ UpdatePushConstant<D3D9RenderStateItem::FogDensity>();
+ }
+ }
+ };
+
+ if (vertexFog) {
+ D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGVERTEXMODE]);
+
+ UpdateFogConstants(mode);
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogState);
+
+ EmitCs([cMode = mode] (DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, cMode);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE);
+ });
+ }
+ }
+ else if (pixelFog) {
+ D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGTABLEMODE]);
+
+ UpdateFogConstants(mode);
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogState);
+
+ EmitCs([cMode = mode] (DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, cMode);
+ });
+ }
+ }
+ else {
+ if (fogEnabled)
+ UpdateFogConstants(D3DFOG_NONE);
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFogState);
+
+ EmitCs([cEnabled = fogEnabled] (DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, cEnabled);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE);
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE);
+ });
+ }
+ }
+ }
+
+
+ void D3D9DeviceEx::BindFramebuffer() {
+ m_flags.clr(D3D9DeviceFlag::DirtyFramebuffer);
+
+ DxvkRenderTargets attachments;
+
+ bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE];
+
+ // D3D9 doesn't have the concept of a framebuffer object,
+ // so we'll just create a new one every time the render
+ // target bindings are updated. Set up the attachments.
+ VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
+
+ for (uint32_t i : bit::BitMask(m_boundRTs)) {
+ const DxvkImageCreateInfo& rtImageInfo = m_state.renderTargets[i]->GetCommonTexture()->GetImage()->info();
+
+ if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM))
+ sampleCount = rtImageInfo.sampleCount;
+ else if (unlikely(sampleCount != rtImageInfo.sampleCount))
+ continue;
+
+ attachments.color[i] = {
+ m_state.renderTargets[i]->GetRenderTargetView(srgb),
+ m_state.renderTargets[i]->GetRenderTargetLayout() };
+ }
+
+ if (m_state.depthStencil != nullptr) {
+ const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info();
+ const bool depthWrite = m_state.renderStates[D3DRS_ZWRITEENABLE];
+
+ if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM || sampleCount == dsImageInfo.sampleCount)) {
+ attachments.depth = {
+ m_state.depthStencil->GetDepthStencilView(),
+ m_state.depthStencil->GetDepthStencilLayout(depthWrite, m_activeHazardsDS != 0) };
+ }
+ }
+
+ // Create and bind the framebuffer object to the context
+ EmitCs([
+ cAttachments = std::move(attachments)
+ ] (DxvkContext* ctx) {
+ ctx->bindRenderTargets(cAttachments);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindViewportAndScissor() {
+ m_flags.clr(D3D9DeviceFlag::DirtyViewportScissor);
+
+ VkViewport viewport;
+ VkRect2D scissor;
+
+ // D3D9's coordinate system has its origin in the bottom left,
+ // but the viewport coordinates are aligned to the top-left
+ // corner so we can get away with flipping the viewport.
+ const D3DVIEWPORT9& vp = m_state.viewport;
+
+ // Correctness Factor for 1/2 texel offset
+ // We need to bias this slightly to make
+ // imprecision in games happy.
+ // Originally we did this only for powers of two
+ // resolutions but since NEAREST filtering fixed to
+ // truncate, we need to do this all the time now.
+ float cf = 0.5f - (1.0f / 128.0f);
+
+ viewport = VkViewport{
+ float(vp.X) + cf, float(vp.Height + vp.Y) + cf,
+ float(vp.Width), -float(vp.Height),
+ vp.MinZ, vp.MaxZ,
+ };
+
+ // Scissor rectangles. Vulkan does not provide an easy way
+ // to disable the scissor test, so we'll have to set scissor
+ // rects that are at least as large as the framebuffer.
+ bool enableScissorTest = m_state.renderStates[D3DRS_SCISSORTESTENABLE];
+
+ if (enableScissorTest) {
+ RECT sr = m_state.scissorRect;
+
+ VkOffset2D srPosA;
+ srPosA.x = std::max<int32_t>(0, sr.left);
+ srPosA.x = std::max<int32_t>(vp.X, srPosA.x);
+ srPosA.y = std::max<int32_t>(0, sr.top);
+ srPosA.y = std::max<int32_t>(vp.Y, srPosA.y);
+
+ VkOffset2D srPosB;
+ srPosB.x = std::max<int32_t>(srPosA.x, sr.right);
+ srPosB.x = std::min<int32_t>(vp.X + vp.Width, srPosB.x);
+ srPosB.y = std::max<int32_t>(srPosA.y, sr.bottom);
+ srPosB.y = std::min<int32_t>(vp.Y + vp.Height, srPosB.y);
+
+ VkExtent2D srSize;
+ srSize.width = uint32_t(srPosB.x - srPosA.x);
+ srSize.height = uint32_t(srPosB.y - srPosA.y);
+
+ scissor = VkRect2D{ srPosA, srSize };
+ }
+ else {
+ scissor = VkRect2D{
+ VkOffset2D { int32_t(vp.X), int32_t(vp.Y) },
+ VkExtent2D { vp.Width, vp.Height }};
+ }
+
+ EmitCs([
+ cViewport = viewport,
+ cScissor = scissor
+ ] (DxvkContext* ctx) {
+ ctx->setViewports(
+ 1,
+ &cViewport,
+ &cScissor);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindMultiSampleState() {
+ m_flags.clr(D3D9DeviceFlag::DirtyMultiSampleState);
+
+ DxvkMultisampleState msState;
+ msState.sampleMask = m_flags.test(D3D9DeviceFlag::ValidSampleMask)
+ ? m_state.renderStates[D3DRS_MULTISAMPLEMASK]
+ : 0xffffffff;
+ msState.enableAlphaToCoverage = IsAlphaToCoverageEnabled();
+
+ EmitCs([
+ cState = msState
+ ] (DxvkContext* ctx) {
+ ctx->setMultisampleState(cState);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindBlendState() {
+ m_flags.clr(D3D9DeviceFlag::DirtyBlendState);
+
+ auto& state = m_state.renderStates;
+
+ bool separateAlpha = state[D3DRS_SEPARATEALPHABLENDENABLE];
+
+ DxvkBlendMode mode;
+ mode.enableBlending = state[D3DRS_ALPHABLENDENABLE] != FALSE;
+
+ D3D9BlendState color, alpha;
+
+ color.Src = D3DBLEND(state[D3DRS_SRCBLEND]);
+ color.Dst = D3DBLEND(state[D3DRS_DESTBLEND]);
+ color.Op = D3DBLENDOP(state[D3DRS_BLENDOP]);
+ FixupBlendState(color);
+
+ if (separateAlpha) {
+ alpha.Src = D3DBLEND(state[D3DRS_SRCBLENDALPHA]);
+ alpha.Dst = D3DBLEND(state[D3DRS_DESTBLENDALPHA]);
+ alpha.Op = D3DBLENDOP(state[D3DRS_BLENDOPALPHA]);
+ FixupBlendState(alpha);
+ }
+ else
+ alpha = color;
+
+ mode.colorSrcFactor = DecodeBlendFactor(color.Src, false);
+ mode.colorDstFactor = DecodeBlendFactor(color.Dst, false);
+ mode.colorBlendOp = DecodeBlendOp (color.Op);
+
+ mode.alphaSrcFactor = DecodeBlendFactor(alpha.Src, true);
+ mode.alphaDstFactor = DecodeBlendFactor(alpha.Dst, true);
+ mode.alphaBlendOp = DecodeBlendOp (alpha.Op);
+
+ mode.writeMask = state[ColorWriteIndex(0)];
+
+ std::array<VkColorComponentFlags, 3> extraWriteMasks;
+ for (uint32_t i = 0; i < 3; i++)
+ extraWriteMasks[i] = state[ColorWriteIndex(i + 1)];
+
+ EmitCs([
+ cMode = mode,
+ cWriteMasks = extraWriteMasks,
+ cAlphaMasks = m_alphaSwizzleRTs
+ ](DxvkContext* ctx) {
+ for (uint32_t i = 0; i < 4; i++) {
+ DxvkBlendMode mode = cMode;
+ if (i != 0)
+ mode.writeMask = cWriteMasks[i - 1];
+
+ const bool alphaSwizzle = cAlphaMasks & (1 << i);
+
+ auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) {
+ if (alphaSwizzle) {
+ if (Factor == VK_BLEND_FACTOR_DST_ALPHA)
+ return VK_BLEND_FACTOR_ONE;
+ else if (Factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA)
+ return VK_BLEND_FACTOR_ZERO;
+ }
+
+ return Factor;
+ };
+
+ mode.colorSrcFactor = NormalizeFactor(mode.colorSrcFactor);
+ mode.colorDstFactor = NormalizeFactor(mode.colorDstFactor);
+ mode.alphaSrcFactor = NormalizeFactor(mode.alphaSrcFactor);
+ mode.alphaDstFactor = NormalizeFactor(mode.alphaDstFactor);
+
+ ctx->setBlendMode(i, mode);
+ }
+ });
+ }
+
+
+ void D3D9DeviceEx::BindBlendFactor() {
+ DxvkBlendConstants blendConstants;
+ DecodeD3DCOLOR(
+ D3DCOLOR(m_state.renderStates[D3DRS_BLENDFACTOR]),
+ reinterpret_cast<float*>(&blendConstants));
+
+ EmitCs([
+ cBlendConstants = blendConstants
+ ](DxvkContext* ctx) {
+ ctx->setBlendConstants(cBlendConstants);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindDepthStencilState() {
+ m_flags.clr(D3D9DeviceFlag::DirtyDepthStencilState);
+
+ auto& rs = m_state.renderStates;
+
+ bool stencil = rs[D3DRS_STENCILENABLE];
+ bool twoSidedStencil = stencil && rs[D3DRS_TWOSIDEDSTENCILMODE];
+
+ DxvkDepthStencilState state;
+ state.enableDepthTest = rs[D3DRS_ZENABLE] != FALSE;
+ state.enableDepthWrite = rs[D3DRS_ZWRITEENABLE] != FALSE;
+ state.enableStencilTest = stencil;
+ state.depthCompareOp = DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ZFUNC]));
+
+ if (stencil) {
+ state.stencilOpFront.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILFAIL]));
+ state.stencilOpFront.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILPASS]));
+ state.stencilOpFront.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILZFAIL]));
+ state.stencilOpFront.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_STENCILFUNC]));
+ state.stencilOpFront.compareMask = uint32_t(rs[D3DRS_STENCILMASK]);
+ state.stencilOpFront.writeMask = uint32_t(rs[D3DRS_STENCILWRITEMASK]);
+ state.stencilOpFront.reference = 0;
+ }
+ else
+ state.stencilOpFront = VkStencilOpState();
+
+ if (twoSidedStencil) {
+ state.stencilOpBack.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILFAIL]));
+ state.stencilOpBack.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILPASS]));
+ state.stencilOpBack.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILZFAIL]));
+ state.stencilOpBack.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_CCW_STENCILFUNC]));
+ state.stencilOpBack.compareMask = state.stencilOpFront.compareMask;
+ state.stencilOpBack.writeMask = state.stencilOpFront.writeMask;
+ state.stencilOpBack.reference = 0;
+ }
+ else
+ state.stencilOpBack = state.stencilOpFront;
+
+ EmitCs([
+ cState = state
+ ](DxvkContext* ctx) {
+ ctx->setDepthStencilState(cState);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindRasterizerState() {
+ m_flags.clr(D3D9DeviceFlag::DirtyRasterizerState);
+
+ auto& rs = m_state.renderStates;
+
+ DxvkRasterizerState state;
+ state.cullMode = DecodeCullMode(D3DCULL(rs[D3DRS_CULLMODE]));
+ state.depthBiasEnable = IsDepthBiasEnabled();
+ state.depthClipEnable = true;
+ state.frontFace = VK_FRONT_FACE_CLOCKWISE;
+ state.polygonMode = DecodeFillMode(D3DFILLMODE(rs[D3DRS_FILLMODE]));
+ state.conservativeMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
+ state.sampleCount = 0;
+
+ EmitCs([
+ cState = state
+ ](DxvkContext* ctx) {
+ ctx->setRasterizerState(cState);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindDepthBias() {
+ m_flags.clr(D3D9DeviceFlag::DirtyDepthBias);
+
+ auto& rs = m_state.renderStates;
+
+ float depthBias = bit::cast<float>(rs[D3DRS_DEPTHBIAS]) * m_depthBiasScale;
+ float slopeScaledDepthBias = bit::cast<float>(rs[D3DRS_SLOPESCALEDEPTHBIAS]);
+
+ DxvkDepthBias biases;
+ biases.depthBiasConstant = depthBias;
+ biases.depthBiasSlope = slopeScaledDepthBias;
+ biases.depthBiasClamp = 0.0f;
+
+ EmitCs([
+ cBiases = biases
+ ](DxvkContext* ctx) {
+ ctx->setDepthBias(cBiases);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindAlphaTestState() {
+ m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState);
+
+ auto& rs = m_state.renderStates;
+
+ VkCompareOp alphaOp = IsAlphaTestEnabled()
+ ? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC]))
+ : VK_COMPARE_OP_ALWAYS;
+
+ EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindDepthStencilRefrence() {
+ auto& rs = m_state.renderStates;
+
+ uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]) & 0xff;
+
+ EmitCs([cRef = ref] (DxvkContext* ctx) {
+ ctx->setStencilReference(cRef);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindSampler(DWORD Sampler) {
+ auto& state = m_state.samplerStates[Sampler];
+
+ D3D9SamplerKey key;
+ key.AddressU = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSU]);
+ key.AddressV = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSV]);
+ key.AddressW = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSW]);
+ key.MagFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MAGFILTER]);
+ key.MinFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MINFILTER]);
+ key.MipFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MIPFILTER]);
+ key.MaxAnisotropy = state[D3DSAMP_MAXANISOTROPY];
+ key.MipmapLodBias = bit::cast<float>(state[D3DSAMP_MIPMAPLODBIAS]);
+ key.MaxMipLevel = state[D3DSAMP_MAXMIPLEVEL];
+ key.BorderColor = D3DCOLOR(state[D3DSAMP_BORDERCOLOR]);
+ key.Depth = m_depthTextures & (1u << Sampler);
+
+ if (m_d3d9Options.samplerAnisotropy != -1) {
+ if (key.MagFilter == D3DTEXF_LINEAR)
+ key.MagFilter = D3DTEXF_ANISOTROPIC;
+
+ if (key.MinFilter == D3DTEXF_LINEAR)
+ key.MinFilter = D3DTEXF_ANISOTROPIC;
+
+ key.MaxAnisotropy = m_d3d9Options.samplerAnisotropy;
+ }
+
+ NormalizeSamplerKey(key);
+
+ auto samplerInfo = RemapStateSamplerShader(Sampler);
+
+ const uint32_t slot = computeResourceSlotId(
+ samplerInfo.first, DxsoBindingType::Image,
+ samplerInfo.second);
+
+ EmitCs([this,
+ cSlot = slot,
+ cKey = key
+ ] (DxvkContext* ctx) {
+ auto pair = m_samplers.find(cKey);
+ if (pair != m_samplers.end()) {
+ ctx->bindResourceSampler(cSlot, pair->second);
+ return;
+ }
+
+ auto mipFilter = DecodeMipFilter(cKey.MipFilter);
+
+ DxvkSamplerCreateInfo info;
+ info.addressModeU = DecodeAddressMode(cKey.AddressU);
+ info.addressModeV = DecodeAddressMode(cKey.AddressV);
+ info.addressModeW = DecodeAddressMode(cKey.AddressW);
+ info.compareToDepth = cKey.Depth;
+ info.compareOp = cKey.Depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_NEVER;
+ info.magFilter = DecodeFilter(cKey.MagFilter);
+ info.minFilter = DecodeFilter(cKey.MinFilter);
+ info.mipmapMode = mipFilter.MipFilter;
+ info.maxAnisotropy = float(cKey.MaxAnisotropy);
+ info.useAnisotropy = cKey.MaxAnisotropy > 1;
+ info.mipmapLodBias = cKey.MipmapLodBias;
+ info.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0;
+ info.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0;
+ info.usePixelCoord = VK_FALSE;
+
+ DecodeD3DCOLOR(cKey.BorderColor, info.borderColor.float32);
+
+ if (!m_dxvkDevice->features().extCustomBorderColor.customBorderColorWithoutFormat) {
+ // HACK: Let's get OPAQUE_WHITE border color over
+ // TRANSPARENT_BLACK if the border RGB is white.
+ if (info.borderColor.float32[0] == 1.0f
+ && info.borderColor.float32[1] == 1.0f
+ && info.borderColor.float32[2] == 1.0f
+ && !m_dxvkDevice->features().extCustomBorderColor.customBorderColors) {
+ // Then set the alpha to 1.
+ info.borderColor.float32[3] = 1.0f;
+ }
+ }
+
+ try {
+ auto sampler = m_dxvkDevice->createSampler(info);
+
+ m_samplers.insert(std::make_pair(cKey, sampler));
+ ctx->bindResourceSampler(cSlot, std::move(sampler));
+
+ m_samplerCount++;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ }
+ });
+ }
+
+
+ void D3D9DeviceEx::BindTexture(DWORD StateSampler) {
+ auto shaderSampler = RemapStateSamplerShader(StateSampler);
+
+ uint32_t slot = computeResourceSlotId(shaderSampler.first,
+ DxsoBindingType::Image, uint32_t(shaderSampler.second));
+
+ const bool srgb =
+ m_state.samplerStates[StateSampler][D3DSAMP_SRGBTEXTURE] & 0x1;
+
+ D3D9CommonTexture* commonTex =
+ GetCommonTexture(m_state.textures[StateSampler]);
+
+ EmitCs([
+ cSlot = slot,
+ cImageView = commonTex->GetSampleView(srgb)
+ ](DxvkContext* ctx) {
+ ctx->bindResourceView(cSlot, cImageView, nullptr);
+ });
+ }
+
+
+ void D3D9DeviceEx::UnbindTextures(uint32_t mask) {
+ EmitCs([
+ cMask = mask
+ ](DxvkContext* ctx) {
+ for (uint32_t i : bit::BitMask(cMask)) {
+ auto shaderSampler = RemapStateSamplerShader(i);
+
+ uint32_t slot = computeResourceSlotId(shaderSampler.first,
+ DxsoBindingType::Image, uint32_t(shaderSampler.second));
+
+ ctx->bindResourceView(slot, nullptr, nullptr);
+ }
+ });
+ }
+
+
+ void D3D9DeviceEx::UndirtySamplers(uint32_t mask) {
+ for (uint32_t i : bit::BitMask(mask))
+ BindSampler(i);
+
+ m_dirtySamplerStates &= ~mask;
+ }
+
+
+ void D3D9DeviceEx::UndirtyTextures(uint32_t usedMask) {
+ const uint32_t activeMask = usedMask & m_activeTextures;
+ const uint32_t inactiveMask = usedMask & ~m_activeTextures;
+
+ for (uint32_t i : bit::BitMask(activeMask))
+ BindTexture(i);
+
+ if (inactiveMask)
+ UnbindTextures(inactiveMask);
+
+ m_dirtyTextures &= ~usedMask;
+ }
+
+ void D3D9DeviceEx::MarkTextureBindingDirty(IDirect3DBaseTexture9* texture) {
+ D3D9DeviceLock lock = LockDevice();
+
+ for (uint32_t i : bit::BitMask(m_activeTextures)) {
+ if (m_state.textures[i] == texture)
+ m_dirtyTextures |= 1u << i;
+ }
+ }
+
+
+ D3D9DrawInfo D3D9DeviceEx::GenerateDrawInfo(
+ D3DPRIMITIVETYPE PrimitiveType,
+ UINT PrimitiveCount,
+ UINT InstanceCount) {
+ D3D9DrawInfo drawInfo;
+ drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount);
+ drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed
+ ? InstanceCount
+ : 1u;
+ return drawInfo;
+ }
+
+
+ uint32_t D3D9DeviceEx::GetInstanceCount() const {
+ return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u);
+ }
+
+
+ void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType) {
+ if (config::HazardTrackingEnabled) {
+ if (unlikely(m_activeHazardsRT != 0)) {
+ EmitCs([](DxvkContext* ctx) {
+ ctx->emitRenderTargetReadbackBarrier();
+ });
+
+ if (m_d3d9Options.generalHazards)
+ MarkRenderHazards();
+ }
+
+ if (unlikely((m_lastHazardsDS == 0) != (m_activeHazardsDS == 0))) {
+ m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
+ m_lastHazardsDS = m_activeHazardsDS;
+ }
+ }
+
+ for (uint32_t i = 0; i < caps::MaxStreams; i++) {
+ auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
+ if (vbo != nullptr && vbo->NeedsUpload())
+ FlushBuffer(vbo);
+ }
+
+ const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
+ const uint32_t usedTextureMask = m_activeTextures & usedSamplerMask;
+
+ if (config::ManagedUploadTrackingEnabled) {
+ const uint32_t texturesToUpload = m_activeTexturesToUpload & usedTextureMask;
+ if (unlikely(texturesToUpload != 0))
+ UploadManagedTextures(texturesToUpload);
+ }
+
+ if (config::MipGenTrackingEnabled) {
+ const uint32_t texturesToGen = m_activeTexturesToGen & usedTextureMask;
+ if (unlikely(texturesToGen != 0))
+ GenerateTextureMips(texturesToGen);
+ }
+
+ auto* ibo = GetCommonBuffer(m_state.indices);
+ if (ibo != nullptr && ibo->NeedsUpload())
+ FlushBuffer(ibo);
+
+ UpdateFog();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFramebuffer))
+ BindFramebuffer();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyViewportScissor))
+ BindViewportAndScissor();
+
+ const uint32_t activeDirtySamplers = m_dirtySamplerStates & usedTextureMask;
+ if (activeDirtySamplers)
+ UndirtySamplers(activeDirtySamplers);
+
+ const uint32_t usedDirtyTextures = m_dirtyTextures & usedSamplerMask;
+ if (usedDirtyTextures)
+ UndirtyTextures(usedDirtyTextures);
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyBlendState))
+ BindBlendState();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState))
+ BindDepthStencilState();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyRasterizerState))
+ BindRasterizerState();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyDepthBias))
+ BindDepthBias();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState))
+ BindMultiSampleState();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState))
+ BindAlphaTestState();
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes))
+ UpdateClipPlanes();
+
+ if (PrimitiveType == D3DPT_POINTLIST)
+ UpdatePointMode<true>();
+ else if (m_lastPointMode != 0)
+ UpdatePointMode<false>();
+
+ if (likely(UseProgrammableVS())) {
+ if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) {
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+
+ BindShader<DxsoProgramType::VertexShader>(
+ GetCommonShader(m_state.vertexShader),
+ GetVertexShaderPermutation());
+ }
+ UploadConstants<DxsoProgramTypes::VertexShader>();
+
+ if (likely(!CanSWVP())) {
+ UpdateBoolSpecConstantVertex(
+ m_state.vsConsts.bConsts[0] &
+ m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask);
+ } else
+ UpdateBoolSpecConstantVertex(0);
+ }
+ else {
+ UpdateBoolSpecConstantVertex(0);
+ UpdateFixedFunctionVS();
+ }
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyInputLayout))
+ BindInputLayout();
+
+ auto UpdateSamplerTypes = [&](uint32_t types, uint32_t projections, uint32_t fetch4) {
+ if (m_lastSamplerTypes != types)
+ UpdateSamplerSpecConsant(types);
+
+ if (m_lastProjectionBitfield != projections)
+ UpdateProjectionSpecConstant(projections);
+
+ if (m_lastFetch4 != fetch4)
+ UpdateFetch4SpecConstant(fetch4);
+ };
+
+ if (likely(UseProgrammablePS())) {
+ UploadConstants<DxsoProgramTypes::PixelShader>();
+
+ const uint32_t psTextureMask = usedTextureMask & ((1u << 16u) - 1u);
+ const uint32_t fetch4 = m_fetch4 & psTextureMask;
+ const uint32_t projected = m_projectionBitfield & psTextureMask;
+
+ const auto& programInfo = GetCommonShader(m_state.pixelShader)->GetInfo();
+
+ if (programInfo.majorVersion() >= 2)
+ UpdateSamplerTypes(m_d3d9Options.forceSamplerTypeSpecConstants ? m_textureTypes : 0u, 0u, fetch4);
+ else
+ UpdateSamplerTypes(m_textureTypes, programInfo.minorVersion() >= 4 ? 0u : projected, fetch4); // For implicit samplers...
+
+ UpdateBoolSpecConstantPixel(
+ m_state.psConsts.bConsts[0] &
+ m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask);
+ }
+ else {
+ UpdateBoolSpecConstantPixel(0);
+ UpdateSamplerTypes(0u, 0u, 0u);
+
+ UpdateFixedFunctionPS();
+ }
+
+ const uint32_t depthTextureMask = m_depthTextures & usedTextureMask;
+ if (depthTextureMask != m_lastSamplerDepthMode)
+ UpdateSamplerDepthModeSpecConstant(depthTextureMask);
+
+ if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData) && config::FixedFunctionEnabled) {
+ m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData);
+
+ DxvkBufferSliceHandle slice = m_psShared->allocSlice();
+
+ EmitCs([
+ cBuffer = m_psShared,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+
+ D3D9SharedPS* data = reinterpret_cast<D3D9SharedPS*>(slice.mapPtr);
+
+ for (uint32_t i = 0; i < caps::TextureStageCount; i++) {
+ DecodeD3DCOLOR(D3DCOLOR(m_state.textureStages[i][DXVK_TSS_CONSTANT]), data->Stages[i].Constant);
+
+ // Flip major-ness so we can get away with a nice easy
+ // dot in the shader without complex access
+ data->Stages[i].BumpEnvMat[0][0] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT00]);
+ data->Stages[i].BumpEnvMat[1][0] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT01]);
+ data->Stages[i].BumpEnvMat[0][1] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT10]);
+ data->Stages[i].BumpEnvMat[1][1] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT11]);
+
+ data->Stages[i].BumpEnvLScale = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVLSCALE]);
+ data->Stages[i].BumpEnvLOffset = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVLOFFSET]);
+ }
+ }
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyDepthBounds)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyDepthBounds);
+
+ DxvkDepthBounds db;
+ db.enableDepthBounds = (m_state.renderStates[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB));
+ db.minDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_Z]);
+ db.maxDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_W]);
+
+ EmitCs([
+ cDepthBounds = db
+ ] (DxvkContext* ctx) {
+ ctx->setDepthBounds(cDepthBounds);
+ });
+ }
+ }
+
+
+ template <DxsoProgramType ShaderStage>
+ void D3D9DeviceEx::BindShader(
+ const D3D9CommonShader* pShaderModule,
+ D3D9ShaderPermutation Permutation) {
+ EmitCs([
+ cShader = pShaderModule->GetShader(Permutation)
+ ] (DxvkContext* ctx) {
+ ctx->bindShader(GetShaderStage(ShaderStage), cShader);
+ });
+ }
+
+
+ void D3D9DeviceEx::BindInputLayout() {
+ m_flags.clr(D3D9DeviceFlag::DirtyInputLayout);
+
+ if (m_state.vertexDecl == nullptr) {
+ EmitCs([&cIaState = m_iaState] (DxvkContext* ctx) {
+ cIaState.streamsUsed = 0;
+ ctx->setInputLayout(0, nullptr, 0, nullptr);
+ });
+ }
+ else {
+ std::array<uint32_t, caps::MaxStreams> streamFreq;
+
+ for (uint32_t i = 0; i < caps::MaxStreams; i++)
+ streamFreq[i] = m_state.streamFreq[i];
+
+ Com<D3D9VertexDecl, false> vertexDecl = m_state.vertexDecl;
+ Com<D3D9VertexShader, false> vertexShader;
+
+ if (UseProgrammableVS())
+ vertexShader = m_state.vertexShader;
+
+ EmitCs([
+ &cIaState = m_iaState,
+ cVertexDecl = std::move(vertexDecl),
+ cVertexShader = std::move(vertexShader),
+ cStreamsInstanced = m_instancedData,
+ cStreamFreq = streamFreq
+ ] (DxvkContext* ctx) {
+ cIaState.streamsInstanced = cStreamsInstanced;
+ cIaState.streamsUsed = 0;
+
+ const auto& elements = cVertexDecl->GetElements();
+
+ std::array<DxvkVertexAttribute, 2 * caps::InputRegisterCount> attrList;
+ std::array<DxvkVertexBinding, 2 * caps::InputRegisterCount> bindList;
+
+ uint32_t attrMask = 0;
+ uint32_t bindMask = 0;
+
+ const auto& isgn = cVertexShader != nullptr
+ ? GetCommonShader(cVertexShader)->GetIsgn()
+ : GetFixedFunctionIsgn();
+
+ for (uint32_t i = 0; i < isgn.elemCount; i++) {
+ const auto& decl = isgn.elems[i];
+
+ DxvkVertexAttribute attrib;
+ attrib.location = i;
+ attrib.binding = NullStreamIdx;
+ attrib.format = VK_FORMAT_R32G32B32A32_SFLOAT;
+ attrib.offset = 0;
+
+ for (const auto& element : elements) {
+ DxsoSemantic elementSemantic = { static_cast<DxsoUsage>(element.Usage), element.UsageIndex };
+ if (elementSemantic.usage == DxsoUsage::PositionT)
+ elementSemantic.usage = DxsoUsage::Position;
+
+ if (elementSemantic == decl.semantic) {
+ attrib.binding = uint32_t(element.Stream);
+ attrib.format = DecodeDecltype(D3DDECLTYPE(element.Type));
+ attrib.offset = element.Offset;
+
+ cIaState.streamsUsed |= 1u << attrib.binding;
+ break;
+ }
+ }
+
+ attrList[i] = attrib;
+
+ DxvkVertexBinding binding;
+ binding.binding = attrib.binding;
+
+ uint32_t instanceData = cStreamFreq[binding.binding % caps::MaxStreams];
+ if (instanceData & D3DSTREAMSOURCE_INSTANCEDATA) {
+ binding.fetchRate = instanceData & 0x7FFFFF; // Remove instance packed-in flags in the data.
+ binding.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE;
+ }
+ else {
+ binding.fetchRate = 0;
+ binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
+ }
+
+ // Check if the binding was already defined.
+ bool bindingDefined = false;
+
+ for (uint32_t j = 0; j < i; j++) {
+ uint32_t bindingId = attrList.at(j).binding;
+
+ if (binding.binding == bindingId) {
+ bindingDefined = true;
+ }
+ }
+
+ if (!bindingDefined)
+ bindList.at(binding.binding) = binding;
+
+ attrMask |= 1u << i;
+ bindMask |= 1u << binding.binding;
+ }
+
+ // Compact the attribute and binding lists to filter
+ // out attributes and bindings not used by the shader
+ uint32_t attrCount = CompactSparseList(attrList.data(), attrMask);
+ uint32_t bindCount = CompactSparseList(bindList.data(), bindMask);
+
+ ctx->setInputLayout(
+ attrCount, attrList.data(),
+ bindCount, bindList.data());
+ });
+ }
+ }
+
+
+ void D3D9DeviceEx::BindVertexBuffer(
+ UINT Slot,
+ D3D9VertexBuffer* pBuffer,
+ UINT Offset,
+ UINT Stride) {
+ EmitCs([
+ cSlotId = Slot,
+ cBufferSlice = pBuffer != nullptr ?
+ pBuffer->GetCommonBuffer()->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>(Offset)
+ : DxvkBufferSlice(),
+ cStride = pBuffer != nullptr ? Stride : 0
+ ] (DxvkContext* ctx) {
+ ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride);
+ });
+ }
+
+ void D3D9DeviceEx::BindIndices() {
+ D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices);
+
+ D3D9Format format = buffer != nullptr
+ ? buffer->Desc()->Format
+ : D3D9Format::INDEX32;
+
+ const VkIndexType indexType = DecodeIndexType(format);
+
+ EmitCs([
+ cBufferSlice = buffer != nullptr ? buffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>() : DxvkBufferSlice(),
+ cIndexType = indexType
+ ](DxvkContext* ctx) {
+ ctx->bindIndexBuffer(cBufferSlice, cIndexType);
+ });
+ }
+
+
+ void D3D9DeviceEx::Begin(D3D9Query* pQuery) {
+ D3D9DeviceLock lock = LockDevice();
+
+ EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) {
+ cQuery->Begin(ctx);
+ });
+ }
+
+
+ void D3D9DeviceEx::End(D3D9Query* pQuery) {
+ D3D9DeviceLock lock = LockDevice();
+
+ EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) {
+ cQuery->End(ctx);
+ });
+
+ pQuery->NotifyEnd();
+ if (unlikely(pQuery->IsEvent())) {
+ pQuery->IsStalling()
+ ? Flush()
+ : FlushImplicit(TRUE);
+ } else if (pQuery->IsStalling()) {
+ FlushImplicit(FALSE);
+ }
+ }
+
+
+ void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
+ m_state.vsConsts.bConsts[idx] &= ~mask;
+ m_state.vsConsts.bConsts[idx] |= bits & mask;
+
+ m_consts[DxsoProgramTypes::VertexShader].dirty = true;
+ }
+
+
+ void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
+ m_state.psConsts.bConsts[idx] &= ~mask;
+ m_state.psConsts.bConsts[idx] |= bits & mask;
+
+ m_consts[DxsoProgramTypes::PixelShader].dirty = true;
+ }
+
+
+ HRESULT D3D9DeviceEx::CreateShaderModule(
+ D3D9CommonShader* pShaderModule,
+ VkShaderStageFlagBits ShaderStage,
+ const DWORD* pShaderBytecode,
+ const DxsoModuleInfo* pModuleInfo) {
+ try {
+ m_shaderModules->GetShaderModule(this, pShaderModule,
+ ShaderStage, pModuleInfo, pShaderBytecode);
+
+ return D3D_OK;
+ }
+ catch (const DxvkError& e) {
+ Logger::err(e.message());
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+
+ template <
+ DxsoProgramType ProgramType,
+ D3D9ConstantType ConstantType,
+ typename T>
+ HRESULT D3D9DeviceEx::SetShaderConstants(
+ UINT StartRegister,
+ const T* pConstantData,
+ UINT Count) {
+ const uint32_t regCountHardware = DetermineHardwareRegCount<ProgramType, ConstantType>();
+ constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount<ProgramType, ConstantType>();
+
+ if (unlikely(StartRegister + Count > regCountSoftware))
+ return D3DERR_INVALIDCALL;
+
+ Count = UINT(
+ std::max<INT>(
+ std::clamp<INT>(Count + StartRegister, 0, regCountHardware) - INT(StartRegister),
+ 0));
+
+ if (unlikely(Count == 0))
+ return D3D_OK;
+
+ if (unlikely(pConstantData == nullptr))
+ return D3DERR_INVALIDCALL;
+
+ if (unlikely(ShouldRecord()))
+ return m_recorder->SetShaderConstants<ProgramType, ConstantType, T>(
+ StartRegister,
+ pConstantData,
+ Count);
+
+ if constexpr (ConstantType != D3D9ConstantType::Bool) {
+ uint32_t maxCount = ConstantType == D3D9ConstantType::Float
+ ? m_consts[ProgramType].meta.maxConstIndexF
+ : m_consts[ProgramType].meta.maxConstIndexI;
+
+ m_consts[ProgramType].dirty |= StartRegister < maxCount;
+ }
+
+ UpdateStateConstants<ProgramType, ConstantType, T>(
+ &m_state,
+ StartRegister,
+ pConstantData,
+ Count,
+ m_d3d9Options.d3d9FloatEmulation);
+
+ return D3D_OK;
+ }
+
+
+ void D3D9DeviceEx::UpdateFixedFunctionVS() {
+ // Shader...
+ bool hasPositionT = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) : false;
+ bool hasBlendWeight = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendWeight) : false;
+ bool hasBlendIndices = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendIndices) : false;
+
+ bool indexedVertexBlend = hasBlendIndices && m_state.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE];
+
+ D3D9FF_VertexBlendMode vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
+
+ if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE && !hasPositionT) {
+ vertexBlendMode = m_state.renderStates[D3DRS_VERTEXBLEND] == D3DVBF_TWEENING
+ ? D3D9FF_VertexBlendMode_Tween
+ : D3D9FF_VertexBlendMode_Normal;
+
+ if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) {
+ if (!hasBlendWeight)
+ vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
+ }
+ else if (!indexedVertexBlend)
+ vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
+ }
+
+ if (unlikely(hasPositionT && m_state.vertexShader != nullptr && !m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) {
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+ m_flags.set(D3D9DeviceFlag::DirtyProgVertexShader);
+ }
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexShader)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFVertexShader);
+
+ D3D9FFShaderKeyVS key;
+ key.Data.Contents.HasPositionT = hasPositionT;
+ key.Data.Contents.HasColor0 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) : false;
+ key.Data.Contents.HasColor1 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) : false;
+ key.Data.Contents.HasPointSize = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPointSize) : false;
+ key.Data.Contents.HasFog = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasFog) : false;
+
+ bool lighting = m_state.renderStates[D3DRS_LIGHTING] != 0 && !key.Data.Contents.HasPositionT;
+ bool colorVertex = m_state.renderStates[D3DRS_COLORVERTEX] != 0;
+ uint32_t mask = (lighting && colorVertex)
+ ? (key.Data.Contents.HasColor0 ? D3DMCS_COLOR1 : D3DMCS_MATERIAL)
+ | (key.Data.Contents.HasColor1 ? D3DMCS_COLOR2 : D3DMCS_MATERIAL)
+ : 0;
+
+ key.Data.Contents.UseLighting = lighting;
+ key.Data.Contents.NormalizeNormals = m_state.renderStates[D3DRS_NORMALIZENORMALS];
+ key.Data.Contents.LocalViewer = m_state.renderStates[D3DRS_LOCALVIEWER] && lighting;
+
+ key.Data.Contents.RangeFog = m_state.renderStates[D3DRS_RANGEFOGENABLE];
+
+ key.Data.Contents.DiffuseSource = m_state.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
+ key.Data.Contents.AmbientSource = m_state.renderStates[D3DRS_AMBIENTMATERIALSOURCE] & mask;
+ key.Data.Contents.SpecularSource = m_state.renderStates[D3DRS_SPECULARMATERIALSOURCE] & mask;
+ key.Data.Contents.EmissiveSource = m_state.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
+
+ uint32_t lightCount = 0;
+
+ if (key.Data.Contents.UseLighting) {
+ for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
+ if (m_state.enabledLightIndices[i] != UINT32_MAX)
+ lightCount++;
+ }
+ }
+
+ key.Data.Contents.LightCount = lightCount;
+
+ for (uint32_t i = 0; i < caps::MaxTextureBlendStages; i++) {
+ uint32_t transformFlags = m_state.textureStages[i][DXVK_TSS_TEXTURETRANSFORMFLAGS] & ~(D3DTTFF_PROJECTED);
+ uint32_t index = m_state.textureStages[i][DXVK_TSS_TEXCOORDINDEX];
+ uint32_t indexFlags = (index & TCIMask) >> TCIOffset;
+
+ transformFlags &= 0b111;
+ index &= 0b111;
+
+ key.Data.Contents.TransformFlags |= transformFlags << (i * 3);
+ key.Data.Contents.TexcoordFlags |= indexFlags << (i * 3);
+ key.Data.Contents.TexcoordIndices |= index << (i * 3);
+ }
+
+ key.Data.Contents.TexcoordDeclMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetTexcoordMask() : 0;
+
+ key.Data.Contents.VertexBlendMode = uint32_t(vertexBlendMode);
+
+ if (vertexBlendMode == D3D9FF_VertexBlendMode_Normal) {
+ key.Data.Contents.VertexBlendIndexed = indexedVertexBlend;
+ key.Data.Contents.VertexBlendCount = m_state.renderStates[D3DRS_VERTEXBLEND] & 0xff;
+ }
+
+ key.Data.Contents.VertexClipping = IsClipPlaneEnabled();
+
+ EmitCs([
+ this,
+ cKey = key,
+ &cShaders = m_ffModules
+ ](DxvkContext* ctx) {
+ auto shader = cShaders.GetShaderModule(this, cKey);
+ ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, shader.GetShader());
+ });
+ }
+
+ if (hasPositionT && (m_flags.test(D3D9DeviceFlag::DirtyFFViewport) || m_ffZTest != IsZTestEnabled())) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFViewport);
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ const auto& vp = m_state.viewport;
+ // For us to account for the Vulkan viewport rules
+ // when translating Window Coords -> Real Coords:
+ // We need to negate the inverse extent we multiply by,
+ // this follows through to the offset when that gets
+ // timesed by it.
+ // The 1.0f additional offset however does not,
+ // so we account for that there manually.
+
+ m_ffZTest = IsZTestEnabled();
+
+ m_viewportInfo.inverseExtent = Vector4(
+ 2.0f / float(vp.Width),
+ -2.0f / float(vp.Height),
+ m_ffZTest ? 1.0f : 0.0f,
+ 1.0f);
+
+ m_viewportInfo.inverseOffset = Vector4(
+ -float(vp.X), -float(vp.Y),
+ 0.0f, 0.0f);
+
+ m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset * m_viewportInfo.inverseExtent;
+
+ m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset + Vector4(-1.0f, 1.0f, 0.0f, 0.0f);
+ }
+
+ // Constants...
+ if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexData)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFVertexData);
+
+ DxvkBufferSliceHandle slice = m_vsFixedFunction->allocSlice();
+
+ EmitCs([
+ cBuffer = m_vsFixedFunction,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+
+ auto WorldView = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLD)];
+ auto NormalMatrix = inverse(WorldView);
+
+ D3D9FixedFunctionVS* data = reinterpret_cast<D3D9FixedFunctionVS*>(slice.mapPtr);
+ data->WorldView = WorldView;
+ data->NormalMatrix = NormalMatrix;
+ data->InverseView = transpose(inverse(m_state.transforms[GetTransformIndex(D3DTS_VIEW)]));
+ data->Projection = m_state.transforms[GetTransformIndex(D3DTS_PROJECTION)];
+
+ for (uint32_t i = 0; i < data->TexcoordMatrices.size(); i++)
+ data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i];
+
+ data->ViewportInfo = m_viewportInfo;
+
+ DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data);
+
+ uint32_t lightIdx = 0;
+ for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
+ auto idx = m_state.enabledLightIndices[i];
+ if (idx == UINT32_MAX)
+ continue;
+
+ data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);
+ }
+
+ data->Material = m_state.material;
+ data->TweenFactor = bit::cast<float>(m_state.renderStates[D3DRS_TWEENFACTOR]);
+ }
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexBlend) && vertexBlendMode == D3D9FF_VertexBlendMode_Normal) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFVertexBlend);
+
+ DxvkBufferSliceHandle slice = m_vsVertexBlend->allocSlice();
+
+ EmitCs([
+ cBuffer = m_vsVertexBlend,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+
+ auto UploadVertexBlendData = [&](auto data) {
+ for (uint32_t i = 0; i < std::size(data->WorldView); i++)
+ data->WorldView[i] = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLDMATRIX(i))];
+ };
+
+ (m_isSWVP && indexedVertexBlend)
+ ? UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataSW*>(slice.mapPtr))
+ : UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataHW*>(slice.mapPtr));
+ }
+ }
+
+
+ void D3D9DeviceEx::UpdateFixedFunctionPS() {
+ // Shader...
+ if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelShader) || m_lastSamplerTypesFF != m_textureTypes) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFPixelShader);
+ m_lastSamplerTypesFF = m_textureTypes;
+
+ // Used args for a given operation.
+ auto ArgsMask = [](DWORD Op) {
+ switch (Op) {
+ case D3DTOP_DISABLE:
+ return 0b000u; // No Args
+ case D3DTOP_SELECTARG1:
+ case D3DTOP_PREMODULATE:
+ return 0b010u; // Arg 1
+ case D3DTOP_SELECTARG2:
+ return 0b100u; // Arg 2
+ case D3DTOP_MULTIPLYADD:
+ case D3DTOP_LERP:
+ return 0b111u; // Arg 0, 1, 2
+ default:
+ return 0b110u; // Arg 1, 2
+ }
+ };
+
+ D3D9FFShaderKeyFS key;
+
+ uint32_t idx;
+ for (idx = 0; idx < caps::TextureStageCount; idx++) {
+ auto& stage = key.Stages[idx].Contents;
+ auto& data = m_state.textureStages[idx];
+
+ // Subsequent stages do not occur if this is true.
+ if (data[DXVK_TSS_COLOROP] == D3DTOP_DISABLE)
+ break;
+
+ // If the stage is invalid (ie. no texture bound),
+ // this and all subsequent stages get disabled.
+ if (m_state.textures[idx] == nullptr) {
+ if (((data[DXVK_TSS_COLORARG0] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 0u)))
+ || ((data[DXVK_TSS_COLORARG1] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 1u)))
+ || ((data[DXVK_TSS_COLORARG2] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 2u))))
+ break;
+ }
+
+ stage.ColorOp = data[DXVK_TSS_COLOROP];
+ stage.AlphaOp = data[DXVK_TSS_ALPHAOP];
+
+ stage.ColorArg0 = data[DXVK_TSS_COLORARG0];
+ stage.ColorArg1 = data[DXVK_TSS_COLORARG1];
+ stage.ColorArg2 = data[DXVK_TSS_COLORARG2];
+
+ stage.AlphaArg0 = data[DXVK_TSS_ALPHAARG0];
+ stage.AlphaArg1 = data[DXVK_TSS_ALPHAARG1];
+ stage.AlphaArg2 = data[DXVK_TSS_ALPHAARG2];
+
+ const uint32_t samplerOffset = idx * 2;
+ stage.Type = (m_textureTypes >> samplerOffset) & 0xffu;
+ stage.ResultIsTemp = data[DXVK_TSS_RESULTARG] == D3DTA_TEMP;
+
+ uint32_t ttff = data[DXVK_TSS_TEXTURETRANSFORMFLAGS];
+ uint32_t count = ttff & ~D3DTTFF_PROJECTED;
+
+ stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0;
+ stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0;
+ }
+
+ auto& stage0 = key.Stages[0].Contents;
+
+ if (stage0.ResultIsTemp &&
+ stage0.ColorOp != D3DTOP_DISABLE &&
+ stage0.AlphaOp == D3DTOP_DISABLE) {
+ stage0.AlphaOp = D3DTOP_SELECTARG1;
+ stage0.AlphaArg1 = D3DTA_DIFFUSE;
+ }
+
+ stage0.GlobalSpecularEnable = m_state.renderStates[D3DRS_SPECULARENABLE];
+ stage0.GlobalFlatShade = m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
+
+ // The last stage *always* writes to current.
+ if (idx >= 1)
+ key.Stages[idx - 1].Contents.ResultIsTemp = false;
+
+ EmitCs([
+ this,
+ cKey = key,
+ &cShaders = m_ffModules
+ ](DxvkContext* ctx) {
+ auto shader = cShaders.GetShaderModule(this, cKey);
+ ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, shader.GetShader());
+ });
+ }
+
+ // Constants
+
+ if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelData)) {
+ m_flags.clr(D3D9DeviceFlag::DirtyFFPixelData);
+
+ DxvkBufferSliceHandle slice = m_psFixedFunction->allocSlice();
+
+ EmitCs([
+ cBuffer = m_psFixedFunction,
+ cSlice = slice
+ ] (DxvkContext* ctx) {
+ ctx->invalidateBuffer(cBuffer, cSlice);
+ });
+
+ auto& rs = m_state.renderStates;
+
+ D3D9FixedFunctionPS* data = reinterpret_cast<D3D9FixedFunctionPS*>(slice.mapPtr);
+ DecodeD3DCOLOR((D3DCOLOR)rs[D3DRS_TEXTUREFACTOR], data->textureFactor.data);
+ }
+ }
+
+
+ bool D3D9DeviceEx::UseProgrammableVS() {
+ if (!config::FixedFunctionEnabled)
+ return true;
+
+ return m_state.vertexShader != nullptr
+ && m_state.vertexDecl != nullptr
+ && !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT);
+ }
+
+
+ bool D3D9DeviceEx::UseProgrammablePS() {
+ if (!config::FixedFunctionEnabled)
+ return true;
+
+ return m_state.pixelShader != nullptr;
+ }
+
+
+ void D3D9DeviceEx::UpdateBoolSpecConstantVertex(uint32_t value) {
+ if (value == m_lastBoolSpecConstantVertex)
+ return;
+
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexShaderBools, cBitfield);
+ });
+
+ m_lastBoolSpecConstantVertex = value;
+ }
+
+
+ void D3D9DeviceEx::UpdateBoolSpecConstantPixel(uint32_t value) {
+ if (value == m_lastBoolSpecConstantPixel)
+ return;
+
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelShaderBools, cBitfield);
+ });
+
+ m_lastBoolSpecConstantPixel = value;
+ }
+
+
+ void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) {
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield);
+ });
+
+ m_lastSamplerTypes = value;
+ }
+
+
+ void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) {
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield);
+ });
+
+ m_lastProjectionBitfield = value;
+ }
+
+
+ void D3D9DeviceEx::UpdateFetch4SpecConstant(uint32_t value) {
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::Fetch4, cBitfield);
+ });
+
+ m_lastFetch4 = value;
+ }
+
+
+ void D3D9DeviceEx::UpdateSamplerDepthModeSpecConstant(uint32_t value) {
+ EmitCs([cBitfield = value](DxvkContext* ctx) {
+ ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerDepthMode, cBitfield);
+ });
+
+ m_lastSamplerDepthMode = value;
+ }
+
+
+ void D3D9DeviceEx::ApplyPrimitiveType(
+ DxvkContext* pContext,
+ D3DPRIMITIVETYPE PrimType) {
+ if (m_iaState.primitiveType != PrimType) {
+ m_iaState.primitiveType = PrimType;
+
+ auto iaState = DecodeInputAssemblyState(PrimType);
+ pContext->setInputAssemblyState(iaState);
+ }
+ }
+
+
+ void D3D9DeviceEx::ResolveZ() {
+ D3D9Surface* src = m_state.depthStencil.ptr();
+ IDirect3DBaseTexture9* dst = m_state.textures[0];
+
+ if (unlikely(!src || !dst))
+ return;
+
+ D3D9CommonTexture* srcTextureInfo = GetCommonTexture(src);
+ D3D9CommonTexture* dstTextureInfo = GetCommonTexture(dst);
+
+ const D3D9_COMMON_TEXTURE_DESC* srcDesc = srcTextureInfo->Desc();
+ const D3D9_COMMON_TEXTURE_DESC* dstDesc = dstTextureInfo->Desc();
+
+ VkSampleCountFlagBits dstSampleCount;
+ DecodeMultiSampleType(dstDesc->MultiSample, dstDesc->MultisampleQuality, &dstSampleCount);
+
+ if (unlikely(dstSampleCount != VK_SAMPLE_COUNT_1_BIT)) {
+ Logger::warn("D3D9DeviceEx::ResolveZ: dstSampleCount != 1. Discarding.");
+ return;
+ }
+
+ const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format);
+ const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format);
+
+ auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor);
+ auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor);
+
+ const VkImageSubresource dstSubresource =
+ dstTextureInfo->GetSubresourceFromIndex(
+ dstVulkanFormatInfo->aspectMask, 0);
+
+ const VkImageSubresource srcSubresource =
+ srcTextureInfo->GetSubresourceFromIndex(
+ srcVulkanFormatInfo->aspectMask, src->GetSubresource());
+
+ const VkImageSubresourceLayers dstSubresourceLayers = {
+ dstSubresource.aspectMask,
+ dstSubresource.mipLevel,
+ dstSubresource.arrayLayer, 1 };
+
+ const VkImageSubresourceLayers srcSubresourceLayers = {
+ srcSubresource.aspectMask,
+ srcSubresource.mipLevel,
+ srcSubresource.arrayLayer, 1 };
+
+ VkSampleCountFlagBits srcSampleCount;
+ DecodeMultiSampleType(srcDesc->MultiSample, srcDesc->MultisampleQuality, &srcSampleCount);
+
+ if (srcSampleCount == VK_SAMPLE_COUNT_1_BIT) {
+ EmitCs([
+ cDstImage = dstTextureInfo->GetImage(),
+ cSrcImage = srcTextureInfo->GetImage(),
+ cDstLayers = dstSubresourceLayers,
+ cSrcLayers = srcSubresourceLayers
+ ] (DxvkContext* ctx) {
+ ctx->copyImage(
+ cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 },
+ cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 },
+ cDstImage->mipLevelExtent(cDstLayers.mipLevel));
+ });
+ } else {
+ EmitCs([
+ cDstImage = dstTextureInfo->GetImage(),
+ cSrcImage = srcTextureInfo->GetImage(),
+ cDstSubres = dstSubresourceLayers,
+ cSrcSubres = srcSubresourceLayers
+ ] (DxvkContext* ctx) {
+ // We should resolve using the first sample according to
+ // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Advanced-DX9-Capabilities-for-ATI-Radeon-Cards_v2.pdf
+ // "The resolve operation copies the depth value from the *first sample only* into the resolved depth stencil texture."
+ constexpr auto resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
+
+ VkImageResolve region;
+ region.srcSubresource = cSrcSubres;
+ region.srcOffset = VkOffset3D { 0, 0, 0 };
+ region.dstSubresource = cDstSubres;
+ region.dstOffset = VkOffset3D { 0, 0, 0 };
+ region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel);
+
+ ctx->resolveDepthStencilImage(cDstImage, cSrcImage, region, resolveMode, resolveMode);
+ });
+ }
+
+ dstTextureInfo->MarkAllWrittenByGPU();
+ }
+
+
+ void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) {
+ EmitCs([
+ cImage = pResource->GetImage(),
+ cNewLayout = NewLayout
+ ] (DxvkContext* ctx) {
+ ctx->changeImageLayout(
+ cImage, cNewLayout);
+ });
+ }
+
+
+ void D3D9DeviceEx::TransformImage(
+ D3D9CommonTexture* pResource,
+ const VkImageSubresourceRange* pSubresources,
+ VkImageLayout OldLayout,
+ VkImageLayout NewLayout) {
+ EmitCs([
+ cImage = pResource->GetImage(),
+ cSubresources = *pSubresources,
+ cOldLayout = OldLayout,
+ cNewLayout = NewLayout
+ ] (DxvkContext* ctx) {
+ ctx->transformImage(
+ cImage, cSubresources,
+ cOldLayout, cNewLayout);
+ });
+ }
+
+
+ HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) {
+ if (!pPresentationParameters->EnableAutoDepthStencil)
+ SetDepthStencilSurface(nullptr);
+
+ for (uint32_t i = 1; i < caps::MaxSimultaneousRenderTargets; i++)
+ SetRenderTarget(i, nullptr);
+
+ auto& rs = m_state.renderStates;
+
+ rs[D3DRS_SEPARATEALPHABLENDENABLE] = FALSE;
+ rs[D3DRS_ALPHABLENDENABLE] = FALSE;
+ rs[D3DRS_BLENDOP] = D3DBLENDOP_ADD;
+ rs[D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD;
+ rs[D3DRS_DESTBLEND] = D3DBLEND_ZERO;
+ rs[D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO;
+ rs[D3DRS_COLORWRITEENABLE] = 0x0000000f;
+ rs[D3DRS_COLORWRITEENABLE1] = 0x0000000f;
+ rs[D3DRS_COLORWRITEENABLE2] = 0x0000000f;
+ rs[D3DRS_COLORWRITEENABLE3] = 0x0000000f;
+ rs[D3DRS_SRCBLEND] = D3DBLEND_ONE;
+ rs[D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE;
+ BindBlendState();
+
+ rs[D3DRS_BLENDFACTOR] = 0xffffffff;
+ BindBlendFactor();
+
+ rs[D3DRS_ZENABLE] = pPresentationParameters->EnableAutoDepthStencil
+ ? D3DZB_TRUE
+ : D3DZB_FALSE;
+ rs[D3DRS_ZFUNC] = D3DCMP_LESSEQUAL;
+ rs[D3DRS_TWOSIDEDSTENCILMODE] = FALSE;
+ rs[D3DRS_ZWRITEENABLE] = TRUE;
+ rs[D3DRS_STENCILENABLE] = FALSE;
+ rs[D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_STENCILFUNC] = D3DCMP_ALWAYS;
+ rs[D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP;
+ rs[D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS;
+ rs[D3DRS_STENCILMASK] = 0xFFFFFFFF;
+ rs[D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF;
+ BindDepthStencilState();
+
+ rs[D3DRS_STENCILREF] = 0;
+ BindDepthStencilRefrence();
+
+ rs[D3DRS_FILLMODE] = D3DFILL_SOLID;
+ rs[D3DRS_CULLMODE] = D3DCULL_CCW;
+ rs[D3DRS_DEPTHBIAS] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_SLOPESCALEDEPTHBIAS] = bit::cast<DWORD>(0.0f);
+ BindRasterizerState();
+ BindDepthBias();
+
+ rs[D3DRS_SCISSORTESTENABLE] = FALSE;
+
+ rs[D3DRS_ALPHATESTENABLE] = FALSE;
+ rs[D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS;
+ BindAlphaTestState();
+ rs[D3DRS_ALPHAREF] = 0;
+ UpdatePushConstant<D3D9RenderStateItem::AlphaRef>();
+
+ rs[D3DRS_MULTISAMPLEMASK] = 0xffffffff;
+ BindMultiSampleState();
+
+ rs[D3DRS_TEXTUREFACTOR] = 0xffffffff;
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelData);
+
+ rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1;
+ rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2;
+ rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL;
+ rs[D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL;
+ rs[D3DRS_LIGHTING] = TRUE;
+ rs[D3DRS_COLORVERTEX] = TRUE;
+ rs[D3DRS_LOCALVIEWER] = TRUE;
+ rs[D3DRS_RANGEFOGENABLE] = FALSE;
+ rs[D3DRS_NORMALIZENORMALS] = FALSE;
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
+
+ // PS
+ rs[D3DRS_SPECULARENABLE] = FALSE;
+
+ rs[D3DRS_AMBIENT] = 0;
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
+
+ rs[D3DRS_FOGENABLE] = FALSE;
+ rs[D3DRS_FOGCOLOR] = 0;
+ rs[D3DRS_FOGTABLEMODE] = D3DFOG_NONE;
+ rs[D3DRS_FOGSTART] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_FOGEND] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_FOGDENSITY] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_FOGVERTEXMODE] = D3DFOG_NONE;
+ m_flags.set(D3D9DeviceFlag::DirtyFogColor);
+ m_flags.set(D3D9DeviceFlag::DirtyFogDensity);
+ m_flags.set(D3D9DeviceFlag::DirtyFogEnd);
+ m_flags.set(D3D9DeviceFlag::DirtyFogScale);
+ m_flags.set(D3D9DeviceFlag::DirtyFogState);
+
+ rs[D3DRS_CLIPPLANEENABLE] = 0;
+ m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
+
+ rs[D3DRS_POINTSPRITEENABLE] = FALSE;
+ rs[D3DRS_POINTSCALEENABLE] = FALSE;
+ rs[D3DRS_POINTSCALE_A] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_POINTSCALE_B] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_POINTSCALE_C] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_POINTSIZE] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_POINTSIZE_MIN] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_POINTSIZE_MAX] = bit::cast<DWORD>(64.0f);
+ UpdatePushConstant<D3D9RenderStateItem::PointSize>();
+ UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>();
+ UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>();
+ m_flags.set(D3D9DeviceFlag::DirtyPointScale);
+ UpdatePointMode<false>();
+
+ rs[D3DRS_SRGBWRITEENABLE] = 0;
+
+ rs[D3DRS_SHADEMODE] = D3DSHADE_GOURAUD;
+
+ rs[D3DRS_VERTEXBLEND] = D3DVBF_DISABLE;
+ rs[D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE;
+ rs[D3DRS_TWEENFACTOR] = bit::cast<DWORD>(0.0f);
+ m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
+
+ // Render States not implemented beyond this point.
+ rs[D3DRS_LASTPIXEL] = TRUE;
+ rs[D3DRS_DITHERENABLE] = FALSE;
+ rs[D3DRS_WRAP0] = 0;
+ rs[D3DRS_WRAP1] = 0;
+ rs[D3DRS_WRAP2] = 0;
+ rs[D3DRS_WRAP3] = 0;
+ rs[D3DRS_WRAP4] = 0;
+ rs[D3DRS_WRAP5] = 0;
+ rs[D3DRS_WRAP6] = 0;
+ rs[D3DRS_WRAP7] = 0;
+ rs[D3DRS_CLIPPING] = TRUE;
+ rs[D3DRS_MULTISAMPLEANTIALIAS] = TRUE;
+ rs[D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE;
+ rs[D3DRS_DEBUGMONITORTOKEN] = D3DDMT_ENABLE;
+ rs[D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC;
+ rs[D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR;
+ rs[D3DRS_ANTIALIASEDLINEENABLE] = FALSE;
+ rs[D3DRS_MINTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_MAXTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_ADAPTIVETESS_X] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_ADAPTIVETESS_Y] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_ADAPTIVETESS_Z] = bit::cast<DWORD>(1.0f);
+ rs[D3DRS_ADAPTIVETESS_W] = bit::cast<DWORD>(0.0f);
+ rs[D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE;
+ rs[D3DRS_WRAP8] = 0;
+ rs[D3DRS_WRAP9] = 0;
+ rs[D3DRS_WRAP10] = 0;
+ rs[D3DRS_WRAP11] = 0;
+ rs[D3DRS_WRAP12] = 0;
+ rs[D3DRS_WRAP13] = 0;
+ rs[D3DRS_WRAP14] = 0;
+ rs[D3DRS_WRAP15] = 0;
+ // End Unimplemented Render States
+
+ for (uint32_t i = 0; i < caps::TextureStageCount; i++) {
+ auto& stage = m_state.textureStages[i];
+
+ stage[DXVK_TSS_COLOROP] = i == 0 ? D3DTOP_MODULATE : D3DTOP_DISABLE;
+ stage[DXVK_TSS_COLORARG1] = D3DTA_TEXTURE;
+ stage[DXVK_TSS_COLORARG2] = D3DTA_CURRENT;
+ stage[DXVK_TSS_ALPHAOP] = i == 0 ? D3DTOP_SELECTARG1 : D3DTOP_DISABLE;
+ stage[DXVK_TSS_ALPHAARG1] = D3DTA_TEXTURE;
+ stage[DXVK_TSS_ALPHAARG2] = D3DTA_CURRENT;
+ stage[DXVK_TSS_BUMPENVMAT00] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_BUMPENVMAT01] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_BUMPENVMAT10] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_BUMPENVMAT11] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_TEXCOORDINDEX] = i;
+ stage[DXVK_TSS_BUMPENVLSCALE] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_BUMPENVLOFFSET] = bit::cast<DWORD>(0.0f);
+ stage[DXVK_TSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE;
+ stage[DXVK_TSS_COLORARG0] = D3DTA_CURRENT;
+ stage[DXVK_TSS_ALPHAARG0] = D3DTA_CURRENT;
+ stage[DXVK_TSS_RESULTARG] = D3DTA_CURRENT;
+ stage[DXVK_TSS_CONSTANT] = 0x00000000;
+ }
+ m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData);
+ m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
+
+ for (uint32_t i = 0; i < caps::MaxStreams; i++)
+ m_state.streamFreq[i] = 1;
+
+ for (uint32_t i = 0; i < m_state.textures.size(); i++)
+ TextureChangePrivate(m_state.textures[i], nullptr);
+
+ EmitCs([
+ cSize = m_state.textures.size()
+ ](DxvkContext* ctx) {
+ for (uint32_t i = 0; i < cSize; i++) {
+ auto samplerInfo = RemapStateSamplerShader(DWORD(i));
+ uint32_t slot = computeResourceSlotId(samplerInfo.first, DxsoBindingType::Image, uint32_t(samplerInfo.second));
+ ctx->bindResourceView(slot, nullptr, nullptr);
+ }
+ });
+
+ m_dirtyTextures = 0;
+ m_depthTextures = 0;
+
+ auto& ss = m_state.samplerStates;
+ for (uint32_t i = 0; i < ss.size(); i++) {
+ auto& state = ss[i];
+ state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP;
+ state[D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP;
+ state[D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP;
+ state[D3DSAMP_BORDERCOLOR] = 0x00000000;
+ state[D3DSAMP_MAGFILTER] = D3DTEXF_POINT;
+ state[D3DSAMP_MINFILTER] = D3DTEXF_POINT;
+ state[D3DSAMP_MIPFILTER] = D3DTEXF_NONE;
+ state[D3DSAMP_MIPMAPLODBIAS] = bit::cast<DWORD>(0.0f);
+ state[D3DSAMP_MAXMIPLEVEL] = 0;
+ state[D3DSAMP_MAXANISOTROPY] = 1;
+ state[D3DSAMP_SRGBTEXTURE] = 0;
+ state[D3DSAMP_ELEMENTINDEX] = 0;
+ state[D3DSAMP_DMAPOFFSET] = 0;
+
+ BindSampler(i);
+ }
+
+ m_dirtySamplerStates = 0;
+
+ for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
+ float plane[4] = { 0, 0, 0, 0 };
+ SetClipPlane(i, plane);
+ }
+
+ // We should do this...
+ m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
+
+ UpdateSamplerSpecConsant(0u);
+ UpdateBoolSpecConstantVertex(0u);
+ UpdateBoolSpecConstantPixel(0u);
+ UpdateSamplerDepthModeSpecConstant(0u);
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
+ D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat);
+
+ Logger::info(str::format(
+ "D3D9DeviceEx::ResetSwapChain:\n",
+ " Requested Presentation Parameters\n",
+ " - Width: ", pPresentationParameters->BackBufferWidth, "\n",
+ " - Height: ", pPresentationParameters->BackBufferHeight, "\n",
+ " - Format: ", backBufferFmt, "\n"
+ " - Auto Depth Stencil: ", pPresentationParameters->EnableAutoDepthStencil ? "true" : "false", "\n",
+ " ^ Format: ", EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat), "\n",
+ " - Windowed: ", pPresentationParameters->Windowed ? "true" : "false", "\n"));
+
+ if (backBufferFmt != D3D9Format::Unknown) {
+ if (!IsSupportedBackBufferFormat(backBufferFmt)) {
+ Logger::err(str::format("D3D9DeviceEx::ResetSwapChain: Unsupported backbuffer format: ",
+ EnumerateFormat(pPresentationParameters->BackBufferFormat)));
+ return D3DERR_INVALIDCALL;
+ }
+ }
+
+ if (m_implicitSwapchain != nullptr) {
+ if (FAILED(m_implicitSwapchain->Reset(pPresentationParameters, pFullscreenDisplayMode)))
+ return D3DERR_INVALIDCALL;
+ }
+ else
+ m_implicitSwapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode);
+
+ if (pPresentationParameters->EnableAutoDepthStencil) {
+ D3D9_COMMON_TEXTURE_DESC desc;
+ desc.Width = pPresentationParameters->BackBufferWidth;
+ desc.Height = pPresentationParameters->BackBufferHeight;
+ desc.Depth = 1;
+ desc.ArraySize = 1;
+ desc.MipLevels = 1;
+ desc.Usage = D3DUSAGE_DEPTHSTENCIL;
+ desc.Format = EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat);
+ desc.Pool = D3DPOOL_DEFAULT;
+ desc.Discard = FALSE;
+ desc.MultiSample = pPresentationParameters->MultiSampleType;
+ desc.MultisampleQuality = pPresentationParameters->MultiSampleQuality;
+ desc.IsBackBuffer = FALSE;
+ desc.IsAttachmentOnly = TRUE;
+
+ if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
+ return D3DERR_NOTAVAILABLE;
+
+ m_autoDepthStencil = new D3D9Surface(this, &desc, nullptr);
+ m_initializer->InitTexture(m_autoDepthStencil->GetCommonTexture());
+ SetDepthStencilSurface(m_autoDepthStencil.ptr());
+ }
+
+ SetRenderTarget(0, m_implicitSwapchain->GetBackBuffer(0));
+
+ // Force this if we end up binding the same RT to make scissor change go into effect.
+ BindViewportAndScissor();
+
+ return D3D_OK;
+ }
+
+
+ HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
+ HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode);
+ if (FAILED(hr))
+ return hr;
+
+ hr = ResetState(pPresentationParameters);
+ if (FAILED(hr))
+ return hr;
+
+ Flush();
+ SynchronizeCsThread();
+
+ return D3D_OK;
+ }
+
+}