diff options
Diffstat (limited to 'src/libs/dxvk-native-1.9.2a/src/d3d9')
78 files changed, 23513 insertions, 0 deletions
diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9.def b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9.def new file mode 100644 index 00000000..6829b194 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9.def @@ -0,0 +1,22 @@ +LIBRARY D3D9.DLL +EXPORTS + Direct3DShaderValidatorCreate9 @24 + + PSGPError @25 + PSGPSampleTexture @26 + + D3DPERF_BeginEvent @27 + D3DPERF_EndEvent @28 + D3DPERF_GetStatus @29 + D3DPERF_QueryRepeatFrame @30 + D3DPERF_SetMarker @31 + D3DPERF_SetOptions @32 + D3DPERF_SetRegion @33 + + DebugSetLevel @34 + DebugSetMute @35 + + Direct3D9EnableMaximizedWindowedModeShim @36 + + Direct3DCreate9 @37 + Direct3DCreate9Ex @38 diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.cpp new file mode 100644 index 00000000..7f67839f --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.cpp @@ -0,0 +1,834 @@ +#include "d3d9_adapter.h" + +#include "d3d9_interface.h" +#include "d3d9_monitor.h" +#include "d3d9_caps.h" +#include "d3d9_util.h" + +#include "../wsi/wsi_mode.h" +#include "../wsi/wsi_monitor.h" + +#include "../util/util_bit.h" +#include "../util/util_luid.h" +#include "../util/util_ratio.h" +#include "../util/util_string.h" + +#include <cfloat> + +namespace dxvk { + + const char* GetDriverDLL(DxvkGpuVendor vendor) { + switch (vendor) { + default: + case DxvkGpuVendor::Nvidia: return "nvd3dum.dll"; + +#if defined(__x86_64__) || defined(_M_X64) + case DxvkGpuVendor::Amd: return "aticfx64.dll"; + case DxvkGpuVendor::Intel: return "igdumd64.dll"; +#else + case DxvkGpuVendor::Amd: return "aticfx32.dll"; + case DxvkGpuVendor::Intel: return "igdumd32.dll"; +#endif + } + } + + + D3D9Adapter::D3D9Adapter( + D3D9InterfaceEx* pParent, + Rc<DxvkAdapter> Adapter, + UINT Ordinal, + UINT DisplayIndex) + : m_parent (pParent), + m_adapter (Adapter), + m_ordinal (Ordinal), + m_displayIndex (DisplayIndex), + m_modeCacheFormat (D3D9Format::Unknown), + m_d3d9Formats (Adapter, m_parent->GetOptions()) { + m_adapter->logAdapterInfo(); + } + + template <size_t N> + static void copyToStringArray(char (&dst)[N], const char* src) { + std::strncpy(dst, src, N); + dst[N - 1] = '\0'; + } + + + HRESULT D3D9Adapter::GetAdapterIdentifier( + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier) { + if (unlikely(pIdentifier == nullptr)) + return D3DERR_INVALIDCALL; + + auto& options = m_parent->GetOptions(); + + const auto& props = m_adapter->deviceProperties(); + + // TODO: Consolidate this! +#ifndef DXVK_NATIVE + DISPLAY_DEVICEA device = { }; + device.cb = sizeof(device); + + if (!::EnumDisplayDevicesA(nullptr, m_displayIndex, &device, 0)) { + Logger::err("D3D9Adapter::GetAdapterIdentifier: Failed to query display info"); + return D3DERR_INVALIDCALL; + } + + const char* displayName = device.DeviceName; +#else + WCHAR wideDisplayName[32] = { }; + if (!wsi::getDisplayName(GetDefaultMonitor(), wideDisplayName)) { + Logger::err("D3D9Adapter::GetAdapterIdentifier: Failed to query monitor info"); + return D3DERR_INVALIDCALL; + } + + std::string displayNameStr = str::fromws(wideDisplayName); + const char* displayName = displayNameStr.c_str(); +#endif + + + GUID guid = bit::cast<GUID>(m_adapter->devicePropertiesExt().coreDeviceId.deviceUUID); + + uint32_t vendorId = options.customVendorId == -1 ? props.vendorID : uint32_t(options.customVendorId); + uint32_t deviceId = options.customDeviceId == -1 ? props.deviceID : uint32_t(options.customDeviceId); + const char* desc = options.customDeviceDesc.empty() ? props.deviceName : options.customDeviceDesc.c_str(); + const char* driver = GetDriverDLL(DxvkGpuVendor(vendorId)); + + copyToStringArray(pIdentifier->Description, desc); + copyToStringArray(pIdentifier->DeviceName, displayName); // The GDI device name. Not the actual device name. + copyToStringArray(pIdentifier->Driver, driver); // This is the driver's dll. + + pIdentifier->DeviceIdentifier = guid; + pIdentifier->DeviceId = deviceId; + pIdentifier->VendorId = vendorId; + pIdentifier->Revision = 0; + pIdentifier->SubSysId = 0; + pIdentifier->WHQLLevel = m_parent->IsExtended() ? 1 : 0; // This doesn't check with the driver on Direct3D9Ex and is always 1. + pIdentifier->DriverVersion.QuadPart = INT64_MAX; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceType( + D3DDEVTYPE DevType, + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL bWindowed) { + if (!IsSupportedAdapterFormat(AdapterFormat)) + return D3DERR_NOTAVAILABLE; + + if (!IsSupportedBackBufferFormat(AdapterFormat, BackBufferFormat, bWindowed)) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceFormat( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3D9Format CheckFormat) { + if (!IsSupportedAdapterFormat(AdapterFormat)) + return D3DERR_NOTAVAILABLE; + + const bool dmap = Usage & D3DUSAGE_DMAP; + const bool rt = Usage & D3DUSAGE_RENDERTARGET; + const bool ds = Usage & D3DUSAGE_DEPTHSTENCIL; + + const bool surface = RType == D3DRTYPE_SURFACE; + const bool texture = RType == D3DRTYPE_TEXTURE; + + const bool twoDimensional = surface || texture; + + const bool srgb = (Usage & (D3DUSAGE_QUERY_SRGBREAD | D3DUSAGE_QUERY_SRGBWRITE)) != 0; + + if (CheckFormat == D3D9Format::INST) + return D3D_OK; + + if (rt && CheckFormat == D3D9Format::A8 && m_parent->GetOptions().disableA8RT) + return D3DERR_NOTAVAILABLE; + + if (ds && !IsDepthFormat(CheckFormat)) + return D3DERR_NOTAVAILABLE; + + if (rt && CheckFormat == D3D9Format::NULL_FORMAT && twoDimensional) + return D3D_OK; + + if (rt && CheckFormat == D3D9Format::RESZ && surface) + return D3D_OK; + + if (CheckFormat == D3D9Format::ATOC && surface) + return D3D_OK; + + if (CheckFormat == D3D9Format::NVDB && surface) + return m_adapter->features().core.features.depthBounds + ? D3D_OK + : D3DERR_NOTAVAILABLE; + + // I really don't want to support this... + if (dmap) + return D3DERR_NOTAVAILABLE; + + auto mapping = m_d3d9Formats.GetFormatMapping(CheckFormat); + if (mapping.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + if (mapping.FormatSrgb == VK_FORMAT_UNDEFINED && srgb) + return D3DERR_NOTAVAILABLE; + + if (RType == D3DRTYPE_VERTEXBUFFER || RType == D3DRTYPE_INDEXBUFFER) + return D3D_OK; + + // Let's actually ask Vulkan now that we got some quirks out the way! + return CheckDeviceVkFormat(mapping.FormatColor, Usage, RType); + } + + + HRESULT D3D9Adapter::CheckDeviceMultiSampleType( + D3DDEVTYPE DeviceType, + D3D9Format SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels) { + if (pQualityLevels != nullptr) + *pQualityLevels = 1; + + auto dst = ConvertFormatUnfixed(SurfaceFormat); + if (dst.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + if (MultiSampleType != D3DMULTISAMPLE_NONE + && (SurfaceFormat == D3D9Format::D32_LOCKABLE + || SurfaceFormat == D3D9Format::D32F_LOCKABLE + || SurfaceFormat == D3D9Format::D16_LOCKABLE)) + return D3DERR_NOTAVAILABLE; + + uint32_t sampleCount = std::max<uint32_t>(MultiSampleType, 1u); + + // Check if this is a power of two... + if (sampleCount & (sampleCount - 1)) + return D3DERR_NOTAVAILABLE; + + // Therefore... + VkSampleCountFlags sampleFlags = VkSampleCountFlags(sampleCount); + + auto availableFlags = !IsDepthFormat(SurfaceFormat) + ? m_adapter->deviceProperties().limits.framebufferColorSampleCounts + : m_adapter->deviceProperties().limits.framebufferDepthSampleCounts; + + if (!(availableFlags & sampleFlags)) + return D3DERR_NOTAVAILABLE; + + if (pQualityLevels != nullptr) { + if (MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) + *pQualityLevels = 32 - bit::lzcnt(availableFlags); + else + *pQualityLevels = 1; + } + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDepthStencilMatch( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + D3D9Format RenderTargetFormat, + D3D9Format DepthStencilFormat) { + if (!IsDepthFormat(DepthStencilFormat)) + return D3DERR_NOTAVAILABLE; + + if (RenderTargetFormat == dxvk::D3D9Format::NULL_FORMAT) + return D3D_OK; + + auto mapping = ConvertFormatUnfixed(RenderTargetFormat); + if (mapping.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceFormatConversion( + D3DDEVTYPE DeviceType, + D3D9Format SourceFormat, + D3D9Format TargetFormat) { + bool sourceSupported = IsSupportedBackBufferFormat(D3D9Format::Unknown, SourceFormat, TRUE); + bool targetSupported = TargetFormat == D3D9Format::X1R5G5B5 + || TargetFormat == D3D9Format::A1R5G5B5 + || TargetFormat == D3D9Format::R5G6B5 + // || TargetFormat == D3D9Format::R8G8B8 <-- We don't support R8G8B8 + || TargetFormat == D3D9Format::X8R8G8B8 + || TargetFormat == D3D9Format::A8R8G8B8 + || TargetFormat == D3D9Format::A2R10G10B10 + || TargetFormat == D3D9Format::A16B16G16R16 + || TargetFormat == D3D9Format::A2B10G10R10 + || TargetFormat == D3D9Format::A8B8G8R8 + || TargetFormat == D3D9Format::X8B8G8R8 + || TargetFormat == D3D9Format::A16B16G16R16F + || TargetFormat == D3D9Format::A32B32G32R32F; + + return (sourceSupported && targetSupported) + ? D3D_OK + : D3DERR_NOTAVAILABLE; + } + + + HRESULT D3D9Adapter::GetDeviceCaps( + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps) { + using namespace dxvk::caps; + + if (pCaps == nullptr) + return D3DERR_INVALIDCALL; + + auto& options = m_parent->GetOptions(); + + // TODO: Actually care about what the adapter supports here. + // ^ For Intel and older cards most likely here. + + // Device Type + pCaps->DeviceType = DeviceType; + // Adapter Id + pCaps->AdapterOrdinal = m_ordinal; + // Caps 1 + pCaps->Caps = D3DCAPS_READ_SCANLINE; + // Caps 2 + pCaps->Caps2 = D3DCAPS2_FULLSCREENGAMMA + /* | D3DCAPS2_CANCALIBRATEGAMMA */ + /* | D3DCAPS2_RESERVED */ + /* | D3DCAPS2_CANMANAGERESOURCE */ + | D3DCAPS2_DYNAMICTEXTURES + | D3DCAPS2_CANAUTOGENMIPMAP + /* | D3DCAPS2_CANSHARERESOURCE */; + // Caps 3 + pCaps->Caps3 = D3DCAPS3_ALPHA_FULLSCREEN_FLIP_OR_DISCARD + | D3DCAPS3_LINEAR_TO_SRGB_PRESENTATION + | D3DCAPS3_COPY_TO_VIDMEM + | D3DCAPS3_COPY_TO_SYSTEMMEM + /* | D3DCAPS3_DXVAHD */ + /* | D3DCAPS3_DXVAHD_LIMITED */; + // Presentation Intervals + pCaps->PresentationIntervals = D3DPRESENT_INTERVAL_DEFAULT + | D3DPRESENT_INTERVAL_ONE + | D3DPRESENT_INTERVAL_TWO + | D3DPRESENT_INTERVAL_THREE + | D3DPRESENT_INTERVAL_FOUR + | D3DPRESENT_INTERVAL_IMMEDIATE; + // Cursor + pCaps->CursorCaps = D3DCURSORCAPS_COLOR; // I do not support Cursor yet, but I don't want to say I don't support it for compatibility reasons. + // Dev Caps + pCaps->DevCaps = D3DDEVCAPS_EXECUTESYSTEMMEMORY + | D3DDEVCAPS_EXECUTEVIDEOMEMORY + | D3DDEVCAPS_TLVERTEXSYSTEMMEMORY + | D3DDEVCAPS_TLVERTEXVIDEOMEMORY + /* | D3DDEVCAPS_TEXTURESYSTEMMEMORY */ + | D3DDEVCAPS_TEXTUREVIDEOMEMORY + | D3DDEVCAPS_DRAWPRIMTLVERTEX + | D3DDEVCAPS_CANRENDERAFTERFLIP + | D3DDEVCAPS_TEXTURENONLOCALVIDMEM + | D3DDEVCAPS_DRAWPRIMITIVES2 + /* | D3DDEVCAPS_SEPARATETEXTUREMEMORIES */ + | D3DDEVCAPS_DRAWPRIMITIVES2EX + | D3DDEVCAPS_HWTRANSFORMANDLIGHT + | D3DDEVCAPS_CANBLTSYSTONONLOCAL + | D3DDEVCAPS_HWRASTERIZATION + | D3DDEVCAPS_PUREDEVICE + /* | D3DDEVCAPS_QUINTICRTPATCHES */ + /* | D3DDEVCAPS_RTPATCHES */ + /* | D3DDEVCAPS_RTPATCHHANDLEZERO */ + /* | D3DDEVCAPS_NPATCHES */; + // Primitive Misc. Caps + pCaps->PrimitiveMiscCaps = D3DPMISCCAPS_MASKZ + | D3DPMISCCAPS_CULLNONE + | D3DPMISCCAPS_CULLCW + | D3DPMISCCAPS_CULLCCW + | D3DPMISCCAPS_COLORWRITEENABLE + | D3DPMISCCAPS_CLIPPLANESCALEDPOINTS + | D3DPMISCCAPS_CLIPTLVERTS + | D3DPMISCCAPS_TSSARGTEMP + | D3DPMISCCAPS_BLENDOP + /* | D3DPMISCCAPS_NULLREFERENCE */ + | D3DPMISCCAPS_INDEPENDENTWRITEMASKS + | D3DPMISCCAPS_PERSTAGECONSTANT + | D3DPMISCCAPS_FOGANDSPECULARALPHA + | D3DPMISCCAPS_SEPARATEALPHABLEND + | D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS + | D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING + | D3DPMISCCAPS_FOGVERTEXCLAMPED + | D3DPMISCCAPS_POSTBLENDSRGBCONVERT; + // Raster Caps + pCaps->RasterCaps = D3DPRASTERCAPS_DITHER + | D3DPRASTERCAPS_ZTEST + | D3DPRASTERCAPS_FOGVERTEX + | D3DPRASTERCAPS_FOGTABLE + | D3DPRASTERCAPS_MIPMAPLODBIAS + /* | D3DPRASTERCAPS_ZBUFFERLESSHSR */ + | D3DPRASTERCAPS_FOGRANGE + | D3DPRASTERCAPS_ANISOTROPY + /* | D3DPRASTERCAPS_WBUFFER */ + | D3DPRASTERCAPS_WFOG + | D3DPRASTERCAPS_ZFOG + | D3DPRASTERCAPS_COLORPERSPECTIVE + | D3DPRASTERCAPS_SCISSORTEST + | D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS + | D3DPRASTERCAPS_DEPTHBIAS + | D3DPRASTERCAPS_MULTISAMPLE_TOGGLE; // <-- TODO! (but difficult in Vk) + // Z Comparison Caps + pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER + | D3DPCMPCAPS_LESS + | D3DPCMPCAPS_EQUAL + | D3DPCMPCAPS_LESSEQUAL + | D3DPCMPCAPS_GREATER + | D3DPCMPCAPS_NOTEQUAL + | D3DPCMPCAPS_GREATEREQUAL + | D3DPCMPCAPS_ALWAYS; + // Source Blend Caps + pCaps->SrcBlendCaps = D3DPBLENDCAPS_ZERO + | D3DPBLENDCAPS_ONE + | D3DPBLENDCAPS_SRCCOLOR + | D3DPBLENDCAPS_INVSRCCOLOR + | D3DPBLENDCAPS_SRCALPHA + | D3DPBLENDCAPS_INVSRCALPHA + | D3DPBLENDCAPS_DESTALPHA + | D3DPBLENDCAPS_INVDESTALPHA + | D3DPBLENDCAPS_DESTCOLOR + | D3DPBLENDCAPS_INVDESTCOLOR + | D3DPBLENDCAPS_SRCALPHASAT + | D3DPBLENDCAPS_BOTHSRCALPHA + | D3DPBLENDCAPS_BOTHINVSRCALPHA + | D3DPBLENDCAPS_BLENDFACTOR + | D3DPBLENDCAPS_INVSRCCOLOR2 + | D3DPBLENDCAPS_SRCCOLOR2; + // Destination Blend Caps + pCaps->DestBlendCaps = pCaps->SrcBlendCaps; + // Alpha Comparison Caps + pCaps->AlphaCmpCaps = pCaps->ZCmpCaps; + // Shade Caps + pCaps->ShadeCaps = D3DPSHADECAPS_COLORGOURAUDRGB + | D3DPSHADECAPS_SPECULARGOURAUDRGB + | D3DPSHADECAPS_ALPHAGOURAUDBLEND + | D3DPSHADECAPS_FOGGOURAUD; + // Texture Caps + pCaps->TextureCaps = D3DPTEXTURECAPS_PERSPECTIVE + /* | D3DPTEXTURECAPS_POW2 */ + | D3DPTEXTURECAPS_ALPHA + /* | D3DPTEXTURECAPS_SQUAREONLY */ + | D3DPTEXTURECAPS_TEXREPEATNOTSCALEDBYSIZE + | D3DPTEXTURECAPS_ALPHAPALETTE + /* | D3DPTEXTURECAPS_NONPOW2CONDITIONAL */ + | D3DPTEXTURECAPS_PROJECTED + | D3DPTEXTURECAPS_CUBEMAP + | D3DPTEXTURECAPS_VOLUMEMAP + | D3DPTEXTURECAPS_MIPMAP + | D3DPTEXTURECAPS_MIPVOLUMEMAP + | D3DPTEXTURECAPS_MIPCUBEMAP + /* | D3DPTEXTURECAPS_CUBEMAP_POW2 */ + /* | D3DPTEXTURECAPS_VOLUMEMAP_POW2 */ + /* | D3DPTEXTURECAPS_NOPROJECTEDBUMPENV */; + // Texture Filter Caps + pCaps->TextureFilterCaps = D3DPTFILTERCAPS_MINFPOINT + | D3DPTFILTERCAPS_MINFLINEAR + | D3DPTFILTERCAPS_MINFANISOTROPIC + /* | D3DPTFILTERCAPS_MINFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MINFGAUSSIANQUAD */ + | D3DPTFILTERCAPS_MIPFPOINT + | D3DPTFILTERCAPS_MIPFLINEAR + /* | D3DPTFILTERCAPS_CONVOLUTIONMONO */ + | D3DPTFILTERCAPS_MAGFPOINT + | D3DPTFILTERCAPS_MAGFLINEAR + | D3DPTFILTERCAPS_MAGFANISOTROPIC + /* | D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MAGFGAUSSIANQUAD */; + // Cube Texture Filter Caps + pCaps->CubeTextureFilterCaps = pCaps->TextureFilterCaps; + // Volume Texture Filter Caps + pCaps->VolumeTextureFilterCaps = pCaps->TextureFilterCaps; + // Texture Address Caps + pCaps->TextureAddressCaps = D3DPTADDRESSCAPS_WRAP + | D3DPTADDRESSCAPS_MIRROR + | D3DPTADDRESSCAPS_CLAMP + | D3DPTADDRESSCAPS_BORDER + | D3DPTADDRESSCAPS_INDEPENDENTUV + | D3DPTADDRESSCAPS_MIRRORONCE; + // Volume Texture Address Caps + pCaps->VolumeTextureAddressCaps = pCaps->TextureAddressCaps; + // Line Caps + pCaps->LineCaps = D3DLINECAPS_TEXTURE + | D3DLINECAPS_ZTEST + | D3DLINECAPS_BLEND + | D3DLINECAPS_ALPHACMP + | D3DLINECAPS_FOG + | D3DLINECAPS_ANTIALIAS; //<-- Lying about doing AA lines here, we don't *fully* support that. + // Max Texture Width + pCaps->MaxTextureWidth = MaxTextureDimension; + // Max Texture Height + pCaps->MaxTextureHeight = MaxTextureDimension; + // Max Volume Extent + pCaps->MaxVolumeExtent = 8192; + // Max Texture Repeat + pCaps->MaxTextureRepeat = 8192; + // Max Texture Aspect Ratio + pCaps->MaxTextureAspectRatio = 8192; + // Max Anisotropy + pCaps->MaxAnisotropy = 16; + // Max Vertex W + pCaps->MaxVertexW = 1e10f; + // Guard Bands + pCaps->GuardBandLeft = -32768.0f; + pCaps->GuardBandTop = -32768.0f; + pCaps->GuardBandRight = 32768.0f; + pCaps->GuardBandBottom = 32768.0f; + // Extents Adjust + pCaps->ExtentsAdjust = 0.0f; + // Stencil Caps + pCaps->StencilCaps = D3DSTENCILCAPS_KEEP + | D3DSTENCILCAPS_ZERO + | D3DSTENCILCAPS_REPLACE + | D3DSTENCILCAPS_INCRSAT + | D3DSTENCILCAPS_DECRSAT + | D3DSTENCILCAPS_INVERT + | D3DSTENCILCAPS_INCR + | D3DSTENCILCAPS_DECR + | D3DSTENCILCAPS_TWOSIDED; + // FVF Caps + pCaps->FVFCaps = (MaxSimultaneousTextures & D3DFVFCAPS_TEXCOORDCOUNTMASK) + /* | D3DFVFCAPS_DONOTSTRIPELEMENTS */ + | D3DFVFCAPS_PSIZE; + // Texture Op Caps + pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE + | D3DTEXOPCAPS_SELECTARG1 + | D3DTEXOPCAPS_SELECTARG2 + | D3DTEXOPCAPS_MODULATE + | D3DTEXOPCAPS_MODULATE2X + | D3DTEXOPCAPS_MODULATE4X + | D3DTEXOPCAPS_ADD + | D3DTEXOPCAPS_ADDSIGNED + | D3DTEXOPCAPS_ADDSIGNED2X + | D3DTEXOPCAPS_SUBTRACT + | D3DTEXOPCAPS_ADDSMOOTH + | D3DTEXOPCAPS_BLENDDIFFUSEALPHA + | D3DTEXOPCAPS_BLENDTEXTUREALPHA + | D3DTEXOPCAPS_BLENDFACTORALPHA + | D3DTEXOPCAPS_BLENDTEXTUREALPHAPM + | D3DTEXOPCAPS_BLENDCURRENTALPHA + | D3DTEXOPCAPS_PREMODULATE + | D3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR + | D3DTEXOPCAPS_MODULATECOLOR_ADDALPHA + | D3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR + | D3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA + | D3DTEXOPCAPS_BUMPENVMAP + | D3DTEXOPCAPS_BUMPENVMAPLUMINANCE + | D3DTEXOPCAPS_DOTPRODUCT3 + | D3DTEXOPCAPS_MULTIPLYADD + | D3DTEXOPCAPS_LERP; + // Max Texture Blend Stages + pCaps->MaxTextureBlendStages = MaxTextureBlendStages; + // Max Simultaneous Textures + pCaps->MaxSimultaneousTextures = MaxSimultaneousTextures; + // Vertex Processing Caps + pCaps->VertexProcessingCaps = D3DVTXPCAPS_TEXGEN + | D3DVTXPCAPS_MATERIALSOURCE7 + | D3DVTXPCAPS_DIRECTIONALLIGHTS + | D3DVTXPCAPS_POSITIONALLIGHTS + | D3DVTXPCAPS_LOCALVIEWER + | D3DVTXPCAPS_TWEENING + | D3DVTXPCAPS_TEXGEN_SPHEREMAP + /* | D3DVTXPCAPS_NO_TEXGEN_NONLOCALVIEWER*/; + // Max Active Lights + pCaps->MaxActiveLights = caps::MaxEnabledLights; + // Max User Clip Planes + pCaps->MaxUserClipPlanes = MaxClipPlanes; + // Max Vertex Blend Matrices + pCaps->MaxVertexBlendMatrices = 4; + // Max Vertex Blend Matrix Index + pCaps->MaxVertexBlendMatrixIndex = 8; + // Max Point Size + pCaps->MaxPointSize = 256.0f; + // Max Primitive Count + pCaps->MaxPrimitiveCount = 0x00555555; + // Max Vertex Index + pCaps->MaxVertexIndex = 0x00ffffff; + // Max Streams + pCaps->MaxStreams = MaxStreams; + // Max Stream Stride + pCaps->MaxStreamStride = 508; // bytes + + const uint32_t majorVersion = options.shaderModel; + const uint32_t minorVersion = options.shaderModel != 1 ? 0 : 4; + + // Shader Versions + pCaps->VertexShaderVersion = D3DVS_VERSION(majorVersion, minorVersion); + pCaps->PixelShaderVersion = D3DPS_VERSION(majorVersion, minorVersion); + + // Max Vertex Shader Const + pCaps->MaxVertexShaderConst = MaxFloatConstantsVS; + // Max PS1 Value + pCaps->PixelShader1xMaxValue = FLT_MAX; + // Dev Caps 2 + pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET + /* | D3DDEVCAPS2_DMAPNPATCH */ + /* | D3DDEVCAPS2_ADAPTIVETESSRTPATCH */ + /* | D3DDEVCAPS2_ADAPTIVETESSNPATCH */ + | D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES + /* | D3DDEVCAPS2_PRESAMPLEDDMAPNPATCH */ + | D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET; + // Max N Patch Tesselation Level + pCaps->MaxNpatchTessellationLevel = 0.0f; + // Reserved for... something + pCaps->Reserved5 = 0; + // Master adapter for us is adapter 0, atm... + pCaps->MasterAdapterOrdinal = 0; + // The group of adapters this one is in + pCaps->AdapterOrdinalInGroup = 0; + // Number of adapters in current group + pCaps->NumberOfAdaptersInGroup = 1; + // Decl Type Caps + pCaps->DeclTypes = D3DDTCAPS_UBYTE4 + | D3DDTCAPS_UBYTE4N + | D3DDTCAPS_SHORT2N + | D3DDTCAPS_SHORT4N + | D3DDTCAPS_USHORT2N + | D3DDTCAPS_USHORT4N + | D3DDTCAPS_UDEC3 + | D3DDTCAPS_DEC3N + | D3DDTCAPS_FLOAT16_2 + | D3DDTCAPS_FLOAT16_4; + // Number of simultaneous RTs + pCaps->NumSimultaneousRTs = MaxSimultaneousRenderTargets; + // Possible StretchRect filters + pCaps->StretchRectFilterCaps = D3DPTFILTERCAPS_MINFPOINT + | D3DPTFILTERCAPS_MINFLINEAR + /* | D3DPTFILTERCAPS_MINFANISOTROPIC */ + /* | D3DPTFILTERCAPS_MINFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MINFGAUSSIANQUAD */ + /* | D3DPTFILTERCAPS_MIPFPOINT */ + /* | D3DPTFILTERCAPS_MIPFLINEAR */ + /* | D3DPTFILTERCAPS_CONVOLUTIONMONO */ + | D3DPTFILTERCAPS_MAGFPOINT + | D3DPTFILTERCAPS_MAGFLINEAR + /* | D3DPTFILTERCAPS_MAGFANISOTROPIC */ + /* | D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MAGFGAUSSIANQUAD */; + + // Not too bothered about doing these longhand + // We should match whatever my AMD hardware reports here + // methinks for the best chance of stuff working. + pCaps->VS20Caps.Caps = 1; + pCaps->VS20Caps.DynamicFlowControlDepth = 24; + pCaps->VS20Caps.NumTemps = 32; + pCaps->VS20Caps.StaticFlowControlDepth = 4; + + pCaps->PS20Caps.Caps = 31; + pCaps->PS20Caps.DynamicFlowControlDepth = 24; + pCaps->PS20Caps.NumTemps = 32; + pCaps->PS20Caps.StaticFlowControlDepth = 4; + + pCaps->PS20Caps.NumInstructionSlots = options.shaderModel >= 2 ? 512 : 256; + + pCaps->VertexTextureFilterCaps = 50332416; + pCaps->MaxVShaderInstructionsExecuted = 4294967295; + pCaps->MaxPShaderInstructionsExecuted = 4294967295; + + pCaps->MaxVertexShader30InstructionSlots = options.shaderModel == 3 ? 32768 : 0; + pCaps->MaxPixelShader30InstructionSlots = options.shaderModel == 3 ? 32768 : 0; + + return D3D_OK; + } + + + HMONITOR D3D9Adapter::GetMonitor() { + return GetDefaultMonitor(); + } + + + UINT D3D9Adapter::GetAdapterModeCountEx(const D3DDISPLAYMODEFILTER* pFilter) { + if (pFilter == nullptr) + return 0; + + // We don't offer any interlaced formats here so early out and avoid destroying mode cache. + if (pFilter->ScanLineOrdering == D3DSCANLINEORDERING_INTERLACED) + return 0; + + CacheModes(EnumerateFormat(pFilter->Format)); + return m_modes.size(); + } + + + HRESULT D3D9Adapter::EnumAdapterModesEx( + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode) { + if (pMode == nullptr || pFilter == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9Format format = + EnumerateFormat(pFilter->Format); + + if (FAILED(CheckDeviceFormat( + D3DDEVTYPE_HAL, EnumerateFormat(pFilter->Format), + D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, + EnumerateFormat(pFilter->Format)))) + return D3DERR_INVALIDCALL; + + CacheModes(format); + + // We don't return any scanline orderings that aren't progressive, + // The format filtering is already handled for us by cache modes + // So we can early out here and then just index. + if (pFilter->ScanLineOrdering == D3DSCANLINEORDERING_INTERLACED) + return D3DERR_INVALIDCALL; + + if (Mode >= m_modes.size()) + return D3DERR_INVALIDCALL; + + *pMode = m_modes[Mode]; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::GetAdapterDisplayModeEx( + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + if (pMode == nullptr) + return D3DERR_INVALIDCALL; + + if (pRotation != nullptr) + *pRotation = D3DDISPLAYROTATION_IDENTITY; + + wsi::WsiMode mode = { }; + + if (!wsi::getCurrentDisplayMode(GetDefaultMonitor(), &mode)) { + Logger::err("D3D9Adapter::GetAdapterDisplayModeEx: Failed to enum display settings"); + return D3DERR_INVALIDCALL; + } + + pMode->Size = sizeof(D3DDISPLAYMODEEX); + pMode->Width = mode.width; + pMode->Height = mode.height; + pMode->RefreshRate = mode.refreshRate.numerator / mode.refreshRate.denominator; + pMode->Format = D3DFMT_X8R8G8B8; + pMode->ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::GetAdapterLUID(LUID* pLUID) { + if (pLUID == nullptr) + return D3DERR_INVALIDCALL; + + auto& deviceId = m_adapter->devicePropertiesExt().coreDeviceId; + + if (deviceId.deviceLUIDValid) + *pLUID = bit::cast<LUID>(deviceId.deviceLUID); + else + *pLUID = dxvk::GetAdapterLUID(m_ordinal); + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceVkFormat( + VkFormat Format, + DWORD Usage, + D3DRESOURCETYPE RType) { + VkFormatFeatureFlags checkFlags = 0; + + if (RType != D3DRTYPE_SURFACE) + checkFlags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) { + checkFlags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) + checkFlags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + checkFlags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + else + checkFlags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + VkFormatFeatureFlags checkFlagsMipGen = checkFlags; + + if (Usage & D3DUSAGE_AUTOGENMIPMAP) { + checkFlagsMipGen |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + checkFlagsMipGen |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + + VkFormatProperties fmtSupport = m_adapter->formatProperties(Format); + VkFormatFeatureFlags imgFeatures = fmtSupport.optimalTilingFeatures | fmtSupport.linearTilingFeatures; + + if ((imgFeatures & checkFlags) != checkFlags) + return D3DERR_NOTAVAILABLE; + + return ((imgFeatures & checkFlagsMipGen) != checkFlagsMipGen) + ? D3DOK_NOAUTOGEN + : D3D_OK; + } + + + void D3D9Adapter::CacheModes(D3D9Format Format) { + if (!m_modes.empty() && m_modeCacheFormat == Format) + return; // We already cached the modes for this format. No need to do it again. + + m_modes.clear(); + m_modeCacheFormat = Format; + + // Skip unsupported formats + if (!IsSupportedAdapterFormat(Format)) + return; + + auto& options = m_parent->GetOptions(); + + // Walk over all modes that the display supports and + // return those that match the requested format etc. + wsi::WsiMode devMode = { }; + + uint32_t modeIndex = 0; + + const auto forcedRatio = Ratio<DWORD>(options.forceAspectRatio); + + while (wsi::getDisplayMode(GetDefaultMonitor(), modeIndex++, &devMode)) { + // Skip interlaced modes altogether + if (devMode.interlaced) + continue; + + // Skip modes with incompatible formats + if (devMode.bitsPerPixel != GetMonitorFormatBpp(Format)) + continue; + + if (!forcedRatio.undefined() && Ratio<DWORD>(devMode.width, devMode.height) != forcedRatio) + continue; + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(D3DDISPLAYMODEEX); + mode.Width = devMode.width; + mode.Height = devMode.height; + mode.RefreshRate = devMode.refreshRate.numerator / devMode.refreshRate.denominator; + mode.Format = static_cast<D3DFORMAT>(Format); + mode.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + m_modes.push_back(mode); + } + + // Sort display modes by width, height and refresh rate, + // in that order. Some games rely on correct ordering. + std::sort(m_modes.begin(), m_modes.end(), + [](const D3DDISPLAYMODEEX& a, const D3DDISPLAYMODEEX& b) { + if (a.Width < b.Width) return true; + if (a.Width > b.Width) return false; + + if (a.Height < b.Height) return true; + if (a.Height > b.Height) return false; + + return a.RefreshRate < b.RefreshRate; + }); + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.h new file mode 100644 index 00000000..0cf74981 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_adapter.h @@ -0,0 +1,112 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_options.h" +#include "d3d9_format.h" + +#include "../dxvk/dxvk_adapter.h" + +namespace dxvk { + + class D3D9InterfaceEx; + + class D3D9Adapter { + + public: + + D3D9Adapter( + D3D9InterfaceEx* pParent, + Rc<DxvkAdapter> Adapter, + UINT Ordinal, + UINT DisplayIndex); + + HRESULT GetAdapterIdentifier( + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier); + + HRESULT CheckDeviceType( + D3DDEVTYPE DevType, + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL bWindowed); + + HRESULT CheckDeviceFormat( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3D9Format CheckFormat); + + HRESULT CheckDeviceMultiSampleType( + D3DDEVTYPE DeviceType, + D3D9Format SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels); + + HRESULT CheckDepthStencilMatch( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + D3D9Format RenderTargetFormat, + D3D9Format DepthStencilFormat); + + HRESULT CheckDeviceFormatConversion( + D3DDEVTYPE DeviceType, + D3D9Format SourceFormat, + D3D9Format TargetFormat); + + HRESULT GetDeviceCaps( + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps); + + HMONITOR GetMonitor(); + + UINT GetAdapterModeCountEx(const D3DDISPLAYMODEFILTER* pFilter); + + HRESULT EnumAdapterModesEx( + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode); + + HRESULT GetAdapterDisplayModeEx( + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT GetAdapterLUID(LUID* pLUID); + + UINT GetOrdinal() { return m_ordinal; } + + Rc<DxvkAdapter> GetDXVKAdapter() { return m_adapter; } + + D3D9_VK_FORMAT_MAPPING GetFormatMapping(D3D9Format Format) const { + return m_d3d9Formats.GetFormatMapping(Format); + } + + const DxvkFormatInfo* GetUnsupportedFormatInfo(D3D9Format Format) const { + return m_d3d9Formats.GetUnsupportedFormatInfo(Format); + } + + private: + + HRESULT CheckDeviceVkFormat( + VkFormat Format, + DWORD Usage, + D3DRESOURCETYPE RType); + + void CacheModes(D3D9Format Format); + + D3D9InterfaceEx* m_parent; + + Rc<DxvkAdapter> m_adapter; + UINT m_ordinal; + UINT m_displayIndex; + + std::vector<D3DDISPLAYMODEEX> m_modes; + D3D9Format m_modeCacheFormat; + + const D3D9VkFormatTable m_d3d9Formats; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.cpp new file mode 100644 index 00000000..c0b72c46 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.cpp @@ -0,0 +1,116 @@ +#include "d3d9_buffer.h" + +namespace dxvk { + + //////////////////////// + // D3D9VertexBuffer + //////////////////////// + + D3D9VertexBuffer::D3D9VertexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9VertexBufferBase(pDevice, pDesc) { + + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexBuffer::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DVertexBuffer9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9VertexBuffer::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9VertexBuffer::GetType() { + return D3DRTYPE_VERTEXBUFFER; + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexBuffer::GetDesc( + D3DVERTEXBUFFER_DESC* pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9_BUFFER_DESC* desc = m_buffer.Desc(); + + pDesc->Format = static_cast<D3DFORMAT>(desc->Format); + pDesc->Type = desc->Type; + pDesc->Usage = desc->Usage; + pDesc->Pool = desc->Pool; + pDesc->Size = desc->Size; + pDesc->FVF = desc->FVF; + + return D3D_OK; + } + + + ////////////////////// + // D3D9IndexBuffer + ////////////////////// + + + D3D9IndexBuffer::D3D9IndexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9IndexBufferBase(pDevice, pDesc) { + + } + + + HRESULT STDMETHODCALLTYPE D3D9IndexBuffer::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DIndexBuffer9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9IndexBuffer::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9IndexBuffer::GetType() { + return D3DRTYPE_INDEXBUFFER; + } + + + HRESULT STDMETHODCALLTYPE D3D9IndexBuffer::GetDesc( + D3DINDEXBUFFER_DESC* pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9_BUFFER_DESC* desc = m_buffer.Desc(); + + pDesc->Format = static_cast<D3DFORMAT>(desc->Format); + pDesc->Type = desc->Type; + pDesc->Usage = desc->Usage; + pDesc->Pool = desc->Pool; + pDesc->Size = desc->Size; + + return D3D_OK; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.h new file mode 100644 index 00000000..0600334c --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_buffer.h @@ -0,0 +1,96 @@ +#pragma once + +#include "d3d9_resource.h" + +#include "d3d9_common_buffer.h" + +namespace dxvk { + + template <typename... Type> + class D3D9Buffer : public D3D9Resource<Type...> { + + public: + + D3D9Buffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9Resource<Type...> (pDevice), + m_buffer (pDevice, pDesc) { + + } + + HRESULT STDMETHODCALLTYPE Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) final { + return m_buffer.Lock( + OffsetToLock, + SizeToLock, + ppbData, + Flags); + } + + HRESULT STDMETHODCALLTYPE Unlock() final { + return m_buffer.Unlock(); + } + + void STDMETHODCALLTYPE PreLoad() final { + m_buffer.PreLoad(); + } + + D3D9CommonBuffer* GetCommonBuffer() { + return &m_buffer; + } + + protected: + + D3D9CommonBuffer m_buffer; + + }; + + + using D3D9VertexBufferBase = D3D9Buffer<IDirect3DVertexBuffer9>; + class D3D9VertexBuffer final : public D3D9VertexBufferBase { + + public: + + D3D9VertexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetDesc(D3DVERTEXBUFFER_DESC* pDesc); + + }; + + using D3D9IndexBufferBase = D3D9Buffer<IDirect3DIndexBuffer9>; + class D3D9IndexBuffer final : public D3D9IndexBufferBase { + + public: + + D3D9IndexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetDesc(D3DINDEXBUFFER_DESC* pDesc); + + }; + + template <typename T> + inline D3D9CommonBuffer* GetCommonBuffer(const T& pResource) { + return pResource != nullptr ? pResource->GetCommonBuffer() : nullptr; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_caps.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_caps.h new file mode 100644 index 00000000..0b008757 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_caps.h @@ -0,0 +1,32 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk::caps { + + constexpr uint32_t MaxClipPlanes = 6; + constexpr uint32_t MaxSamplers = 16; + constexpr uint32_t MaxStreams = 16; + constexpr uint32_t MaxSimultaneousTextures = 8; + constexpr uint32_t MaxTextureBlendStages = MaxSimultaneousTextures; + constexpr uint32_t MaxSimultaneousRenderTargets = D3D_MAX_SIMULTANEOUS_RENDERTARGETS; + + constexpr uint32_t MaxFloatConstantsVS = 256; + constexpr uint32_t MaxFloatConstantsPS = 224; + constexpr uint32_t MaxOtherConstants = 16; + constexpr uint32_t MaxFloatConstantsSoftware = 8192; + constexpr uint32_t MaxOtherConstantsSoftware = 2048; + + constexpr uint32_t InputRegisterCount = 16; + + constexpr uint32_t MaxTextureDimension = 16384; + constexpr uint32_t MaxMipLevels = 15; + constexpr uint32_t MaxSubresources = 15 * 6; + + constexpr uint32_t MaxTransforms = 10 + 256; + + constexpr uint32_t TextureStageCount = MaxSimultaneousTextures; + + constexpr uint32_t MaxEnabledLights = 8; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.cpp new file mode 100644 index 00000000..509f0cbf --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.cpp @@ -0,0 +1,136 @@ +#include "d3d9_common_buffer.h" + +#include "d3d9_device.h" +#include "d3d9_util.h" + +namespace dxvk { + + D3D9CommonBuffer::D3D9CommonBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : m_parent ( pDevice ), m_desc ( *pDesc ) { + m_buffer = CreateBuffer(); + if (GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + m_stagingBuffer = CreateStagingBuffer(); + + m_sliceHandle = GetMapBuffer()->getSliceHandle(); + + if (m_desc.Pool != D3DPOOL_DEFAULT) + m_dirtyRange = D3D9Range(0, m_desc.Size); + } + + + HRESULT D3D9CommonBuffer::Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) { + return m_parent->LockBuffer( + this, + OffsetToLock, + SizeToLock, + ppbData, + Flags); + } + + + HRESULT D3D9CommonBuffer::Unlock() { + return m_parent->UnlockBuffer(this); + } + + + HRESULT D3D9CommonBuffer::ValidateBufferProperties(const D3D9_BUFFER_DESC* pDesc) { + if (pDesc->Size == 0) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + + void D3D9CommonBuffer::PreLoad() { + if (IsPoolManaged(m_desc.Pool)) { + auto lock = m_parent->LockDevice(); + + if (NeedsUpload()) + m_parent->FlushBuffer(this); + } + } + + + Rc<DxvkBuffer> D3D9CommonBuffer::CreateBuffer() const { + DxvkBufferCreateInfo info; + info.size = m_desc.Size; + info.usage = 0; + info.stages = 0; + info.access = 0; + + VkMemoryPropertyFlags memoryFlags = 0; + + if (m_desc.Type == D3DRTYPE_VERTEXBUFFER) { + info.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + info.access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + + if (m_parent->SupportsSWVP()) { + info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; + info.access |= VK_ACCESS_SHADER_WRITE_BIT; + } + } + else if (m_desc.Type == D3DRTYPE_INDEXBUFFER) { + info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + info.access |= VK_ACCESS_INDEX_READ_BIT; + } + + if (GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT) { + info.stages |= VK_PIPELINE_STAGE_HOST_BIT; + info.access |= VK_ACCESS_HOST_WRITE_BIT; + + if (!(m_desc.Usage & D3DUSAGE_WRITEONLY)) + info.access |= VK_ACCESS_HOST_READ_BIT; + + memoryFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else { + info.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; + info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + info.access |= VK_ACCESS_TRANSFER_WRITE_BIT; + + memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + if (memoryFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT && m_parent->GetOptions()->apitraceMode) { + memoryFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + + return m_parent->GetDXVKDevice()->createBuffer(info, memoryFlags); + } + + + Rc<DxvkBuffer> D3D9CommonBuffer::CreateStagingBuffer() const { + DxvkBufferCreateInfo info; + info.size = m_desc.Size; + info.stages = VK_PIPELINE_STAGE_HOST_BIT + | VK_PIPELINE_STAGE_TRANSFER_BIT; + + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + info.access = VK_ACCESS_HOST_WRITE_BIT + | VK_ACCESS_TRANSFER_READ_BIT; + + if (!(m_desc.Usage & D3DUSAGE_WRITEONLY)) + info.access |= VK_ACCESS_HOST_READ_BIT; + + VkMemoryPropertyFlags memoryFlags = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + return m_parent->GetDXVKDevice()->createBuffer(info, memoryFlags); + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.h new file mode 100644 index 00000000..8f24d9c8 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_buffer.h @@ -0,0 +1,225 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" + +#include "d3d9_device_child.h" +#include "d3d9_format.h" + +namespace dxvk { + + /** + * \brief Buffer map mode + */ + enum D3D9_COMMON_BUFFER_MAP_MODE { + D3D9_COMMON_BUFFER_MAP_MODE_BUFFER, + D3D9_COMMON_BUFFER_MAP_MODE_DIRECT + }; + + /** + * \brief Common buffer descriptor + */ + struct D3D9_BUFFER_DESC { + D3DRESOURCETYPE Type; + UINT Size; + DWORD Usage; + D3D9Format Format; + D3DPOOL Pool; + DWORD FVF; + }; + + /** + * \brief The type of buffer you want to use + */ + enum D3D9_COMMON_BUFFER_TYPE { + D3D9_COMMON_BUFFER_TYPE_MAPPING, + D3D9_COMMON_BUFFER_TYPE_STAGING, + D3D9_COMMON_BUFFER_TYPE_REAL + }; + + struct D3D9Range { + D3D9Range() { Clear(); } + + D3D9Range(uint32_t min, uint32_t max) + : min(min), + max(max) { + + } + + inline bool IsDegenerate() { return min == max; } + + inline void Conjoin(D3D9Range range) { + if (IsDegenerate()) + *this = range; + else { + min = std::min(range.min, min); + max = std::max(range.max, max); + } + } + + inline bool Overlaps(D3D9Range range) { + if (IsDegenerate()) + return false; + + return range.max > min && range.min < max; + } + + inline void Clear() { min = 0; max = 0; } + + uint32_t min = 0; + uint32_t max = 0; + }; + + class D3D9CommonBuffer { + static constexpr VkDeviceSize BufferSliceAlignment = 64; + public: + + D3D9CommonBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags); + + HRESULT Unlock(); + + /** + * \brief Determine the mapping mode of the buffer, (ie. direct mapping or backed) + */ + inline D3D9_COMMON_BUFFER_MAP_MODE GetMapMode() const { + return (m_desc.Pool == D3DPOOL_DEFAULT && (m_desc.Usage & (D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY))) + ? D3D9_COMMON_BUFFER_MAP_MODE_DIRECT + : D3D9_COMMON_BUFFER_MAP_MODE_BUFFER; + } + + /** + * \brief Abstraction for getting a type of buffer (mapping/staging/the real buffer) across mapping modes. + */ + template <D3D9_COMMON_BUFFER_TYPE Type> + inline const Rc<DxvkBuffer>& GetBuffer() const { + if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_MAPPING) + return GetMapBuffer(); + else if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_STAGING) + return GetStagingBuffer(); + else //if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_REAL) + return GetRealBuffer(); + } + + template <D3D9_COMMON_BUFFER_TYPE Type> + inline DxvkBufferSlice GetBufferSlice() const { + return GetBufferSlice<Type>(0, m_desc.Size); + } + + template <D3D9_COMMON_BUFFER_TYPE Type> + inline DxvkBufferSlice GetBufferSlice(VkDeviceSize offset) const { + return GetBufferSlice<Type>(offset, m_desc.Size - offset); + } + + template <D3D9_COMMON_BUFFER_TYPE Type> + inline DxvkBufferSlice GetBufferSlice(VkDeviceSize offset, VkDeviceSize length) const { + return DxvkBufferSlice(GetBuffer<Type>(), offset, length); + } + + inline DxvkBufferSliceHandle AllocMapSlice() { + return GetMapBuffer()->allocSlice(); + } + + inline DxvkBufferSliceHandle DiscardMapSlice() { + m_sliceHandle = GetMapBuffer()->allocSlice(); + return m_sliceHandle; + } + + inline DxvkBufferSliceHandle GetMappedSlice() const { + return m_sliceHandle; + } + + inline DWORD GetMapFlags() const { return m_mapFlags; } + inline void SetMapFlags(DWORD Flags) { m_mapFlags = Flags; } + + inline const D3D9_BUFFER_DESC* Desc() const { return &m_desc; } + + static HRESULT ValidateBufferProperties(const D3D9_BUFFER_DESC* pDesc); + + /** + * \brief The range of the buffer that was changed using Lock calls + */ + inline D3D9Range& DirtyRange() { return m_dirtyRange; } + + /** + * \brief The range of the buffer that might currently be read by the GPU + */ + inline D3D9Range& GPUReadingRange() { return m_gpuReadingRange; } + + /** + * \brief Whether or not the buffer was written to by the GPU (in IDirect3DDevice9::ProcessVertices) + */ + inline bool WasWrittenByGPU() const { return m_wasWrittenByGPU; } + + /** + * \brief Sets whether or not the buffer was written to by the GPU + */ + inline void SetWrittenByGPU(bool state) { m_wasWrittenByGPU = state; } + + inline uint32_t IncrementLockCount() { return ++m_lockCount; } + inline uint32_t DecrementLockCount() { + if (m_lockCount == 0) + return 0; + + return --m_lockCount; + } + inline uint32_t GetLockCount() const { return m_lockCount; } + + /** + * \brief Whether or not the staging buffer needs to be copied to the actual buffer + */ + inline bool NeedsUpload() { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); } + + inline bool DoesStagingBufferUploads() const { return m_uploadUsingStaging; } + + inline void EnableStagingBufferUploads() { + if (GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + return; + + m_uploadUsingStaging = true; + } + + void PreLoad(); + + private: + + Rc<DxvkBuffer> CreateBuffer() const; + Rc<DxvkBuffer> CreateStagingBuffer() const; + + inline const Rc<DxvkBuffer>& GetMapBuffer() const { + return m_stagingBuffer != nullptr ? m_stagingBuffer : m_buffer; + } + + inline const Rc<DxvkBuffer>& GetStagingBuffer() const { + return m_stagingBuffer; + } + + inline const Rc<DxvkBuffer>& GetRealBuffer() const { + return m_buffer; + } + + D3D9DeviceEx* m_parent; + const D3D9_BUFFER_DESC m_desc; + DWORD m_mapFlags; + bool m_wasWrittenByGPU = false; + bool m_uploadUsingStaging = false; + + Rc<DxvkBuffer> m_buffer; + Rc<DxvkBuffer> m_stagingBuffer; + + DxvkBufferSliceHandle m_sliceHandle; + + D3D9Range m_dirtyRange; + D3D9Range m_gpuReadingRange; + + uint32_t m_lockCount = 0; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.cpp new file mode 100644 index 00000000..a44bb459 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.cpp @@ -0,0 +1,522 @@ +#include "d3d9_common_texture.h" + +#include "d3d9_util.h" +#include "d3d9_device.h" + +#include <algorithm> + +namespace dxvk { + + D3D9CommonTexture::D3D9CommonTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType) + : m_device(pDevice), m_desc(*pDesc), m_type(ResourceType) { + if (m_desc.Format == D3D9Format::Unknown) + m_desc.Format = (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) + ? D3D9Format::D32 + : D3D9Format::X8R8G8B8; + + for (uint32_t i = 0; i < m_dirtyBoxes.size(); i++) { + AddDirtyBox(nullptr, i); + } + + if (m_desc.Pool != D3DPOOL_DEFAULT) { + const uint32_t subresources = CountSubresources(); + for (uint32_t i = 0; i < subresources; i++) { + SetNeedsUpload(i, true); + } + } + + m_mapping = pDevice->LookupFormat(m_desc.Format); + + m_mapMode = DetermineMapMode(); + m_shadow = DetermineShadowState(); + + if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) { + bool plainSurface = m_type == D3DRTYPE_SURFACE && + !(m_desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)); + + try { + m_image = CreatePrimaryImage(ResourceType, plainSurface); + } + catch (const DxvkError& e) { + // D3DUSAGE_AUTOGENMIPMAP and offscreen plain is mutually exclusive + // so we can combine their retry this way. + if (m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP || plainSurface) { + m_desc.Usage &= ~D3DUSAGE_AUTOGENMIPMAP; + m_desc.MipLevels = 1; + m_image = CreatePrimaryImage(ResourceType, false); + } + else + throw e; + } + + CreateSampleView(0); + + if (!IsManaged()) { + m_size = m_image->memSize(); + if (!m_device->ChangeReportedMemory(-m_size)) + throw DxvkError("D3D9: Reporting out of memory from tracking."); + } + } + + if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM) + CreateBuffers(); + + m_exposedMipLevels = m_desc.MipLevels; + + if (m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP) + m_exposedMipLevels = 1; + } + + + D3D9CommonTexture::~D3D9CommonTexture() { + if (m_size != 0) + m_device->ChangeReportedMemory(m_size); + } + + + VkImageSubresource D3D9CommonTexture::GetSubresourceFromIndex( + VkImageAspectFlags Aspect, + UINT Subresource) const { + VkImageSubresource result; + result.aspectMask = Aspect; + result.mipLevel = Subresource % m_desc.MipLevels; + result.arrayLayer = Subresource / m_desc.MipLevels; + return result; + } + + + HRESULT D3D9CommonTexture::NormalizeTextureProperties( + D3D9DeviceEx* pDevice, + D3D9_COMMON_TEXTURE_DESC* pDesc) { + auto* options = pDevice->GetOptions(); + + ////////////////////// + // Mapping Validation + auto mapping = pDevice->LookupFormat(pDesc->Format); + + // Handle DisableA8RT hack for The Sims 2 + if (pDesc->Format == D3D9Format::A8 && + (pDesc->Usage & D3DUSAGE_RENDERTARGET) && + options->disableA8RT) + return D3DERR_INVALIDCALL; + + // If the mapping is invalid then lets return invalid + // Some edge cases: + // NULL format does not map to anything, but should succeed + // SCRATCH textures can still be made if the device does not support + // the format at all. + + if (!mapping.IsValid() && pDesc->Format != D3D9Format::NULL_FORMAT) { + auto info = pDevice->UnsupportedFormatInfo(pDesc->Format); + + if (pDesc->Pool != D3DPOOL_SCRATCH || info->elementSize == 0) + return D3DERR_INVALIDCALL; + } + + /////////////////// + // Desc Validation + + if (pDesc->Width == 0 || pDesc->Height == 0 || pDesc->Depth == 0) + return D3DERR_INVALIDCALL; + + if (FAILED(DecodeMultiSampleType(pDesc->MultiSample, pDesc->MultisampleQuality, nullptr))) + return D3DERR_INVALIDCALL; + + // Using MANAGED pool with DYNAMIC usage is illegal + if (IsPoolManaged(pDesc->Pool) && (pDesc->Usage & D3DUSAGE_DYNAMIC)) + return D3DERR_INVALIDCALL; + + // D3DUSAGE_WRITEONLY doesn't apply to textures. + if (pDesc->Usage & D3DUSAGE_WRITEONLY) + return D3DERR_INVALIDCALL; + + // RENDERTARGET and DEPTHSTENCIL must be default pool + constexpr DWORD incompatibleUsages = D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL; + if (pDesc->Pool != D3DPOOL_DEFAULT && (pDesc->Usage & incompatibleUsages)) + return D3DERR_INVALIDCALL; + + // Use the maximum possible mip level count if the supplied + // mip level count is either unspecified (0) or invalid + const uint32_t maxMipLevelCount = pDesc->MultiSample <= D3DMULTISAMPLE_NONMASKABLE + ? util::computeMipLevelCount({ pDesc->Width, pDesc->Height, pDesc->Depth }) + : 1u; + + if (pDesc->Usage & D3DUSAGE_AUTOGENMIPMAP) + pDesc->MipLevels = 0; + + if (pDesc->MipLevels == 0 || pDesc->MipLevels > maxMipLevelCount) + pDesc->MipLevels = maxMipLevelCount; + + return D3D_OK; + } + + + bool D3D9CommonTexture::CreateBufferSubresource(UINT Subresource) { + if (m_buffers[Subresource] != nullptr) + return false; + + DxvkBufferCreateInfo info; + info.size = GetMipSize(Subresource); + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT; + + if (m_mapping.ConversionFormatInfo.FormatType != D3D9ConversionFormat_None) { + info.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + + VkMemoryPropertyFlags memType = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + m_buffers[Subresource] = m_device->GetDXVKDevice()->createBuffer(info, memType); + m_mappedSlices[Subresource] = m_buffers[Subresource]->getSliceHandle(); + + return true; + } + + + VkDeviceSize D3D9CommonTexture::GetMipSize(UINT Subresource) const { + const UINT MipLevel = Subresource % m_desc.MipLevels; + + const DxvkFormatInfo* formatInfo = m_mapping.FormatColor != VK_FORMAT_UNDEFINED + ? imageFormatInfo(m_mapping.FormatColor) + : m_device->UnsupportedFormatInfo(m_desc.Format); + + const VkExtent3D mipExtent = util::computeMipLevelExtent( + GetExtent(), MipLevel); + + const VkExtent3D blockCount = util::computeBlockCount( + mipExtent, formatInfo->blockSize); + + const uint32_t planeCount = m_mapping.ConversionFormatInfo.PlaneCount; + + return std::min(planeCount, 2u) + * align(formatInfo->elementSize * blockCount.width, 4) + * blockCount.height + * blockCount.depth; + } + + + Rc<DxvkImage> D3D9CommonTexture::CreatePrimaryImage(D3DRESOURCETYPE ResourceType, bool TryOffscreenRT) const { + DxvkImageCreateInfo imageInfo; + imageInfo.type = GetImageTypeFromResourceType(ResourceType); + imageInfo.format = m_mapping.ConversionFormatInfo.FormatColor != VK_FORMAT_UNDEFINED + ? m_mapping.ConversionFormatInfo.FormatColor + : m_mapping.FormatColor; + imageInfo.flags = 0; + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + imageInfo.extent.width = m_desc.Width; + imageInfo.extent.height = m_desc.Height; + imageInfo.extent.depth = m_desc.Depth; + imageInfo.numLayers = m_desc.ArraySize; + imageInfo.mipLevels = m_desc.MipLevels; + imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT + | VK_IMAGE_USAGE_SAMPLED_BIT; + imageInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT + | m_device->GetEnabledShaderStages(); + imageInfo.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_SHADER_READ_BIT; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.layout = VK_IMAGE_LAYOUT_GENERAL; + imageInfo.shared = m_desc.IsBackBuffer; + + if (m_mapping.ConversionFormatInfo.FormatType != D3D9ConversionFormat_None) { + imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + + DecodeMultiSampleType(m_desc.MultiSample, m_desc.MultisampleQuality, &imageInfo.sampleCount); + + // The image must be marked as mutable if it can be reinterpreted + // by a view with a different format. Depth-stencil formats cannot + // be reinterpreted in Vulkan, so we'll ignore those. + auto formatProperties = imageFormatInfo(m_mapping.FormatColor); + + bool isMutable = m_mapping.FormatSrgb != VK_FORMAT_UNDEFINED; + bool isColorFormat = (formatProperties->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + + if (isMutable && isColorFormat) { + imageInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + imageInfo.viewFormatCount = 2; + imageInfo.viewFormats = m_mapping.Formats; + } + + // Are we an RT, need to gen mips or an offscreen plain surface? + if (m_desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_AUTOGENMIPMAP) || TryOffscreenRT) { + imageInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + imageInfo.access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { + imageInfo.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + imageInfo.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + if (ResourceType == D3DRTYPE_CUBETEXTURE) + imageInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + + // Some image formats (i.e. the R32G32B32 ones) are + // only supported with linear tiling on most GPUs + if (!CheckImageSupport(&imageInfo, VK_IMAGE_TILING_OPTIMAL)) + imageInfo.tiling = VK_IMAGE_TILING_LINEAR; + + // We must keep LINEAR images in GENERAL layout, but we + // can choose a better layout for the image based on how + // it is going to be used by the game. + if (imageInfo.tiling == VK_IMAGE_TILING_OPTIMAL) + imageInfo.layout = OptimizeLayout(imageInfo.usage); + + // For some formats, we need to enable render target + // capabilities if available, but these should + // in no way affect the default image layout + imageInfo.usage |= EnableMetaCopyUsage(imageInfo.format, imageInfo.tiling); + + // Check if we can actually create the image + if (!CheckImageSupport(&imageInfo, imageInfo.tiling)) { + throw DxvkError(str::format( + "D3D9: Cannot create texture:", + "\n Type: ", std::hex, ResourceType, + "\n Format: ", m_desc.Format, + "\n Extent: ", m_desc.Width, + "x", m_desc.Height, + "x", m_desc.Depth, + "\n Samples: ", m_desc.MultiSample, + "\n Layers: ", m_desc.ArraySize, + "\n Levels: ", m_desc.MipLevels, + "\n Usage: ", std::hex, m_desc.Usage, + "\n Pool: ", std::hex, m_desc.Pool)); + } + + return m_device->GetDXVKDevice()->createImage(imageInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + + + Rc<DxvkImage> D3D9CommonTexture::CreateResolveImage() const { + DxvkImageCreateInfo imageInfo = m_image->info(); + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + + return m_device->GetDXVKDevice()->createImage(imageInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + + + BOOL D3D9CommonTexture::DetermineShadowState() const { + static std::array<D3D9Format, 3> blacklist = { + D3D9Format::INTZ, D3D9Format::DF16, D3D9Format::DF24 + }; + + return IsDepthFormat(m_desc.Format) + && std::find(blacklist.begin(), blacklist.end(), m_desc.Format) == blacklist.end(); + } + + + BOOL D3D9CommonTexture::CheckImageSupport( + const DxvkImageCreateInfo* pImageInfo, + VkImageTiling Tiling) const { + const Rc<DxvkAdapter> adapter = m_device->GetDXVKDevice()->adapter(); + + VkImageFormatProperties formatProps = { }; + + VkResult status = adapter->imageFormatProperties( + pImageInfo->format, pImageInfo->type, Tiling, + pImageInfo->usage, pImageInfo->flags, formatProps); + + if (status != VK_SUCCESS) + return FALSE; + + return (pImageInfo->extent.width <= formatProps.maxExtent.width) + && (pImageInfo->extent.height <= formatProps.maxExtent.height) + && (pImageInfo->extent.depth <= formatProps.maxExtent.depth) + && (pImageInfo->numLayers <= formatProps.maxArrayLayers) + && (pImageInfo->mipLevels <= formatProps.maxMipLevels) + && (pImageInfo->sampleCount & formatProps.sampleCounts); + } + + + VkImageUsageFlags D3D9CommonTexture::EnableMetaCopyUsage( + VkFormat Format, + VkImageTiling Tiling) const { + VkFormatFeatureFlags requestedFeatures = 0; + + if (Format == VK_FORMAT_D16_UNORM || Format == VK_FORMAT_D32_SFLOAT) + requestedFeatures |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Format == VK_FORMAT_R16_UNORM || Format == VK_FORMAT_R32_SFLOAT) + requestedFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + if (requestedFeatures == 0) + return 0; + + // Enable usage flags for all supported and requested features + VkFormatProperties properties = m_device->GetDXVKDevice()->adapter()->formatProperties(Format); + + requestedFeatures &= Tiling == VK_IMAGE_TILING_OPTIMAL + ? properties.optimalTilingFeatures + : properties.linearTilingFeatures; + + VkImageUsageFlags requestedUsage = 0; + + if (requestedFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + requestedUsage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (requestedFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) + requestedUsage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return requestedUsage; + } + + + VkImageType D3D9CommonTexture::GetImageTypeFromResourceType(D3DRESOURCETYPE Type) { + switch (Type) { + case D3DRTYPE_SURFACE: + case D3DRTYPE_TEXTURE: return VK_IMAGE_TYPE_2D; + case D3DRTYPE_VOLUMETEXTURE: return VK_IMAGE_TYPE_3D; + case D3DRTYPE_CUBETEXTURE: return VK_IMAGE_TYPE_2D; + default: throw DxvkError("D3D9CommonTexture: Unhandled resource type"); + } + } + + + VkImageViewType D3D9CommonTexture::GetImageViewTypeFromResourceType( + D3DRESOURCETYPE Dimension, + UINT Layer) { + switch (Dimension) { + case D3DRTYPE_SURFACE: + case D3DRTYPE_TEXTURE: return VK_IMAGE_VIEW_TYPE_2D; + case D3DRTYPE_VOLUMETEXTURE: return VK_IMAGE_VIEW_TYPE_3D; + case D3DRTYPE_CUBETEXTURE: return Layer == AllLayers + ? VK_IMAGE_VIEW_TYPE_CUBE + : VK_IMAGE_VIEW_TYPE_2D; + default: throw DxvkError("D3D9CommonTexture: Unhandled resource type"); + } + } + + + VkImageLayout D3D9CommonTexture::OptimizeLayout(VkImageUsageFlags Usage) const { + const VkImageUsageFlags usageFlags = Usage; + + // Filter out unnecessary flags. Transfer operations + // are handled by the backend in a transparent manner. + Usage &= ~(VK_IMAGE_USAGE_TRANSFER_DST_BIT + | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + + // Ignore sampled bit in case the image was created with + // an image flag that only allows attachment usage + if (m_desc.IsAttachmentOnly) + Usage &= ~VK_IMAGE_USAGE_SAMPLED_BIT; + + // If the image is used only as an attachment, we never + // have to transform the image back to a different layout + if (Usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + if (Usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + Usage &= ~(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + + // If the image is used for reading but not as a storage + // image, we can optimize the image for texture access + if (Usage == VK_IMAGE_USAGE_SAMPLED_BIT) { + return usageFlags & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL + : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + // Otherwise, we have to stick with the default layout + return VK_IMAGE_LAYOUT_GENERAL; + } + + + Rc<DxvkImageView> D3D9CommonTexture::CreateView( + UINT Layer, + UINT Lod, + VkImageUsageFlags UsageFlags, + bool Srgb) { + DxvkImageViewCreateInfo viewInfo; + viewInfo.format = m_mapping.ConversionFormatInfo.FormatColor != VK_FORMAT_UNDEFINED + ? PickSRGB(m_mapping.ConversionFormatInfo.FormatColor, m_mapping.ConversionFormatInfo.FormatSrgb, Srgb) + : PickSRGB(m_mapping.FormatColor, m_mapping.FormatSrgb, Srgb); + viewInfo.aspect = imageFormatInfo(viewInfo.format)->aspectMask; + viewInfo.swizzle = m_mapping.Swizzle; + viewInfo.usage = UsageFlags; + viewInfo.type = GetImageViewTypeFromResourceType(m_type, Layer); + viewInfo.minLevel = Lod; + viewInfo.numLevels = m_desc.MipLevels - Lod; + viewInfo.minLayer = Layer == AllLayers ? 0 : Layer; + viewInfo.numLayers = Layer == AllLayers ? m_desc.ArraySize : 1; + + // Remove the stencil aspect if we are trying to create a regular image + // view of a depth stencil format + if (UsageFlags != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.aspect &= ~VK_IMAGE_ASPECT_STENCIL_BIT; + + if (UsageFlags == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT || + UsageFlags == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.numLevels = 1; + + // Remove swizzle on depth views. + if (UsageFlags == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.swizzle = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }; + + // Create the underlying image view object + return m_device->GetDXVKDevice()->createImageView(GetImage(), viewInfo); + } + + + void D3D9CommonTexture::PreLoadAll() { + if (!IsManaged()) + return; + + auto lock = m_device->LockDevice(); + m_device->UploadManagedTexture(this); + m_device->MarkTextureUploaded(this); + } + + + void D3D9CommonTexture::PreLoadSubresource(UINT Subresource) { + if (IsManaged()) { + auto lock = m_device->LockDevice(); + + if (NeedsUpload(Subresource)) { + m_device->FlushImage(this, Subresource); + SetNeedsUpload(Subresource, false); + + if (!NeedsAnyUpload()) + m_device->MarkTextureUploaded(this); + } + } + } + + + void D3D9CommonTexture::CreateSampleView(UINT Lod) { + // This will be a no-op for SYSTEMMEM types given we + // don't expose the cap to allow texturing with them. + if (unlikely(m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM)) + return; + + m_sampleView.Color = CreateView(AllLayers, Lod, VK_IMAGE_USAGE_SAMPLED_BIT, false); + + if (IsSrgbCompatible()) + m_sampleView.Srgb = CreateView(AllLayers, Lod, VK_IMAGE_USAGE_SAMPLED_BIT, true); + } + + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.h new file mode 100644 index 00000000..3a6ac45e --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_common_texture.h @@ -0,0 +1,512 @@ +#pragma once + +#include "d3d9_format.h" +#include "d3d9_util.h" +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_device.h" + +#include "../util/util_bit.h" + +namespace dxvk { + + class D3D9DeviceEx; + + /** + * \brief Image memory mapping mode + * + * Determines how exactly \c LockBox will + * behave when mapping an image. + */ + enum D3D9_COMMON_TEXTURE_MAP_MODE { + D3D9_COMMON_TEXTURE_MAP_MODE_NONE, ///< No mapping available + D3D9_COMMON_TEXTURE_MAP_MODE_BACKED, ///< Mapped image through buffer + D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM, ///< Only a buffer - no image + }; + + /** + * \brief Common texture description + * + * Contains all members that can be + * defined for 2D, Cube and 3D textures. + */ + struct D3D9_COMMON_TEXTURE_DESC { + UINT Width; + UINT Height; + UINT Depth; + UINT ArraySize; + UINT MipLevels; + DWORD Usage; + D3D9Format Format; + D3DPOOL Pool; + D3DMULTISAMPLE_TYPE MultiSample; + DWORD MultisampleQuality; + bool Discard; + bool IsBackBuffer; + bool IsAttachmentOnly; + }; + + struct D3D9ColorView { + inline Rc<DxvkImageView>& Pick(bool Srgb) { + return Srgb ? this->Srgb : this->Color; + } + + inline const Rc<DxvkImageView>& Pick(bool Srgb) const { + return Srgb ? this->Srgb : this->Color; + } + + Rc<DxvkImageView> Color; + Rc<DxvkImageView> Srgb; + }; + + template <typename T> + using D3D9SubresourceArray = std::array<T, caps::MaxSubresources>; + + using D3D9SubresourceBitset = bit::bitset<caps::MaxSubresources>; + + class D3D9CommonTexture { + + public: + + D3D9CommonTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType); + + ~D3D9CommonTexture(); + + /** + * \brief Device + * \returns The parent device + */ + D3D9DeviceEx* Device() const { + return m_device; + } + + /** + * \brief Texture properties + * + * The returned data can be used to fill in + * \c D3D11_TEXTURE2D_DESC and similar structs. + * \returns Pointer to texture description + */ + const D3D9_COMMON_TEXTURE_DESC* Desc() const { + return &m_desc; + } + + /** + * \brief Vulkan Format + * \returns The Vulkan format of the resource + */ + const D3D9_VK_FORMAT_MAPPING& GetFormatMapping() const { + return m_mapping; + } + + /** + * \brief Counts number of subresources + * \returns Number of subresources + */ + UINT CountSubresources() const { + return m_desc.ArraySize * m_desc.MipLevels; + } + + /** + * \brief Map mode + * \returns Map mode + */ + D3D9_COMMON_TEXTURE_MAP_MODE GetMapMode() const { + return m_mapMode; + } + + /** + * \brief The DXVK image + * Note, this will be nullptr if the map mode is D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM + * \returns The DXVK image + */ + const Rc<DxvkImage>& GetImage() const { + return m_image; + } + + /** + * \brief Get a copy of the main image, but with a single sample + * This function will allocate/reuse an image with the same info + * as the main image + * \returns An image with identical info, but 1 sample + */ + const Rc<DxvkImage>& GetResolveImage() { + if (unlikely(m_resolveImage == nullptr)) + m_resolveImage = CreateResolveImage(); + + return m_resolveImage; + } + + const Rc<DxvkBuffer>& GetBuffer(UINT Subresource) { + return m_buffers[Subresource]; + } + + + DxvkBufferSliceHandle GetMappedSlice(UINT Subresource) { + return m_mappedSlices[Subresource]; + } + + + DxvkBufferSliceHandle DiscardMapSlice(UINT Subresource) { + DxvkBufferSliceHandle handle = m_buffers[Subresource]->allocSlice(); + m_mappedSlices[Subresource] = handle; + return handle; + } + + /** + * \brief Computes subresource from the subresource index + * + * Used by some functions that operate on only + * one subresource, such as \c UpdateSurface. + * \param [in] Aspect The image aspect + * \param [in] Subresource Subresource index + * \returns The Vulkan image subresource + */ + VkImageSubresource GetSubresourceFromIndex( + VkImageAspectFlags Aspect, + UINT Subresource) const; + + /** + * \brief Normalizes and validates texture description + * + * Fills in undefined values and validates the texture + * parameters. Any error returned by this method should + * be forwarded to the application. + * \param [in,out] pDesc Texture description + * \returns \c S_OK if the parameters are valid + */ + static HRESULT NormalizeTextureProperties( + D3D9DeviceEx* pDevice, + D3D9_COMMON_TEXTURE_DESC* pDesc); + + /** + * \brief Shadow + * \returns Whether the texture is to be depth compared + */ + bool IsShadow() const { + return m_shadow; + } + + /** + * \brief Null + * \returns Whether the texture is D3DFMT_NULL or not + */ + bool IsNull() const { + return m_desc.Format == D3D9Format::NULL_FORMAT; + } + + /** + * \brief Subresource + * \returns The subresource idx of a given face and mip level + */ + UINT CalcSubresource(UINT Face, UINT MipLevel) const { + return Face * m_desc.MipLevels + MipLevel; + } + + /** + * \brief Creates buffers + * Creates mapping and staging buffers for all subresources + * allocates new buffers if necessary + */ + void CreateBuffers() { + const uint32_t count = CountSubresources(); + for (uint32_t i = 0; i < count; i++) + CreateBufferSubresource(i); + } + + /** + * \brief Creates a buffer + * Creates mapping and staging buffers for a given subresource + * allocates new buffers if necessary + * \returns Whether an allocation happened + */ + bool CreateBufferSubresource(UINT Subresource); + + /** + * \brief Destroys a buffer + * Destroys mapping and staging buffers for a given subresource + */ + void DestroyBufferSubresource(UINT Subresource) { + m_buffers[Subresource] = nullptr; + SetWrittenByGPU(Subresource, true); + } + + bool IsDynamic() const { + return m_desc.Usage & D3DUSAGE_DYNAMIC; + } + + /** + * \brief Managed + * \returns Whether a resource is managed (pool) or not + */ + bool IsManaged() const { + return IsPoolManaged(m_desc.Pool); + } + + /** + * \brief Render Target + * \returns Whether a resource is a render target or not + */ + bool IsRenderTarget() const { + return m_desc.Usage & D3DUSAGE_RENDERTARGET; + } + + /** + * \brief Depth stencil + * \returns Whether a resource is a depth stencil or not + */ + bool IsDepthStencil() const { + return m_desc.Usage & D3DUSAGE_DEPTHSTENCIL; + } + + /** + * \brief Autogen Mipmap + * \returns Whether the texture is to have automatic mip generation + */ + bool IsAutomaticMip() const { + return m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP; + } + + /** + * \brief Checks whether sRGB views can be created + * \returns Whether the format is sRGB compatible. + */ + bool IsSrgbCompatible() const { + return m_mapping.FormatSrgb; + } + + /** + * \brief Recreate main image view + * Recreates the main view of the sampler w/ a specific LOD. + * SetLOD only works on MANAGED textures so this is A-okay. + */ + void CreateSampleView(UINT Lod); + + /** + * \brief Extent + * \returns The extent of the top-level mip + */ + VkExtent3D GetExtent() const { + return VkExtent3D{ m_desc.Width, m_desc.Height, m_desc.Depth }; + } + + /** + * \brief Mip Extent + * \returns The extent of a mip or subresource + */ + VkExtent3D GetExtentMip(UINT Subresource) const { + UINT MipLevel = Subresource % m_desc.MipLevels; + return util::computeMipLevelExtent(GetExtent(), MipLevel); + } + + bool MarkHazardous() { + return std::exchange(m_hazardous, true); + } + + D3DRESOURCETYPE GetType() { + return m_type; + } + + const D3D9_VK_FORMAT_MAPPING& GetMapping() { return m_mapping; } + + void SetLocked(UINT Subresource, bool value) { m_locked.set(Subresource, value); } + + bool GetLocked(UINT Subresource) const { return m_locked.get(Subresource); } + + bool IsAnySubresourceLocked() const { return m_locked.any(); } + + void SetWrittenByGPU(UINT Subresource, bool value) { m_wasWrittenByGPU.set(Subresource, value); } + + bool WasWrittenByGPU(UINT Subresource) const { return m_wasWrittenByGPU.get(Subresource); } + + void MarkAllWrittenByGPU() { m_wasWrittenByGPU.setAll(); } + + void SetReadOnlyLocked(UINT Subresource, bool readOnly) { return m_readOnly.set(Subresource, readOnly); } + + bool GetReadOnlyLocked(UINT Subresource) const { return m_readOnly.get(Subresource); } + + const Rc<DxvkImageView>& GetSampleView(bool srgb) const { + return m_sampleView.Pick(srgb && IsSrgbCompatible()); + } + + VkImageLayout DetermineRenderTargetLayout() const { + return m_image != nullptr && + m_image->info().tiling == VK_IMAGE_TILING_OPTIMAL && + !m_hazardous + ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_GENERAL; + } + + VkImageLayout DetermineDepthStencilLayout(bool write, bool hazardous) const { + VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + if (unlikely(hazardous)) { + layout = write + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; + } + + if (unlikely(m_image->info().tiling != VK_IMAGE_TILING_OPTIMAL)) + layout = VK_IMAGE_LAYOUT_GENERAL; + + return layout; + } + + Rc<DxvkImageView> CreateView( + UINT Layer, + UINT Lod, + VkImageUsageFlags UsageFlags, + bool Srgb); + D3D9SubresourceBitset& GetUploadBitmask() { return m_needsUpload; } + + void SetNeedsUpload(UINT Subresource, bool upload) { m_needsUpload.set(Subresource, upload); } + bool NeedsUpload(UINT Subresource) const { return m_needsUpload.get(Subresource); } + bool NeedsAnyUpload() { return m_needsUpload.any(); } + void ClearNeedsUpload() { return m_needsUpload.clearAll(); } + bool DoesStagingBufferUploads(UINT Subresource) const { return m_uploadUsingStaging.get(Subresource); } + + void EnableStagingBufferUploads(UINT Subresource) { + m_uploadUsingStaging.set(Subresource, true); + } + + void SetNeedsMipGen(bool value) { m_needsMipGen = value; } + bool NeedsMipGen() const { return m_needsMipGen; } + + DWORD ExposedMipLevels() { return m_exposedMipLevels; } + + void SetMipFilter(D3DTEXTUREFILTERTYPE filter) { m_mipFilter = filter; } + D3DTEXTUREFILTERTYPE GetMipFilter() const { return m_mipFilter; } + + void PreLoadAll(); + void PreLoadSubresource(UINT Subresource); + + void AddDirtyBox(CONST D3DBOX* pDirtyBox, uint32_t layer) { + if (pDirtyBox) { + D3DBOX box = *pDirtyBox; + if (box.Right <= box.Left + || box.Bottom <= box.Top + || box.Back <= box.Front) + return; + + box.Right = std::min(box.Right, m_desc.Width); + box.Bottom = std::min(box.Bottom, m_desc.Height); + box.Back = std::min(box.Back, m_desc.Depth); + + D3DBOX& dirtyBox = m_dirtyBoxes[layer]; + if (dirtyBox.Left == dirtyBox.Right) { + dirtyBox = box; + } else { + dirtyBox.Left = std::min(dirtyBox.Left, box.Left); + dirtyBox.Right = std::max(dirtyBox.Right, box.Right); + dirtyBox.Top = std::min(dirtyBox.Top, box.Top); + dirtyBox.Bottom = std::max(dirtyBox.Bottom, box.Bottom); + dirtyBox.Front = std::min(dirtyBox.Front, box.Front); + dirtyBox.Back = std::max(dirtyBox.Back, box.Back); + } + } else { + m_dirtyBoxes[layer] = { 0, 0, m_desc.Width, m_desc.Height, 0, m_desc.Depth }; + } + } + + void ClearDirtyBoxes() { + for (uint32_t i = 0; i < m_dirtyBoxes.size(); i++) { + m_dirtyBoxes[i] = { 0, 0, 0, 0, 0, 0 }; + } + } + + const D3DBOX& GetDirtyBox(uint32_t layer) const { + return m_dirtyBoxes[layer]; + } + + static VkImageType GetImageTypeFromResourceType( + D3DRESOURCETYPE Dimension); + + private: + + D3D9DeviceEx* m_device; + D3D9_COMMON_TEXTURE_DESC m_desc; + D3DRESOURCETYPE m_type; + D3D9_COMMON_TEXTURE_MAP_MODE m_mapMode; + + Rc<DxvkImage> m_image; + Rc<DxvkImage> m_resolveImage; + D3D9SubresourceArray< + Rc<DxvkBuffer>> m_buffers; + D3D9SubresourceArray< + DxvkBufferSliceHandle> m_mappedSlices; + + D3D9_VK_FORMAT_MAPPING m_mapping; + + bool m_shadow; //< Depth Compare-ness + + int64_t m_size = 0; + + bool m_systemmemModified = false; + + bool m_hazardous = false; + + D3D9ColorView m_sampleView; + + D3D9SubresourceBitset m_locked = { }; + + D3D9SubresourceBitset m_readOnly = { }; + + D3D9SubresourceBitset m_wasWrittenByGPU = { }; + + D3D9SubresourceBitset m_needsUpload = { }; + + D3D9SubresourceBitset m_uploadUsingStaging = { }; + + DWORD m_exposedMipLevels = 0; + + bool m_needsMipGen = false; + + D3DTEXTUREFILTERTYPE m_mipFilter = D3DTEXF_LINEAR; + + std::array<D3DBOX, 6> m_dirtyBoxes; + + /** + * \brief Mip level + * \returns Size of packed mip level in bytes + */ + VkDeviceSize GetMipSize(UINT Subresource) const; + + Rc<DxvkImage> CreatePrimaryImage(D3DRESOURCETYPE ResourceType, bool TryOffscreenRT) const; + + Rc<DxvkImage> CreateResolveImage() const; + + BOOL DetermineShadowState() const; + + BOOL CheckImageSupport( + const DxvkImageCreateInfo* pImageInfo, + VkImageTiling Tiling) const; + + VkImageUsageFlags EnableMetaCopyUsage( + VkFormat Format, + VkImageTiling Tiling) const; + + D3D9_COMMON_TEXTURE_MAP_MODE DetermineMapMode() const { + if (m_desc.Format == D3D9Format::NULL_FORMAT) + return D3D9_COMMON_TEXTURE_MAP_MODE_NONE; + + if (m_desc.Pool == D3DPOOL_SYSTEMMEM || m_desc.Pool == D3DPOOL_SCRATCH) + return D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM; + + return D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; + } + + VkImageLayout OptimizeLayout( + VkImageUsageFlags Usage) const; + + static VkImageViewType GetImageViewTypeFromResourceType( + D3DRESOURCETYPE Dimension, + UINT Layer); + + static constexpr UINT AllLayers = UINT32_MAX; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_config.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_config.h new file mode 100644 index 00000000..4a882e9c --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_config.h @@ -0,0 +1,13 @@ +#pragma once + +namespace dxvk::config { + + constexpr bool FullValidation = false; + constexpr bool FloatEmulationByDefault = false; + constexpr bool FixedFunctionEnabled = true; + constexpr bool HazardTrackingEnabled = false; + constexpr bool ManagedUploadTrackingEnabled = true; + constexpr bool MipGenTrackingEnabled = false; + constexpr bool SWVPEnabled = false; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_layout.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_layout.h new file mode 100644 index 00000000..df4120bc --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_layout.h @@ -0,0 +1,31 @@ +#pragma once + +#include <cstdint> + +#include "d3d9_caps.h" + +namespace dxvk { + + struct D3D9ConstantLayout { + uint32_t floatCount; + uint32_t intCount; + uint32_t boolCount; + uint32_t bitmaskCount; + + uint32_t floatSize() const { return floatCount * 4 * sizeof(float); } + uint32_t intSize() const { return intCount * 4 * sizeof(int); } + uint32_t bitmaskSize() const { + // Account for SWVP (non SWVP uses a spec constant) + return bitmaskCount != 1 + ? bitmaskCount * 1 * sizeof(uint32_t) + : 0; + } + + uint32_t floatOffset() const { return 0; } + uint32_t intOffset() const { return floatOffset() + floatSize(); } + uint32_t bitmaskOffset() const { return intOffset() + intSize(); } + + uint32_t totalSize() const { return floatSize() + intSize() + bitmaskSize(); } + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_set.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_set.h new file mode 100644 index 00000000..16b4e7e9 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_constant_set.h @@ -0,0 +1,47 @@ +#pragma once + +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_buffer.h" + +#include "../dxso/dxso_isgn.h" + +#include "../util/util_math.h" +#include "../util/util_vector.h" + +#include <cstdint> + +namespace dxvk { + + enum class D3D9ConstantType { + Float, + Int, + Bool + }; + + // We make an assumption later based on the packing of this struct for copying. + struct D3D9ShaderConstantsVSSoftware { + Vector4 fConsts[caps::MaxFloatConstantsSoftware]; + Vector4i iConsts[caps::MaxOtherConstantsSoftware]; + uint32_t bConsts[caps::MaxOtherConstantsSoftware / 32]; + }; + + struct D3D9ShaderConstantsVSHardware { + Vector4 fConsts[caps::MaxFloatConstantsVS]; + Vector4i iConsts[caps::MaxOtherConstants]; + uint32_t bConsts[1]; + }; + + struct D3D9ShaderConstantsPS { + Vector4 fConsts[caps::MaxFloatConstantsPS]; + Vector4i iConsts[caps::MaxOtherConstants]; + uint32_t bConsts[1]; + }; + + struct D3D9ConstantSets { + Rc<DxvkBuffer> buffer; + DxsoShaderMetaInfo meta = {}; + bool dirty = true; + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.cpp new file mode 100644 index 00000000..510bbbc9 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.cpp @@ -0,0 +1,67 @@ +#include "d3d9_cursor.h" +#include "d3d9_util.h" + +#include <utility> + +namespace dxvk { + +#ifndef DXVK_NATIVE + void D3D9Cursor::UpdateCursor(int X, int Y) { + POINT currentPos = { }; + if (::GetCursorPos(¤tPos) && currentPos == POINT{ X, Y }) + return; + + ::SetCursorPos(X, Y); + } + + + BOOL D3D9Cursor::ShowCursor(BOOL bShow) { + ::SetCursor(bShow ? m_hCursor : nullptr); + return std::exchange(m_visible, bShow); + } + + + HRESULT D3D9Cursor::SetHardwareCursor(UINT XHotSpot, UINT YHotSpot, const CursorBitmap& bitmap) { + DWORD mask[32]; + std::memset(mask, ~0, sizeof(mask)); + + ICONINFO info; + info.fIcon = FALSE; + info.xHotspot = XHotSpot; + info.yHotspot = YHotSpot; + info.hbmMask = ::CreateBitmap(HardwareCursorWidth, HardwareCursorHeight, 1, 1, mask); + info.hbmColor = ::CreateBitmap(HardwareCursorWidth, HardwareCursorHeight, 1, 32, &bitmap[0]); + + if (m_hCursor != nullptr) + ::DestroyCursor(m_hCursor); + + m_hCursor = ::CreateIconIndirect(&info); + + ::DeleteObject(info.hbmMask); + ::DeleteObject(info.hbmColor); + + ShowCursor(m_visible); + + return D3D_OK; + } +#else + void D3D9Cursor::UpdateCursor(int X, int Y) { + Logger::warn("D3D9Cursor::UpdateCursor: Not supported on native"); + } + + + BOOL D3D9Cursor::ShowCursor(BOOL bShow) { + Logger::warn("D3D9Cursor::ShowCursor: Not supported on native"); + + return std::exchange(m_visible, bShow); + } + + + HRESULT D3D9Cursor::SetHardwareCursor(UINT XHotSpot, UINT YHotSpot, const CursorBitmap& bitmap) { + Logger::warn("D3D9Cursor::SetHardwareCursor: Not supported on native"); + + return D3DERR_INVALIDCALL; + } +#endif + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.h new file mode 100644 index 00000000..1b541d14 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_cursor.h @@ -0,0 +1,35 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + constexpr uint32_t HardwareCursorWidth = 32u; + constexpr uint32_t HardwareCursorHeight = 32u; + constexpr uint32_t HardwareCursorFormatSize = 4u; + constexpr uint32_t HardwareCursorPitch = HardwareCursorWidth * HardwareCursorFormatSize; + + // Format Size of 4 bytes (ARGB) + using CursorBitmap = uint8_t[HardwareCursorHeight * HardwareCursorPitch]; + + class D3D9Cursor { + + public: + + void UpdateCursor(int X, int Y); + + BOOL ShowCursor(BOOL bShow); + + HRESULT SetHardwareCursor(UINT XHotSpot, UINT YHotSpot, const CursorBitmap& bitmap); + + private: + + BOOL m_visible = FALSE; + +#ifndef DXVK_NATIVE + HCURSOR m_hCursor = nullptr; +#endif + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp new file mode 100644 index 00000000..7af7f511 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.cpp @@ -0,0 +1,7219 @@ +#include "d3d9_device.h" + +#include "d3d9_interface.h" +#include "d3d9_swapchain.h" +#include "d3d9_caps.h" +#include "d3d9_util.h" +#include "d3d9_texture.h" +#include "d3d9_buffer.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_shader.h" +#include "d3d9_query.h" +#include "d3d9_stateblock.h" +#include "d3d9_monitor.h" +#include "d3d9_spec_constants.h" +#include "d3d9_names.h" +#include "d3d9_format_helpers.h" + +#include "../dxvk/dxvk_adapter.h" +#include "../dxvk/dxvk_instance.h" + +#include "../util/util_bit.h" +#include "../util/util_math.h" + +#include "d3d9_initializer.h" + +#include <algorithm> +#include <cfloat> +#ifdef MSC_VER +#pragma fenv_access (on) +#endif + +namespace dxvk { + + D3D9DeviceEx::D3D9DeviceEx( + D3D9InterfaceEx* pParent, + D3D9Adapter* pAdapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + Rc<DxvkDevice> dxvkDevice) + : m_parent ( pParent ) + , m_deviceType ( DeviceType ) + , m_window ( hFocusWindow ) + , m_behaviorFlags ( BehaviorFlags ) + , m_adapter ( pAdapter ) + , m_dxvkDevice ( dxvkDevice ) + , m_shaderModules ( new D3D9ShaderModuleSet ) + , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) + , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) + , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) + , m_csThread ( dxvkDevice->createContext() ) + , m_csChunk ( AllocCsChunk() ) { + // If we can SWVP, then we use an extended constant set + // as SWVP has many more slots available than HWVP. + bool canSWVP = CanSWVP(); + DetermineConstantLayouts(canSWVP); + + if (canSWVP) + Logger::info("D3D9DeviceEx: Using extended constant set for software vertex processing."); + + m_initializer = new D3D9Initializer(m_dxvkDevice); + m_converter = new D3D9FormatHelper(m_dxvkDevice); + + EmitCs([ + cDevice = m_dxvkDevice + ] (DxvkContext* ctx) { + ctx->beginRecording(cDevice->createCommandList()); + + DxvkLogicOpState loState; + loState.enableLogicOp = VK_FALSE; + loState.logicOp = VK_LOGIC_OP_CLEAR; + ctx->setLogicOpState(loState); + }); + + if (!(BehaviorFlags & D3DCREATE_FPU_PRESERVE)) + SetupFPU(); + + m_dxsoOptions = DxsoOptions(this, m_d3d9Options); + + CreateConstantBuffers(); + + m_availableMemory = DetermineInitialTextureMemory(); + } + + + D3D9DeviceEx::~D3D9DeviceEx() { + Flush(); + SynchronizeCsThread(); + + delete m_initializer; + delete m_converter; + + m_dxvkDevice->waitForIdle(); // Sync Device + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + bool extended = m_parent->IsExtended() + && riid == __uuidof(IDirect3DDevice9Ex); + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DDevice9) + || extended) { + *ppvObject = ref(this); + return S_OK; + } + + // We want to ignore this if the extended device is queried and we weren't made extended. + if (riid == __uuidof(IDirect3DDevice9Ex)) + return E_NOINTERFACE; + + Logger::warn("D3D9DeviceEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::TestCooperativeLevel() { + // Equivelant of D3D11/DXGI present tests. We can always present. + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() { + // This is not meant to be accurate. + // The values are also wildly incorrect in d3d9... But some games rely + // on this inaccurate value... + + // Clamp to megabyte range, as per spec. + constexpr UINT range = 0xfff00000; + + // Can't have negative memory! + int64_t memory = std::max<int64_t>(m_availableMemory.load(), 0); + + return UINT(memory) & range; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EvictManagedResources() { + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) { + if (ppD3D9 == nullptr) + return D3DERR_INVALIDCALL; + + *ppD3D9 = m_parent.ref(); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) { + return m_adapter->GetDeviceCaps(m_deviceType, pCaps); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) { + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->GetDisplayMode(pMode); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) { + if (pParameters == nullptr) + return D3DERR_INVALIDCALL; + + pParameters->AdapterOrdinal = m_adapter->GetOrdinal(); + pParameters->BehaviorFlags = m_behaviorFlags; + pParameters->DeviceType = m_deviceType; + pParameters->hFocusWindow = m_window; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCursorProperties( + UINT XHotSpot, + UINT YHotSpot, + IDirect3DSurface9* pCursorBitmap) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pCursorBitmap == nullptr)) + return D3DERR_INVALIDCALL; + + auto* cursorTex = GetCommonTexture(pCursorBitmap); + if (unlikely(cursorTex->Desc()->Format != D3D9Format::A8R8G8B8)) + return D3DERR_INVALIDCALL; + + uint32_t inputWidth = cursorTex->Desc()->Width; + uint32_t inputHeight = cursorTex->Desc()->Height; + + // Always use a hardware cursor when windowed. + bool hwCursor = m_presentParams.Windowed; + + // Always use a hardware cursor w/h <= 32 px + hwCursor |= inputWidth <= HardwareCursorWidth + || inputHeight <= HardwareCursorHeight; + + if (hwCursor) { + D3DLOCKED_BOX lockedBox; + HRESULT hr = LockImage(cursorTex, 0, 0, &lockedBox, nullptr, D3DLOCK_READONLY); + if (FAILED(hr)) + return hr; + + const uint8_t* data = reinterpret_cast<const uint8_t*>(lockedBox.pBits); + + // Windows works with a stride of 128, lets respect that. + // Copy data to the bitmap... + CursorBitmap bitmap = { 0 }; + size_t copyPitch = std::min<size_t>( + HardwareCursorPitch, + inputWidth * inputHeight * HardwareCursorFormatSize); + + for (uint32_t h = 0; h < HardwareCursorHeight; h++) + std::memcpy(&bitmap[h * HardwareCursorPitch], &data[h * lockedBox.RowPitch], copyPitch); + + UnlockImage(cursorTex, 0, 0); + + // Set this as our cursor. + return m_cursor.SetHardwareCursor(XHotSpot, YHotSpot, bitmap); + } + + // Software Cursor... + Logger::warn("D3D9DeviceEx::SetCursorProperties: Software cursor not implemented."); + return D3D_OK; + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + // I was not able to find an instance + // where the cursor update was not immediate. + + // Fullscreen + Windowed seem to have the same + // behaviour here. + + // Hence we ignore the flag D3DCURSOR_IMMEDIATE_UPDATE. + + m_cursor.UpdateCursor(X, Y); + } + + + BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) { + D3D9DeviceLock lock = LockDevice(); + + return m_cursor.ShowCursor(bShow); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain( + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DSwapChain9** ppSwapChain) { + return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(pSwapChain); + + if (unlikely(pSwapChain == nullptr)) + return D3DERR_INVALIDCALL; + + // This only returns the implicit swapchain... + + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + *pSwapChain = static_cast<IDirect3DSwapChain9*>(m_implicitSwapchain.ref()); + + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9DeviceEx::GetNumberOfSwapChains() { + // This only counts the implicit swapchain... + + return 1; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) { + D3D9DeviceLock lock = LockDevice(); + + HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr); + if (FAILED(hr)) + return hr; + + hr = ResetState(pPresentationParameters); + if (FAILED(hr)) + return hr; + + Flush(); + SynchronizeCsThread(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion) { + return PresentEx( + pSourceRect, + pDestRect, + hDestWindowOverride, + pDirtyRegion, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetBackBuffer( + UINT iSwapChain, + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer) { + InitReturnPtr(ppBackBuffer); + + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->GetBackBuffer(iBackBuffer, Type, ppBackBuffer); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) { + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->GetRasterStatus(pRasterStatus); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) { + return m_implicitSwapchain->SetDialogBoxMode(bEnableDialogs); + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::SetGammaRamp( + UINT iSwapChain, + DWORD Flags, + const D3DGAMMARAMP* pRamp) { + if (unlikely(iSwapChain != 0)) + return; + + m_implicitSwapchain->SetGammaRamp(Flags, pRamp); + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) { + if (unlikely(iSwapChain != 0)) + return; + + m_implicitSwapchain->GetGammaRamp(pRamp); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateTexture( + UINT Width, + UINT Height, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DTexture9** ppTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppTexture); + + if (unlikely(ppTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = FALSE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9Texture2D> texture = new D3D9Texture2D(this, &desc); + + void* initialData = nullptr; + + if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr) + initialData = *(reinterpret_cast<void**>(pSharedHandle)); + + m_initializer->InitTexture(texture->GetCommonTexture(), initialData); + *ppTexture = texture.ref(); + + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVolumeTexture( + UINT Width, + UINT Height, + UINT Depth, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DVolumeTexture9** ppVolumeTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppVolumeTexture); + + if (unlikely(ppVolumeTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = Depth; + desc.ArraySize = 1; + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = FALSE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9Texture3D> texture = new D3D9Texture3D(this, &desc); + m_initializer->InitTexture(texture->GetCommonTexture()); + *ppVolumeTexture = texture.ref(); + + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateCubeTexture( + UINT EdgeLength, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DCubeTexture9** ppCubeTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppCubeTexture); + + if (unlikely(ppCubeTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = EdgeLength; + desc.Height = EdgeLength; + desc.Depth = 1; + desc.ArraySize = 6; // A cube has 6 faces, wowwie! + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = FALSE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9TextureCube> texture = new D3D9TextureCube(this, &desc); + m_initializer->InitTexture(texture->GetCommonTexture()); + *ppCubeTexture = texture.ref(); + + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexBuffer( + UINT Length, + DWORD Usage, + DWORD FVF, + D3DPOOL Pool, + IDirect3DVertexBuffer9** ppVertexBuffer, + HANDLE* pSharedHandle) { + InitReturnPtr(ppVertexBuffer); + + if (unlikely(ppVertexBuffer == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + desc.Format = D3D9Format::VERTEXDATA; + desc.FVF = FVF; + desc.Pool = Pool; + desc.Size = Length; + desc.Type = D3DRTYPE_VERTEXBUFFER; + desc.Usage = Usage; + + if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9VertexBuffer> buffer = new D3D9VertexBuffer(this, &desc); + m_initializer->InitBuffer(buffer->GetCommonBuffer()); + *ppVertexBuffer = buffer.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateIndexBuffer( + UINT Length, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DIndexBuffer9** ppIndexBuffer, + HANDLE* pSharedHandle) { + InitReturnPtr(ppIndexBuffer); + + if (unlikely(ppIndexBuffer == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Size = Length; + desc.Type = D3DRTYPE_INDEXBUFFER; + desc.Usage = Usage; + + if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9IndexBuffer> buffer = new D3D9IndexBuffer(this, &desc); + m_initializer->InitBuffer(buffer->GetCommonBuffer()); + *ppIndexBuffer = buffer.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTarget( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateRenderTargetEx( + Width, + Height, + Format, + MultiSample, + MultisampleQuality, + Lockable, + ppSurface, + pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateDepthStencilSurfaceEx( + Width, + Height, + Format, + MultiSample, + MultisampleQuality, + Discard, + ppSurface, + pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateSurface( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestinationSurface, + const POINT* pDestPoint) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface); + D3D9Surface* dst = static_cast<D3D9Surface*>(pDestinationSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + + if (unlikely(srcTextureInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + if (unlikely(srcTextureInfo->Desc()->Format != dstTextureInfo->Desc()->Format)) + return D3DERR_INVALIDCALL; + + const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTextureInfo->GetFormatMapping().FormatColor); + + VkOffset3D srcBlockOffset = { 0u, 0u, 0u }; + VkOffset3D dstOffset = { 0u, 0u, 0u }; + VkExtent3D texLevelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); + VkExtent3D texLevelBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); + + VkExtent3D copyExtent = texLevelExtent; + + if (pSourceRect != nullptr) { + const VkExtent3D extent = { uint32_t(pSourceRect->right - pSourceRect->left), uint32_t(pSourceRect->bottom - pSourceRect->top), 1 }; + + const bool extentAligned = extent.width % formatInfo->blockSize.width == 0 + && extent.height % formatInfo->blockSize.height == 0; + + if (pSourceRect->left < 0 + || pSourceRect->top < 0 + || pSourceRect->right <= pSourceRect->left + || pSourceRect->bottom <= pSourceRect->top + || pSourceRect->left % formatInfo->blockSize.width != 0 + || pSourceRect->top % formatInfo->blockSize.height != 0 + || (extent != texLevelExtent && !extentAligned)) + return D3DERR_INVALIDCALL; + + srcBlockOffset = { pSourceRect->left / int32_t(formatInfo->blockSize.width), + pSourceRect->top / int32_t(formatInfo->blockSize.height), + 0u }; + + copyExtent = { extent.width, + extent.height, + 1u }; + } + + if (pDestPoint != nullptr) { + if (pDestPoint->x % formatInfo->blockSize.width != 0 + || pDestPoint->y % formatInfo->blockSize.height != 0 + || pDestPoint->x < 0 + || pDestPoint->y < 0) + return D3DERR_INVALIDCALL; + + dstOffset = { pDestPoint->x, + pDestPoint->y, + 0u }; + } + + VkExtent3D copyBlockCount = util::computeBlockCount(copyExtent, formatInfo->blockSize); + + const auto dstSubresource = vk::makeSubresourceLayers( + dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource())); + + DxvkBufferSliceHandle srcSlice = srcTextureInfo->GetMappedSlice(src->GetSubresource()); + VkDeviceSize dirtySize = copyBlockCount.width * copyBlockCount.height * formatInfo->elementSize; + D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize); + VkDeviceSize pitch = align(texLevelBlockCount.width * formatInfo->elementSize, 4); + VkDeviceSize copySrcOffset = srcBlockOffset.z * texLevelBlockCount.height * pitch + + srcBlockOffset.y * pitch + + srcBlockOffset.x * formatInfo->elementSize; + + void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset; + util::packImageData( + slice.mapPtr, srcData, copyBlockCount, formatInfo->elementSize, + pitch, pitch * texLevelBlockCount.height); + + Rc<DxvkImage> dstImage = dstTextureInfo->GetImage(); + + EmitCs([ + cDstImage = std::move(dstImage), + cSrcSlice = slice.slice, + cDstLayers = dstSubresource, + cDstOffset = dstOffset, + cCopyExtent = copyExtent + ] (DxvkContext* ctx) { + ctx->copyBufferToImage( + cDstImage, cDstLayers, cDstOffset, cCopyExtent, + cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0); + }); + + dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); + + if (dstTextureInfo->IsAutomaticMip()) + MarkTextureMipsDirty(dstTextureInfo); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture( + IDirect3DBaseTexture9* pSourceTexture, + IDirect3DBaseTexture9* pDestinationTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (!pDestinationTexture || !pSourceTexture) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* dstTexInfo = GetCommonTexture(pDestinationTexture); + D3D9CommonTexture* srcTexInfo = GetCommonTexture(pSourceTexture); + + if (unlikely(srcTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTexInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + const Rc<DxvkImage> dstImage = dstTexInfo->GetImage(); + const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTexInfo->GetFormatMapping().FormatColor); + uint32_t mipLevels = std::min(srcTexInfo->Desc()->MipLevels, dstTexInfo->Desc()->MipLevels); + uint32_t arraySlices = std::min(srcTexInfo->Desc()->ArraySize, dstTexInfo->Desc()->ArraySize); + + if (unlikely(srcTexInfo->IsAutomaticMip() && !dstTexInfo->IsAutomaticMip())) + return D3DERR_INVALIDCALL; + + if (dstTexInfo->IsAutomaticMip()) + mipLevels = 1; + + for (uint32_t a = 0; a < arraySlices; a++) { + const D3DBOX& box = srcTexInfo->GetDirtyBox(a); + if (box.Left >= box.Right || box.Top >= box.Bottom || box.Front >= box.Back) + continue; + + for (uint32_t m = 0; m < mipLevels; m++) { + VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 }; + + VkOffset3D scaledBoxOffset = { + int32_t(alignDown(box.Left >> m, formatInfo->blockSize.width)), + int32_t(alignDown(box.Top >> m, formatInfo->blockSize.height)), + int32_t(alignDown(box.Front >> m, formatInfo->blockSize.depth)) + }; + VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({ + uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))), + uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))), + uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth))) + }, m); + VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize); + VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize); + + VkExtent3D texLevelExtent = dstImage->mipLevelExtent(m); + VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); + + scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width); + scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height); + scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth); + + VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; + D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize); + VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize); + VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); + VkDeviceSize copySrcOffset = boxOffsetBlockCount.z * texLevelExtentBlockCount.height * pitch + + boxOffsetBlockCount.y * pitch + + boxOffsetBlockCount.x * formatInfo->elementSize; + + void* srcData = reinterpret_cast<uint8_t*>(srcTexInfo->GetMappedSlice(srcTexInfo->CalcSubresource(a, m)).mapPtr) + copySrcOffset; + util::packImageData( + slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, + pitch, pitch * texLevelExtentBlockCount.height); + + scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width, scaledAlignedBoxExtent.width); + scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height, scaledAlignedBoxExtent.height); + scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth, scaledAlignedBoxExtent.depth); + + EmitCs([ + cDstImage = dstImage, + cSrcSlice = slice.slice, + cDstLayers = dstLayers, + cExtent = scaledAlignedBoxExtent, + cOffset = scaledBoxOffset + ] (DxvkContext* ctx) { + ctx->copyBufferToImage( + cDstImage, cDstLayers, + cOffset, cExtent, + cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0); + }); + + dstTexInfo->SetWrittenByGPU(dstTexInfo->CalcSubresource(a, m), true); + } + } + + srcTexInfo->ClearDirtyBoxes(); + if (dstTexInfo->IsAutomaticMip() && mipLevels != dstTexInfo->Desc()->MipLevels) + MarkTextureMipsDirty(dstTexInfo); + + FlushImplicit(false); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData( + IDirect3DSurface9* pRenderTarget, + IDirect3DSurface9* pDestSurface) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* src = static_cast<D3D9Surface*>(pRenderTarget); + D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + if (pRenderTarget == pDestSurface) + return D3D_OK; + + D3D9CommonTexture* dstTexInfo = GetCommonTexture(dst); + D3D9CommonTexture* srcTexInfo = GetCommonTexture(src); + + if (srcTexInfo->Desc()->Format != dstTexInfo->Desc()->Format) + return D3DERR_INVALIDCALL; + + if (dstTexInfo->Desc()->Pool == D3DPOOL_DEFAULT) + return this->StretchRect(pRenderTarget, nullptr, pDestSurface, nullptr, D3DTEXF_NONE); + + Rc<DxvkBuffer> dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource()); + + Rc<DxvkImage> srcImage = srcTexInfo->GetImage(); + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + + const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkExtent3D srcExtent = srcTexInfo->GetExtentMip(src->GetMipLevel()); + + VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(srcExtent, srcFormatInfo->blockSize); + VkDeviceSize pitch = align(texLevelExtentBlockCount.width * uint32_t(srcFormatInfo->elementSize), 4); + uint32_t pitchBlocks = uint32_t(pitch / srcFormatInfo->elementSize); + VkExtent2D dstExtent = VkExtent2D{ pitchBlocks, + texLevelExtentBlockCount.height * pitchBlocks }; + + EmitCs([ + cBuffer = dstBuffer, + cImage = srcImage, + cSubresources = srcSubresourceLayers, + cLevelExtent = srcExtent, + cDstExtent = dstExtent + ] (DxvkContext* ctx) { + ctx->copyImageToBuffer(cBuffer, 0, 4, 0, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + }); + + dstTexInfo->SetWrittenByGPU(dst->GetSubresource(), true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) { + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->GetFrontBufferData(pDestSurface); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::StretchRect( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestSurface, + const RECT* pDestRect, + D3DTEXTUREFILTERTYPE Filter) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface); + D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(src == dst)) + return D3DERR_INVALIDCALL; + + bool fastPath = true; + + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); + + if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT || + srcTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + Rc<DxvkImage> dstImage = dstTextureInfo->GetImage(); + Rc<DxvkImage> srcImage = srcTextureInfo->GetImage(); + + const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(dstImage->info().format); + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + + const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, dst->GetSubresource()); + const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); + + if (unlikely((srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && m_flags.test(D3D9DeviceFlag::InScene))) + return D3DERR_INVALIDCALL; + + VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); + VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); + + D3D9Format srcFormat = srcTextureInfo->Desc()->Format; + D3D9Format dstFormat = dstTextureInfo->Desc()->Format; + + // We may only fast path copy non identicals one way! + // We don't know what garbage could be in the X8 data. + bool similar = AreFormatsSimilar(srcFormat, dstFormat); + + // Copies are only supported on similar formats. + fastPath &= similar; + + // Copies are only supported if the sample count matches, + // otherwise we need to resolve. + bool needsResolve = srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; + bool fbBlit = dstImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; + fastPath &= !fbBlit; + + // Copies would only work if we are block aligned. + if (pSourceRect != nullptr) { + fastPath &= (pSourceRect->left % srcFormatInfo->blockSize.width == 0); + fastPath &= (pSourceRect->right % srcFormatInfo->blockSize.width == 0); + fastPath &= (pSourceRect->top % srcFormatInfo->blockSize.height == 0); + fastPath &= (pSourceRect->bottom % srcFormatInfo->blockSize.height == 0); + } + + if (pDestRect != nullptr) { + fastPath &= (pDestRect->left % dstFormatInfo->blockSize.width == 0); + fastPath &= (pDestRect->top % dstFormatInfo->blockSize.height == 0); + } + + VkImageSubresourceLayers dstSubresourceLayers = { + dstSubresource.aspectMask, + dstSubresource.mipLevel, + dstSubresource.arrayLayer, 1 }; + + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkImageBlit blitInfo; + blitInfo.dstSubresource = dstSubresourceLayers; + blitInfo.srcSubresource = srcSubresourceLayers; + + blitInfo.dstOffsets[0] = pDestRect != nullptr + ? VkOffset3D{ int32_t(pDestRect->left), int32_t(pDestRect->top), 0 } + : VkOffset3D{ 0, 0, 0 }; + + blitInfo.dstOffsets[1] = pDestRect != nullptr + ? VkOffset3D{ int32_t(pDestRect->right), int32_t(pDestRect->bottom), 1 } + : VkOffset3D{ int32_t(dstExtent.width), int32_t(dstExtent.height), 1 }; + + blitInfo.srcOffsets[0] = pSourceRect != nullptr + ? VkOffset3D{ int32_t(pSourceRect->left), int32_t(pSourceRect->top), 0 } + : VkOffset3D{ 0, 0, 0 }; + + blitInfo.srcOffsets[1] = pSourceRect != nullptr + ? VkOffset3D{ int32_t(pSourceRect->right), int32_t(pSourceRect->bottom), 1 } + : VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 }; + + if (unlikely(IsBlitRegionInvalid(blitInfo.srcOffsets, srcExtent))) + return D3DERR_INVALIDCALL; + + if (unlikely(IsBlitRegionInvalid(blitInfo.dstOffsets, dstExtent))) + return D3DERR_INVALIDCALL; + + VkExtent3D srcCopyExtent = + { uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x), + uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y), + uint32_t(blitInfo.srcOffsets[1].z - blitInfo.srcOffsets[0].z) }; + + VkExtent3D dstCopyExtent = + { uint32_t(blitInfo.dstOffsets[1].x - blitInfo.dstOffsets[0].x), + uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y), + uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) }; + + // Copies would only work if the extents match. (ie. no stretching) + bool stretch = srcCopyExtent != dstCopyExtent; + fastPath &= !stretch; + + if (!fastPath || needsResolve) { + // Compressed destination formats are forbidden for blits. + if (dstFormatInfo->flags.test(DxvkFormatFlag::BlockCompressed)) + return D3DERR_INVALIDCALL; + } + + auto EmitResolveCS = [&](const Rc<DxvkImage>& resolveDst, bool intermediate) { + VkImageResolve region; + region.srcSubresource = blitInfo.srcSubresource; + region.srcOffset = blitInfo.srcOffsets[0]; + region.dstSubresource = intermediate ? blitInfo.srcSubresource : blitInfo.dstSubresource; + region.dstOffset = intermediate ? blitInfo.srcOffsets[0] : blitInfo.dstOffsets[0]; + region.extent = srcCopyExtent; + + EmitCs([ + cDstImage = resolveDst, + cSrcImage = srcImage, + cRegion = region + ] (DxvkContext* ctx) { + if (cRegion.srcSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + ctx->resolveImage( + cDstImage, cSrcImage, cRegion, + VK_FORMAT_UNDEFINED); + } + else { + ctx->resolveDepthStencilImage( + cDstImage, cSrcImage, cRegion, + VK_RESOLVE_MODE_AVERAGE_BIT_KHR, + VK_RESOLVE_MODE_AVERAGE_BIT_KHR); + } + }); + }; + + if (fastPath) { + if (needsResolve) { + EmitResolveCS(dstImage, false); + } else { + EmitCs([ + cDstImage = dstImage, + cSrcImage = srcImage, + cDstLayers = blitInfo.dstSubresource, + cSrcLayers = blitInfo.srcSubresource, + cDstOffset = blitInfo.dstOffsets[0], + cSrcOffset = blitInfo.srcOffsets[0], + cExtent = srcCopyExtent + ] (DxvkContext* ctx) { + ctx->copyImage( + cDstImage, cDstLayers, cDstOffset, + cSrcImage, cSrcLayers, cSrcOffset, + cExtent); + }); + } + } + else { + if (needsResolve) { + auto resolveSrc = srcTextureInfo->GetResolveImage(); + + EmitResolveCS(resolveSrc, true); + srcImage = resolveSrc; + } + + EmitCs([ + cDstImage = dstImage, + cDstMap = dstTextureInfo->GetMapping().Swizzle, + cSrcImage = srcImage, + cSrcMap = srcTextureInfo->GetMapping().Swizzle, + cBlitInfo = blitInfo, + cFilter = stretch ? DecodeFilter(Filter) : VK_FILTER_NEAREST + ] (DxvkContext* ctx) { + ctx->blitImage( + cDstImage, + cDstMap, + cSrcImage, + cSrcMap, + cBlitInfo, + cFilter); + }); + } + + dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); + + if (dstTextureInfo->IsAutomaticMip()) + MarkTextureMipsDirty(dstTextureInfo); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ColorFill( + IDirect3DSurface9* pSurface, + const RECT* pRect, + D3DCOLOR Color) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* dst = static_cast<D3D9Surface*>(pSurface); + + if (unlikely(dst == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + + if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + VkExtent3D mipExtent = dstTextureInfo->GetExtentMip(dst->GetSubresource()); + + VkOffset3D offset = VkOffset3D{ 0u, 0u, 0u }; + VkExtent3D extent = mipExtent; + + bool isFullExtent = true; + if (pRect != nullptr) { + ConvertRect(*pRect, offset, extent); + + isFullExtent = offset == VkOffset3D{ 0u, 0u, 0u } + && extent == mipExtent; + } + + Rc<DxvkImageView> rtView = dst->GetRenderTargetView(false); + + VkClearValue clearValue; + DecodeD3DCOLOR(Color, clearValue.color.float32); + + // Fast path for games that may use this as an + // alternative to Clear on render targets. + if (isFullExtent && rtView != nullptr) { + EmitCs([ + cImageView = rtView, + cClearValue = clearValue + ] (DxvkContext* ctx) { + ctx->clearRenderTarget( + cImageView, + VK_IMAGE_ASPECT_COLOR_BIT, + cClearValue); + }); + } else { + if (unlikely(rtView == nullptr)) { + const D3D9Format format = dstTextureInfo->Desc()->Format; + if (format != D3D9Format::NULL_FORMAT) + Logger::err(str::format("D3D9DeviceEx::ColorFill: Unsupported format ", format)); + + return D3D_OK; + } + + EmitCs([ + cImageView = rtView, + cOffset = offset, + cExtent = extent, + cClearValue = clearValue + ] (DxvkContext* ctx) { + ctx->clearImageView( + cImageView, + cOffset, cExtent, + VK_IMAGE_ASPECT_COLOR_BIT, + cClearValue); + }); + } + + dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); + + if (dstTextureInfo->IsAutomaticMip()) + MarkTextureMipsDirty(dstTextureInfo); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateOffscreenPlainSurfaceEx( + Width, Height, + Format, Pool, + ppSurface, pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9* pRenderTarget) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets + || (pRenderTarget == nullptr && RenderTargetIndex == 0))) + return D3DERR_INVALIDCALL; + + D3D9Surface* rt = static_cast<D3D9Surface*>(pRenderTarget); + D3D9CommonTexture* texInfo = rt != nullptr + ? rt->GetCommonTexture() + : nullptr; + + if (unlikely(rt != nullptr && !(texInfo->Desc()->Usage & D3DUSAGE_RENDERTARGET))) + return D3DERR_INVALIDCALL; + + if (RenderTargetIndex == 0) { + auto rtSize = rt->GetSurfaceExtent(); + + D3DVIEWPORT9 viewport; + viewport.X = 0; + viewport.Y = 0; + viewport.Width = rtSize.width; + viewport.Height = rtSize.height; + viewport.MinZ = 0.0f; + viewport.MaxZ = 1.0f; + + RECT scissorRect; + scissorRect.left = 0; + scissorRect.top = 0; + scissorRect.right = rtSize.width; + scissorRect.bottom = rtSize.height; + + if (m_state.viewport != viewport) { + m_flags.set(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + m_state.viewport = viewport; + } + + if (m_state.scissorRect != scissorRect) { + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + m_state.scissorRect = scissorRect; + } + } + + if (m_state.renderTargets[RenderTargetIndex] == rt) + return D3D_OK; + + // Do a strong flush if the first render target is changed. + FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + + m_state.renderTargets[RenderTargetIndex] = rt; + + UpdateBoundRTs(RenderTargetIndex); + UpdateActiveRTs(RenderTargetIndex); + + uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs; + + m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex); + + if (rt != nullptr) { + if (texInfo->GetMapping().Swizzle.a == VK_COMPONENT_SWIZZLE_ONE) + m_alphaSwizzleRTs |= 1 << RenderTargetIndex; + + if (texInfo->IsAutomaticMip()) + texInfo->SetNeedsMipGen(true); + + texInfo->SetWrittenByGPU(rt->GetSubresource(), true); + } + + if (originalAlphaSwizzleRTs != m_alphaSwizzleRTs) + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + + if (RenderTargetIndex == 0) { + bool validSampleMask = texInfo->Desc()->MultiSample > D3DMULTISAMPLE_NONMASKABLE; + + if (validSampleMask != m_flags.test(D3D9DeviceFlag::ValidSampleMask)) { + m_flags.clr(D3D9DeviceFlag::ValidSampleMask); + if (validSampleMask) + m_flags.set(D3D9DeviceFlag::ValidSampleMask); + + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9** ppRenderTarget) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppRenderTarget); + + if (unlikely(ppRenderTarget == nullptr || RenderTargetIndex > caps::MaxSimultaneousRenderTargets)) + return D3DERR_INVALIDCALL; + + if (m_state.renderTargets[RenderTargetIndex] == nullptr) + return D3DERR_NOTFOUND; + + *ppRenderTarget = m_state.renderTargets[RenderTargetIndex].ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* ds = static_cast<D3D9Surface*>(pNewZStencil); + + if (unlikely(ds && !(ds->GetCommonTexture()->Desc()->Usage & D3DUSAGE_DEPTHSTENCIL))) + return D3DERR_INVALIDCALL; + + if (m_state.depthStencil == ds) + return D3D_OK; + + FlushImplicit(FALSE); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + + if (ds != nullptr) { + float rValue = GetDepthBufferRValue(ds->GetCommonTexture()->GetFormatMapping().FormatColor); + if (m_depthBiasScale != rValue) { + m_depthBiasScale = rValue; + m_flags.set(D3D9DeviceFlag::DirtyDepthBias); + } + } + + m_state.depthStencil = ds; + + UpdateActiveHazardsDS(UINT32_MAX); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppZStencilSurface); + + if (unlikely(ppZStencilSurface == nullptr)) + return D3DERR_INVALIDCALL; + + if (m_state.depthStencil == nullptr) + return D3DERR_NOTFOUND; + + *ppZStencilSurface = m_state.depthStencil.ref(); + + return D3D_OK; + } + + // The Begin/EndScene functions actually do nothing. + // Some games don't even call them. + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(m_flags.test(D3D9DeviceFlag::InScene))) + return D3DERR_INVALIDCALL; + + m_flags.set(D3D9DeviceFlag::InScene); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(!m_flags.test(D3D9DeviceFlag::InScene))) + return D3DERR_INVALIDCALL; + + FlushImplicit(true); + + m_flags.clr(D3D9DeviceFlag::InScene); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Clear( + DWORD Count, + const D3DRECT* pRects, + DWORD Flags, + D3DCOLOR Color, + float Z, + DWORD Stencil) { + if (unlikely(!Count && pRects)) + return D3D_OK; + + D3D9DeviceLock lock = LockDevice(); + + const auto& vp = m_state.viewport; + const auto& sc = m_state.scissorRect; + + bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; + bool scissor = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; + + VkOffset3D offset = { int32_t(vp.X), int32_t(vp.Y), 0 }; + VkExtent3D extent = { vp.Width, vp.Height, 1u }; + + if (scissor) { + offset.x = std::max<int32_t> (offset.x, sc.left); + offset.y = std::max<int32_t> (offset.y, sc.top); + + extent.width = std::min<uint32_t>(extent.width, sc.right - offset.x); + extent.height = std::min<uint32_t>(extent.height, sc.bottom - offset.y); + } + + // This becomes pretty unreadable in one singular if statement... + if (Count) { + // If pRects is null, or our first rect encompasses the viewport: + if (!pRects) + Count = 0; + else if (pRects[0].x1 <= offset.x && pRects[0].y1 <= offset.y + && pRects[0].x2 >= offset.x + int32_t(extent.width) && pRects[0].y2 >= offset.y + int32_t(extent.height)) + Count = 0; + } + + // Here, Count of 0 will denote whether or not to care about user rects. + VkClearValue clearValueDepth; + clearValueDepth.depthStencil.depth = Z; + clearValueDepth.depthStencil.stencil = Stencil; + + VkClearValue clearValueColor; + DecodeD3DCOLOR(Color, clearValueColor.color.float32); + + VkImageAspectFlags depthAspectMask = 0; + if (m_state.depthStencil != nullptr) { + if (Flags & D3DCLEAR_ZBUFFER) + depthAspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + + if (Flags & D3DCLEAR_STENCIL) + depthAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + + depthAspectMask &= imageFormatInfo(m_state.depthStencil->GetCommonTexture()->GetFormatMapping().FormatColor)->aspectMask; + } + + auto ClearImageView = [this]( + uint32_t alignment, + VkOffset3D offset, + VkExtent3D extent, + const Rc<DxvkImageView>& imageView, + VkImageAspectFlags aspectMask, + VkClearValue clearValue) { + + VkExtent3D imageExtent = imageView->mipLevelExtent(0); + extent.width = std::min(imageExtent.width, extent.width); + extent.height = std::min(imageExtent.height, extent.height); + + if (unlikely(uint32_t(offset.x) >= imageExtent.width || uint32_t(offset.y) >= imageExtent.height)) + return; + + const bool fullClear = align(extent.width, alignment) == align(imageExtent.width, alignment) + && align(extent.height, alignment) == align(imageExtent.height, alignment) + && offset.x == 0 + && offset.y == 0; + + if (fullClear) { + EmitCs([ + cClearValue = clearValue, + cAspectMask = aspectMask, + cImageView = imageView + ] (DxvkContext* ctx) { + ctx->clearRenderTarget( + cImageView, + cAspectMask, + cClearValue); + }); + } + else { + EmitCs([ + cClearValue = clearValue, + cAspectMask = aspectMask, + cImageView = imageView, + cOffset = offset, + cExtent = extent + ] (DxvkContext* ctx) { + ctx->clearImageView( + cImageView, + cOffset, cExtent, + cAspectMask, + cClearValue); + }); + } + }; + + auto ClearViewRect = [&]( + uint32_t alignment, + VkOffset3D offset, + VkExtent3D extent) { + // Clear depth if we need to. + if (depthAspectMask != 0) + ClearImageView(alignment, offset, extent, m_state.depthStencil->GetDepthStencilView(), depthAspectMask, clearValueDepth); + + // Clear render targets if we need to. + if (Flags & D3DCLEAR_TARGET) { + for (uint32_t rt : bit::BitMask(m_boundRTs)) { + const auto& rts = m_state.renderTargets[rt]; + const auto& rtv = rts->GetRenderTargetView(srgb); + + if (likely(rtv != nullptr)) { + ClearImageView(alignment, offset, extent, rtv, VK_IMAGE_ASPECT_COLOR_BIT, clearValueColor); + + D3D9CommonTexture* dstTexture = rts->GetCommonTexture(); + + if (dstTexture->IsAutomaticMip()) + MarkTextureMipsDirty(dstTexture); + } + } + } + }; + + // A Hat in Time and other UE3 games only gets partial clears here + // because of an oversized rt height due to their weird alignment... + // This works around that. + uint32_t alignment = m_d3d9Options.lenientClear ? 8 : 1; + + if (!Count) { + // Clear our viewport & scissor minified region in this rendertarget. + ClearViewRect(alignment, offset, extent); + } + else { + // Clear the application provided rects. + for (uint32_t i = 0; i < Count; i++) { + VkOffset3D rectOffset = { + std::max<int32_t>(pRects[i].x1, offset.x), + std::max<int32_t>(pRects[i].y1, offset.y), + 0 + }; + + VkExtent3D rectExtent = { + std::min<uint32_t>(pRects[i].x2, offset.x + extent.width) - rectOffset.x, + std::min<uint32_t>(pRects[i].y2, offset.y + extent.height) - rectOffset.y, + 1u + }; + + ClearViewRect(alignment, rectOffset, rectExtent); + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) { + return SetStateTransform(GetTransformIndex(State), pMatrix); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMatrix == nullptr)) + return D3DERR_INVALIDCALL; + + *pMatrix = bit::cast<D3DMATRIX>(m_state.transforms[GetTransformIndex(State)]); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->MultiplyStateTransform(TransformState, pMatrix); + + uint32_t idx = GetTransformIndex(TransformState); + + m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix); + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD)) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetViewport(pViewport); + + if (m_state.viewport == *pViewport) + return D3D_OK; + + m_state.viewport = *pViewport; + + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + m_flags.set(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) { + D3D9DeviceLock lock = LockDevice(); + + if (pViewport == nullptr) + return D3DERR_INVALIDCALL; + + *pViewport = m_state.viewport; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaterial == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetMaterial(pMaterial); + + m_state.material = *pMaterial; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaterial == nullptr)) + return D3DERR_INVALIDCALL; + + *pMaterial = m_state.material; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pLight == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) { + Logger::warn("D3D9DeviceEx::SetLight: State block not implemented."); + return D3D_OK; + } + + if (Index >= m_state.lights.size()) + m_state.lights.resize(Index + 1); + + m_state.lights[Index] = *pLight; + + if (m_state.IsLightEnabled(Index)) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pLight == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) + return D3DERR_INVALIDCALL; + + *pLight = m_state.lights[Index].value(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= m_state.lights.size())) + m_state.lights.resize(Index + 1); + + if (unlikely(!m_state.lights[Index])) + m_state.lights[Index] = DefaultLight; + + if (m_state.IsLightEnabled(Index) == !!Enable) + return D3D_OK; + + uint32_t searchIndex = UINT32_MAX; + uint32_t setIndex = Index; + + if (!Enable) + std::swap(searchIndex, setIndex); + + for (auto& idx : m_state.enabledLightIndices) { + if (idx == searchIndex) { + idx = setIndex; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pEnable == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) + return D3DERR_INVALIDCALL; + + *pEnable = m_state.IsLightEnabled(Index) ? 128 : 0; // Weird quirk but OK. + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetClipPlane(Index, pPlane); + + bool dirty = false; + + for (uint32_t i = 0; i < 4; i++) { + dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i]; + m_state.clipPlanes[Index].coeff[i] = pPlane[i]; + } + + bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index); + dirty &= enabled; + + if (dirty) + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipPlane(DWORD Index, float* pPlane) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) + return D3DERR_INVALIDCALL; + + for (uint32_t i = 0; i < 4; i++) + pPlane[i] = m_state.clipPlanes[Index].coeff[i]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + // D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK + if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { + return D3D_OK; + } + + if (unlikely(ShouldRecord())) + return m_recorder->SetRenderState(State, Value); + + auto& states = m_state.renderStates; + + bool changed = states[State] != Value; + + if (likely(changed)) { + const bool oldClipPlaneEnabled = IsClipPlaneEnabled(); + + const bool oldDepthBiasEnabled = IsDepthBiasEnabled(); + + const bool oldATOC = IsAlphaToCoverageEnabled(); + const bool oldNVDB = states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB); + const bool oldAlphaTest = IsAlphaTestEnabled(); + + states[State] = Value; + + // AMD's driver hack for ATOC and RESZ + if (unlikely(State == D3DRS_POINTSIZE)) { + // ATOC + constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::A2M1); + constexpr uint32_t AlphaToCoverageDisable = uint32_t(D3D9Format::A2M0); + + if (Value == AlphaToCoverageEnable + || Value == AlphaToCoverageDisable) { + m_amdATOC = Value == AlphaToCoverageEnable; + + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaTestEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + return D3D_OK; + } + + // RESZ + constexpr uint32_t RESZ = 0x7fa05000; + if (Value == RESZ) { + ResolveZ(); + return D3D_OK; + } + } + + // NV's driver hack for ATOC. + if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) { + constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::ATOC); + constexpr uint32_t AlphaToCoverageDisable = 0; + + if (Value == AlphaToCoverageEnable + || Value == AlphaToCoverageDisable) { + m_nvATOC = Value == AlphaToCoverageEnable; + + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaTestEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + return D3D_OK; + } + + if (unlikely(Value == uint32_t(D3D9Format::COPM))) { + // UE3 calls this MinimalNVIDIADriverShaderOptimization + Logger::info("D3D9DeviceEx::SetRenderState: MinimalNVIDIADriverShaderOptimization is unsupported"); + return D3D_OK; + } + } + + switch (State) { + case D3DRS_SEPARATEALPHABLENDENABLE: + case D3DRS_ALPHABLENDENABLE: + case D3DRS_BLENDOP: + case D3DRS_BLENDOPALPHA: + case D3DRS_DESTBLEND: + case D3DRS_DESTBLENDALPHA: + case D3DRS_SRCBLEND: + case D3DRS_SRCBLENDALPHA: + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + + case D3DRS_COLORWRITEENABLE: + UpdateActiveRTs(0); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE1: + UpdateActiveRTs(1); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE2: + UpdateActiveRTs(2); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE3: + UpdateActiveRTs(3); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + + case D3DRS_ALPHATESTENABLE: { + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaTestEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + break; + } + + case D3DRS_ALPHAFUNC: + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + break; + + case D3DRS_BLENDFACTOR: + BindBlendFactor(); + break; + + case D3DRS_MULTISAMPLEMASK: + if (m_flags.test(D3D9DeviceFlag::ValidSampleMask)) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + break; + + case D3DRS_ZWRITEENABLE: + if (m_activeHazardsDS != 0) + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + + m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState); + break; + + case D3DRS_ZENABLE: + case D3DRS_ZFUNC: + case D3DRS_TWOSIDEDSTENCILMODE: + case D3DRS_STENCILENABLE: + case D3DRS_STENCILFAIL: + case D3DRS_STENCILZFAIL: + case D3DRS_STENCILPASS: + case D3DRS_STENCILFUNC: + case D3DRS_CCW_STENCILFAIL: + case D3DRS_CCW_STENCILZFAIL: + case D3DRS_CCW_STENCILPASS: + case D3DRS_CCW_STENCILFUNC: + case D3DRS_STENCILMASK: + case D3DRS_STENCILWRITEMASK: + m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState); + break; + + case D3DRS_STENCILREF: + BindDepthStencilRefrence(); + break; + + case D3DRS_SCISSORTESTENABLE: + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + break; + + case D3DRS_SRGBWRITEENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + break; + + case D3DRS_DEPTHBIAS: + case D3DRS_SLOPESCALEDEPTHBIAS: { + const bool depthBiasEnabled = IsDepthBiasEnabled(); + + if (depthBiasEnabled != oldDepthBiasEnabled) + m_flags.set(D3D9DeviceFlag::DirtyRasterizerState); + + if (depthBiasEnabled) + m_flags.set(D3D9DeviceFlag::DirtyDepthBias); + + break; + } + case D3DRS_CULLMODE: + case D3DRS_FILLMODE: + m_flags.set(D3D9DeviceFlag::DirtyRasterizerState); + break; + + case D3DRS_CLIPPLANEENABLE: { + const bool clipPlaneEnabled = IsClipPlaneEnabled(); + + if (clipPlaneEnabled != oldClipPlaneEnabled) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + break; + } + + case D3DRS_ALPHAREF: + UpdatePushConstant<D3D9RenderStateItem::AlphaRef>(); + break; + + case D3DRS_TEXTUREFACTOR: + m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); + break; + + case D3DRS_DIFFUSEMATERIALSOURCE: + case D3DRS_AMBIENTMATERIALSOURCE: + case D3DRS_SPECULARMATERIALSOURCE: + case D3DRS_EMISSIVEMATERIALSOURCE: + case D3DRS_COLORVERTEX: + case D3DRS_LIGHTING: + case D3DRS_NORMALIZENORMALS: + case D3DRS_LOCALVIEWER: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_AMBIENT: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + break; + + case D3DRS_SPECULARENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case D3DRS_FOGENABLE: + case D3DRS_FOGVERTEXMODE: + case D3DRS_FOGTABLEMODE: + m_flags.set(D3D9DeviceFlag::DirtyFogState); + break; + + case D3DRS_RANGEFOGENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_FOGCOLOR: + m_flags.set(D3D9DeviceFlag::DirtyFogColor); + break; + + case D3DRS_FOGSTART: + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + break; + + case D3DRS_FOGEND: + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + m_flags.set(D3D9DeviceFlag::DirtyFogEnd); + break; + + case D3DRS_FOGDENSITY: + m_flags.set(D3D9DeviceFlag::DirtyFogDensity); + break; + + case D3DRS_POINTSIZE: + UpdatePushConstant<D3D9RenderStateItem::PointSize>(); + break; + + case D3DRS_POINTSIZE_MIN: + UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>(); + break; + + case D3DRS_POINTSIZE_MAX: + UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>(); + break; + + case D3DRS_POINTSCALE_A: + case D3DRS_POINTSCALE_B: + case D3DRS_POINTSCALE_C: + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + break; + + case D3DRS_POINTSCALEENABLE: + case D3DRS_POINTSPRITEENABLE: + // Nothing to do here! + // This is handled in UpdatePointMode. + break; + + case D3DRS_SHADEMODE: + if (m_state.pixelShader != nullptr) { + BindShader<DxsoProgramType::PixelShader>( + GetCommonShader(m_state.pixelShader), + GetPixelShaderPermutation()); + } + + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case D3DRS_TWEENFACTOR: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + break; + + case D3DRS_VERTEXBLEND: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_INDEXEDVERTEXBLENDENABLE: + if (CanSWVP() && Value) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_ADAPTIVETESS_X: + case D3DRS_ADAPTIVETESS_Z: + case D3DRS_ADAPTIVETESS_W: + if (states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB) || oldNVDB) { + m_flags.set(D3D9DeviceFlag::DirtyDepthBounds); + break; + } + + default: + static bool s_errorShown[256]; + + if (!std::exchange(s_errorShown[State], true)) + Logger::warn(str::format("D3D9DeviceEx::SetRenderState: Unhandled render state ", State)); + break; + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { + return D3DERR_INVALIDCALL; + } + + if (State < D3DRS_ZENABLE || State > D3DRS_BLENDOPALPHA) + *pValue = 0; + else + *pValue = m_state.renderStates[State]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock( + D3DSTATEBLOCKTYPE Type, + IDirect3DStateBlock9** ppSB) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSB); + + if (unlikely(ppSB == nullptr)) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9StateBlock> sb = new D3D9StateBlock(this, ConvertStateBlockType(Type)); + *ppSB = sb.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(m_recorder != nullptr)) + return D3DERR_INVALIDCALL; + + m_recorder = new D3D9StateBlock(this, D3D9StateBlockType::None); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSB); + + if (unlikely(ppSB == nullptr || m_recorder == nullptr)) + return D3DERR_INVALIDCALL; + + *ppSB = m_recorder.ref(); + m_recorder = nullptr; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipStatus(const D3DCLIPSTATUS9* pClipStatus) { + Logger::warn("D3D9DeviceEx::SetClipStatus: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipStatus(D3DCLIPSTATUS9* pClipStatus) { + Logger::warn("D3D9DeviceEx::GetClipStatus: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (ppTexture == nullptr) + return D3DERR_INVALIDCALL; + + *ppTexture = nullptr; + + if (unlikely(InvalidSampler(Stage))) + return D3D_OK; + + DWORD stateSampler = RemapSamplerState(Stage); + + *ppTexture = ref(m_state.textures[stateSampler]); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) { + if (unlikely(InvalidSampler(Stage))) + return D3D_OK; + + DWORD stateSampler = RemapSamplerState(Stage); + + return SetStateTexture(stateSampler, pTexture); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD* pValue) { + auto dxvkType = RemapTextureStageStateType(Type); + + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + *pValue = 0; + + if (unlikely(Stage >= caps::TextureStageCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(dxvkType >= TextureStageStateCount)) + return D3DERR_INVALIDCALL; + + *pValue = m_state.textureStages[Stage][dxvkType]; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value) { + return SetStateTextureStageState(Stage, RemapTextureStageStateType(Type), Value); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD* pValue) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + *pValue = 0; + + if (unlikely(InvalidSampler(Sampler))) + return D3D_OK; + + Sampler = RemapSamplerState(Sampler); + + *pValue = m_state.samplerStates[Sampler][Type]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + if (unlikely(InvalidSampler(Sampler))) + return D3D_OK; + + uint32_t stateSampler = RemapSamplerState(Sampler); + + return SetStateSamplerState(stateSampler, Type, Value); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ValidateDevice(DWORD* pNumPasses) { + if (pNumPasses != nullptr) + *pNumPasses = 1; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries) { + // This succeeds even though we don't advertise support. + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries) { + // Don't advertise support for this... + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCurrentTexturePalette(UINT PaletteNumber) { + // This succeeds even though we don't advertise support. + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCurrentTexturePalette(UINT *PaletteNumber) { + // Don't advertise support for this... + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pRect == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetScissorRect(pRect); + + if (m_state.scissorRect == *pRect) + return D3D_OK; + + m_state.scissorRect = *pRect; + + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pRect == nullptr)) + return D3DERR_INVALIDCALL; + + *pRect = m_state.scissorRect; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSoftwareVertexProcessing(BOOL bSoftware) { + auto lock = LockDevice(); + + if (bSoftware && !CanSWVP()) + return D3DERR_INVALIDCALL; + + m_isSWVP = bSoftware; + + return D3D_OK; + } + + + BOOL STDMETHODCALLTYPE D3D9DeviceEx::GetSoftwareVertexProcessing() { + auto lock = LockDevice(); + + return m_isSWVP; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetNPatchMode(float nSegments) { + return D3D_OK; + } + + + float STDMETHODCALLTYPE D3D9DeviceEx::GetNPatchMode() { + return 0.0f; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + UINT StartVertex, + UINT PrimitiveCount) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(config::FullValidation && !PrimitiveCount)) + return S_OK; + + PrepareDraw(PrimitiveType); + + EmitCs([this, + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStartVertex = StartVertex, + cInstanceCount = GetInstanceCount() + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartVertex, 0); + }); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + INT BaseVertexIndex, + UINT MinVertexIndex, + UINT NumVertices, + UINT StartIndex, + UINT PrimitiveCount) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(config::FullValidation && !PrimitiveCount)) + return S_OK; + + PrepareDraw(PrimitiveType); + + EmitCs([this, + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStartIndex = StartIndex, + cBaseVertexIndex = BaseVertexIndex, + cInstanceCount = GetInstanceCount() + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->drawIndexed( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartIndex, + cBaseVertexIndex, 0); + }); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(config::FullValidation && !PrimitiveCount)) + return S_OK; + + PrepareDraw(PrimitiveType); + + auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); + + const uint32_t dataSize = GetUPDataSize(drawInfo.vertexCount, VertexStreamZeroStride); + const uint32_t bufferSize = GetUPBufferSize(drawInfo.vertexCount, VertexStreamZeroStride); + + auto upSlice = AllocTempBuffer<true>(bufferSize); + FillUPVertexBuffer(upSlice.mapPtr, pVertexStreamZeroData, dataSize, bufferSize); + + EmitCs([this, + cBufferSlice = std::move(upSlice.slice), + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cInstanceCount = GetInstanceCount(), + cStride = VertexStreamZeroStride + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->bindVertexBuffer(0, cBufferSlice, cStride); + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + 0, 0); + ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0); + }); + + m_state.vertexBuffers[0].vertexBuffer = nullptr; + m_state.vertexBuffers[0].offset = 0; + m_state.vertexBuffers[0].stride = 0; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT MinVertexIndex, + UINT NumVertices, + UINT PrimitiveCount, + const void* pIndexData, + D3DFORMAT IndexDataFormat, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(config::FullValidation && m_state.vertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(config::FullValidation && !PrimitiveCount)) + return S_OK; + + PrepareDraw(PrimitiveType); + + auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); + + const uint32_t vertexDataSize = GetUPDataSize(MinVertexIndex + NumVertices, VertexStreamZeroStride); + const uint32_t vertexBufferSize = GetUPBufferSize(MinVertexIndex + NumVertices, VertexStreamZeroStride); + + const uint32_t indexSize = IndexDataFormat == D3DFMT_INDEX16 ? 2 : 4; + const uint32_t indicesSize = drawInfo.vertexCount * indexSize; + + const uint32_t upSize = vertexBufferSize + indicesSize; + + auto upSlice = AllocTempBuffer<true>(upSize); + uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr); + FillUPVertexBuffer(data, pVertexStreamZeroData, vertexDataSize, vertexBufferSize); + std::memcpy(data + vertexBufferSize, pIndexData, indicesSize); + + EmitCs([this, + cVertexSize = vertexBufferSize, + cBufferSlice = std::move(upSlice.slice), + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStride = VertexStreamZeroStride, + cInstanceCount = GetInstanceCount(), + cIndexType = DecodeIndexType( + static_cast<D3D9Format>(IndexDataFormat)) + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride); + ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType); + ctx->drawIndexed( + drawInfo.vertexCount, drawInfo.instanceCount, + 0, + 0, 0); + ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0); + ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32); + }); + + m_state.vertexBuffers[0].vertexBuffer = nullptr; + m_state.vertexBuffers[0].offset = 0; + m_state.vertexBuffers[0].stride = 0; + + m_state.indices = nullptr; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ProcessVertices( + UINT SrcStartIndex, + UINT DestIndex, + UINT VertexCount, + IDirect3DVertexBuffer9* pDestBuffer, + IDirect3DVertexDeclaration9* pVertexDecl, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (!SupportsSWVP()) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::err("D3D9DeviceEx::ProcessVertices: SWVP emu unsupported (vertexPipelineStoresAndAtomics)"); + + return D3D_OK; + } + + D3D9CommonBuffer* dst = static_cast<D3D9VertexBuffer*>(pDestBuffer)->GetCommonBuffer(); + D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*> (pVertexDecl); + + PrepareDraw(D3DPT_FORCE_DWORD); + + if (decl == nullptr) { + DWORD FVF = dst->Desc()->FVF; + + auto iter = m_fvfTable.find(FVF); + + if (iter == m_fvfTable.end()) { + decl = new D3D9VertexDecl(this, FVF); + m_fvfTable.insert(std::make_pair(FVF, decl)); + } + else + decl = iter->second.ptr(); + } + + uint32_t offset = DestIndex * decl->GetSize(); + + auto slice = dst->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>(); + slice = slice.subSlice(offset, slice.length() - offset); + + EmitCs([this, + cDecl = ref(decl), + cVertexCount = VertexCount, + cStartIndex = SrcStartIndex, + cInstanceCount = GetInstanceCount(), + cBufferSlice = slice, + cIndexed = m_state.indices != nullptr + ](DxvkContext* ctx) { + Rc<DxvkShader> shader = m_swvpEmulator.GetShaderModule(this, cDecl); + + auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount); + + if (drawInfo.instanceCount != 1) { + drawInfo.instanceCount = 1; + + Logger::warn("D3D9DeviceEx::ProcessVertices: instancing unsupported"); + } + + ApplyPrimitiveType(ctx, D3DPT_POINTLIST); + + // Unbind the pixel shader, we aren't drawing + // to avoid val errors / UB. + ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, nullptr); + + ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, shader); + ctx->bindResourceBuffer(getSWVPBufferSlot(), cBufferSlice); + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartIndex, 0); + ctx->bindResourceBuffer(getSWVPBufferSlot(), DxvkBufferSlice()); + ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, nullptr); + }); + + // We unbound the pixel shader before, + // let's make sure that gets rebound. + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + if (m_state.pixelShader != nullptr) { + BindShader<DxsoProgramTypes::PixelShader>( + GetCommonShader(m_state.pixelShader), + GetPixelShaderPermutation()); + } + + if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) { + uint32_t copySize = VertexCount * decl->GetSize(); + + EmitCs([ + cSrcBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_REAL>(), + cDstBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(), + cOffset = offset, + cCopySize = copySize + ](DxvkContext* ctx) { + ctx->copyBuffer(cDstBuffer, cOffset, cSrcBuffer, cOffset, cCopySize); + }); + } + + dst->SetWrittenByGPU(true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration( + const D3DVERTEXELEMENT9* pVertexElements, + IDirect3DVertexDeclaration9** ppDecl) { + InitReturnPtr(ppDecl); + + if (unlikely(ppDecl == nullptr || pVertexElements == nullptr)) + return D3DERR_INVALIDCALL; + + const D3DVERTEXELEMENT9* counter = pVertexElements; + while (counter->Stream != 0xFF) + counter++; + + const uint32_t declCount = uint32_t(counter - pVertexElements); + + try { + const Com<D3D9VertexDecl> decl = new D3D9VertexDecl(this, pVertexElements, declCount); + *ppDecl = decl.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) { + D3D9DeviceLock lock = LockDevice(); + + D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*>(pDecl); + + if (unlikely(ShouldRecord())) + return m_recorder->SetVertexDeclaration(decl); + + if (decl == m_state.vertexDecl.ptr()) + return D3D_OK; + + bool dirtyFFShader = decl == nullptr || m_state.vertexDecl == nullptr; + if (!dirtyFFShader) + dirtyFFShader |= decl->TestFlag(D3D9VertexDeclFlag::HasPositionT) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) + || decl->TestFlag(D3D9VertexDeclFlag::HasColor0) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) + || decl->TestFlag(D3D9VertexDeclFlag::HasColor1) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) + || decl->GetTexcoordMask() != m_state.vertexDecl->GetTexcoordMask(); + + if (dirtyFFShader) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + m_state.vertexDecl = decl; + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppDecl); + + if (ppDecl == nullptr) + return D3D_OK; + + if (m_state.vertexDecl == nullptr) + return D3D_OK; + + *ppDecl = m_state.vertexDecl.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) { + D3D9DeviceLock lock = LockDevice(); + + if (FVF == 0) + return D3D_OK; + + D3D9VertexDecl* decl = nullptr; + + auto iter = m_fvfTable.find(FVF); + + if (iter == m_fvfTable.end()) { + decl = new D3D9VertexDecl(this, FVF); + m_fvfTable.insert(std::make_pair(FVF, decl)); + } + else + decl = iter->second.ptr(); + + return this->SetVertexDeclaration(decl); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) { + D3D9DeviceLock lock = LockDevice(); + + if (pFVF == nullptr) + return D3DERR_INVALIDCALL; + + *pFVF = m_state.vertexDecl != nullptr + ? m_state.vertexDecl->GetFVF() + : 0; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader( + const DWORD* pFunction, + IDirect3DVertexShader9** ppShader) { + // CreateVertexShader does not init the + // return ptr unlike CreatePixelShader + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + DxsoModuleInfo moduleInfo; + moduleInfo.options = m_dxsoOptions; + + D3D9CommonShader module; + + if (FAILED(this->CreateShaderModule(&module, + VK_SHADER_STAGE_VERTEX_BIT, + pFunction, + &moduleInfo))) + return D3DERR_INVALIDCALL; + + *ppShader = ref(new D3D9VertexShader(this, module)); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) { + D3D9DeviceLock lock = LockDevice(); + + D3D9VertexShader* shader = static_cast<D3D9VertexShader*>(pShader); + + if (unlikely(ShouldRecord())) + return m_recorder->SetVertexShader(shader); + + if (shader == m_state.vertexShader.ptr()) + return D3D_OK; + + auto* oldShader = GetCommonShader(m_state.vertexShader); + auto* newShader = GetCommonShader(shader); + + bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; + bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; + + m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader; + m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); + + if (newShader && oldShader) { + m_consts[DxsoProgramTypes::VertexShader].dirty + |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; + } + + m_state.vertexShader = shader; + + if (shader != nullptr) { + m_flags.clr(D3D9DeviceFlag::DirtyProgVertexShader); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + BindShader<DxsoProgramTypes::VertexShader>( + GetCommonShader(shader), + GetVertexShaderPermutation()); + + m_vsShaderMasks = newShader->GetShaderMask(); + } + else + m_vsShaderMasks = D3D9ShaderMasks(); + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + *ppShader = m_state.vertexShader.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9* pStreamData, + UINT OffsetInBytes, + UINT Stride) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + D3D9VertexBuffer* buffer = static_cast<D3D9VertexBuffer*>(pStreamData); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStreamSource( + StreamNumber, + buffer, + OffsetInBytes, + Stride); + + auto& vbo = m_state.vertexBuffers[StreamNumber]; + bool needsUpdate = vbo.vertexBuffer != buffer; + + if (needsUpdate) + vbo.vertexBuffer = buffer; + + if (buffer != nullptr) { + needsUpdate |= vbo.offset != OffsetInBytes + || vbo.stride != Stride; + + vbo.offset = OffsetInBytes; + vbo.stride = Stride; + } + + if (needsUpdate) + BindVertexBuffer(StreamNumber, buffer, OffsetInBytes, Stride); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9** ppStreamData, + UINT* pOffsetInBytes, + UINT* pStride) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppStreamData); + + if (likely(pOffsetInBytes != nullptr)) + *pOffsetInBytes = 0; + + if (likely(pStride != nullptr)) + *pStride = 0; + + if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + const auto& vbo = m_state.vertexBuffers[StreamNumber]; + + *ppStreamData = vbo.vertexBuffer.ref(); + *pOffsetInBytes = vbo.offset; + *pStride = vbo.stride; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + const bool indexed = Setting & D3DSTREAMSOURCE_INDEXEDDATA; + const bool instanced = Setting & D3DSTREAMSOURCE_INSTANCEDATA; + + if (unlikely(StreamNumber == 0 && instanced)) + return D3DERR_INVALIDCALL; + + if (unlikely(instanced && indexed)) + return D3DERR_INVALIDCALL; + + if (unlikely(Setting == 0)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetStreamSourceFreq(StreamNumber, Setting); + + if (m_state.streamFreq[StreamNumber] == Setting) + return D3D_OK; + + m_state.streamFreq[StreamNumber] = Setting; + + if (instanced) + m_instancedData |= 1u << StreamNumber; + else + m_instancedData &= ~(1u << StreamNumber); + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + if (unlikely(pSetting == nullptr)) + return D3DERR_INVALIDCALL; + + *pSetting = m_state.streamFreq[StreamNumber]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) { + D3D9DeviceLock lock = LockDevice(); + + D3D9IndexBuffer* buffer = static_cast<D3D9IndexBuffer*>(pIndexData); + + if (unlikely(ShouldRecord())) + return m_recorder->SetIndices(buffer); + + if (buffer == m_state.indices.ptr()) + return D3D_OK; + + m_state.indices = buffer; + + BindIndices(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) { + D3D9DeviceLock lock = LockDevice(); + InitReturnPtr(ppIndexData); + + if (unlikely(ppIndexData == nullptr)) + return D3DERR_INVALIDCALL; + + *ppIndexData = m_state.indices.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader( + const DWORD* pFunction, + IDirect3DPixelShader9** ppShader) { + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + DxsoModuleInfo moduleInfo; + moduleInfo.options = m_dxsoOptions; + + D3D9CommonShader module; + + if (FAILED(this->CreateShaderModule(&module, + VK_SHADER_STAGE_FRAGMENT_BIT, + pFunction, + &moduleInfo))) + return D3DERR_INVALIDCALL; + + *ppShader = ref(new D3D9PixelShader(this, module)); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) { + D3D9DeviceLock lock = LockDevice(); + + D3D9PixelShader* shader = static_cast<D3D9PixelShader*>(pShader); + + if (unlikely(ShouldRecord())) + return m_recorder->SetPixelShader(shader); + + if (shader == m_state.pixelShader.ptr()) + return D3D_OK; + + auto* oldShader = GetCommonShader(m_state.pixelShader); + auto* newShader = GetCommonShader(shader); + + bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; + bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; + + m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader; + m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); + + if (newShader && oldShader) { + m_consts[DxsoProgramTypes::PixelShader].dirty + |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; + } + + m_state.pixelShader = shader; + + if (shader != nullptr) { + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + BindShader<DxsoProgramTypes::PixelShader>( + GetCommonShader(shader), + GetPixelShaderPermutation()); + + m_psShaderMasks = newShader->GetShaderMask(); + } + else { + // TODO: What fixed function textures are in use? + // Currently we are making all 8 of them as in use here. + + // The RT output is always 0 for fixed function. + m_psShaderMasks = FixedFunctionMask; + } + + UpdateActiveHazardsRT(UINT32_MAX); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + *ppShader = m_state.pixelShader.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants < + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawRectPatch( + UINT Handle, + const float* pNumSegs, + const D3DRECTPATCH_INFO* pRectPatchInfo) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DrawRectPatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawTriPatch( + UINT Handle, + const float* pNumSegs, + const D3DTRIPATCH_INFO* pTriPatchInfo) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DrawTriPatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DeletePatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) { + HRESULT hr = D3D9Query::QuerySupported(this, Type); + + if (ppQuery == nullptr || hr != D3D_OK) + return hr; + + try { + *ppQuery = ref(new D3D9Query(this, Type)); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + // Ex Methods + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetConvolutionMonoKernel( + UINT width, + UINT height, + float* rows, + float* columns) { + // We don't advertise support for this. + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ComposeRects( + IDirect3DSurface9* pSrc, + IDirect3DSurface9* pDst, + IDirect3DVertexBuffer9* pSrcRectDescs, + UINT NumRects, + IDirect3DVertexBuffer9* pDstRectDescs, + D3DCOMPOSERECTSOP Operation, + int Xoffset, + int Yoffset) { + Logger::warn("D3D9DeviceEx::ComposeRects: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetGPUThreadPriority(INT* pPriority) { + Logger::warn("D3D9DeviceEx::GetGPUThreadPriority: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetGPUThreadPriority(INT Priority) { + Logger::warn("D3D9DeviceEx::SetGPUThreadPriority: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) { + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->WaitForVBlank(); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources) { + Logger::warn("D3D9DeviceEx::CheckResourceResidency: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaximumFrameLatency(UINT MaxLatency) { + D3D9DeviceLock lock = LockDevice(); + + if (MaxLatency == 0) + MaxLatency = DefaultFrameLatency; + + if (MaxLatency > MaxFrameLatency) + MaxLatency = MaxFrameLatency; + + m_frameLatency = MaxLatency; + + m_implicitSwapchain->SyncFrameLatency(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaximumFrameLatency(UINT* pMaxLatency) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaxLatency == nullptr)) + return D3DERR_INVALIDCALL; + + *pMaxLatency = m_frameLatency; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckDeviceState(HWND hDestinationWindow) { + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::PresentEx( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags) { + return m_implicitSwapchain->Present( + pSourceRect, + pDestRect, + hDestWindowOverride, + pDirtyRegion, + dwFlags); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTargetEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage | D3DUSAGE_RENDERTARGET; + desc.Format = EnumerateFormat(Format); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = FALSE; + desc.MultiSample = MultiSample; + desc.MultisampleQuality = MultisampleQuality; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = TRUE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = Pool == D3DPOOL_DEFAULT; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage | D3DUSAGE_DEPTHSTENCIL; + desc.Format = EnumerateFormat(Format); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = Discard; + desc.MultiSample = MultiSample; + desc.MultisampleQuality = MultisampleQuality; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = TRUE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_INVALIDCALL; + + try { + const Com<D3D9Surface> surface = new D3D9Surface(this, &desc, nullptr); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3D9DeviceLock lock = LockDevice(); + + HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); + if (FAILED(hr)) + return hr; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayModeEx( + UINT iSwapChain, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + if (unlikely(iSwapChain != 0)) + return D3DERR_INVALIDCALL; + + return m_implicitSwapchain->GetDisplayModeEx(pMode, pRotation); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChainEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DSwapChain9** ppSwapChain) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSwapChain); + + if (ppSwapChain == nullptr || pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + // Additional fullscreen swapchains are forbidden. + if (!pPresentationParameters->Windowed) + return D3DERR_INVALIDCALL; + + // We can't make another swapchain if we are fullscreen. + if (!m_implicitSwapchain->GetPresentParams()->Windowed) + return D3DERR_INVALIDCALL; + + m_implicitSwapchain->Invalidate(pPresentationParameters->hDeviceWindow); + + try { + auto* swapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode); + *ppSwapChain = ref(swapchain); + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateSamplerState(StateSampler, Type, Value); + + auto& state = m_state.samplerStates; + + if (state[StateSampler][Type] == Value) + return D3D_OK; + + state[StateSampler][Type] = Value; + + if (Type == D3DSAMP_ADDRESSU + || Type == D3DSAMP_ADDRESSV + || Type == D3DSAMP_ADDRESSW + || Type == D3DSAMP_MAGFILTER + || Type == D3DSAMP_MINFILTER + || Type == D3DSAMP_MIPFILTER + || Type == D3DSAMP_MAXANISOTROPY + || Type == D3DSAMP_MIPMAPLODBIAS + || Type == D3DSAMP_MAXMIPLEVEL + || Type == D3DSAMP_BORDERCOLOR) + m_dirtySamplerStates |= 1u << StateSampler; + else if (Type == D3DSAMP_SRGBTEXTURE && (m_activeTextures & (1u << StateSampler))) + m_dirtyTextures |= 1u << StateSampler; + + constexpr DWORD Fetch4Enabled = MAKEFOURCC('G', 'E', 'T', '4'); + constexpr DWORD Fetch4Disabled = MAKEFOURCC('G', 'E', 'T', '1'); + + if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) { + if (unlikely(Value == Fetch4Enabled)) { + m_fetch4Enabled |= 1u << StateSampler; + if (state[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT) + m_fetch4 |= 1u << StateSampler; + } + else if (unlikely(Value == Fetch4Disabled)) { + m_fetch4Enabled &= ~(1u << StateSampler); + m_fetch4 &= ~(1u << StateSampler); + } + } + + if (unlikely(Type == D3DSAMP_MAGFILTER && (m_fetch4Enabled & (1u << StateSampler)))) { + if (Value == D3DTEXF_POINT) + m_fetch4 |= 1u << StateSampler; + else + m_fetch4 &= ~(1u << StateSampler); + } + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateTexture(StateSampler, pTexture); + + if (m_state.textures[StateSampler] == pTexture) + return D3D_OK; + + auto oldTexture = GetCommonTexture(m_state.textures[StateSampler]); + auto newTexture = GetCommonTexture(pTexture); + + // We need to check our ops and disable respective stages. + // Given we have transition from a null resource to + // a valid resource or vice versa. + if (StateSampler < 16) { + const uint32_t offset = StateSampler * 2; + const uint32_t textureType = newTexture != nullptr + ? uint32_t(newTexture->GetType() - D3DRTYPE_TEXTURE) + : 0; + const uint32_t textureBitMask = 0b11u << offset; + const uint32_t textureBits = textureType << offset; + + m_textureTypes &= ~textureBitMask; + m_textureTypes |= textureBits; + + if (newTexture == nullptr || oldTexture == nullptr) + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + } + + DWORD oldUsage = oldTexture != nullptr ? oldTexture->Desc()->Usage : 0; + DWORD newUsage = newTexture != nullptr ? newTexture->Desc()->Usage : 0; + + if (newTexture != nullptr) { + const bool oldDepth = m_depthTextures & (1u << StateSampler); + const bool newDepth = newTexture->IsShadow(); + + if (oldDepth != newDepth) { + m_depthTextures &= ~(1u << StateSampler); + if (newDepth) + m_depthTextures |= 1u << StateSampler; + + m_dirtySamplerStates |= 1u << StateSampler; + } + } + + DWORD combinedUsage = oldUsage | newUsage; + + TextureChangePrivate(m_state.textures[StateSampler], pTexture); + + m_dirtyTextures |= 1u << StateSampler; + + UpdateActiveTextures(StateSampler, combinedUsage); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateTransform(idx, pMatrix); + + m_state.transforms[idx] = ConvertMatrix(pMatrix); + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD)) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateTextureStageState( + DWORD Stage, + D3D9TextureStageStateTypes Type, + DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Stage >= caps::TextureStageCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(Type >= TextureStageStateCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateTextureStageState(Stage, Type, Value); + + if (likely(m_state.textureStages[Stage][Type] != Value)) { + m_state.textureStages[Stage][Type] = Value; + + switch (Type) { + case DXVK_TSS_COLOROP: + case DXVK_TSS_COLORARG0: + case DXVK_TSS_COLORARG1: + case DXVK_TSS_COLORARG2: + case DXVK_TSS_ALPHAOP: + case DXVK_TSS_ALPHAARG0: + case DXVK_TSS_ALPHAARG1: + case DXVK_TSS_ALPHAARG2: + case DXVK_TSS_RESULTARG: + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case DXVK_TSS_TEXCOORDINDEX: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case DXVK_TSS_TEXTURETRANSFORMFLAGS: + m_projectionBitfield &= ~(1 << Stage); + if (Value & D3DTTFF_PROJECTED) + m_projectionBitfield |= 1 << Stage; + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case DXVK_TSS_BUMPENVMAT00: + case DXVK_TSS_BUMPENVMAT01: + case DXVK_TSS_BUMPENVMAT10: + case DXVK_TSS_BUMPENVMAT11: + case DXVK_TSS_BUMPENVLSCALE: + case DXVK_TSS_BUMPENVLOFFSET: + case DXVK_TSS_CONSTANT: + m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); + break; + + default: break; + } + } + + return D3D_OK; + } + + + bool D3D9DeviceEx::IsExtended() { + return m_parent->IsExtended(); + } + + + bool D3D9DeviceEx::SupportsSWVP() { + return m_dxvkDevice->features().core.features.vertexPipelineStoresAndAtomics; + } + + + HWND D3D9DeviceEx::GetWindow() { + return m_window; + } + + + DxvkDeviceFeatures D3D9DeviceEx::GetDeviceFeatures(const Rc<DxvkAdapter>& adapter) { + DxvkDeviceFeatures supported = adapter->features(); + DxvkDeviceFeatures enabled = {}; + + // Geometry shaders are used for some meta ops + enabled.core.features.geometryShader = VK_TRUE; + enabled.core.features.robustBufferAccess = VK_TRUE; + enabled.extRobustness2.robustBufferAccess2 = supported.extRobustness2.robustBufferAccess2; + + enabled.extMemoryPriority.memoryPriority = supported.extMemoryPriority.memoryPriority; + + enabled.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation = supported.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation; + + enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor; + enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor; + + // Null Descriptors + enabled.extRobustness2.nullDescriptor = supported.extRobustness2.nullDescriptor; + + // ProcessVertices + enabled.core.features.vertexPipelineStoresAndAtomics = supported.core.features.vertexPipelineStoresAndAtomics; + + // DXVK Meta + enabled.core.features.shaderStorageImageWriteWithoutFormat = VK_TRUE; + enabled.core.features.imageCubeArray = VK_TRUE; + + // SM1 level hardware + enabled.core.features.depthClamp = VK_TRUE; + enabled.core.features.depthBiasClamp = VK_TRUE; + enabled.core.features.fillModeNonSolid = VK_TRUE; + enabled.core.features.pipelineStatisticsQuery = supported.core.features.pipelineStatisticsQuery; + enabled.core.features.sampleRateShading = VK_TRUE; + enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy; + enabled.core.features.shaderClipDistance = VK_TRUE; + enabled.core.features.shaderCullDistance = VK_TRUE; + + // Ensure we support real BC formats and unofficial vendor ones. + enabled.core.features.textureCompressionBC = VK_TRUE; + + enabled.extDepthClipEnable.depthClipEnable = supported.extDepthClipEnable.depthClipEnable; + enabled.extHostQueryReset.hostQueryReset = supported.extHostQueryReset.hostQueryReset; + + // SM2 level hardware + enabled.core.features.occlusionQueryPrecise = VK_TRUE; + + // SM3 level hardware + enabled.core.features.multiViewport = VK_TRUE; + enabled.core.features.independentBlend = VK_TRUE; + + // D3D10 level hardware supports this in D3D9 native. + enabled.core.features.fullDrawIndexUint32 = VK_TRUE; + + // Enable depth bounds test if we support it. + enabled.core.features.depthBounds = supported.core.features.depthBounds; + + if (supported.extCustomBorderColor.customBorderColorWithoutFormat) { + enabled.extCustomBorderColor.customBorderColors = VK_TRUE; + enabled.extCustomBorderColor.customBorderColorWithoutFormat = VK_TRUE; + } + + return enabled; + } + + + void D3D9DeviceEx::DetermineConstantLayouts(bool canSWVP) { + m_vsLayout.floatCount = canSWVP ? uint32_t(m_d3d9Options.swvpFloatCount) : caps::MaxFloatConstantsVS; + m_vsLayout.intCount = canSWVP ? uint32_t(m_d3d9Options.swvpIntCount) : caps::MaxOtherConstants; + m_vsLayout.boolCount = canSWVP ? uint32_t(m_d3d9Options.swvpBoolCount) : caps::MaxOtherConstants; + m_vsLayout.bitmaskCount = align(m_vsLayout.boolCount, 32) / 32; + + m_psLayout.floatCount = caps::MaxFloatConstantsPS; + m_psLayout.intCount = caps::MaxOtherConstants; + m_psLayout.boolCount = caps::MaxOtherConstants; + m_psLayout.bitmaskCount = align(m_psLayout.boolCount, 32) / 32; + } + + + template<bool UpBuffer> + D3D9BufferSlice D3D9DeviceEx::AllocTempBuffer(VkDeviceSize size) { + constexpr VkDeviceSize DefaultSize = 1 << 20; + + VkMemoryPropertyFlags memoryFlags + = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + + if constexpr (UpBuffer) { + memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + D3D9BufferSlice& currentSlice = UpBuffer ? m_upBuffer : m_managedUploadBuffer; + + if (size <= DefaultSize) { + if (unlikely(!currentSlice.slice.defined())) { + DxvkBufferCreateInfo info; + info.size = DefaultSize; + if constexpr (UpBuffer) { + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } else { + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; + } + + currentSlice.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); + currentSlice.mapPtr = currentSlice.slice.mapPtr(0); + } else if (unlikely(currentSlice.slice.length() < size)) { + auto physSlice = currentSlice.slice.buffer()->allocSlice(); + + currentSlice.slice = DxvkBufferSlice(currentSlice.slice.buffer()); + currentSlice.mapPtr = physSlice.mapPtr; + + EmitCs([ + cBuffer = currentSlice.slice.buffer(), + cSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + } + + D3D9BufferSlice result; + result.slice = currentSlice.slice.subSlice(0, size); + result.mapPtr = reinterpret_cast<char*>(currentSlice.mapPtr) + currentSlice.slice.offset(); + + VkDeviceSize adjust = align(size, CACHE_LINE_SIZE); + currentSlice.slice = currentSlice.slice.subSlice(adjust, currentSlice.slice.length() - adjust); + return result; + } else { + // Create a temporary buffer for very large allocations + DxvkBufferCreateInfo info; + info.size = size; + if constexpr (UpBuffer) { + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } else { + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT; + } + + D3D9BufferSlice result; + result.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); + result.mapPtr = result.slice.mapPtr(0); + return result; + } + } + + bool D3D9DeviceEx::ShouldRecord() { + return m_recorder != nullptr && !m_recorder->IsApplying(); + } + + + D3D9_VK_FORMAT_MAPPING D3D9DeviceEx::LookupFormat( + D3D9Format Format) const { + return m_adapter->GetFormatMapping(Format); + } + + const DxvkFormatInfo* D3D9DeviceEx::UnsupportedFormatInfo( + D3D9Format Format) const { + return m_adapter->GetUnsupportedFormatInfo(Format); + } + + bool D3D9DeviceEx::WaitForResource( + const Rc<DxvkResource>& Resource, + DWORD MapFlags) { + // Wait for the any pending D3D9 command to be executed + // on the CS thread so that we can determine whether the + // resource is currently in use or not. + + // Determine access type to wait for based on map mode + DxvkAccess access = (MapFlags & D3DLOCK_READONLY) + ? DxvkAccess::Write + : DxvkAccess::Read; + + if (!Resource->isInUse(access)) + SynchronizeCsThread(); + + if (Resource->isInUse(access)) { + if (MapFlags & D3DLOCK_DONOTWAIT) { + // We don't have to wait, but misbehaving games may + // still try to spin on `Map` until the resource is + // idle, so we should flush pending commands + FlushImplicit(FALSE); + return false; + } + else { + // Make sure pending commands using the resource get + // executed on the the GPU if we have to wait for it + Flush(); + SynchronizeCsThread(); + + Resource->waitIdle(access); + } + } + + return true; + } + + + uint32_t D3D9DeviceEx::CalcImageLockOffset( + uint32_t SlicePitch, + uint32_t RowPitch, + const DxvkFormatInfo* FormatInfo, + const D3DBOX* pBox) { + if (pBox == nullptr) + return 0; + + std::array<uint32_t, 3> offsets = { pBox->Front, pBox->Top, pBox->Left }; + + uint32_t elementSize = 1; + + if (FormatInfo != nullptr) { + elementSize = FormatInfo->elementSize; + + offsets[0] = offsets[0] / FormatInfo->blockSize.depth; + offsets[1] = offsets[1] / FormatInfo->blockSize.height; + offsets[2] = offsets[2] / FormatInfo->blockSize.width; + } + + return offsets[0] * SlicePitch + + offsets[1] * RowPitch + + offsets[2] * elementSize; + } + + + HRESULT D3D9DeviceEx::LockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel, + D3DLOCKED_BOX* pLockedBox, + const D3DBOX* pBox, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + UINT Subresource = pResource->CalcSubresource(Face, MipLevel); + + // Don't allow multiple lockings. + if (unlikely(pResource->GetLocked(Subresource))) + return D3DERR_INVALIDCALL; + + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_READONLY)) == (D3DLOCK_DISCARD | D3DLOCK_READONLY))) + return D3DERR_INVALIDCALL; + + if (unlikely(!m_d3d9Options.allowDoNotWait)) + Flags &= ~D3DLOCK_DONOTWAIT; + + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) + Flags &= ~D3DLOCK_DISCARD; + + auto& desc = *(pResource->Desc()); + + bool alloced = pResource->CreateBufferSubresource(Subresource); + + const Rc<DxvkBuffer> mappedBuffer = pResource->GetBuffer(Subresource); + + auto& formatMapping = pResource->GetFormatMapping(); + + const DxvkFormatInfo* formatInfo = formatMapping.IsValid() + ? imageFormatInfo(formatMapping.FormatColor) : UnsupportedFormatInfo(pResource->Desc()->Format); + + auto subresource = pResource->GetSubresourceFromIndex( + formatInfo->aspectMask, Subresource); + + VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel); + VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); + + const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM; + const bool managed = IsPoolManaged(desc.Pool); + const bool scratch = desc.Pool == D3DPOOL_SCRATCH; + + bool fullResource = pBox == nullptr; + if (unlikely(!fullResource)) { + VkOffset3D lockOffset; + VkExtent3D lockExtent; + + ConvertBox(*pBox, lockOffset, lockExtent); + + fullResource = lockOffset == VkOffset3D{ 0, 0, 0 } + && lockExtent.width >= levelExtent.width + && lockExtent.height >= levelExtent.height + && lockExtent.depth >= levelExtent.depth; + } + + // If we are not locking the entire image + // a partial discard is meant to occur. + // We can't really implement that, so just ignore discard + // if we are not locking the full resource + + // DISCARD is also ignored for MANAGED and SYSTEMEM. + // DISCARD is not ignored for non-DYNAMIC unlike what the docs say. + + if (!fullResource || desc.Pool != D3DPOOL_DEFAULT) + Flags &= ~D3DLOCK_DISCARD; + + if (desc.Usage & D3DUSAGE_WRITEONLY) + Flags &= ~D3DLOCK_READONLY; + + const bool readOnly = Flags & D3DLOCK_READONLY; + pResource->SetReadOnlyLocked(Subresource, readOnly); + + bool renderable = desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL); + + // If we recently wrote to the texture on the gpu, + // then we need to copy -> buffer + // We are also always dirty if we are a render target, + // a depth stencil, or auto generate mipmaps. + bool wasWrittenByGPU = pResource->WasWrittenByGPU(Subresource) || renderable; + pResource->SetWrittenByGPU(Subresource, false); + + DxvkBufferSliceHandle physSlice; + + if (Flags & D3DLOCK_DISCARD) { + // We do not have to preserve the contents of the + // buffer if the entire image gets discarded. + physSlice = pResource->DiscardMapSlice(Subresource); + + EmitCs([ + cImageBuffer = std::move(mappedBuffer), + cBufferSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cImageBuffer, cBufferSlice); + }); + } + else if ((managed && !m_d3d9Options.evictManagedOnUnlock) || scratch || systemmem) { + // Managed and scratch resources + // are meant to be able to provide readback without waiting. + // We always keep a copy of them in system memory for this reason. + // No need to wait as its not in use. + physSlice = pResource->GetMappedSlice(Subresource); + + // We do not need to wait for the resource in the event the + // calling app promises not to overwrite data that is in use + // or is reading. Remember! This will only trigger for MANAGED resources + // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. + const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(Subresource); + const bool skipWait = (scratch || managed || (systemmem && !wasWrittenByGPU)) + && (usesStagingBuffer || readOnly); + + if (alloced) { + std::memset(physSlice.mapPtr, 0, physSlice.length); + } + else if (!skipWait) { + if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappedBuffer, D3DLOCK_DONOTWAIT)) + pResource->EnableStagingBufferUploads(Subresource); + + if (!WaitForResource(mappedBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + } + } + else { + physSlice = pResource->GetMappedSlice(Subresource); + + if (!alloced || wasWrittenByGPU) { + if (unlikely(wasWrittenByGPU)) { + Rc<DxvkImage> resourceImage = pResource->GetImage(); + + Rc<DxvkImage> mappedImage = resourceImage->info().sampleCount != 1 + ? pResource->GetResolveImage() + : std::move(resourceImage); + + // When using any map mode which requires the image contents + // to be preserved, and if the GPU has write access to the + // image, copy the current image contents into the buffer. + auto subresourceLayers = vk::makeSubresourceLayers(subresource); + + // We need to resolve this, some games + // lock MSAA render targets even though + // that's entirely illegal and they explicitly + // tell us that they do NOT want to lock them... + if (resourceImage != nullptr) { + EmitCs([ + cMainImage = resourceImage, + cResolveImage = mappedImage, + cSubresource = subresourceLayers + ] (DxvkContext* ctx) { + VkImageResolve region; + region.srcSubresource = cSubresource; + region.srcOffset = VkOffset3D { 0, 0, 0 }; + region.dstSubresource = cSubresource; + region.dstOffset = VkOffset3D { 0, 0, 0 }; + region.extent = cMainImage->mipLevelExtent(cSubresource.mipLevel); + + if (cSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + ctx->resolveImage( + cResolveImage, cMainImage, region, + cMainImage->info().format); + } + else { + ctx->resolveDepthStencilImage( + cResolveImage, cMainImage, region, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR); + } + }); + } + + VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format); + + EmitCs([ + cImageBuffer = mappedBuffer, + cImage = std::move(mappedImage), + cSubresources = subresourceLayers, + cLevelExtent = levelExtent, + cPackedFormat = packedFormat + ] (DxvkContext* ctx) { + if (cSubresources.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + ctx->copyImageToBuffer(cImageBuffer, 0, 4, 0, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + } else { + // Copying DS to a packed buffer is only supported for D24S8 and D32S8 + // right now so the 4 byte row alignment is guaranteed by the format size + ctx->copyDepthStencilImageToPackedBuffer( + cImageBuffer, 0, + VkOffset2D { 0, 0 }, + VkExtent2D { cLevelExtent.width, cLevelExtent.height }, + cImage, cSubresources, + VkOffset2D { 0, 0 }, + VkExtent2D { cLevelExtent.width, cLevelExtent.height }, + cPackedFormat); + } + }); + } + + if (!WaitForResource(mappedBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + } else { + // If we are a new alloc, and we weren't written by the GPU + // that means that we are a newly initialized + // texture, and hence can just memset -> 0 and + // avoid a wait here. + std::memset(physSlice.mapPtr, 0, physSlice.length); + } + } + + const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2; + // Set up map pointer. + if (atiHack) { + // We need to lie here. The game is expected to use this info and do a workaround. + // It's stupid. I know. + pLockedBox->RowPitch = align(std::max(desc.Width >> MipLevel, 1u), 4); + pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u); + } + else { + // Data is tightly packed within the mapped buffer. + pLockedBox->RowPitch = align(formatInfo->elementSize * blockCount.width, 4); + pLockedBox->SlicePitch = pLockedBox->RowPitch * blockCount.height; + } + + pResource->SetLocked(Subresource, true); + + const bool noDirtyUpdate = Flags & D3DLOCK_NO_DIRTY_UPDATE; + if (likely((pResource->IsManaged() && m_d3d9Options.evictManagedOnUnlock) + || ((desc.Pool == D3DPOOL_DEFAULT || !noDirtyUpdate) && !readOnly))) { + if (pBox && MipLevel != 0) { + D3DBOX scaledBox = *pBox; + scaledBox.Left <<= MipLevel; + scaledBox.Right = std::min(scaledBox.Right << MipLevel, pResource->Desc()->Width); + scaledBox.Top <<= MipLevel; + scaledBox.Bottom = std::min(scaledBox.Bottom << MipLevel, pResource->Desc()->Height); + scaledBox.Back <<= MipLevel; + scaledBox.Front = std::min(scaledBox.Front << MipLevel, pResource->Desc()->Depth); + pResource->AddDirtyBox(&scaledBox, Face); + } else { + pResource->AddDirtyBox(pBox, Face); + } + } + + if (managed && !m_d3d9Options.evictManagedOnUnlock && !readOnly) { + pResource->SetNeedsUpload(Subresource, true); + + for (uint32_t i : bit::BitMask(m_activeTextures)) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[i]); + + if (texInfo == pResource) { + m_activeTexturesToUpload |= 1 << i; + // We can early out here, no need to add another index for this. + break; + } + } + } + + const uint32_t offset = CalcImageLockOffset( + pLockedBox->SlicePitch, + pLockedBox->RowPitch, + (!atiHack) ? formatInfo : nullptr, + pBox); + + + uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr); + data += offset; + pLockedBox->pBits = data; + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::UnlockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel) { + D3D9DeviceLock lock = LockDevice(); + + UINT Subresource = pResource->CalcSubresource(Face, MipLevel); + + // We weren't locked anyway! + if (unlikely(!pResource->GetLocked(Subresource))) + return D3D_OK; + + pResource->SetLocked(Subresource, false); + + // Flush image contents from staging if we aren't read only + // and we aren't deferring for managed. + const D3DBOX& box = pResource->GetDirtyBox(Face); + bool shouldFlush = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; + shouldFlush &= box.Left < box.Right && box.Top < box.Bottom && box.Front < box.Back; + shouldFlush &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock; + + if (shouldFlush) { + this->FlushImage(pResource, Subresource); + if (!pResource->IsAnySubresourceLocked()) + pResource->ClearDirtyBoxes(); + } + + // Toss our staging buffer if we're not dynamic + // and we aren't managed (for sysmem copy.) + bool shouldToss = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; + shouldToss &= !pResource->IsDynamic(); + shouldToss &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock; + + if (shouldToss) { + pResource->DestroyBufferSubresource(Subresource); + pResource->SetWrittenByGPU(Subresource, true); + } + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::FlushImage( + D3D9CommonTexture* pResource, + UINT Subresource) { + const Rc<DxvkImage> image = pResource->GetImage(); + + // Now that data has been written into the buffer, + // we need to copy its contents into the image + const DxvkBufferSliceHandle srcSlice = pResource->GetMappedSlice(Subresource); + + auto formatInfo = imageFormatInfo(image->info().format); + auto subresource = pResource->GetSubresourceFromIndex( + formatInfo->aspectMask, Subresource); + + VkImageSubresourceLayers subresourceLayers = { + subresource.aspectMask, + subresource.mipLevel, + subresource.arrayLayer, 1 }; + + auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo; + + if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { + VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, subresource.mipLevel, subresource.arrayLayer, 1 }; + + const D3DBOX& box = pResource->GetDirtyBox(subresource.arrayLayer); + VkOffset3D scaledBoxOffset = { + int32_t(alignDown(box.Left >> subresource.mipLevel, formatInfo->blockSize.width)), + int32_t(alignDown(box.Top >> subresource.mipLevel, formatInfo->blockSize.height)), + int32_t(alignDown(box.Front >> subresource.mipLevel, formatInfo->blockSize.depth)) + }; + VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({ + uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))), + uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))), + uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth))) + }, subresource.mipLevel); + VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize); + VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize); + + VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel); + VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); + + scaledAlignedBoxExtent.width = std::min<uint32_t>(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width); + scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height); + scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth); + + VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize); + VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); + VkDeviceSize copySrcOffset = boxOffsetBlockCount.z * texLevelExtentBlockCount.height * pitch + + boxOffsetBlockCount.y * pitch + + boxOffsetBlockCount.x * formatInfo->elementSize; + + VkDeviceSize rowAlignment = 0; + DxvkBufferSlice copySrcSlice; + if (pResource->DoesStagingBufferUploads(Subresource)) { + VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; + D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize); + copySrcSlice = slice.slice; + void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset; + util::packImageData( + slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, + pitch, pitch * texLevelExtentBlockCount.height); + } else { + copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(Subresource), copySrcOffset, srcSlice.length); + rowAlignment = pitch; // row alignment can act as the pitch parameter + } + + EmitCs([ + cSrcSlice = std::move(copySrcSlice), + cDstImage = image, + cDstLayers = dstLayers, + cDstLevelExtent = scaledAlignedBoxExtent, + cOffset = scaledBoxOffset, + cRowAlignment = rowAlignment + ] (DxvkContext* ctx) { + ctx->copyBufferToImage( + cDstImage, cDstLayers, + cOffset, cDstLevelExtent, + cSrcSlice.buffer(), cSrcSlice.offset(), + cRowAlignment, 0); + }); + } + else { + const DxvkFormatInfo* formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor); + VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel); + VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); + // Add more blocks for the other planes that we might have. + // TODO: PLEASE CLEAN ME + texLevelExtentBlockCount.height *= std::min(convertFormat.PlaneCount, 2u); + + // the converter can not handle the 4 aligned pitch so we always repack into a staging buffer + D3D9BufferSlice slice = AllocTempBuffer<false>(srcSlice.length); + VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); + + util::packImageData( + slice.mapPtr, srcSlice.mapPtr, texLevelExtentBlockCount, formatInfo->elementSize, + pitch, std::min(convertFormat.PlaneCount, 2u) * pitch * texLevelExtentBlockCount.height); + + Flush(); + SynchronizeCsThread(); + + m_converter->ConvertFormat( + convertFormat, + image, subresourceLayers, + slice.slice); + } + + if (pResource->IsAutomaticMip()) + MarkTextureMipsDirty(pResource); + + return D3D_OK; + } + + + void D3D9DeviceEx::EmitGenerateMips( + D3D9CommonTexture* pResource) { + if (pResource->IsManaged()) + UploadManagedTexture(pResource); + + EmitCs([ + cImageView = pResource->GetSampleView(false), + cFilter = pResource->GetMipFilter() + ] (DxvkContext* ctx) { + ctx->generateMipmaps(cImageView, DecodeFilter(cFilter)); + }); + } + + + HRESULT D3D9DeviceEx::LockBuffer( + D3D9CommonBuffer* pResource, + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ppbData == nullptr)) + return D3DERR_INVALIDCALL; + + if (!m_d3d9Options.allowDiscard) + Flags &= ~D3DLOCK_DISCARD; + + auto& desc = *pResource->Desc(); + + // Ignore DISCARD if NOOVERWRITE is set + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) + Flags &= ~D3DLOCK_DISCARD; + + // Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2) + // The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC + // but tests say otherwise! + if (desc.Pool != D3DPOOL_DEFAULT) + Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE); + + // Ignore DONOTWAIT if we are DYNAMIC + // Yes... D3D9 is a good API. + if (desc.Usage & D3DUSAGE_DYNAMIC) + Flags &= ~D3DLOCK_DONOTWAIT; + + // We only bounds check for MANAGED. + // (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure + // how that works given it is meant to be a DIRECT access..?) + const bool respectUserBounds = !(Flags & D3DLOCK_DISCARD) && + SizeToLock != 0; + + // If we don't respect the bounds, encompass it all in our tests/checks + // These values may be out of range and don't get clamped. + uint32_t offset = respectUserBounds ? OffsetToLock : 0; + uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size; + D3D9Range lockRange = D3D9Range(offset, offset + size); + + if ((desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY)) + pResource->DirtyRange().Conjoin(lockRange); + + Rc<DxvkBuffer> mappingBuffer = pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(); + + DxvkBufferSliceHandle physSlice; + + if (Flags & D3DLOCK_DISCARD) { + // Allocate a new backing slice for the buffer and set + // it as the 'new' mapped slice. This assumes that the + // only way to invalidate a buffer is by mapping it. + physSlice = pResource->DiscardMapSlice(); + + EmitCs([ + cBuffer = std::move(mappingBuffer), + cBufferSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cBufferSlice); + }); + + pResource->SetWrittenByGPU(false); + pResource->GPUReadingRange().Clear(); + } + else { + // Use map pointer from previous map operation. This + // way we don't have to synchronize with the CS thread + // if the map mode is D3DLOCK_NOOVERWRITE. + physSlice = pResource->GetMappedSlice(); + + // NOOVERWRITE promises that they will not write in a currently used area. + // Therefore we can skip waiting for these two cases. + // We can also skip waiting if there is not dirty range overlap, if we are one of those resources. + + // If we are respecting the bounds ie. (MANAGED) we can test overlap + // of our bounds, otherwise we just ignore this and go for it all the time. + const bool wasWrittenByGPU = pResource->WasWrittenByGPU(); + const bool readOnly = Flags & D3DLOCK_READONLY; + const bool noOverlap = !pResource->GPUReadingRange().Overlaps(lockRange); + const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE; + const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(); + const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT; + const bool skipWait = (!wasWrittenByGPU && (usesStagingBuffer || readOnly || (noOverlap && !directMapping))) || noOverwrite; + if (!skipWait) { + if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappingBuffer, D3DLOCK_DONOTWAIT)) + pResource->EnableStagingBufferUploads(); + + if (!WaitForResource(mappingBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + + pResource->SetWrittenByGPU(false); + pResource->GPUReadingRange().Clear(); + } + } + + uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr); + // The offset/size is not clamped to or affected by the desc size. + data += OffsetToLock; + + *ppbData = reinterpret_cast<void*>(data); + + DWORD oldFlags = pResource->GetMapFlags(); + + // We need to remove the READONLY flags from the map flags + // if there was ever a non-readonly upload. + if (!(Flags & D3DLOCK_READONLY)) + oldFlags &= ~D3DLOCK_READONLY; + + pResource->SetMapFlags(Flags | oldFlags); + pResource->IncrementLockCount(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::FlushBuffer( + D3D9CommonBuffer* pResource) { + auto dstBuffer = pResource->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>(); + auto srcSlice = pResource->GetMappedSlice(); + + D3D9Range& range = pResource->DirtyRange(); + + DxvkBufferSlice copySrcSlice; + if (pResource->DoesStagingBufferUploads()) { + D3D9BufferSlice slice = AllocTempBuffer<false>(range.max - range.min); + copySrcSlice = slice.slice; + void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + range.min; + memcpy(slice.mapPtr, srcData, range.max - range.min); + } else { + copySrcSlice = DxvkBufferSlice(pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(), range.min, range.max - range.min); + } + + EmitCs([ + cDstSlice = dstBuffer, + cSrcSlice = copySrcSlice, + cDstOffset = range.min, + cLength = range.max - range.min + ] (DxvkContext* ctx) { + ctx->copyBuffer( + cDstSlice.buffer(), + cDstSlice.offset() + cDstOffset, + cSrcSlice.buffer(), + cSrcSlice.offset(), + cLength); + }); + + pResource->GPUReadingRange().Conjoin(pResource->DirtyRange()); + pResource->DirtyRange().Clear(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::UnlockBuffer( + D3D9CommonBuffer* pResource) { + D3D9DeviceLock lock = LockDevice(); + + if (pResource->DecrementLockCount() != 0) + return D3D_OK; + + if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + return D3D_OK; + + if (pResource->DirtyRange().IsDegenerate()) + return D3D_OK; + + pResource->SetMapFlags(0); + + if (pResource->Desc()->Pool != D3DPOOL_DEFAULT) + return D3D_OK; + + FlushImplicit(FALSE); + + FlushBuffer(pResource); + + return D3D_OK; + } + + + void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { + m_csThread.dispatchChunk(std::move(chunk)); + m_csIsBusy = true; + } + + + void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) { + // Flush only if the GPU is about to go idle, in + // order to keep the number of submissions low. + uint32_t pending = m_dxvkDevice->pendingSubmissions(); + + if (StrongHint || pending <= MaxPendingSubmits) { + auto now = dxvk::high_resolution_clock::now(); + + uint32_t delay = MinFlushIntervalUs + + IncFlushIntervalUs * pending; + + // Prevent flushing too often in short intervals. + if (now - m_lastFlush >= std::chrono::microseconds(delay)) + Flush(); + } + } + + + void D3D9DeviceEx::SynchronizeCsThread() { + D3D9DeviceLock lock = LockDevice(); + + // Dispatch current chunk so that all commands + // recorded prior to this function will be run + FlushCsChunk(); + + if (m_csThread.isBusy()) + m_csThread.synchronize(); + } + + + void D3D9DeviceEx::SetupFPU() { + // Should match d3d9 float behaviour. + +#if defined(_MSC_VER) + // For MSVC we can use these cross arch and platform funcs to set the FPU. + // This will work on any platform, x86, x64, ARM, etc. + + // Clear exceptions. + _clearfp(); + + // Disable exceptions + _controlfp(_MCW_EM, _MCW_EM); + +#ifndef _WIN64 + // Use 24 bit precision + _controlfp(_PC_24, _MCW_PC); +#endif + + // Round to nearest + _controlfp(_RC_NEAR, _MCW_RC); +#elif (defined(__GNUC__) || defined(__MINGW32__)) && (defined(__i386__) || defined(__x86_64__) || defined(__ia64)) + // For GCC/MinGW we can use inline asm to set it. + // This only works for x86 and x64 processors however. + + uint16_t control; + + // Get current control word. + __asm__ __volatile__("fnstcw %0" : "=m" (*&control)); + + // Clear existing settings. + control &= 0xF0C0; + + // Disable exceptions + // Use 24 bit precision + // Round to nearest + control |= 0x003F; + + // Set new control word. + __asm__ __volatile__("fldcw %0" : : "m" (*&control)); +#else + Logger::warn("D3D9DeviceEx::SetupFPU: not supported on this arch."); +#endif + } + + + int64_t D3D9DeviceEx::DetermineInitialTextureMemory() { + auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties(); + + VkDeviceSize availableTextureMemory = 0; + + for (uint32_t i = 0; i < memoryProp.memoryHeapCount; i++) + availableTextureMemory += memoryProp.memoryHeaps[i].size; + + constexpr VkDeviceSize Megabytes = 1024 * 1024; + + // The value returned is a 32-bit value, so we need to clamp it. + VkDeviceSize maxMemory = (VkDeviceSize(m_d3d9Options.maxAvailableMemory) * Megabytes) - 1; + availableTextureMemory = std::min(availableTextureMemory, maxMemory); + + return int64_t(availableTextureMemory); + } + + + Rc<DxvkBuffer> D3D9DeviceEx::CreateConstantBuffer( + bool SSBO, + VkDeviceSize Size, + DxsoProgramType ShaderStage, + DxsoConstantBuffers BufferType) { + DxvkBufferCreateInfo info = { }; + info.usage = SSBO ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + info.access = SSBO ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_UNIFORM_READ_BIT; + info.size = Size; + info.stages = ShaderStage == DxsoProgramType::VertexShader + ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT + : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (m_d3d9Options.deviceLocalConstantBuffers) + memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + Rc<DxvkBuffer> buffer = m_dxvkDevice->createBuffer(info, memoryFlags); + + const uint32_t slotId = computeResourceSlotId( + ShaderStage, DxsoBindingType::ConstantBuffer, + BufferType); + + EmitCs([ + cSlotId = slotId, + cBuffer = buffer + ] (DxvkContext* ctx) { + ctx->bindResourceBuffer(cSlotId, + DxvkBufferSlice(cBuffer, 0, cBuffer->info().size)); + }); + + return buffer; + } + + + void D3D9DeviceEx::CreateConstantBuffers() { + m_consts[DxsoProgramTypes::VertexShader].buffer = + CreateConstantBuffer(m_dxsoOptions.vertexConstantBufferAsSSBO, + m_vsLayout.totalSize(), + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSConstantBuffer); + + m_consts[DxsoProgramTypes::PixelShader].buffer = + CreateConstantBuffer(false, + m_psLayout.totalSize(), + DxsoProgramType::PixelShader, + DxsoConstantBuffers::PSConstantBuffer); + + m_vsClipPlanes = + CreateConstantBuffer(false, + caps::MaxClipPlanes * sizeof(D3D9ClipPlane), + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSClipPlanes); + + m_vsFixedFunction = + CreateConstantBuffer(false, + sizeof(D3D9FixedFunctionVS), + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSFixedFunction); + + m_psFixedFunction = + CreateConstantBuffer(false, + sizeof(D3D9FixedFunctionPS), + DxsoProgramType::PixelShader, + DxsoConstantBuffers::PSFixedFunction); + + m_psShared = + CreateConstantBuffer(false, + sizeof(D3D9SharedPS), + DxsoProgramType::PixelShader, + DxsoConstantBuffers::PSShared); + + m_vsVertexBlend = + CreateConstantBuffer(true, + CanSWVP() + ? sizeof(D3D9FixedFunctionVertexBlendDataSW) + : sizeof(D3D9FixedFunctionVertexBlendDataHW), + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSVertexBlendData); + } + + + template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType> + inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) { + const D3D9ConstantSets& constSet = m_consts[ShaderStage]; + + auto* dst = reinterpret_cast<HardwareLayoutType*>(pData); + + if (constSet.meta.maxConstIndexF) + std::memcpy(dst->fConsts, Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); + if (constSet.meta.maxConstIndexI) + std::memcpy(dst->iConsts, Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); + } + + + template <typename SoftwareLayoutType, typename ShaderType> + inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; + + auto dst = reinterpret_cast<uint8_t*>(pData); + + if (constSet.meta.maxConstIndexF) + std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); + if (constSet.meta.maxConstIndexI) + std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); + if (constSet.meta.maxConstIndexB) + std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize()); + } + + + template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType> + inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + D3D9ConstantSets& constSet = m_consts[ShaderStage]; + + if (!constSet.dirty) + return; + + constSet.dirty = false; + + DxvkBufferSliceHandle slice = constSet.buffer->allocSlice(); + + EmitCs([ + cBuffer = constSet.buffer, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + if constexpr (ShaderStage == DxsoProgramType::PixelShader) + UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader); + else if (likely(!CanSWVP())) + UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader); + else + UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader); + + if (constSet.meta.needsConstantCopies) { + Vector4* data = reinterpret_cast<Vector4*>(slice.mapPtr); + + auto& shaderConsts = GetCommonShader(Shader)->GetConstants(); + + for (const auto& constant : shaderConsts) + data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32); + } + } + + + template <DxsoProgramType ShaderStage> + void D3D9DeviceEx::UploadConstants() { + if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) + return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts, m_vsLayout, m_state.vertexShader); + else + return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts, m_psLayout, m_state.pixelShader); + } + + + void D3D9DeviceEx::UpdateClipPlanes() { + m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes); + + auto slice = m_vsClipPlanes->allocSlice(); + auto dst = reinterpret_cast<D3D9ClipPlane*>(slice.mapPtr); + + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) + ? m_state.clipPlanes[i] + : D3D9ClipPlane(); + } + + EmitCs([ + cBuffer = m_vsClipPlanes, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + } + + + template <uint32_t Offset, uint32_t Length> + void D3D9DeviceEx::UpdatePushConstant(const void* pData) { + struct ConstantData { uint8_t Data[Length]; }; + + auto* constData = reinterpret_cast<const ConstantData*>(pData); + + EmitCs([ + cData = *constData + ](DxvkContext* ctx) { + ctx->pushConstants(Offset, Length, &cData); + }); + } + + + template <D3D9RenderStateItem Item> + void D3D9DeviceEx::UpdatePushConstant() { + auto& rs = m_state.renderStates; + + if constexpr (Item == D3D9RenderStateItem::AlphaRef) { + float alpha = float(rs[D3DRS_ALPHAREF] & 0xFF) / 255.0f; + UpdatePushConstant<offsetof(D3D9RenderStateInfo, alphaRef), sizeof(float)>(&alpha); + } + else if constexpr (Item == D3D9RenderStateItem::FogColor) { + Vector4 color; + DecodeD3DCOLOR(D3DCOLOR(rs[D3DRS_FOGCOLOR]), color.data); + UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogColor), sizeof(D3D9RenderStateInfo::fogColor)>(&color); + } + else if constexpr (Item == D3D9RenderStateItem::FogDensity) { + float density = bit::cast<float>(rs[D3DRS_FOGDENSITY]); + UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogDensity), sizeof(float)>(&density); + } + else if constexpr (Item == D3D9RenderStateItem::FogEnd) { + float end = bit::cast<float>(rs[D3DRS_FOGEND]); + UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogEnd), sizeof(float)>(&end); + } + else if constexpr (Item == D3D9RenderStateItem::FogScale) { + float end = bit::cast<float>(rs[D3DRS_FOGEND]); + float start = bit::cast<float>(rs[D3DRS_FOGSTART]); + + float scale = 1.0f / (end - start); + UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogScale), sizeof(float)>(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointSize) { + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSize), sizeof(float)>(&rs[D3DRS_POINTSIZE]); + } + else if constexpr (Item == D3D9RenderStateItem::PointSizeMin) { + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMin), sizeof(float)>(&rs[D3DRS_POINTSIZE_MIN]); + } + else if constexpr (Item == D3D9RenderStateItem::PointSizeMax) { + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMax), sizeof(float)>(&rs[D3DRS_POINTSIZE_MAX]); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleA) { + float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_A]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleA), sizeof(float)>(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleB) { + float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_B]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleB), sizeof(float)>(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleC) { + float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_C]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleC), sizeof(float)>(&scale); + } + else + Logger::warn("D3D9: Invalid push constant set to update."); + } + + + + void D3D9DeviceEx::Flush() { + D3D9DeviceLock lock = LockDevice(); + + m_initializer->Flush(); + m_converter->Flush(); + + if (m_csIsBusy || !m_csChunk->empty()) { + // Add commands to flush the threaded + // context, then flush the command list + EmitCs([](DxvkContext* ctx) { + ctx->flushCommandList(); + }); + + FlushCsChunk(); + + // Reset flush timer used for implicit flushes + m_lastFlush = dxvk::high_resolution_clock::now(); + m_csIsBusy = false; + } + } + + + inline void D3D9DeviceEx::UpdateBoundRTs(uint32_t index) { + const uint32_t bit = 1 << index; + + m_boundRTs &= ~bit; + + if (m_state.renderTargets[index] != nullptr && + !m_state.renderTargets[index]->IsNull()) + m_boundRTs |= bit; + } + + + inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { + if (!config::HazardTrackingEnabled) + return; + + const uint32_t bit = 1 << index; + + m_activeRTs &= ~bit; + + if ((m_boundRTs & bit) != 0 && + m_state.renderTargets[index]->GetBaseTexture() != nullptr && + m_state.renderStates[ColorWriteIndex(index)]) + m_activeRTs |= bit; + + UpdateActiveHazardsRT(bit); + } + + + inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index, DWORD combinedUsage) { + if (!config::ManagedUploadTrackingEnabled && !config::HazardTrackingEnabled && !config::MipGenTrackingEnabled) + return; + + const uint32_t bit = 1 << index; + + if (config::HazardTrackingEnabled) { + m_activeRTTextures &= ~bit; + m_activeDSTextures &= ~bit; + } + m_activeTextures &= ~bit; + if (config::ManagedUploadTrackingEnabled) + m_activeTexturesToUpload &= ~bit; + if (config::MipGenTrackingEnabled) + m_activeTexturesToGen &= ~bit; + + auto tex = GetCommonTexture(m_state.textures[index]); + if (tex != nullptr) { + m_activeTextures |= bit; + + if (unlikely(config::HazardTrackingEnabled && tex->IsRenderTarget())) + m_activeRTTextures |= bit; + + if (unlikely(config::HazardTrackingEnabled && tex->IsDepthStencil())) + m_activeDSTextures |= bit; + + if (unlikely(config::ManagedUploadTrackingEnabled && tex->NeedsAnyUpload())) + m_activeTexturesToUpload |= bit; + + if (unlikely(config::MipGenTrackingEnabled && tex->NeedsMipGen())) + m_activeTexturesToGen |= bit; + } + + if (config::HazardTrackingEnabled) { + if (unlikely(combinedUsage & D3DUSAGE_RENDERTARGET)) + UpdateActiveHazardsRT(UINT32_MAX); + + if (unlikely(combinedUsage & D3DUSAGE_DEPTHSTENCIL)) + UpdateActiveHazardsDS(bit); + } + } + + + inline void D3D9DeviceEx::UpdateActiveHazardsRT(uint32_t rtMask) { + if (!config::HazardTrackingEnabled) + return; + + auto masks = m_psShaderMasks; + masks.rtMask &= m_activeRTs & rtMask; + masks.samplerMask &= m_activeRTTextures; + + m_activeHazardsRT = m_activeHazardsRT & (~rtMask); + for (uint32_t rtIdx : bit::BitMask(masks.rtMask)) { + for (uint32_t samplerIdx : bit::BitMask(masks.samplerMask)) { + D3D9Surface* rtSurf = m_state.renderTargets[rtIdx].ptr(); + + IDirect3DBaseTexture9* rtBase = rtSurf->GetBaseTexture(); + IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx]; + + // HACK: Don't mark for hazards if we aren't rendering to mip 0! + // Some games use screenspace passes like this for blurring + // Sampling from mip 0 (texture) -> mip 1 (rt) + // and we'd trigger the hazard path otherwise which is unnecessary, + // and would shove us into GENERAL and emitting readback barriers. + if (likely(rtSurf->GetMipLevel() != 0 || rtBase != texBase)) + continue; + + m_activeHazardsRT |= 1 << rtIdx; + } + } + } + + + inline void D3D9DeviceEx::UpdateActiveHazardsDS(uint32_t texMask) { + if (!config::HazardTrackingEnabled) + return; + + m_activeHazardsDS = m_activeHazardsDS & (~texMask); + if (m_state.depthStencil != nullptr && + m_state.depthStencil->GetBaseTexture() != nullptr) { + uint32_t samplerMask = m_activeDSTextures & texMask; + for (uint32_t samplerIdx : bit::BitMask(samplerMask)) { + IDirect3DBaseTexture9* dsBase = m_state.depthStencil->GetBaseTexture(); + IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx]; + + if (likely(dsBase != texBase)) + continue; + + m_activeHazardsDS |= 1 << samplerIdx; + } + } + } + + + void D3D9DeviceEx::MarkRenderHazards() { + if (!config::HazardTrackingEnabled) + return; + + for (uint32_t rtIdx : bit::BitMask(m_activeHazardsRT)) { + // Guaranteed to not be nullptr... + auto tex = m_state.renderTargets[rtIdx]->GetCommonTexture(); + if (unlikely(!tex->MarkHazardous())) { + TransitionImage(tex, VK_IMAGE_LAYOUT_GENERAL); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + } + } + } + + + void D3D9DeviceEx::UploadManagedTexture(D3D9CommonTexture* pResource) { + for (uint32_t subresource = 0; subresource < pResource->CountSubresources(); subresource++) { + if (!pResource->NeedsUpload(subresource) || pResource->GetBuffer(subresource) == nullptr) + continue; + + this->FlushImage(pResource, subresource); + } + + pResource->ClearDirtyBoxes(); + pResource->ClearNeedsUpload(); + } + + + void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) { + if (!config::ManagedUploadTrackingEnabled) + return; + + // Guaranteed to not be nullptr... + for (uint32_t texIdx : bit::BitMask(mask)) + UploadManagedTexture(GetCommonTexture(m_state.textures[texIdx])); + + m_activeTexturesToUpload &= ~mask; + } + + + void D3D9DeviceEx::GenerateTextureMips(uint32_t mask) { + for (uint32_t texIdx : bit::BitMask(mask)) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[texIdx]); + + if (texInfo->NeedsMipGen()) { + this->EmitGenerateMips(texInfo); + texInfo->SetNeedsMipGen(false); + } + } + + m_activeTexturesToGen &= ~mask; + } + + + void D3D9DeviceEx::MarkTextureMipsDirty(D3D9CommonTexture* pResource) { + if (!config::MipGenTrackingEnabled) + return; + + pResource->SetNeedsMipGen(true); + pResource->MarkAllWrittenByGPU(); + + for (uint32_t i : bit::BitMask(m_activeTextures)) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[i]); + + if (texInfo == pResource) { + m_activeTexturesToGen |= 1 << i; + // We can early out here, no need to add another index for this. + break; + } + } + } + + + void D3D9DeviceEx::MarkTextureMipsUnDirty(D3D9CommonTexture* pResource) { + if (!config::MipGenTrackingEnabled) + return; + + pResource->SetNeedsMipGen(false); + + for (uint32_t i : bit::BitMask(m_activeTextures)) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[i]); + + if (texInfo == pResource) + m_activeTexturesToGen &= ~(1 << i); + } + } + + + void D3D9DeviceEx::MarkTextureUploaded(D3D9CommonTexture* pResource) { + if (!config::ManagedUploadTrackingEnabled) + return; + + for (uint32_t i : bit::BitMask(m_activeTextures)) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[i]); + + if (texInfo == pResource) + m_activeTexturesToUpload &= ~(1 << i); + } + } + + + template <bool Points> + void D3D9DeviceEx::UpdatePointMode() { + if constexpr (!Points) { + m_lastPointMode = 0; + + EmitCs([](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, 0); + }); + } + else { + auto& rs = m_state.renderStates; + + const bool scale = rs[D3DRS_POINTSCALEENABLE] && !UseProgrammableVS(); + const bool sprite = rs[D3DRS_POINTSPRITEENABLE]; + + const uint32_t scaleBit = scale ? 1u : 0u; + const uint32_t spriteBit = sprite ? 2u : 0u; + + uint32_t mode = scaleBit | spriteBit; + + if (rs[D3DRS_POINTSCALEENABLE] && m_flags.test(D3D9DeviceFlag::DirtyPointScale)) { + m_flags.clr(D3D9DeviceFlag::DirtyPointScale); + + UpdatePushConstant<D3D9RenderStateItem::PointScaleA>(); + UpdatePushConstant<D3D9RenderStateItem::PointScaleB>(); + UpdatePushConstant<D3D9RenderStateItem::PointScaleC>(); + } + + if (unlikely(mode != m_lastPointMode)) { + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, cMode); + }); + + m_lastPointMode = mode; + } + } + } + + + void D3D9DeviceEx::UpdateFog() { + auto& rs = m_state.renderStates; + + bool fogEnabled = rs[D3DRS_FOGENABLE]; + + bool pixelFog = rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE && fogEnabled; + bool vertexFog = rs[D3DRS_FOGVERTEXMODE] != D3DFOG_NONE && fogEnabled && !pixelFog; + + auto UpdateFogConstants = [&](D3DFOGMODE FogMode) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogColor)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogColor); + UpdatePushConstant<D3D9RenderStateItem::FogColor>(); + } + + if (FogMode == D3DFOG_LINEAR) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogScale)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogScale); + UpdatePushConstant<D3D9RenderStateItem::FogScale>(); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFogEnd)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogEnd); + UpdatePushConstant<D3D9RenderStateItem::FogEnd>(); + } + } + else if (FogMode == D3DFOG_EXP || FogMode == D3DFOG_EXP2) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogDensity)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogDensity); + UpdatePushConstant<D3D9RenderStateItem::FogDensity>(); + } + } + }; + + if (vertexFog) { + D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGVERTEXMODE]); + + UpdateFogConstants(mode); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, cMode); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); + }); + } + } + else if (pixelFog) { + D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGTABLEMODE]); + + UpdateFogConstants(mode); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, cMode); + }); + } + } + else { + if (fogEnabled) + UpdateFogConstants(D3DFOG_NONE); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cEnabled = fogEnabled] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, cEnabled); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); + }); + } + } + } + + + void D3D9DeviceEx::BindFramebuffer() { + m_flags.clr(D3D9DeviceFlag::DirtyFramebuffer); + + DxvkRenderTargets attachments; + + bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; + + // D3D9 doesn't have the concept of a framebuffer object, + // so we'll just create a new one every time the render + // target bindings are updated. Set up the attachments. + VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; + + for (uint32_t i : bit::BitMask(m_boundRTs)) { + const DxvkImageCreateInfo& rtImageInfo = m_state.renderTargets[i]->GetCommonTexture()->GetImage()->info(); + + if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)) + sampleCount = rtImageInfo.sampleCount; + else if (unlikely(sampleCount != rtImageInfo.sampleCount)) + continue; + + attachments.color[i] = { + m_state.renderTargets[i]->GetRenderTargetView(srgb), + m_state.renderTargets[i]->GetRenderTargetLayout() }; + } + + if (m_state.depthStencil != nullptr) { + const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info(); + const bool depthWrite = m_state.renderStates[D3DRS_ZWRITEENABLE]; + + if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM || sampleCount == dsImageInfo.sampleCount)) { + attachments.depth = { + m_state.depthStencil->GetDepthStencilView(), + m_state.depthStencil->GetDepthStencilLayout(depthWrite, m_activeHazardsDS != 0) }; + } + } + + // Create and bind the framebuffer object to the context + EmitCs([ + cAttachments = std::move(attachments) + ] (DxvkContext* ctx) { + ctx->bindRenderTargets(cAttachments); + }); + } + + + void D3D9DeviceEx::BindViewportAndScissor() { + m_flags.clr(D3D9DeviceFlag::DirtyViewportScissor); + + VkViewport viewport; + VkRect2D scissor; + + // D3D9's coordinate system has its origin in the bottom left, + // but the viewport coordinates are aligned to the top-left + // corner so we can get away with flipping the viewport. + const D3DVIEWPORT9& vp = m_state.viewport; + + // Correctness Factor for 1/2 texel offset + // We need to bias this slightly to make + // imprecision in games happy. + // Originally we did this only for powers of two + // resolutions but since NEAREST filtering fixed to + // truncate, we need to do this all the time now. + float cf = 0.5f - (1.0f / 128.0f); + + viewport = VkViewport{ + float(vp.X) + cf, float(vp.Height + vp.Y) + cf, + float(vp.Width), -float(vp.Height), + vp.MinZ, vp.MaxZ, + }; + + // Scissor rectangles. Vulkan does not provide an easy way + // to disable the scissor test, so we'll have to set scissor + // rects that are at least as large as the framebuffer. + bool enableScissorTest = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; + + if (enableScissorTest) { + RECT sr = m_state.scissorRect; + + VkOffset2D srPosA; + srPosA.x = std::max<int32_t>(0, sr.left); + srPosA.x = std::max<int32_t>(vp.X, srPosA.x); + srPosA.y = std::max<int32_t>(0, sr.top); + srPosA.y = std::max<int32_t>(vp.Y, srPosA.y); + + VkOffset2D srPosB; + srPosB.x = std::max<int32_t>(srPosA.x, sr.right); + srPosB.x = std::min<int32_t>(vp.X + vp.Width, srPosB.x); + srPosB.y = std::max<int32_t>(srPosA.y, sr.bottom); + srPosB.y = std::min<int32_t>(vp.Y + vp.Height, srPosB.y); + + VkExtent2D srSize; + srSize.width = uint32_t(srPosB.x - srPosA.x); + srSize.height = uint32_t(srPosB.y - srPosA.y); + + scissor = VkRect2D{ srPosA, srSize }; + } + else { + scissor = VkRect2D{ + VkOffset2D { int32_t(vp.X), int32_t(vp.Y) }, + VkExtent2D { vp.Width, vp.Height }}; + } + + EmitCs([ + cViewport = viewport, + cScissor = scissor + ] (DxvkContext* ctx) { + ctx->setViewports( + 1, + &cViewport, + &cScissor); + }); + } + + + void D3D9DeviceEx::BindMultiSampleState() { + m_flags.clr(D3D9DeviceFlag::DirtyMultiSampleState); + + DxvkMultisampleState msState; + msState.sampleMask = m_flags.test(D3D9DeviceFlag::ValidSampleMask) + ? m_state.renderStates[D3DRS_MULTISAMPLEMASK] + : 0xffffffff; + msState.enableAlphaToCoverage = IsAlphaToCoverageEnabled(); + + EmitCs([ + cState = msState + ] (DxvkContext* ctx) { + ctx->setMultisampleState(cState); + }); + } + + + void D3D9DeviceEx::BindBlendState() { + m_flags.clr(D3D9DeviceFlag::DirtyBlendState); + + auto& state = m_state.renderStates; + + bool separateAlpha = state[D3DRS_SEPARATEALPHABLENDENABLE]; + + DxvkBlendMode mode; + mode.enableBlending = state[D3DRS_ALPHABLENDENABLE] != FALSE; + + D3D9BlendState color, alpha; + + color.Src = D3DBLEND(state[D3DRS_SRCBLEND]); + color.Dst = D3DBLEND(state[D3DRS_DESTBLEND]); + color.Op = D3DBLENDOP(state[D3DRS_BLENDOP]); + FixupBlendState(color); + + if (separateAlpha) { + alpha.Src = D3DBLEND(state[D3DRS_SRCBLENDALPHA]); + alpha.Dst = D3DBLEND(state[D3DRS_DESTBLENDALPHA]); + alpha.Op = D3DBLENDOP(state[D3DRS_BLENDOPALPHA]); + FixupBlendState(alpha); + } + else + alpha = color; + + mode.colorSrcFactor = DecodeBlendFactor(color.Src, false); + mode.colorDstFactor = DecodeBlendFactor(color.Dst, false); + mode.colorBlendOp = DecodeBlendOp (color.Op); + + mode.alphaSrcFactor = DecodeBlendFactor(alpha.Src, true); + mode.alphaDstFactor = DecodeBlendFactor(alpha.Dst, true); + mode.alphaBlendOp = DecodeBlendOp (alpha.Op); + + mode.writeMask = state[ColorWriteIndex(0)]; + + std::array<VkColorComponentFlags, 3> extraWriteMasks; + for (uint32_t i = 0; i < 3; i++) + extraWriteMasks[i] = state[ColorWriteIndex(i + 1)]; + + EmitCs([ + cMode = mode, + cWriteMasks = extraWriteMasks, + cAlphaMasks = m_alphaSwizzleRTs + ](DxvkContext* ctx) { + for (uint32_t i = 0; i < 4; i++) { + DxvkBlendMode mode = cMode; + if (i != 0) + mode.writeMask = cWriteMasks[i - 1]; + + const bool alphaSwizzle = cAlphaMasks & (1 << i); + + auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) { + if (alphaSwizzle) { + if (Factor == VK_BLEND_FACTOR_DST_ALPHA) + return VK_BLEND_FACTOR_ONE; + else if (Factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA) + return VK_BLEND_FACTOR_ZERO; + } + + return Factor; + }; + + mode.colorSrcFactor = NormalizeFactor(mode.colorSrcFactor); + mode.colorDstFactor = NormalizeFactor(mode.colorDstFactor); + mode.alphaSrcFactor = NormalizeFactor(mode.alphaSrcFactor); + mode.alphaDstFactor = NormalizeFactor(mode.alphaDstFactor); + + ctx->setBlendMode(i, mode); + } + }); + } + + + void D3D9DeviceEx::BindBlendFactor() { + DxvkBlendConstants blendConstants; + DecodeD3DCOLOR( + D3DCOLOR(m_state.renderStates[D3DRS_BLENDFACTOR]), + reinterpret_cast<float*>(&blendConstants)); + + EmitCs([ + cBlendConstants = blendConstants + ](DxvkContext* ctx) { + ctx->setBlendConstants(cBlendConstants); + }); + } + + + void D3D9DeviceEx::BindDepthStencilState() { + m_flags.clr(D3D9DeviceFlag::DirtyDepthStencilState); + + auto& rs = m_state.renderStates; + + bool stencil = rs[D3DRS_STENCILENABLE]; + bool twoSidedStencil = stencil && rs[D3DRS_TWOSIDEDSTENCILMODE]; + + DxvkDepthStencilState state; + state.enableDepthTest = rs[D3DRS_ZENABLE] != FALSE; + state.enableDepthWrite = rs[D3DRS_ZWRITEENABLE] != FALSE; + state.enableStencilTest = stencil; + state.depthCompareOp = DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ZFUNC])); + + if (stencil) { + state.stencilOpFront.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILFAIL])); + state.stencilOpFront.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILPASS])); + state.stencilOpFront.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILZFAIL])); + state.stencilOpFront.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_STENCILFUNC])); + state.stencilOpFront.compareMask = uint32_t(rs[D3DRS_STENCILMASK]); + state.stencilOpFront.writeMask = uint32_t(rs[D3DRS_STENCILWRITEMASK]); + state.stencilOpFront.reference = 0; + } + else + state.stencilOpFront = VkStencilOpState(); + + if (twoSidedStencil) { + state.stencilOpBack.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILFAIL])); + state.stencilOpBack.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILPASS])); + state.stencilOpBack.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILZFAIL])); + state.stencilOpBack.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_CCW_STENCILFUNC])); + state.stencilOpBack.compareMask = state.stencilOpFront.compareMask; + state.stencilOpBack.writeMask = state.stencilOpFront.writeMask; + state.stencilOpBack.reference = 0; + } + else + state.stencilOpBack = state.stencilOpFront; + + EmitCs([ + cState = state + ](DxvkContext* ctx) { + ctx->setDepthStencilState(cState); + }); + } + + + void D3D9DeviceEx::BindRasterizerState() { + m_flags.clr(D3D9DeviceFlag::DirtyRasterizerState); + + auto& rs = m_state.renderStates; + + DxvkRasterizerState state; + state.cullMode = DecodeCullMode(D3DCULL(rs[D3DRS_CULLMODE])); + state.depthBiasEnable = IsDepthBiasEnabled(); + state.depthClipEnable = true; + state.frontFace = VK_FRONT_FACE_CLOCKWISE; + state.polygonMode = DecodeFillMode(D3DFILLMODE(rs[D3DRS_FILLMODE])); + state.conservativeMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; + state.sampleCount = 0; + + EmitCs([ + cState = state + ](DxvkContext* ctx) { + ctx->setRasterizerState(cState); + }); + } + + + void D3D9DeviceEx::BindDepthBias() { + m_flags.clr(D3D9DeviceFlag::DirtyDepthBias); + + auto& rs = m_state.renderStates; + + float depthBias = bit::cast<float>(rs[D3DRS_DEPTHBIAS]) * m_depthBiasScale; + float slopeScaledDepthBias = bit::cast<float>(rs[D3DRS_SLOPESCALEDEPTHBIAS]); + + DxvkDepthBias biases; + biases.depthBiasConstant = depthBias; + biases.depthBiasSlope = slopeScaledDepthBias; + biases.depthBiasClamp = 0.0f; + + EmitCs([ + cBiases = biases + ](DxvkContext* ctx) { + ctx->setDepthBias(cBiases); + }); + } + + + void D3D9DeviceEx::BindAlphaTestState() { + m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState); + + auto& rs = m_state.renderStates; + + VkCompareOp alphaOp = IsAlphaTestEnabled() + ? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC])) + : VK_COMPARE_OP_ALWAYS; + + EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp); + }); + } + + + void D3D9DeviceEx::BindDepthStencilRefrence() { + auto& rs = m_state.renderStates; + + uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]) & 0xff; + + EmitCs([cRef = ref] (DxvkContext* ctx) { + ctx->setStencilReference(cRef); + }); + } + + + void D3D9DeviceEx::BindSampler(DWORD Sampler) { + auto& state = m_state.samplerStates[Sampler]; + + D3D9SamplerKey key; + key.AddressU = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSU]); + key.AddressV = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSV]); + key.AddressW = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSW]); + key.MagFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MAGFILTER]); + key.MinFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MINFILTER]); + key.MipFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MIPFILTER]); + key.MaxAnisotropy = state[D3DSAMP_MAXANISOTROPY]; + key.MipmapLodBias = bit::cast<float>(state[D3DSAMP_MIPMAPLODBIAS]); + key.MaxMipLevel = state[D3DSAMP_MAXMIPLEVEL]; + key.BorderColor = D3DCOLOR(state[D3DSAMP_BORDERCOLOR]); + key.Depth = m_depthTextures & (1u << Sampler); + + if (m_d3d9Options.samplerAnisotropy != -1) { + if (key.MagFilter == D3DTEXF_LINEAR) + key.MagFilter = D3DTEXF_ANISOTROPIC; + + if (key.MinFilter == D3DTEXF_LINEAR) + key.MinFilter = D3DTEXF_ANISOTROPIC; + + key.MaxAnisotropy = m_d3d9Options.samplerAnisotropy; + } + + NormalizeSamplerKey(key); + + auto samplerInfo = RemapStateSamplerShader(Sampler); + + const uint32_t slot = computeResourceSlotId( + samplerInfo.first, DxsoBindingType::Image, + samplerInfo.second); + + EmitCs([this, + cSlot = slot, + cKey = key + ] (DxvkContext* ctx) { + auto pair = m_samplers.find(cKey); + if (pair != m_samplers.end()) { + ctx->bindResourceSampler(cSlot, pair->second); + return; + } + + auto mipFilter = DecodeMipFilter(cKey.MipFilter); + + DxvkSamplerCreateInfo info; + info.addressModeU = DecodeAddressMode(cKey.AddressU); + info.addressModeV = DecodeAddressMode(cKey.AddressV); + info.addressModeW = DecodeAddressMode(cKey.AddressW); + info.compareToDepth = cKey.Depth; + info.compareOp = cKey.Depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_NEVER; + info.magFilter = DecodeFilter(cKey.MagFilter); + info.minFilter = DecodeFilter(cKey.MinFilter); + info.mipmapMode = mipFilter.MipFilter; + info.maxAnisotropy = float(cKey.MaxAnisotropy); + info.useAnisotropy = cKey.MaxAnisotropy > 1; + info.mipmapLodBias = cKey.MipmapLodBias; + info.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0; + info.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0; + info.usePixelCoord = VK_FALSE; + + DecodeD3DCOLOR(cKey.BorderColor, info.borderColor.float32); + + if (!m_dxvkDevice->features().extCustomBorderColor.customBorderColorWithoutFormat) { + // HACK: Let's get OPAQUE_WHITE border color over + // TRANSPARENT_BLACK if the border RGB is white. + if (info.borderColor.float32[0] == 1.0f + && info.borderColor.float32[1] == 1.0f + && info.borderColor.float32[2] == 1.0f + && !m_dxvkDevice->features().extCustomBorderColor.customBorderColors) { + // Then set the alpha to 1. + info.borderColor.float32[3] = 1.0f; + } + } + + try { + auto sampler = m_dxvkDevice->createSampler(info); + + m_samplers.insert(std::make_pair(cKey, sampler)); + ctx->bindResourceSampler(cSlot, std::move(sampler)); + + m_samplerCount++; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + } + }); + } + + + void D3D9DeviceEx::BindTexture(DWORD StateSampler) { + auto shaderSampler = RemapStateSamplerShader(StateSampler); + + uint32_t slot = computeResourceSlotId(shaderSampler.first, + DxsoBindingType::Image, uint32_t(shaderSampler.second)); + + const bool srgb = + m_state.samplerStates[StateSampler][D3DSAMP_SRGBTEXTURE] & 0x1; + + D3D9CommonTexture* commonTex = + GetCommonTexture(m_state.textures[StateSampler]); + + EmitCs([ + cSlot = slot, + cImageView = commonTex->GetSampleView(srgb) + ](DxvkContext* ctx) { + ctx->bindResourceView(cSlot, cImageView, nullptr); + }); + } + + + void D3D9DeviceEx::UnbindTextures(uint32_t mask) { + EmitCs([ + cMask = mask + ](DxvkContext* ctx) { + for (uint32_t i : bit::BitMask(cMask)) { + auto shaderSampler = RemapStateSamplerShader(i); + + uint32_t slot = computeResourceSlotId(shaderSampler.first, + DxsoBindingType::Image, uint32_t(shaderSampler.second)); + + ctx->bindResourceView(slot, nullptr, nullptr); + } + }); + } + + + void D3D9DeviceEx::UndirtySamplers(uint32_t mask) { + for (uint32_t i : bit::BitMask(mask)) + BindSampler(i); + + m_dirtySamplerStates &= ~mask; + } + + + void D3D9DeviceEx::UndirtyTextures(uint32_t usedMask) { + const uint32_t activeMask = usedMask & m_activeTextures; + const uint32_t inactiveMask = usedMask & ~m_activeTextures; + + for (uint32_t i : bit::BitMask(activeMask)) + BindTexture(i); + + if (inactiveMask) + UnbindTextures(inactiveMask); + + m_dirtyTextures &= ~usedMask; + } + + void D3D9DeviceEx::MarkTextureBindingDirty(IDirect3DBaseTexture9* texture) { + D3D9DeviceLock lock = LockDevice(); + + for (uint32_t i : bit::BitMask(m_activeTextures)) { + if (m_state.textures[i] == texture) + m_dirtyTextures |= 1u << i; + } + } + + + D3D9DrawInfo D3D9DeviceEx::GenerateDrawInfo( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + UINT InstanceCount) { + D3D9DrawInfo drawInfo; + drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed + ? InstanceCount + : 1u; + return drawInfo; + } + + + uint32_t D3D9DeviceEx::GetInstanceCount() const { + return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u); + } + + + void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType) { + if (config::HazardTrackingEnabled) { + if (unlikely(m_activeHazardsRT != 0)) { + EmitCs([](DxvkContext* ctx) { + ctx->emitRenderTargetReadbackBarrier(); + }); + + if (m_d3d9Options.generalHazards) + MarkRenderHazards(); + } + + if (unlikely((m_lastHazardsDS == 0) != (m_activeHazardsDS == 0))) { + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + m_lastHazardsDS = m_activeHazardsDS; + } + } + + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + if (vbo != nullptr && vbo->NeedsUpload()) + FlushBuffer(vbo); + } + + const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask; + const uint32_t usedTextureMask = m_activeTextures & usedSamplerMask; + + if (config::ManagedUploadTrackingEnabled) { + const uint32_t texturesToUpload = m_activeTexturesToUpload & usedTextureMask; + if (unlikely(texturesToUpload != 0)) + UploadManagedTextures(texturesToUpload); + } + + if (config::MipGenTrackingEnabled) { + const uint32_t texturesToGen = m_activeTexturesToGen & usedTextureMask; + if (unlikely(texturesToGen != 0)) + GenerateTextureMips(texturesToGen); + } + + auto* ibo = GetCommonBuffer(m_state.indices); + if (ibo != nullptr && ibo->NeedsUpload()) + FlushBuffer(ibo); + + UpdateFog(); + + if (m_flags.test(D3D9DeviceFlag::DirtyFramebuffer)) + BindFramebuffer(); + + if (m_flags.test(D3D9DeviceFlag::DirtyViewportScissor)) + BindViewportAndScissor(); + + const uint32_t activeDirtySamplers = m_dirtySamplerStates & usedTextureMask; + if (activeDirtySamplers) + UndirtySamplers(activeDirtySamplers); + + const uint32_t usedDirtyTextures = m_dirtyTextures & usedSamplerMask; + if (usedDirtyTextures) + UndirtyTextures(usedDirtyTextures); + + if (m_flags.test(D3D9DeviceFlag::DirtyBlendState)) + BindBlendState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState)) + BindDepthStencilState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyRasterizerState)) + BindRasterizerState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyDepthBias)) + BindDepthBias(); + + if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState)) + BindMultiSampleState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState)) + BindAlphaTestState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes)) + UpdateClipPlanes(); + + if (PrimitiveType == D3DPT_POINTLIST) + UpdatePointMode<true>(); + else if (m_lastPointMode != 0) + UpdatePointMode<false>(); + + if (likely(UseProgrammableVS())) { + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + BindShader<DxsoProgramType::VertexShader>( + GetCommonShader(m_state.vertexShader), + GetVertexShaderPermutation()); + } + UploadConstants<DxsoProgramTypes::VertexShader>(); + + if (likely(!CanSWVP())) { + UpdateBoolSpecConstantVertex( + m_state.vsConsts.bConsts[0] & + m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask); + } else + UpdateBoolSpecConstantVertex(0); + } + else { + UpdateBoolSpecConstantVertex(0); + UpdateFixedFunctionVS(); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyInputLayout)) + BindInputLayout(); + + auto UpdateSamplerTypes = [&](uint32_t types, uint32_t projections, uint32_t fetch4) { + if (m_lastSamplerTypes != types) + UpdateSamplerSpecConsant(types); + + if (m_lastProjectionBitfield != projections) + UpdateProjectionSpecConstant(projections); + + if (m_lastFetch4 != fetch4) + UpdateFetch4SpecConstant(fetch4); + }; + + if (likely(UseProgrammablePS())) { + UploadConstants<DxsoProgramTypes::PixelShader>(); + + const uint32_t psTextureMask = usedTextureMask & ((1u << 16u) - 1u); + const uint32_t fetch4 = m_fetch4 & psTextureMask; + const uint32_t projected = m_projectionBitfield & psTextureMask; + + const auto& programInfo = GetCommonShader(m_state.pixelShader)->GetInfo(); + + if (programInfo.majorVersion() >= 2) + UpdateSamplerTypes(m_d3d9Options.forceSamplerTypeSpecConstants ? m_textureTypes : 0u, 0u, fetch4); + else + UpdateSamplerTypes(m_textureTypes, programInfo.minorVersion() >= 4 ? 0u : projected, fetch4); // For implicit samplers... + + UpdateBoolSpecConstantPixel( + m_state.psConsts.bConsts[0] & + m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask); + } + else { + UpdateBoolSpecConstantPixel(0); + UpdateSamplerTypes(0u, 0u, 0u); + + UpdateFixedFunctionPS(); + } + + const uint32_t depthTextureMask = m_depthTextures & usedTextureMask; + if (depthTextureMask != m_lastSamplerDepthMode) + UpdateSamplerDepthModeSpecConstant(depthTextureMask); + + if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData) && config::FixedFunctionEnabled) { + m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData); + + DxvkBufferSliceHandle slice = m_psShared->allocSlice(); + + EmitCs([ + cBuffer = m_psShared, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + D3D9SharedPS* data = reinterpret_cast<D3D9SharedPS*>(slice.mapPtr); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + DecodeD3DCOLOR(D3DCOLOR(m_state.textureStages[i][DXVK_TSS_CONSTANT]), data->Stages[i].Constant); + + // Flip major-ness so we can get away with a nice easy + // dot in the shader without complex access + data->Stages[i].BumpEnvMat[0][0] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT00]); + data->Stages[i].BumpEnvMat[1][0] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT01]); + data->Stages[i].BumpEnvMat[0][1] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT10]); + data->Stages[i].BumpEnvMat[1][1] = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT11]); + + data->Stages[i].BumpEnvLScale = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVLSCALE]); + data->Stages[i].BumpEnvLOffset = bit::cast<float>(m_state.textureStages[i][DXVK_TSS_BUMPENVLOFFSET]); + } + } + + if (m_flags.test(D3D9DeviceFlag::DirtyDepthBounds)) { + m_flags.clr(D3D9DeviceFlag::DirtyDepthBounds); + + DxvkDepthBounds db; + db.enableDepthBounds = (m_state.renderStates[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB)); + db.minDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_Z]); + db.maxDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_W]); + + EmitCs([ + cDepthBounds = db + ] (DxvkContext* ctx) { + ctx->setDepthBounds(cDepthBounds); + }); + } + } + + + template <DxsoProgramType ShaderStage> + void D3D9DeviceEx::BindShader( + const D3D9CommonShader* pShaderModule, + D3D9ShaderPermutation Permutation) { + EmitCs([ + cShader = pShaderModule->GetShader(Permutation) + ] (DxvkContext* ctx) { + ctx->bindShader(GetShaderStage(ShaderStage), cShader); + }); + } + + + void D3D9DeviceEx::BindInputLayout() { + m_flags.clr(D3D9DeviceFlag::DirtyInputLayout); + + if (m_state.vertexDecl == nullptr) { + EmitCs([&cIaState = m_iaState] (DxvkContext* ctx) { + cIaState.streamsUsed = 0; + ctx->setInputLayout(0, nullptr, 0, nullptr); + }); + } + else { + std::array<uint32_t, caps::MaxStreams> streamFreq; + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + streamFreq[i] = m_state.streamFreq[i]; + + Com<D3D9VertexDecl, false> vertexDecl = m_state.vertexDecl; + Com<D3D9VertexShader, false> vertexShader; + + if (UseProgrammableVS()) + vertexShader = m_state.vertexShader; + + EmitCs([ + &cIaState = m_iaState, + cVertexDecl = std::move(vertexDecl), + cVertexShader = std::move(vertexShader), + cStreamsInstanced = m_instancedData, + cStreamFreq = streamFreq + ] (DxvkContext* ctx) { + cIaState.streamsInstanced = cStreamsInstanced; + cIaState.streamsUsed = 0; + + const auto& elements = cVertexDecl->GetElements(); + + std::array<DxvkVertexAttribute, 2 * caps::InputRegisterCount> attrList; + std::array<DxvkVertexBinding, 2 * caps::InputRegisterCount> bindList; + + uint32_t attrMask = 0; + uint32_t bindMask = 0; + + const auto& isgn = cVertexShader != nullptr + ? GetCommonShader(cVertexShader)->GetIsgn() + : GetFixedFunctionIsgn(); + + for (uint32_t i = 0; i < isgn.elemCount; i++) { + const auto& decl = isgn.elems[i]; + + DxvkVertexAttribute attrib; + attrib.location = i; + attrib.binding = NullStreamIdx; + attrib.format = VK_FORMAT_R32G32B32A32_SFLOAT; + attrib.offset = 0; + + for (const auto& element : elements) { + DxsoSemantic elementSemantic = { static_cast<DxsoUsage>(element.Usage), element.UsageIndex }; + if (elementSemantic.usage == DxsoUsage::PositionT) + elementSemantic.usage = DxsoUsage::Position; + + if (elementSemantic == decl.semantic) { + attrib.binding = uint32_t(element.Stream); + attrib.format = DecodeDecltype(D3DDECLTYPE(element.Type)); + attrib.offset = element.Offset; + + cIaState.streamsUsed |= 1u << attrib.binding; + break; + } + } + + attrList[i] = attrib; + + DxvkVertexBinding binding; + binding.binding = attrib.binding; + + uint32_t instanceData = cStreamFreq[binding.binding % caps::MaxStreams]; + if (instanceData & D3DSTREAMSOURCE_INSTANCEDATA) { + binding.fetchRate = instanceData & 0x7FFFFF; // Remove instance packed-in flags in the data. + binding.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE; + } + else { + binding.fetchRate = 0; + binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + } + + // Check if the binding was already defined. + bool bindingDefined = false; + + for (uint32_t j = 0; j < i; j++) { + uint32_t bindingId = attrList.at(j).binding; + + if (binding.binding == bindingId) { + bindingDefined = true; + } + } + + if (!bindingDefined) + bindList.at(binding.binding) = binding; + + attrMask |= 1u << i; + bindMask |= 1u << binding.binding; + } + + // Compact the attribute and binding lists to filter + // out attributes and bindings not used by the shader + uint32_t attrCount = CompactSparseList(attrList.data(), attrMask); + uint32_t bindCount = CompactSparseList(bindList.data(), bindMask); + + ctx->setInputLayout( + attrCount, attrList.data(), + bindCount, bindList.data()); + }); + } + } + + + void D3D9DeviceEx::BindVertexBuffer( + UINT Slot, + D3D9VertexBuffer* pBuffer, + UINT Offset, + UINT Stride) { + EmitCs([ + cSlotId = Slot, + cBufferSlice = pBuffer != nullptr ? + pBuffer->GetCommonBuffer()->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>(Offset) + : DxvkBufferSlice(), + cStride = pBuffer != nullptr ? Stride : 0 + ] (DxvkContext* ctx) { + ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride); + }); + } + + void D3D9DeviceEx::BindIndices() { + D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices); + + D3D9Format format = buffer != nullptr + ? buffer->Desc()->Format + : D3D9Format::INDEX32; + + const VkIndexType indexType = DecodeIndexType(format); + + EmitCs([ + cBufferSlice = buffer != nullptr ? buffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>() : DxvkBufferSlice(), + cIndexType = indexType + ](DxvkContext* ctx) { + ctx->bindIndexBuffer(cBufferSlice, cIndexType); + }); + } + + + void D3D9DeviceEx::Begin(D3D9Query* pQuery) { + D3D9DeviceLock lock = LockDevice(); + + EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) { + cQuery->Begin(ctx); + }); + } + + + void D3D9DeviceEx::End(D3D9Query* pQuery) { + D3D9DeviceLock lock = LockDevice(); + + EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) { + cQuery->End(ctx); + }); + + pQuery->NotifyEnd(); + if (unlikely(pQuery->IsEvent())) { + pQuery->IsStalling() + ? Flush() + : FlushImplicit(TRUE); + } else if (pQuery->IsStalling()) { + FlushImplicit(FALSE); + } + } + + + void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.vsConsts.bConsts[idx] &= ~mask; + m_state.vsConsts.bConsts[idx] |= bits & mask; + + m_consts[DxsoProgramTypes::VertexShader].dirty = true; + } + + + void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.psConsts.bConsts[idx] &= ~mask; + m_state.psConsts.bConsts[idx] |= bits & mask; + + m_consts[DxsoProgramTypes::PixelShader].dirty = true; + } + + + HRESULT D3D9DeviceEx::CreateShaderModule( + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DWORD* pShaderBytecode, + const DxsoModuleInfo* pModuleInfo) { + try { + m_shaderModules->GetShaderModule(this, pShaderModule, + ShaderStage, pModuleInfo, pShaderBytecode); + + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT D3D9DeviceEx::SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count) { + const uint32_t regCountHardware = DetermineHardwareRegCount<ProgramType, ConstantType>(); + constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount<ProgramType, ConstantType>(); + + if (unlikely(StartRegister + Count > regCountSoftware)) + return D3DERR_INVALIDCALL; + + Count = UINT( + std::max<INT>( + std::clamp<INT>(Count + StartRegister, 0, regCountHardware) - INT(StartRegister), + 0)); + + if (unlikely(Count == 0)) + return D3D_OK; + + if (unlikely(pConstantData == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetShaderConstants<ProgramType, ConstantType, T>( + StartRegister, + pConstantData, + Count); + + if constexpr (ConstantType != D3D9ConstantType::Bool) { + uint32_t maxCount = ConstantType == D3D9ConstantType::Float + ? m_consts[ProgramType].meta.maxConstIndexF + : m_consts[ProgramType].meta.maxConstIndexI; + + m_consts[ProgramType].dirty |= StartRegister < maxCount; + } + + UpdateStateConstants<ProgramType, ConstantType, T>( + &m_state, + StartRegister, + pConstantData, + Count, + m_d3d9Options.d3d9FloatEmulation); + + return D3D_OK; + } + + + void D3D9DeviceEx::UpdateFixedFunctionVS() { + // Shader... + bool hasPositionT = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) : false; + bool hasBlendWeight = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendWeight) : false; + bool hasBlendIndices = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendIndices) : false; + + bool indexedVertexBlend = hasBlendIndices && m_state.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE]; + + D3D9FF_VertexBlendMode vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + + if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE && !hasPositionT) { + vertexBlendMode = m_state.renderStates[D3DRS_VERTEXBLEND] == D3DVBF_TWEENING + ? D3D9FF_VertexBlendMode_Tween + : D3D9FF_VertexBlendMode_Normal; + + if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) { + if (!hasBlendWeight) + vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + } + else if (!indexedVertexBlend) + vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + } + + if (unlikely(hasPositionT && m_state.vertexShader != nullptr && !m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + m_flags.set(D3D9DeviceFlag::DirtyProgVertexShader); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexShader)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexShader); + + D3D9FFShaderKeyVS key; + key.Data.Contents.HasPositionT = hasPositionT; + key.Data.Contents.HasColor0 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) : false; + key.Data.Contents.HasColor1 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) : false; + key.Data.Contents.HasPointSize = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPointSize) : false; + key.Data.Contents.HasFog = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasFog) : false; + + bool lighting = m_state.renderStates[D3DRS_LIGHTING] != 0 && !key.Data.Contents.HasPositionT; + bool colorVertex = m_state.renderStates[D3DRS_COLORVERTEX] != 0; + uint32_t mask = (lighting && colorVertex) + ? (key.Data.Contents.HasColor0 ? D3DMCS_COLOR1 : D3DMCS_MATERIAL) + | (key.Data.Contents.HasColor1 ? D3DMCS_COLOR2 : D3DMCS_MATERIAL) + : 0; + + key.Data.Contents.UseLighting = lighting; + key.Data.Contents.NormalizeNormals = m_state.renderStates[D3DRS_NORMALIZENORMALS]; + key.Data.Contents.LocalViewer = m_state.renderStates[D3DRS_LOCALVIEWER] && lighting; + + key.Data.Contents.RangeFog = m_state.renderStates[D3DRS_RANGEFOGENABLE]; + + key.Data.Contents.DiffuseSource = m_state.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] & mask; + key.Data.Contents.AmbientSource = m_state.renderStates[D3DRS_AMBIENTMATERIALSOURCE] & mask; + key.Data.Contents.SpecularSource = m_state.renderStates[D3DRS_SPECULARMATERIALSOURCE] & mask; + key.Data.Contents.EmissiveSource = m_state.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] & mask; + + uint32_t lightCount = 0; + + if (key.Data.Contents.UseLighting) { + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + if (m_state.enabledLightIndices[i] != UINT32_MAX) + lightCount++; + } + } + + key.Data.Contents.LightCount = lightCount; + + for (uint32_t i = 0; i < caps::MaxTextureBlendStages; i++) { + uint32_t transformFlags = m_state.textureStages[i][DXVK_TSS_TEXTURETRANSFORMFLAGS] & ~(D3DTTFF_PROJECTED); + uint32_t index = m_state.textureStages[i][DXVK_TSS_TEXCOORDINDEX]; + uint32_t indexFlags = (index & TCIMask) >> TCIOffset; + + transformFlags &= 0b111; + index &= 0b111; + + key.Data.Contents.TransformFlags |= transformFlags << (i * 3); + key.Data.Contents.TexcoordFlags |= indexFlags << (i * 3); + key.Data.Contents.TexcoordIndices |= index << (i * 3); + } + + key.Data.Contents.TexcoordDeclMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetTexcoordMask() : 0; + + key.Data.Contents.VertexBlendMode = uint32_t(vertexBlendMode); + + if (vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + key.Data.Contents.VertexBlendIndexed = indexedVertexBlend; + key.Data.Contents.VertexBlendCount = m_state.renderStates[D3DRS_VERTEXBLEND] & 0xff; + } + + key.Data.Contents.VertexClipping = IsClipPlaneEnabled(); + + EmitCs([ + this, + cKey = key, + &cShaders = m_ffModules + ](DxvkContext* ctx) { + auto shader = cShaders.GetShaderModule(this, cKey); + ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, shader.GetShader()); + }); + } + + if (hasPositionT && (m_flags.test(D3D9DeviceFlag::DirtyFFViewport) || m_ffZTest != IsZTestEnabled())) { + m_flags.clr(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + const auto& vp = m_state.viewport; + // For us to account for the Vulkan viewport rules + // when translating Window Coords -> Real Coords: + // We need to negate the inverse extent we multiply by, + // this follows through to the offset when that gets + // timesed by it. + // The 1.0f additional offset however does not, + // so we account for that there manually. + + m_ffZTest = IsZTestEnabled(); + + m_viewportInfo.inverseExtent = Vector4( + 2.0f / float(vp.Width), + -2.0f / float(vp.Height), + m_ffZTest ? 1.0f : 0.0f, + 1.0f); + + m_viewportInfo.inverseOffset = Vector4( + -float(vp.X), -float(vp.Y), + 0.0f, 0.0f); + + m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset * m_viewportInfo.inverseExtent; + + m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset + Vector4(-1.0f, 1.0f, 0.0f, 0.0f); + } + + // Constants... + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexData)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexData); + + DxvkBufferSliceHandle slice = m_vsFixedFunction->allocSlice(); + + EmitCs([ + cBuffer = m_vsFixedFunction, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto WorldView = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLD)]; + auto NormalMatrix = inverse(WorldView); + + D3D9FixedFunctionVS* data = reinterpret_cast<D3D9FixedFunctionVS*>(slice.mapPtr); + data->WorldView = WorldView; + data->NormalMatrix = NormalMatrix; + data->InverseView = transpose(inverse(m_state.transforms[GetTransformIndex(D3DTS_VIEW)])); + data->Projection = m_state.transforms[GetTransformIndex(D3DTS_PROJECTION)]; + + for (uint32_t i = 0; i < data->TexcoordMatrices.size(); i++) + data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i]; + + data->ViewportInfo = m_viewportInfo; + + DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data); + + uint32_t lightIdx = 0; + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + auto idx = m_state.enabledLightIndices[i]; + if (idx == UINT32_MAX) + continue; + + data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]); + } + + data->Material = m_state.material; + data->TweenFactor = bit::cast<float>(m_state.renderStates[D3DRS_TWEENFACTOR]); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexBlend) && vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexBlend); + + DxvkBufferSliceHandle slice = m_vsVertexBlend->allocSlice(); + + EmitCs([ + cBuffer = m_vsVertexBlend, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto UploadVertexBlendData = [&](auto data) { + for (uint32_t i = 0; i < std::size(data->WorldView); i++) + data->WorldView[i] = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLDMATRIX(i))]; + }; + + (m_isSWVP && indexedVertexBlend) + ? UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataSW*>(slice.mapPtr)) + : UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataHW*>(slice.mapPtr)); + } + } + + + void D3D9DeviceEx::UpdateFixedFunctionPS() { + // Shader... + if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelShader) || m_lastSamplerTypesFF != m_textureTypes) { + m_flags.clr(D3D9DeviceFlag::DirtyFFPixelShader); + m_lastSamplerTypesFF = m_textureTypes; + + // Used args for a given operation. + auto ArgsMask = [](DWORD Op) { + switch (Op) { + case D3DTOP_DISABLE: + return 0b000u; // No Args + case D3DTOP_SELECTARG1: + case D3DTOP_PREMODULATE: + return 0b010u; // Arg 1 + case D3DTOP_SELECTARG2: + return 0b100u; // Arg 2 + case D3DTOP_MULTIPLYADD: + case D3DTOP_LERP: + return 0b111u; // Arg 0, 1, 2 + default: + return 0b110u; // Arg 1, 2 + } + }; + + D3D9FFShaderKeyFS key; + + uint32_t idx; + for (idx = 0; idx < caps::TextureStageCount; idx++) { + auto& stage = key.Stages[idx].Contents; + auto& data = m_state.textureStages[idx]; + + // Subsequent stages do not occur if this is true. + if (data[DXVK_TSS_COLOROP] == D3DTOP_DISABLE) + break; + + // If the stage is invalid (ie. no texture bound), + // this and all subsequent stages get disabled. + if (m_state.textures[idx] == nullptr) { + if (((data[DXVK_TSS_COLORARG0] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 0u))) + || ((data[DXVK_TSS_COLORARG1] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 1u))) + || ((data[DXVK_TSS_COLORARG2] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 2u)))) + break; + } + + stage.ColorOp = data[DXVK_TSS_COLOROP]; + stage.AlphaOp = data[DXVK_TSS_ALPHAOP]; + + stage.ColorArg0 = data[DXVK_TSS_COLORARG0]; + stage.ColorArg1 = data[DXVK_TSS_COLORARG1]; + stage.ColorArg2 = data[DXVK_TSS_COLORARG2]; + + stage.AlphaArg0 = data[DXVK_TSS_ALPHAARG0]; + stage.AlphaArg1 = data[DXVK_TSS_ALPHAARG1]; + stage.AlphaArg2 = data[DXVK_TSS_ALPHAARG2]; + + const uint32_t samplerOffset = idx * 2; + stage.Type = (m_textureTypes >> samplerOffset) & 0xffu; + stage.ResultIsTemp = data[DXVK_TSS_RESULTARG] == D3DTA_TEMP; + + uint32_t ttff = data[DXVK_TSS_TEXTURETRANSFORMFLAGS]; + uint32_t count = ttff & ~D3DTTFF_PROJECTED; + + stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0; + stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0; + } + + auto& stage0 = key.Stages[0].Contents; + + if (stage0.ResultIsTemp && + stage0.ColorOp != D3DTOP_DISABLE && + stage0.AlphaOp == D3DTOP_DISABLE) { + stage0.AlphaOp = D3DTOP_SELECTARG1; + stage0.AlphaArg1 = D3DTA_DIFFUSE; + } + + stage0.GlobalSpecularEnable = m_state.renderStates[D3DRS_SPECULARENABLE]; + stage0.GlobalFlatShade = m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT; + + // The last stage *always* writes to current. + if (idx >= 1) + key.Stages[idx - 1].Contents.ResultIsTemp = false; + + EmitCs([ + this, + cKey = key, + &cShaders = m_ffModules + ](DxvkContext* ctx) { + auto shader = cShaders.GetShaderModule(this, cKey); + ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, shader.GetShader()); + }); + } + + // Constants + + if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelData)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFPixelData); + + DxvkBufferSliceHandle slice = m_psFixedFunction->allocSlice(); + + EmitCs([ + cBuffer = m_psFixedFunction, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto& rs = m_state.renderStates; + + D3D9FixedFunctionPS* data = reinterpret_cast<D3D9FixedFunctionPS*>(slice.mapPtr); + DecodeD3DCOLOR((D3DCOLOR)rs[D3DRS_TEXTUREFACTOR], data->textureFactor.data); + } + } + + + bool D3D9DeviceEx::UseProgrammableVS() { + if (!config::FixedFunctionEnabled) + return true; + + return m_state.vertexShader != nullptr + && m_state.vertexDecl != nullptr + && !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT); + } + + + bool D3D9DeviceEx::UseProgrammablePS() { + if (!config::FixedFunctionEnabled) + return true; + + return m_state.pixelShader != nullptr; + } + + + void D3D9DeviceEx::UpdateBoolSpecConstantVertex(uint32_t value) { + if (value == m_lastBoolSpecConstantVertex) + return; + + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexShaderBools, cBitfield); + }); + + m_lastBoolSpecConstantVertex = value; + } + + + void D3D9DeviceEx::UpdateBoolSpecConstantPixel(uint32_t value) { + if (value == m_lastBoolSpecConstantPixel) + return; + + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelShaderBools, cBitfield); + }); + + m_lastBoolSpecConstantPixel = value; + } + + + void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield); + }); + + m_lastSamplerTypes = value; + } + + + void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield); + }); + + m_lastProjectionBitfield = value; + } + + + void D3D9DeviceEx::UpdateFetch4SpecConstant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::Fetch4, cBitfield); + }); + + m_lastFetch4 = value; + } + + + void D3D9DeviceEx::UpdateSamplerDepthModeSpecConstant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerDepthMode, cBitfield); + }); + + m_lastSamplerDepthMode = value; + } + + + void D3D9DeviceEx::ApplyPrimitiveType( + DxvkContext* pContext, + D3DPRIMITIVETYPE PrimType) { + if (m_iaState.primitiveType != PrimType) { + m_iaState.primitiveType = PrimType; + + auto iaState = DecodeInputAssemblyState(PrimType); + pContext->setInputAssemblyState(iaState); + } + } + + + void D3D9DeviceEx::ResolveZ() { + D3D9Surface* src = m_state.depthStencil.ptr(); + IDirect3DBaseTexture9* dst = m_state.textures[0]; + + if (unlikely(!src || !dst)) + return; + + D3D9CommonTexture* srcTextureInfo = GetCommonTexture(src); + D3D9CommonTexture* dstTextureInfo = GetCommonTexture(dst); + + const D3D9_COMMON_TEXTURE_DESC* srcDesc = srcTextureInfo->Desc(); + const D3D9_COMMON_TEXTURE_DESC* dstDesc = dstTextureInfo->Desc(); + + VkSampleCountFlagBits dstSampleCount; + DecodeMultiSampleType(dstDesc->MultiSample, dstDesc->MultisampleQuality, &dstSampleCount); + + if (unlikely(dstSampleCount != VK_SAMPLE_COUNT_1_BIT)) { + Logger::warn("D3D9DeviceEx::ResolveZ: dstSampleCount != 1. Discarding."); + return; + } + + const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format); + const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format); + + auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor); + auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor); + + const VkImageSubresource dstSubresource = + dstTextureInfo->GetSubresourceFromIndex( + dstVulkanFormatInfo->aspectMask, 0); + + const VkImageSubresource srcSubresource = + srcTextureInfo->GetSubresourceFromIndex( + srcVulkanFormatInfo->aspectMask, src->GetSubresource()); + + const VkImageSubresourceLayers dstSubresourceLayers = { + dstSubresource.aspectMask, + dstSubresource.mipLevel, + dstSubresource.arrayLayer, 1 }; + + const VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkSampleCountFlagBits srcSampleCount; + DecodeMultiSampleType(srcDesc->MultiSample, srcDesc->MultisampleQuality, &srcSampleCount); + + if (srcSampleCount == VK_SAMPLE_COUNT_1_BIT) { + EmitCs([ + cDstImage = dstTextureInfo->GetImage(), + cSrcImage = srcTextureInfo->GetImage(), + cDstLayers = dstSubresourceLayers, + cSrcLayers = srcSubresourceLayers + ] (DxvkContext* ctx) { + ctx->copyImage( + cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 }, + cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, + cDstImage->mipLevelExtent(cDstLayers.mipLevel)); + }); + } else { + EmitCs([ + cDstImage = dstTextureInfo->GetImage(), + cSrcImage = srcTextureInfo->GetImage(), + cDstSubres = dstSubresourceLayers, + cSrcSubres = srcSubresourceLayers + ] (DxvkContext* ctx) { + // We should resolve using the first sample according to + // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Advanced-DX9-Capabilities-for-ATI-Radeon-Cards_v2.pdf + // "The resolve operation copies the depth value from the *first sample only* into the resolved depth stencil texture." + constexpr auto resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; + + VkImageResolve region; + region.srcSubresource = cSrcSubres; + region.srcOffset = VkOffset3D { 0, 0, 0 }; + region.dstSubresource = cDstSubres; + region.dstOffset = VkOffset3D { 0, 0, 0 }; + region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel); + + ctx->resolveDepthStencilImage(cDstImage, cSrcImage, region, resolveMode, resolveMode); + }); + } + + dstTextureInfo->MarkAllWrittenByGPU(); + } + + + void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) { + EmitCs([ + cImage = pResource->GetImage(), + cNewLayout = NewLayout + ] (DxvkContext* ctx) { + ctx->changeImageLayout( + cImage, cNewLayout); + }); + } + + + void D3D9DeviceEx::TransformImage( + D3D9CommonTexture* pResource, + const VkImageSubresourceRange* pSubresources, + VkImageLayout OldLayout, + VkImageLayout NewLayout) { + EmitCs([ + cImage = pResource->GetImage(), + cSubresources = *pSubresources, + cOldLayout = OldLayout, + cNewLayout = NewLayout + ] (DxvkContext* ctx) { + ctx->transformImage( + cImage, cSubresources, + cOldLayout, cNewLayout); + }); + } + + + HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) { + if (!pPresentationParameters->EnableAutoDepthStencil) + SetDepthStencilSurface(nullptr); + + for (uint32_t i = 1; i < caps::MaxSimultaneousRenderTargets; i++) + SetRenderTarget(i, nullptr); + + auto& rs = m_state.renderStates; + + rs[D3DRS_SEPARATEALPHABLENDENABLE] = FALSE; + rs[D3DRS_ALPHABLENDENABLE] = FALSE; + rs[D3DRS_BLENDOP] = D3DBLENDOP_ADD; + rs[D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD; + rs[D3DRS_DESTBLEND] = D3DBLEND_ZERO; + rs[D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO; + rs[D3DRS_COLORWRITEENABLE] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE1] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE2] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE3] = 0x0000000f; + rs[D3DRS_SRCBLEND] = D3DBLEND_ONE; + rs[D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE; + BindBlendState(); + + rs[D3DRS_BLENDFACTOR] = 0xffffffff; + BindBlendFactor(); + + rs[D3DRS_ZENABLE] = pPresentationParameters->EnableAutoDepthStencil + ? D3DZB_TRUE + : D3DZB_FALSE; + rs[D3DRS_ZFUNC] = D3DCMP_LESSEQUAL; + rs[D3DRS_TWOSIDEDSTENCILMODE] = FALSE; + rs[D3DRS_ZWRITEENABLE] = TRUE; + rs[D3DRS_STENCILENABLE] = FALSE; + rs[D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILFUNC] = D3DCMP_ALWAYS; + rs[D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS; + rs[D3DRS_STENCILMASK] = 0xFFFFFFFF; + rs[D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF; + BindDepthStencilState(); + + rs[D3DRS_STENCILREF] = 0; + BindDepthStencilRefrence(); + + rs[D3DRS_FILLMODE] = D3DFILL_SOLID; + rs[D3DRS_CULLMODE] = D3DCULL_CCW; + rs[D3DRS_DEPTHBIAS] = bit::cast<DWORD>(0.0f); + rs[D3DRS_SLOPESCALEDEPTHBIAS] = bit::cast<DWORD>(0.0f); + BindRasterizerState(); + BindDepthBias(); + + rs[D3DRS_SCISSORTESTENABLE] = FALSE; + + rs[D3DRS_ALPHATESTENABLE] = FALSE; + rs[D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS; + BindAlphaTestState(); + rs[D3DRS_ALPHAREF] = 0; + UpdatePushConstant<D3D9RenderStateItem::AlphaRef>(); + + rs[D3DRS_MULTISAMPLEMASK] = 0xffffffff; + BindMultiSampleState(); + + rs[D3DRS_TEXTUREFACTOR] = 0xffffffff; + m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); + + rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1; + rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2; + rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL; + rs[D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL; + rs[D3DRS_LIGHTING] = TRUE; + rs[D3DRS_COLORVERTEX] = TRUE; + rs[D3DRS_LOCALVIEWER] = TRUE; + rs[D3DRS_RANGEFOGENABLE] = FALSE; + rs[D3DRS_NORMALIZENORMALS] = FALSE; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + // PS + rs[D3DRS_SPECULARENABLE] = FALSE; + + rs[D3DRS_AMBIENT] = 0; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + rs[D3DRS_FOGENABLE] = FALSE; + rs[D3DRS_FOGCOLOR] = 0; + rs[D3DRS_FOGTABLEMODE] = D3DFOG_NONE; + rs[D3DRS_FOGSTART] = bit::cast<DWORD>(0.0f); + rs[D3DRS_FOGEND] = bit::cast<DWORD>(1.0f); + rs[D3DRS_FOGDENSITY] = bit::cast<DWORD>(1.0f); + rs[D3DRS_FOGVERTEXMODE] = D3DFOG_NONE; + m_flags.set(D3D9DeviceFlag::DirtyFogColor); + m_flags.set(D3D9DeviceFlag::DirtyFogDensity); + m_flags.set(D3D9DeviceFlag::DirtyFogEnd); + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + m_flags.set(D3D9DeviceFlag::DirtyFogState); + + rs[D3DRS_CLIPPLANEENABLE] = 0; + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + + rs[D3DRS_POINTSPRITEENABLE] = FALSE; + rs[D3DRS_POINTSCALEENABLE] = FALSE; + rs[D3DRS_POINTSCALE_A] = bit::cast<DWORD>(1.0f); + rs[D3DRS_POINTSCALE_B] = bit::cast<DWORD>(0.0f); + rs[D3DRS_POINTSCALE_C] = bit::cast<DWORD>(0.0f); + rs[D3DRS_POINTSIZE] = bit::cast<DWORD>(1.0f); + rs[D3DRS_POINTSIZE_MIN] = bit::cast<DWORD>(1.0f); + rs[D3DRS_POINTSIZE_MAX] = bit::cast<DWORD>(64.0f); + UpdatePushConstant<D3D9RenderStateItem::PointSize>(); + UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>(); + UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>(); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + UpdatePointMode<false>(); + + rs[D3DRS_SRGBWRITEENABLE] = 0; + + rs[D3DRS_SHADEMODE] = D3DSHADE_GOURAUD; + + rs[D3DRS_VERTEXBLEND] = D3DVBF_DISABLE; + rs[D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE; + rs[D3DRS_TWEENFACTOR] = bit::cast<DWORD>(0.0f); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + // Render States not implemented beyond this point. + rs[D3DRS_LASTPIXEL] = TRUE; + rs[D3DRS_DITHERENABLE] = FALSE; + rs[D3DRS_WRAP0] = 0; + rs[D3DRS_WRAP1] = 0; + rs[D3DRS_WRAP2] = 0; + rs[D3DRS_WRAP3] = 0; + rs[D3DRS_WRAP4] = 0; + rs[D3DRS_WRAP5] = 0; + rs[D3DRS_WRAP6] = 0; + rs[D3DRS_WRAP7] = 0; + rs[D3DRS_CLIPPING] = TRUE; + rs[D3DRS_MULTISAMPLEANTIALIAS] = TRUE; + rs[D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE; + rs[D3DRS_DEBUGMONITORTOKEN] = D3DDMT_ENABLE; + rs[D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC; + rs[D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR; + rs[D3DRS_ANTIALIASEDLINEENABLE] = FALSE; + rs[D3DRS_MINTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f); + rs[D3DRS_MAXTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f); + rs[D3DRS_ADAPTIVETESS_X] = bit::cast<DWORD>(0.0f); + rs[D3DRS_ADAPTIVETESS_Y] = bit::cast<DWORD>(0.0f); + rs[D3DRS_ADAPTIVETESS_Z] = bit::cast<DWORD>(1.0f); + rs[D3DRS_ADAPTIVETESS_W] = bit::cast<DWORD>(0.0f); + rs[D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE; + rs[D3DRS_WRAP8] = 0; + rs[D3DRS_WRAP9] = 0; + rs[D3DRS_WRAP10] = 0; + rs[D3DRS_WRAP11] = 0; + rs[D3DRS_WRAP12] = 0; + rs[D3DRS_WRAP13] = 0; + rs[D3DRS_WRAP14] = 0; + rs[D3DRS_WRAP15] = 0; + // End Unimplemented Render States + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + auto& stage = m_state.textureStages[i]; + + stage[DXVK_TSS_COLOROP] = i == 0 ? D3DTOP_MODULATE : D3DTOP_DISABLE; + stage[DXVK_TSS_COLORARG1] = D3DTA_TEXTURE; + stage[DXVK_TSS_COLORARG2] = D3DTA_CURRENT; + stage[DXVK_TSS_ALPHAOP] = i == 0 ? D3DTOP_SELECTARG1 : D3DTOP_DISABLE; + stage[DXVK_TSS_ALPHAARG1] = D3DTA_TEXTURE; + stage[DXVK_TSS_ALPHAARG2] = D3DTA_CURRENT; + stage[DXVK_TSS_BUMPENVMAT00] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_BUMPENVMAT01] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_BUMPENVMAT10] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_BUMPENVMAT11] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_TEXCOORDINDEX] = i; + stage[DXVK_TSS_BUMPENVLSCALE] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_BUMPENVLOFFSET] = bit::cast<DWORD>(0.0f); + stage[DXVK_TSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE; + stage[DXVK_TSS_COLORARG0] = D3DTA_CURRENT; + stage[DXVK_TSS_ALPHAARG0] = D3DTA_CURRENT; + stage[DXVK_TSS_RESULTARG] = D3DTA_CURRENT; + stage[DXVK_TSS_CONSTANT] = 0x00000000; + } + m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + m_state.streamFreq[i] = 1; + + for (uint32_t i = 0; i < m_state.textures.size(); i++) + TextureChangePrivate(m_state.textures[i], nullptr); + + EmitCs([ + cSize = m_state.textures.size() + ](DxvkContext* ctx) { + for (uint32_t i = 0; i < cSize; i++) { + auto samplerInfo = RemapStateSamplerShader(DWORD(i)); + uint32_t slot = computeResourceSlotId(samplerInfo.first, DxsoBindingType::Image, uint32_t(samplerInfo.second)); + ctx->bindResourceView(slot, nullptr, nullptr); + } + }); + + m_dirtyTextures = 0; + m_depthTextures = 0; + + auto& ss = m_state.samplerStates; + for (uint32_t i = 0; i < ss.size(); i++) { + auto& state = ss[i]; + state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP; + state[D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP; + state[D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP; + state[D3DSAMP_BORDERCOLOR] = 0x00000000; + state[D3DSAMP_MAGFILTER] = D3DTEXF_POINT; + state[D3DSAMP_MINFILTER] = D3DTEXF_POINT; + state[D3DSAMP_MIPFILTER] = D3DTEXF_NONE; + state[D3DSAMP_MIPMAPLODBIAS] = bit::cast<DWORD>(0.0f); + state[D3DSAMP_MAXMIPLEVEL] = 0; + state[D3DSAMP_MAXANISOTROPY] = 1; + state[D3DSAMP_SRGBTEXTURE] = 0; + state[D3DSAMP_ELEMENTINDEX] = 0; + state[D3DSAMP_DMAPOFFSET] = 0; + + BindSampler(i); + } + + m_dirtySamplerStates = 0; + + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + float plane[4] = { 0, 0, 0, 0 }; + SetClipPlane(i, plane); + } + + // We should do this... + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + UpdateSamplerSpecConsant(0u); + UpdateBoolSpecConstantVertex(0u); + UpdateBoolSpecConstantPixel(0u); + UpdateSamplerDepthModeSpecConstant(0u); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat); + + Logger::info(str::format( + "D3D9DeviceEx::ResetSwapChain:\n", + " Requested Presentation Parameters\n", + " - Width: ", pPresentationParameters->BackBufferWidth, "\n", + " - Height: ", pPresentationParameters->BackBufferHeight, "\n", + " - Format: ", backBufferFmt, "\n" + " - Auto Depth Stencil: ", pPresentationParameters->EnableAutoDepthStencil ? "true" : "false", "\n", + " ^ Format: ", EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat), "\n", + " - Windowed: ", pPresentationParameters->Windowed ? "true" : "false", "\n")); + + if (backBufferFmt != D3D9Format::Unknown) { + if (!IsSupportedBackBufferFormat(backBufferFmt)) { + Logger::err(str::format("D3D9DeviceEx::ResetSwapChain: Unsupported backbuffer format: ", + EnumerateFormat(pPresentationParameters->BackBufferFormat))); + return D3DERR_INVALIDCALL; + } + } + + if (m_implicitSwapchain != nullptr) { + if (FAILED(m_implicitSwapchain->Reset(pPresentationParameters, pFullscreenDisplayMode))) + return D3DERR_INVALIDCALL; + } + else + m_implicitSwapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode); + + if (pPresentationParameters->EnableAutoDepthStencil) { + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = pPresentationParameters->BackBufferWidth; + desc.Height = pPresentationParameters->BackBufferHeight; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = D3DUSAGE_DEPTHSTENCIL; + desc.Format = EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = FALSE; + desc.MultiSample = pPresentationParameters->MultiSampleType; + desc.MultisampleQuality = pPresentationParameters->MultiSampleQuality; + desc.IsBackBuffer = FALSE; + desc.IsAttachmentOnly = TRUE; + + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) + return D3DERR_NOTAVAILABLE; + + m_autoDepthStencil = new D3D9Surface(this, &desc, nullptr); + m_initializer->InitTexture(m_autoDepthStencil->GetCommonTexture()); + SetDepthStencilSurface(m_autoDepthStencil.ptr()); + } + + SetRenderTarget(0, m_implicitSwapchain->GetBackBuffer(0)); + + // Force this if we end up binding the same RT to make scissor change go into effect. + BindViewportAndScissor(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); + if (FAILED(hr)) + return hr; + + hr = ResetState(pPresentationParameters); + if (FAILED(hr)) + return hr; + + Flush(); + SynchronizeCsThread(); + + return D3D_OK; + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.h new file mode 100644 index 00000000..57260419 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device.h @@ -0,0 +1,1250 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" +#include "../dxvk/dxvk_cs.h" + +#include "d3d9_include.h" +#include "d3d9_cursor.h" +#include "d3d9_format.h" +#include "d3d9_multithread.h" +#include "d3d9_adapter.h" +#include "d3d9_constant_set.h" + +#include "d3d9_state.h" + +#include "d3d9_options.h" + +#include "../dxso/dxso_module.h" +#include "../dxso/dxso_util.h" +#include "../dxso/dxso_options.h" +#include "../dxso/dxso_modinfo.h" + +#include "d3d9_sampler.h" +#include "d3d9_fixed_function.h" +#include "d3d9_swvp_emu.h" +#include "d3d9_config.h" + +#include "d3d9_shader_permutations.h" + +#include <vector> +#include <type_traits> +#include <unordered_map> + +namespace dxvk { + + class D3D9InterfaceEx; + class D3D9SwapChainEx; + class D3D9CommonTexture; + class D3D9CommonBuffer; + class D3D9CommonShader; + class D3D9ShaderModuleSet; + class D3D9Initializer; + class D3D9Query; + class D3D9StateBlock; + class D3D9FormatHelper; + + enum class D3D9DeviceFlag : uint32_t { + DirtyFramebuffer, + DirtyClipPlanes, + DirtyDepthStencilState, + DirtyBlendState, + DirtyRasterizerState, + DirtyDepthBias, + DirtyAlphaTestState, + DirtyInputLayout, + DirtyViewportScissor, + DirtyMultiSampleState, + + DirtyFogState, + DirtyFogColor, + DirtyFogDensity, + DirtyFogScale, + DirtyFogEnd, + + DirtyFFVertexData, + DirtyFFVertexBlend, + DirtyFFVertexShader, + DirtyFFPixelShader, + DirtyFFViewport, + DirtyFFPixelData, + DirtyProgVertexShader, + DirtySharedPixelShaderData, + ValidSampleMask, + DirtyDepthBounds, + DirtyPointScale, + + InScene, + }; + + using D3D9DeviceFlags = Flags<D3D9DeviceFlag>; + + struct D3D9DrawInfo { + uint32_t vertexCount; + uint32_t instanceCount; + }; + + struct D3D9BufferSlice { + DxvkBufferSlice slice = {}; + void* mapPtr = nullptr; + }; + + class D3D9DeviceEx final : public ComObjectClamp<IDirect3DDevice9Ex> { + constexpr static uint32_t DefaultFrameLatency = 3; + constexpr static uint32_t MaxFrameLatency = 20; + + constexpr static uint32_t MinFlushIntervalUs = 750; + constexpr static uint32_t IncFlushIntervalUs = 250; + constexpr static uint32_t MaxPendingSubmits = 6; + + constexpr static uint32_t NullStreamIdx = caps::MaxStreams; + + friend class D3D9SwapChainEx; + public: + + D3D9DeviceEx( + D3D9InterfaceEx* pParent, + D3D9Adapter* pAdapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + Rc<DxvkDevice> dxvkDevice); + + ~D3D9DeviceEx(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE TestCooperativeLevel(); + + UINT STDMETHODCALLTYPE GetAvailableTextureMem(); + + HRESULT STDMETHODCALLTYPE EvictManagedResources(); + + HRESULT STDMETHODCALLTYPE GetDirect3D(IDirect3D9** ppD3D9); + + HRESULT STDMETHODCALLTYPE GetDeviceCaps(D3DCAPS9* pCaps); + + HRESULT STDMETHODCALLTYPE GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters); + + HRESULT STDMETHODCALLTYPE SetCursorProperties( + UINT XHotSpot, + UINT YHotSpot, + IDirect3DSurface9* pCursorBitmap); + + void STDMETHODCALLTYPE SetCursorPosition(int X, int Y, DWORD Flags); + + BOOL STDMETHODCALLTYPE ShowCursor(BOOL bShow); + + HRESULT STDMETHODCALLTYPE CreateAdditionalSwapChain( + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DSwapChain9** ppSwapChain); + + HRESULT STDMETHODCALLTYPE GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain); + + UINT STDMETHODCALLTYPE GetNumberOfSwapChains(); + + HRESULT STDMETHODCALLTYPE Reset(D3DPRESENT_PARAMETERS* pPresentationParameters); + + HRESULT STDMETHODCALLTYPE Present( + const RECT* pSourceRect, + const RECT* pDestRect, HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion); + + HRESULT STDMETHODCALLTYPE GetBackBuffer( + UINT iSwapChain, + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer); + + HRESULT STDMETHODCALLTYPE GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus); + + HRESULT STDMETHODCALLTYPE SetDialogBoxMode(BOOL bEnableDialogs); + + void STDMETHODCALLTYPE SetGammaRamp( + UINT iSwapChain, + DWORD Flags, + const D3DGAMMARAMP* pRamp); + + void STDMETHODCALLTYPE GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp); + + HRESULT STDMETHODCALLTYPE CreateTexture( + UINT Width, + UINT Height, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DTexture9** ppTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateVolumeTexture( + UINT Width, + UINT Height, + UINT Depth, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DVolumeTexture9** ppVolumeTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateCubeTexture( + UINT EdgeLength, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DCubeTexture9** ppCubeTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateVertexBuffer( + UINT Length, + DWORD Usage, + DWORD FVF, + D3DPOOL Pool, + IDirect3DVertexBuffer9** ppVertexBuffer, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateIndexBuffer( + UINT Length, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DIndexBuffer9** ppIndexBuffer, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateRenderTarget( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateDepthStencilSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE UpdateSurface( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestinationSurface, + const POINT* pDestPoint); + + HRESULT STDMETHODCALLTYPE UpdateTexture( + IDirect3DBaseTexture9* pSourceTexture, + IDirect3DBaseTexture9* pDestinationTexture); + + HRESULT STDMETHODCALLTYPE GetRenderTargetData( + IDirect3DSurface9* pRenderTarget, + IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE StretchRect( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestSurface, + const RECT* pDestRect, + D3DTEXTUREFILTERTYPE Filter); + + HRESULT STDMETHODCALLTYPE ColorFill( + IDirect3DSurface9* pSurface, + const RECT* pRect, + D3DCOLOR Color); + + HRESULT STDMETHODCALLTYPE CreateOffscreenPlainSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE SetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9* pRenderTarget); + + HRESULT STDMETHODCALLTYPE GetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9** ppRenderTarget); + + HRESULT STDMETHODCALLTYPE SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil); + + HRESULT STDMETHODCALLTYPE GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface); + + HRESULT STDMETHODCALLTYPE BeginScene(); + + HRESULT STDMETHODCALLTYPE EndScene(); + + HRESULT STDMETHODCALLTYPE Clear( + DWORD Count, + const D3DRECT* pRects, + DWORD Flags, + D3DCOLOR Color, + float Z, + DWORD Stencil); + + HRESULT STDMETHODCALLTYPE SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE SetViewport(const D3DVIEWPORT9* pViewport); + + HRESULT STDMETHODCALLTYPE GetViewport(D3DVIEWPORT9* pViewport); + + HRESULT STDMETHODCALLTYPE SetMaterial(const D3DMATERIAL9* pMaterial); + + HRESULT STDMETHODCALLTYPE GetMaterial(D3DMATERIAL9* pMaterial); + + HRESULT STDMETHODCALLTYPE SetLight(DWORD Index, const D3DLIGHT9* pLight); + + HRESULT STDMETHODCALLTYPE GetLight(DWORD Index, D3DLIGHT9* pLight); + + HRESULT STDMETHODCALLTYPE LightEnable(DWORD Index, BOOL Enable); + + HRESULT STDMETHODCALLTYPE GetLightEnable(DWORD Index, BOOL* pEnable); + + HRESULT STDMETHODCALLTYPE SetClipPlane(DWORD Index, const float* pPlane); + + HRESULT STDMETHODCALLTYPE GetClipPlane(DWORD Index, float* pPlane); + + HRESULT STDMETHODCALLTYPE SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); + + HRESULT STDMETHODCALLTYPE GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue); + + HRESULT STDMETHODCALLTYPE CreateStateBlock( + D3DSTATEBLOCKTYPE Type, + IDirect3DStateBlock9** ppSB); + + HRESULT STDMETHODCALLTYPE BeginStateBlock(); + + HRESULT STDMETHODCALLTYPE EndStateBlock(IDirect3DStateBlock9** ppSB); + + HRESULT STDMETHODCALLTYPE SetClipStatus(const D3DCLIPSTATUS9* pClipStatus); + + HRESULT STDMETHODCALLTYPE GetClipStatus(D3DCLIPSTATUS9* pClipStatus); + + HRESULT STDMETHODCALLTYPE GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture); + + HRESULT STDMETHODCALLTYPE SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture); + + HRESULT STDMETHODCALLTYPE GetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD* pValue); + + HRESULT STDMETHODCALLTYPE SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value); + + HRESULT STDMETHODCALLTYPE GetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD* pValue); + + HRESULT STDMETHODCALLTYPE SetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT STDMETHODCALLTYPE ValidateDevice(DWORD* pNumPasses); + + HRESULT STDMETHODCALLTYPE SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries); + + HRESULT STDMETHODCALLTYPE GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries); + + HRESULT STDMETHODCALLTYPE SetCurrentTexturePalette(UINT PaletteNumber); + + HRESULT STDMETHODCALLTYPE GetCurrentTexturePalette(UINT *PaletteNumber); + + HRESULT STDMETHODCALLTYPE SetScissorRect(const RECT* pRect); + + HRESULT STDMETHODCALLTYPE GetScissorRect(RECT* pRect); + + HRESULT STDMETHODCALLTYPE SetSoftwareVertexProcessing(BOOL bSoftware); + + BOOL STDMETHODCALLTYPE GetSoftwareVertexProcessing(); + + HRESULT STDMETHODCALLTYPE SetNPatchMode(float nSegments); + + float STDMETHODCALLTYPE GetNPatchMode(); + + HRESULT STDMETHODCALLTYPE DrawPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + UINT StartVertex, + UINT PrimitiveCount); + + HRESULT STDMETHODCALLTYPE DrawIndexedPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + INT BaseVertexIndex, + UINT MinVertexIndex, + UINT NumVertices, + UINT StartIndex, + UINT PrimitiveCount); + + HRESULT STDMETHODCALLTYPE DrawPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride); + + HRESULT STDMETHODCALLTYPE DrawIndexedPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT MinVertexIndex, + UINT NumVertices, + UINT PrimitiveCount, + const void* pIndexData, + D3DFORMAT IndexDataFormat, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride); + + HRESULT STDMETHODCALLTYPE ProcessVertices( + UINT SrcStartIndex, + UINT DestIndex, + UINT VertexCount, + IDirect3DVertexBuffer9* pDestBuffer, + IDirect3DVertexDeclaration9* pVertexDecl, + DWORD Flags); + + HRESULT STDMETHODCALLTYPE CreateVertexDeclaration( + const D3DVERTEXELEMENT9* pVertexElements, + IDirect3DVertexDeclaration9** ppDecl); + + HRESULT STDMETHODCALLTYPE SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl); + + HRESULT STDMETHODCALLTYPE GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl); + + HRESULT STDMETHODCALLTYPE SetFVF(DWORD FVF); + + HRESULT STDMETHODCALLTYPE GetFVF(DWORD* pFVF); + + HRESULT STDMETHODCALLTYPE CreateVertexShader( + const DWORD* pFunction, + IDirect3DVertexShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetVertexShader(IDirect3DVertexShader9* pShader); + + HRESULT STDMETHODCALLTYPE GetVertexShader(IDirect3DVertexShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE SetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9* pStreamData, + UINT OffsetInBytes, + UINT Stride); + + HRESULT STDMETHODCALLTYPE GetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9** ppStreamData, + UINT* pOffsetInBytes, + UINT* pStride); + + HRESULT STDMETHODCALLTYPE SetStreamSourceFreq(UINT StreamNumber, UINT Setting); + + HRESULT STDMETHODCALLTYPE GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting); + + HRESULT STDMETHODCALLTYPE SetIndices(IDirect3DIndexBuffer9* pIndexData); + + HRESULT STDMETHODCALLTYPE GetIndices(IDirect3DIndexBuffer9** ppIndexData); + + HRESULT STDMETHODCALLTYPE CreatePixelShader( + const DWORD* pFunction, + IDirect3DPixelShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetPixelShader(IDirect3DPixelShader9* pShader); + + HRESULT STDMETHODCALLTYPE GetPixelShader(IDirect3DPixelShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE DrawRectPatch( + UINT Handle, + const float* pNumSegs, + const D3DRECTPATCH_INFO* pRectPatchInfo); + + HRESULT STDMETHODCALLTYPE DrawTriPatch( + UINT Handle, + const float* pNumSegs, + const D3DTRIPATCH_INFO* pTriPatchInfo); + + HRESULT STDMETHODCALLTYPE DeletePatch(UINT Handle); + + HRESULT STDMETHODCALLTYPE CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery); + + // Ex Methods + + HRESULT STDMETHODCALLTYPE SetConvolutionMonoKernel( + UINT width, + UINT height, + float* rows, + float* columns); + + HRESULT STDMETHODCALLTYPE ComposeRects( + IDirect3DSurface9* pSrc, + IDirect3DSurface9* pDst, + IDirect3DVertexBuffer9* pSrcRectDescs, + UINT NumRects, + IDirect3DVertexBuffer9* pDstRectDescs, + D3DCOMPOSERECTSOP Operation, + int Xoffset, + int Yoffset); + + HRESULT STDMETHODCALLTYPE GetGPUThreadPriority(INT* pPriority); + + HRESULT STDMETHODCALLTYPE SetGPUThreadPriority(INT Priority); + + HRESULT STDMETHODCALLTYPE WaitForVBlank(UINT iSwapChain); + + HRESULT STDMETHODCALLTYPE CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources); + + HRESULT STDMETHODCALLTYPE SetMaximumFrameLatency(UINT MaxLatency); + + HRESULT STDMETHODCALLTYPE GetMaximumFrameLatency(UINT* pMaxLatency); + + HRESULT STDMETHODCALLTYPE CheckDeviceState(HWND hDestinationWindow); + + HRESULT STDMETHODCALLTYPE PresentEx( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags); + + HRESULT STDMETHODCALLTYPE CreateRenderTargetEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE CreateOffscreenPlainSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE CreateDepthStencilSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE ResetEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT STDMETHODCALLTYPE GetDisplayModeEx( + UINT iSwapChain, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT STDMETHODCALLTYPE CreateAdditionalSwapChainEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DSwapChain9** ppSwapChain); + + HRESULT SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); + + HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + HRESULT SetStateTextureStageState( + DWORD Stage, + D3D9TextureStageStateTypes Type, + DWORD Value); + + VkPipelineStageFlags GetEnabledShaderStages() const { + return m_dxvkDevice->getShaderPipelineStages(); + } + + static DxvkDeviceFeatures GetDeviceFeatures(const Rc<DxvkAdapter>& adapter); + + bool SupportsSWVP(); + + bool IsExtended(); + + HWND GetWindow(); + + Rc<DxvkDevice> GetDXVKDevice() { + return m_dxvkDevice; + } + + D3D9_VK_FORMAT_MAPPING LookupFormat( + D3D9Format Format) const; + + const DxvkFormatInfo* UnsupportedFormatInfo( + D3D9Format Format) const; + + bool WaitForResource( + const Rc<DxvkResource>& Resource, + DWORD MapFlags); + + /** + * \brief Locks a subresource of an image + * + * \param [in] Subresource The subresource of the image to lock + * \param [out] pLockedBox The returned locked box of the image, containing data ptr and strides + * \param [in] pBox The region of the subresource to lock. This offsets the returned data ptr + * \param [in] Flags The D3DLOCK_* flags to lock the image with + * \returns \c D3D_OK if the parameters are valid or D3DERR_INVALIDCALL if it fails. + */ + HRESULT LockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT Mip, + D3DLOCKED_BOX* pLockedBox, + const D3DBOX* pBox, + DWORD Flags); + + uint32_t CalcImageLockOffset( + uint32_t SlicePitch, + uint32_t RowPitch, + const DxvkFormatInfo* FormatInfo, + const D3DBOX* pBox); + + /** + * \brief Unlocks a subresource of an image + * + * Passthrough to device unlock. + * \param [in] Subresource The subresource of the image to unlock + * \returns \c D3D_OK if the parameters are valid or D3DERR_INVALIDCALL if it fails. + */ + HRESULT UnlockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel); + + HRESULT FlushImage( + D3D9CommonTexture* pResource, + UINT Subresource); + + void EmitGenerateMips( + D3D9CommonTexture* pResource); + + HRESULT LockBuffer( + D3D9CommonBuffer* pResource, + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags); + + HRESULT FlushBuffer( + D3D9CommonBuffer* pResource); + + HRESULT UnlockBuffer( + D3D9CommonBuffer* pResource); + + void SetupFPU(); + + int64_t DetermineInitialTextureMemory(); + + Rc<DxvkBuffer> CreateConstantBuffer( + bool SSBO, + VkDeviceSize Size, + DxsoProgramType ShaderStage, + DxsoConstantBuffers BufferType); + + void CreateConstantBuffers(); + + void SynchronizeCsThread(); + + void Flush(); + + void UpdateBoundRTs(uint32_t index); + + void UpdateActiveRTs(uint32_t index); + + void UpdateActiveTextures(uint32_t index, DWORD combinedUsage); + + void UpdateActiveHazardsRT(uint32_t rtMask); + + void UpdateActiveHazardsDS(uint32_t texMask); + + void MarkRenderHazards(); + + void UploadManagedTexture(D3D9CommonTexture* pResource); + + void UploadManagedTextures(uint32_t mask); + + void GenerateTextureMips(uint32_t mask); + + void MarkTextureMipsDirty(D3D9CommonTexture* pResource); + + void MarkTextureMipsUnDirty(D3D9CommonTexture* pResource); + + void MarkTextureUploaded(D3D9CommonTexture* pResource); + + template <bool Points> + void UpdatePointMode(); + + void UpdateFog(); + + void BindFramebuffer(); + + void BindViewportAndScissor(); + + inline bool IsAlphaToCoverageEnabled() { + const bool alphaTest = m_state.renderStates[D3DRS_ALPHATESTENABLE] != 0; + + return m_amdATOC || (m_nvATOC && alphaTest); + } + + inline bool IsDepthBiasEnabled() { + const auto& rs = m_state.renderStates; + + float depthBias = bit::cast<float>(rs[D3DRS_DEPTHBIAS]); + float slopeScaledDepthBias = bit::cast<float>(rs[D3DRS_SLOPESCALEDEPTHBIAS]); + + return depthBias != 0.0f || slopeScaledDepthBias != 0.0f; + } + + inline bool IsAlphaTestEnabled() { + return m_state.renderStates[D3DRS_ALPHATESTENABLE] && !IsAlphaToCoverageEnabled(); + } + + inline bool IsZTestEnabled() { + return m_state.renderStates[D3DRS_ZENABLE] && m_state.depthStencil != nullptr; + } + + inline bool IsClipPlaneEnabled() { + return m_state.renderStates[D3DRS_CLIPPLANEENABLE] != 0; + } + + void BindMultiSampleState(); + + void BindBlendState(); + + void BindBlendFactor(); + + void BindDepthStencilState(); + + void BindDepthStencilRefrence(); + + void BindRasterizerState(); + + void BindDepthBias(); + + void BindAlphaTestState(); + + template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType> + inline void UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader); + + template <typename SoftwareLayoutType, typename ShaderType> + inline void UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + + template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType> + inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + + template <DxsoProgramType ShaderStage> + void UploadConstants(); + + void UpdateClipPlanes(); + + template <uint32_t Offset, uint32_t Length> + void UpdatePushConstant(const void* pData); + + template <D3D9RenderStateItem Item> + void UpdatePushConstant(); + + void BindSampler(DWORD Sampler); + + void BindTexture(DWORD SamplerSampler); + + void UnbindTextures(uint32_t mask); + + void UndirtySamplers(uint32_t mask); + + void UndirtyTextures(uint32_t usedMask); + + void MarkTextureBindingDirty(IDirect3DBaseTexture9* texture); + + D3D9DrawInfo GenerateDrawInfo( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + UINT InstanceCount); + + uint32_t GetInstanceCount() const; + + void PrepareDraw(D3DPRIMITIVETYPE PrimitiveType); + + template <DxsoProgramType ShaderStage> + void BindShader( + const D3D9CommonShader* pShaderModule, + D3D9ShaderPermutation Permutation); + + void BindInputLayout(); + + void BindVertexBuffer( + UINT Slot, + D3D9VertexBuffer* pBuffer, + UINT Offset, + UINT Stride); + + void BindIndices(); + + D3D9DeviceLock LockDevice() { + return m_multithread.AcquireLock(); + } + + const D3D9Options* GetOptions() const { + return &m_d3d9Options; + } + + Direct3DState9* GetRawState() { + return &m_state; + } + + void Begin(D3D9Query* pQuery); + void End(D3D9Query* pQuery); + + void SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); + void SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); + + void FlushImplicit(BOOL StrongHint); + + bool ChangeReportedMemory(int64_t delta) { + if (IsExtended()) + return true; + + int64_t availableMemory = m_availableMemory.fetch_add(delta); + + return !m_d3d9Options.memoryTrackTest || availableMemory >= delta; + } + + void ResolveZ(); + + void TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout); + + void TransformImage( + D3D9CommonTexture* pResource, + const VkImageSubresourceRange* pSubresources, + VkImageLayout OldLayout, + VkImageLayout NewLayout); + + const D3D9ConstantLayout& GetVertexConstantLayout() { return m_vsLayout; } + const D3D9ConstantLayout& GetPixelConstantLayout() { return m_psLayout; } + + HRESULT ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters); + HRESULT ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + UINT GetSamplerCount() const { + return m_samplerCount.load(); + } + + private: + + DxvkCsChunkRef AllocCsChunk() { + DxvkCsChunk* chunk = m_csChunkPool.allocChunk(DxvkCsChunkFlag::SingleUse); + return DxvkCsChunkRef(chunk, &m_csChunkPool); + } + + template<typename Cmd> + void EmitCs(Cmd&& command) { + if (unlikely(!m_csChunk->push(command))) { + EmitCsChunk(std::move(m_csChunk)); + + m_csChunk = AllocCsChunk(); + m_csChunk->push(command); + } + } + + void EmitCsChunk(DxvkCsChunkRef&& chunk); + + void FlushCsChunk() { + if (likely(!m_csChunk->empty())) { + EmitCsChunk(std::move(m_csChunk)); + m_csChunk = AllocCsChunk(); + } + } + + bool CanSWVP() { + if (!config::SWVPEnabled) + return false; + + return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING); + } + + inline constexpr D3D9ShaderPermutation GetVertexShaderPermutation() { + return D3D9ShaderPermutations::None; + } + + inline D3D9ShaderPermutation GetPixelShaderPermutation() { + if (unlikely(m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT)) + return D3D9ShaderPermutations::FlatShade; + + return D3D9ShaderPermutations::None; + } + + void DetermineConstantLayouts(bool canSWVP); + + template<bool UpBuffer> + D3D9BufferSlice AllocTempBuffer(VkDeviceSize size); + + bool ShouldRecord(); + + HRESULT CreateShaderModule( + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DWORD* pShaderBytecode, + const DxsoModuleInfo* pModuleInfo); + + inline uint32_t GetUPDataSize(uint32_t vertexCount, uint32_t stride) { + return vertexCount * stride; + } + + inline uint32_t GetUPBufferSize(uint32_t vertexCount, uint32_t stride) { + return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(), stride); + } + + inline void FillUPVertexBuffer(void* buffer, const void* userData, uint32_t dataSize, uint32_t bufferSize) { + uint8_t* data = reinterpret_cast<uint8_t*>(buffer); + // Don't copy excess data if we don't end up needing it. + dataSize = std::min(dataSize, bufferSize); + std::memcpy(data, userData, dataSize); + // Pad out with 0 to make buffer range checks happy + // Some games have components out of range in the vertex decl + // that they don't read from the shader. + // My tests show that these are read back as 0 always if out of range of + // the dataSize. + // + // So... make the actual buffer the range that satisfies the range of the vertex + // declaration and pad with 0s outside of it. + if (dataSize < bufferSize) + std::memset(data + dataSize, 0, bufferSize - dataSize); + } + + // So we don't do OOB. + template <DxsoProgramType ProgramType, + D3D9ConstantType ConstantType> + inline static constexpr uint32_t DetermineSoftwareRegCount() { + constexpr bool isVS = ProgramType == DxsoProgramType::VertexShader; + + switch (ConstantType) { + default: + case D3D9ConstantType::Float: return isVS ? caps::MaxFloatConstantsSoftware : caps::MaxFloatConstantsPS; + case D3D9ConstantType::Int: return isVS ? caps::MaxOtherConstantsSoftware : caps::MaxOtherConstants; + case D3D9ConstantType::Bool: return isVS ? caps::MaxOtherConstantsSoftware : caps::MaxOtherConstants; + } + } + + // So we don't copy more than we need. + template <DxsoProgramType ProgramType, + D3D9ConstantType ConstantType> + inline uint32_t DetermineHardwareRegCount() const { + const auto& layout = ProgramType == DxsoProgramType::VertexShader + ? m_vsLayout : m_psLayout; + + switch (ConstantType) { + default: + case D3D9ConstantType::Float: return layout.floatCount; + case D3D9ConstantType::Int: return layout.intCount; + case D3D9ConstantType::Bool: return layout.boolCount; + } + } + + inline uint32_t GetFrameLatency() { + return m_frameLatency; + } + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count); + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT GetShaderConstants( + UINT StartRegister, + T* pConstantData, + UINT Count) { + auto GetHelper = [&] (const auto& set) { + const uint32_t regCountHardware = DetermineHardwareRegCount<ProgramType, ConstantType>(); + constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount<ProgramType, ConstantType>(); + + if (StartRegister + Count > regCountSoftware) + return D3DERR_INVALIDCALL; + + Count = UINT( + std::max<INT>( + std::clamp<INT>(Count + StartRegister, 0, regCountHardware) - INT(StartRegister), + 0)); + + if (Count == 0) + return D3D_OK; + + if (pConstantData == nullptr) + return D3DERR_INVALIDCALL; + + if constexpr (ConstantType == D3D9ConstantType::Float) { + const float* source = set.fConsts[StartRegister].data; + const size_t size = Count * sizeof(Vector4); + + std::memcpy(pConstantData, source, size); + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + const int* source = set.iConsts[StartRegister].data; + const size_t size = Count * sizeof(Vector4i); + + std::memcpy(pConstantData, source, size); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; + + const uint32_t bit = (1u << bitIdx); + + bool constValue = set.bConsts[arrayIdx] & bit; + pConstantData[i] = constValue ? TRUE : FALSE; + } + } + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? GetHelper(m_state.vsConsts) + : GetHelper(m_state.psConsts); + } + + void UpdateFixedFunctionVS(); + + void UpdateFixedFunctionPS(); + + void ApplyPrimitiveType( + DxvkContext* pContext, + D3DPRIMITIVETYPE PrimType); + + bool UseProgrammableVS(); + + bool UseProgrammablePS(); + + void UpdateBoolSpecConstantVertex(uint32_t value); + + void UpdateBoolSpecConstantPixel(uint32_t value); + + void UpdateSamplerSpecConsant(uint32_t value); + + void UpdateProjectionSpecConstant(uint32_t value); + + void UpdateFetch4SpecConstant(uint32_t value); + + void UpdateSamplerDepthModeSpecConstant(uint32_t value); + + Com<D3D9InterfaceEx> m_parent; + D3DDEVTYPE m_deviceType; + HWND m_window; + WORD m_behaviorFlags; + D3DPRESENT_PARAMETERS m_presentParams; + + D3D9Adapter* m_adapter; + Rc<DxvkDevice> m_dxvkDevice; + + uint32_t m_frameLatency = DefaultFrameLatency; + + D3D9Initializer* m_initializer = nullptr; + D3D9FormatHelper* m_converter = nullptr; + + D3D9FFShaderModuleSet m_ffModules; + D3D9SWVPEmulator m_swvpEmulator; + + Com<D3D9StateBlock, false> m_recorder; + + Rc<D3D9ShaderModuleSet> m_shaderModules; + + Rc<DxvkBuffer> m_vsClipPlanes; + + Rc<DxvkBuffer> m_vsFixedFunction; + Rc<DxvkBuffer> m_vsVertexBlend; + Rc<DxvkBuffer> m_psFixedFunction; + Rc<DxvkBuffer> m_psShared; + + D3D9BufferSlice m_upBuffer; + D3D9BufferSlice m_managedUploadBuffer; + + D3D9Cursor m_cursor; + + Com<D3D9Surface, false> m_autoDepthStencil; + + Com<D3D9SwapChainEx, false> m_implicitSwapchain; + + const D3D9Options m_d3d9Options; + DxsoOptions m_dxsoOptions; + + std::unordered_map< + D3D9SamplerKey, + Rc<DxvkSampler>, + D3D9SamplerKeyHash, + D3D9SamplerKeyEq> m_samplers; + + std::unordered_map< + DWORD, + Com<D3D9VertexDecl, + false>> m_fvfTable; + + D3D9Multithread m_multithread; + D3D9InputAssemblyState m_iaState; + + D3D9DeviceFlags m_flags; + // Last state of depth textures. Doesn't update when NULL is bound. + // & with m_activeTextures to normalize. + uint32_t m_instancedData = 0; + + uint32_t m_depthTextures = 0; + uint32_t m_textureTypes = 0; + uint32_t m_projectionBitfield = 0; + + uint32_t m_dirtySamplerStates = 0; + uint32_t m_dirtyTextures = 0; + + uint32_t m_boundRTs = 0; + + uint32_t m_activeRTs = 0; + uint32_t m_activeRTTextures = 0; + uint32_t m_activeDSTextures = 0; + uint32_t m_activeHazardsRT = 0; + uint32_t m_activeHazardsDS = 0; + uint32_t m_alphaSwizzleRTs = 0; + uint32_t m_activeTextures = 0; + uint32_t m_activeTexturesToUpload = 0; + uint32_t m_activeTexturesToGen = 0; + + uint32_t m_fetch4Enabled = 0; + uint32_t m_fetch4 = 0; + + uint32_t m_lastBoolSpecConstantVertex = 0; + uint32_t m_lastBoolSpecConstantPixel = 0; + uint32_t m_lastSamplerDepthMode = 0; + uint32_t m_lastProjectionBitfield = 0; + uint32_t m_lastSamplerTypes = 0; + uint32_t m_lastPointMode = 0; + uint32_t m_lastFetch4 = 0; + uint32_t m_lastHazardsDS = 0; + uint32_t m_lastSamplerTypesFF = 0; + + D3D9ShaderMasks m_vsShaderMasks = D3D9ShaderMasks(); + D3D9ShaderMasks m_psShaderMasks = FixedFunctionMask; + + bool m_isSWVP; + bool m_amdATOC = false; + bool m_nvATOC = false; + bool m_ffZTest = false; + + float m_depthBiasScale = 0.0f; + + D3D9ConstantLayout m_vsLayout; + D3D9ConstantLayout m_psLayout; + D3D9ConstantSets m_consts[DxsoProgramTypes::Count]; + + D3D9ViewportInfo m_viewportInfo; + + DxvkCsChunkPool m_csChunkPool; + dxvk::high_resolution_clock::time_point m_lastFlush + = dxvk::high_resolution_clock::now(); + DxvkCsThread m_csThread; + DxvkCsChunkRef m_csChunk; + bool m_csIsBusy = false; + + std::atomic<int64_t> m_availableMemory = { 0 }; + std::atomic<int32_t> m_samplerCount = { 0 }; + + Direct3DState9 m_state; + + }; + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device_child.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device_child.h new file mode 100644 index 00000000..433a269a --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_device_child.h @@ -0,0 +1,61 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class D3D9DeviceEx; + + template <typename Base> + class D3D9DeviceChild : public ComObjectClamp<Base> { + + public: + + D3D9DeviceChild(D3D9DeviceEx* pDevice) + : m_parent( pDevice ) { } + + ULONG STDMETHODCALLTYPE AddRef() { + uint32_t refCount = this->m_refCount++; + if (unlikely(!refCount)) { + this->AddRefPrivate(); + GetDevice()->AddRef(); + } + + return refCount + 1; + } + + ULONG STDMETHODCALLTYPE Release() { + uint32_t refCount = --this->m_refCount; + if (unlikely(!refCount)) { + auto* pDevice = GetDevice(); + this->ReleasePrivate(); + pDevice->Release(); + } + return refCount; + } + + HRESULT STDMETHODCALLTYPE GetDevice(IDirect3DDevice9** ppDevice) { + InitReturnPtr(ppDevice); + + if (ppDevice == nullptr) + return D3DERR_INVALIDCALL; + + *ppDevice = ref(GetDevice()); + return D3D_OK; + } + + IDirect3DDevice9Ex* GetDevice() { + return reinterpret_cast<IDirect3DDevice9Ex*>(m_parent); + } + + D3D9DeviceEx* GetParent() { + return m_parent; + } + + protected: + + D3D9DeviceEx* m_parent; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.cpp new file mode 100644 index 00000000..e1f9b536 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.cpp @@ -0,0 +1,2486 @@ +#include "d3d9_fixed_function.h" + +#include "d3d9_device.h" +#include "d3d9_util.h" +#include "d3d9_spec_constants.h" + +#include "../dxvk/dxvk_hash.h" +#include "../dxvk/dxvk_spec_const.h" + +#include "../spirv/spirv_module.h" + +#include <cfloat> + +namespace dxvk { + + D3D9FixedFunctionOptions::D3D9FixedFunctionOptions(const D3D9Options* options) { + invariantPosition = options->invariantPosition; + } + + uint32_t DoFixedFunctionFog(SpirvModule& spvModule, const D3D9FogContext& fogCtx) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t floatPtr = spvModule.defPointerType(floatType, spv::StorageClassPushConstant); + uint32_t vec3Ptr = spvModule.defPointerType(vec3Type, spv::StorageClassPushConstant); + + uint32_t fogColorMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogColor)); + uint32_t fogColor = spvModule.opLoad(vec3Type, + spvModule.opAccessChain(vec3Ptr, fogCtx.RenderState, 1, &fogColorMember)); + + uint32_t fogScaleMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogScale)); + uint32_t fogScale = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogScaleMember)); + + uint32_t fogEndMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogEnd)); + uint32_t fogEnd = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogEndMember)); + + uint32_t fogDensityMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogDensity)); + uint32_t fogDensity = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogDensityMember)); + + uint32_t fogMode = spvModule.specConst32(uint32Type, 0); + + if (!fogCtx.IsPixel) { + spvModule.setDebugName(fogMode, "vertex_fog_mode"); + spvModule.decorateSpecId(fogMode, getSpecId(D3D9SpecConstantId::VertexFogMode)); + } + else { + spvModule.setDebugName(fogMode, "pixel_fog_mode"); + spvModule.decorateSpecId(fogMode, getSpecId(D3D9SpecConstantId::PixelFogMode)); + } + + uint32_t fogEnabled = spvModule.specConstBool(false); + spvModule.setDebugName(fogEnabled, "fog_enabled"); + spvModule.decorateSpecId(fogEnabled, getSpecId(D3D9SpecConstantId::FogEnabled)); + + uint32_t doFog = spvModule.allocateId(); + uint32_t skipFog = spvModule.allocateId(); + + uint32_t returnType = fogCtx.IsPixel ? vec4Type : floatType; + uint32_t returnTypePtr = spvModule.defPointerType(returnType, spv::StorageClassPrivate); + uint32_t returnValuePtr = spvModule.newVar(returnTypePtr, spv::StorageClassPrivate); + spvModule.opStore(returnValuePtr, fogCtx.IsPixel ? fogCtx.oColor : spvModule.constf32(0.0f)); + + // Actually do the fog now we have all the vars in-place. + + spvModule.opSelectionMerge(skipFog, spv::SelectionControlMaskNone); + spvModule.opBranchConditional(fogEnabled, doFog, skipFog); + + spvModule.opLabel(doFog); + + uint32_t wIndex = 3; + uint32_t zIndex = 2; + + uint32_t w = spvModule.opCompositeExtract(floatType, fogCtx.vPos, 1, &wIndex); + uint32_t z = spvModule.opCompositeExtract(floatType, fogCtx.vPos, 1, &zIndex); + + uint32_t depth = 0; + if (fogCtx.IsPixel) + depth = spvModule.opFMul(floatType, z, spvModule.opFDiv(floatType, spvModule.constf32(1.0f), w)); + else { + if (fogCtx.RangeFog) { + std::array<uint32_t, 3> indices = { 0, 1, 2 }; + uint32_t pos3 = spvModule.opVectorShuffle(vec3Type, fogCtx.vPos, fogCtx.vPos, indices.size(), indices.data()); + depth = spvModule.opLength(floatType, pos3); + } + else + depth = fogCtx.HasFogInput + ? fogCtx.vFog + : spvModule.opFAbs(floatType, z); + } + uint32_t fogFactor; + if (!fogCtx.IsPixel && fogCtx.IsFixedFunction && fogCtx.IsPositionT) { + fogFactor = fogCtx.HasSpecular + ? spvModule.opCompositeExtract(floatType, fogCtx.Specular, 1, &wIndex) + : spvModule.constf32(1.0f); + } else { + uint32_t applyFogFactor = spvModule.allocateId(); + + std::array<SpirvPhiLabel, 4> fogVariables; + + std::array<SpirvSwitchCaseLabel, 4> fogCaseLabels = { { + { uint32_t(D3DFOG_NONE), spvModule.allocateId() }, + { uint32_t(D3DFOG_EXP), spvModule.allocateId() }, + { uint32_t(D3DFOG_EXP2), spvModule.allocateId() }, + { uint32_t(D3DFOG_LINEAR), spvModule.allocateId() }, + } }; + + spvModule.opSelectionMerge(applyFogFactor, spv::SelectionControlMaskNone); + spvModule.opSwitch(fogMode, + fogCaseLabels[D3DFOG_NONE].labelId, + fogCaseLabels.size(), + fogCaseLabels.data()); + + for (uint32_t i = 0; i < fogCaseLabels.size(); i++) { + spvModule.opLabel(fogCaseLabels[i].labelId); + + fogVariables[i].labelId = fogCaseLabels[i].labelId; + fogVariables[i].varId = [&] { + auto mode = D3DFOGMODE(fogCaseLabels[i].literal); + switch (mode) { + default: + // vFog + case D3DFOG_NONE: { + if (fogCtx.IsPixel) + return fogCtx.vFog; + + if (fogCtx.IsFixedFunction && fogCtx.HasSpecular) + return spvModule.opCompositeExtract(floatType, fogCtx.Specular, 1, &wIndex); + + return spvModule.constf32(1.0f); + } + + // (end - d) / (end - start) + case D3DFOG_LINEAR: { + uint32_t fogFactor = spvModule.opFSub(floatType, fogEnd, depth); + fogFactor = spvModule.opFMul(floatType, fogFactor, fogScale); + fogFactor = spvModule.opNClamp(floatType, fogFactor, spvModule.constf32(0.0f), spvModule.constf32(1.0f)); + return fogFactor; + } + + // 1 / (e^[d * density])^2 + case D3DFOG_EXP2: + // 1 / (e^[d * density]) + case D3DFOG_EXP: { + uint32_t fogFactor = spvModule.opFMul(floatType, depth, fogDensity); + + if (mode == D3DFOG_EXP2) + fogFactor = spvModule.opFMul(floatType, fogFactor, fogFactor); + + // Provides the rcp. + fogFactor = spvModule.opFNegate(floatType, fogFactor); + fogFactor = spvModule.opExp(floatType, fogFactor); + return fogFactor; + } + } + }(); + + spvModule.opBranch(applyFogFactor); + } + + spvModule.opLabel(applyFogFactor); + + fogFactor = spvModule.opPhi(floatType, + fogVariables.size(), + fogVariables.data()); + } + + uint32_t fogRetValue = 0; + + // Return the new color if we are doing this in PS + // or just the fog factor for oFog in VS + if (fogCtx.IsPixel) { + std::array<uint32_t, 4> indices = { 0, 1, 2, 6 }; + + uint32_t color = fogCtx.oColor; + + uint32_t color3 = spvModule.opVectorShuffle(vec3Type, color, color, 3, indices.data()); + + std::array<uint32_t, 3> fogFacIndices = { fogFactor, fogFactor, fogFactor }; + uint32_t fogFact3 = spvModule.opCompositeConstruct(vec3Type, fogFacIndices.size(), fogFacIndices.data()); + + uint32_t lerpedFrog = spvModule.opFMix(vec3Type, fogColor, color3, fogFact3); + + fogRetValue = spvModule.opVectorShuffle(vec4Type, lerpedFrog, color, indices.size(), indices.data()); + } + else + fogRetValue = fogFactor; + + spvModule.opStore(returnValuePtr, fogRetValue); + + spvModule.opBranch(skipFog); + + spvModule.opLabel(skipFog); + + return spvModule.opLoad(returnType, returnValuePtr); + } + + + uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + + std::array<uint32_t, 11> rsMembers = {{ + vec3Type, + floatType, + floatType, + floatType, + floatType, + + floatType, + floatType, + floatType, + floatType, + floatType, + floatType, + }}; + + uint32_t rsStruct = spvModule.defStructTypeUnique(count, rsMembers.data()); + uint32_t rsBlock = spvModule.newVar( + spvModule.defPointerType(rsStruct, spv::StorageClassPushConstant), + spv::StorageClassPushConstant); + + spvModule.setDebugName (rsBlock, "render_state"); + + spvModule.setDebugName (rsStruct, "render_state_t"); + spvModule.decorate (rsStruct, spv::DecorationBlock); + + uint32_t memberIdx = 0; + auto SetMemberName = [&](const char* name, uint32_t offset) { + if (memberIdx >= count) + return; + + spvModule.setDebugMemberName (rsStruct, memberIdx, name); + spvModule.memberDecorateOffset (rsStruct, memberIdx, offset); + memberIdx++; + }; + + SetMemberName("fog_color", offsetof(D3D9RenderStateInfo, fogColor)); + SetMemberName("fog_scale", offsetof(D3D9RenderStateInfo, fogScale)); + SetMemberName("fog_end", offsetof(D3D9RenderStateInfo, fogEnd)); + SetMemberName("fog_density", offsetof(D3D9RenderStateInfo, fogDensity)); + SetMemberName("alpha_ref", offsetof(D3D9RenderStateInfo, alphaRef)); + SetMemberName("point_size", offsetof(D3D9RenderStateInfo, pointSize)); + SetMemberName("point_size_min", offsetof(D3D9RenderStateInfo, pointSizeMin)); + SetMemberName("point_size_max", offsetof(D3D9RenderStateInfo, pointSizeMax)); + SetMemberName("point_scale_a", offsetof(D3D9RenderStateInfo, pointScaleA)); + SetMemberName("point_scale_b", offsetof(D3D9RenderStateInfo, pointScaleB)); + SetMemberName("point_scale_c", offsetof(D3D9RenderStateInfo, pointScaleC)); + + return rsBlock; + } + + + D3D9PointSizeInfoVS GetPointSizeInfoVS(SpirvModule& spvModule, uint32_t vPos, uint32_t vtx, uint32_t perVertPointSize, uint32_t rsBlock, bool isFixedFunction) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t floatPtr = spvModule.defPointerType(floatType, spv::StorageClassPushConstant); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t boolType = spvModule.defBoolType(); + + auto LoadFloat = [&](D3D9RenderStateItem item) { + uint32_t index = spvModule.constu32(uint32_t(item)); + return spvModule.opLoad(floatType, spvModule.opAccessChain(floatPtr, rsBlock, 1, &index)); + }; + + uint32_t value = perVertPointSize != 0 ? perVertPointSize : LoadFloat(D3D9RenderStateItem::PointSize); + + if (isFixedFunction) { + uint32_t pointMode = spvModule.specConst32(uint32Type, 0); + spvModule.setDebugName(pointMode, "point_mode"); + spvModule.decorateSpecId(pointMode, getSpecId(D3D9SpecConstantId::PointMode)); + + uint32_t scaleBit = spvModule.opBitFieldUExtract(uint32Type, pointMode, spvModule.consti32(0), spvModule.consti32(1)); + uint32_t isScale = spvModule.opIEqual(boolType, scaleBit, spvModule.constu32(1)); + + uint32_t scaleC = LoadFloat(D3D9RenderStateItem::PointScaleC); + uint32_t scaleB = LoadFloat(D3D9RenderStateItem::PointScaleB); + uint32_t scaleA = LoadFloat(D3D9RenderStateItem::PointScaleA); + + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + + uint32_t vtx3; + if (vPos != 0) { + vPos = spvModule.opLoad(vec4Type, vPos); + + uint32_t rhw = spvModule.opCompositeExtract(floatType, vPos, 1, &indices[3]); + rhw = spvModule.opFDiv(floatType, spvModule.constf32(1.0f), rhw); + uint32_t pos3 = spvModule.opVectorShuffle(vec3Type, vPos, vPos, 3, indices.data()); + vtx3 = spvModule.opVectorTimesScalar(vec3Type, pos3, rhw); + } else { + vtx3 = spvModule.opVectorShuffle(vec3Type, vtx, vtx, 3, indices.data()); + } + + uint32_t DeSqr = spvModule.opDot (floatType, vtx3, vtx3); + uint32_t De = spvModule.opSqrt(floatType, DeSqr); + uint32_t scaleValue = spvModule.opFMul(floatType, scaleC, DeSqr); + scaleValue = spvModule.opFFma(floatType, scaleB, De, scaleValue); + scaleValue = spvModule.opFAdd(floatType, scaleA, scaleValue); + scaleValue = spvModule.opSqrt(floatType, scaleValue); + scaleValue = spvModule.opFDiv(floatType, value, scaleValue); + + value = spvModule.opSelect(floatType, isScale, scaleValue, value); + } + + uint32_t min = LoadFloat(D3D9RenderStateItem::PointSizeMin); + uint32_t max = LoadFloat(D3D9RenderStateItem::PointSizeMax); + + D3D9PointSizeInfoVS info; + info.defaultValue = value; + info.min = min; + info.max = max; + + return info; + } + + + D3D9PointSizeInfoPS GetPointSizeInfoPS(SpirvModule& spvModule, uint32_t rsBlock) { + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t boolType = spvModule.defBoolType(); + uint32_t boolVec4 = spvModule.defVectorType(boolType, 4); + + uint32_t pointMode = spvModule.specConst32(uint32Type, 0); + spvModule.setDebugName(pointMode, "point_mode"); + spvModule.decorateSpecId(pointMode, getSpecId(D3D9SpecConstantId::PointMode)); + + uint32_t spriteBit = spvModule.opBitFieldUExtract(uint32Type, pointMode, spvModule.consti32(1), spvModule.consti32(1)); + uint32_t isSprite = spvModule.opIEqual(boolType, spriteBit, spvModule.constu32(1)); + + std::array<uint32_t, 4> isSpriteIndices; + for (uint32_t i = 0; i < isSpriteIndices.size(); i++) + isSpriteIndices[i] = isSprite; + + isSprite = spvModule.opCompositeConstruct(boolVec4, isSpriteIndices.size(), isSpriteIndices.data()); + + D3D9PointSizeInfoPS info; + info.isSprite = isSprite; + + return info; + } + + + uint32_t GetPointCoord(SpirvModule& spvModule, std::vector<uint32_t>& entryPointInterfaces) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t vec2Type = spvModule.defVectorType(floatType, 2); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t vec2Ptr = spvModule.defPointerType(vec2Type, spv::StorageClassInput); + + uint32_t pointCoordPtr = spvModule.newVar(vec2Ptr, spv::StorageClassInput); + + spvModule.decorateBuiltIn(pointCoordPtr, spv::BuiltInPointCoord); + entryPointInterfaces.push_back(pointCoordPtr); + + uint32_t pointCoord = spvModule.opLoad(vec2Type, pointCoordPtr); + + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + + std::array<uint32_t, 4> pointCoordIndices = { + spvModule.opCompositeExtract(floatType, pointCoord, 1, &indices[0]), + spvModule.opCompositeExtract(floatType, pointCoord, 1, &indices[1]), + spvModule.constf32(0.0f), + spvModule.constf32(0.0f) + }; + + return spvModule.opCompositeConstruct(vec4Type, pointCoordIndices.size(), pointCoordIndices.data()); + } + + + uint32_t GetSharedConstants(SpirvModule& spvModule) { + uint32_t float_t = spvModule.defFloatType(32); + uint32_t vec2_t = spvModule.defVectorType(float_t, 2); + uint32_t vec4_t = spvModule.defVectorType(float_t, 4); + + std::array<uint32_t, D3D9SharedPSStages_Count> stageMembers = { + vec4_t, + + vec2_t, + vec2_t, + + float_t, + float_t, + }; + + std::array<decltype(stageMembers), caps::TextureStageCount> members; + + for (auto& member : members) + member = stageMembers; + + const uint32_t structType = + spvModule.defStructType(members.size() * stageMembers.size(), members[0].data()); + + spvModule.decorateBlock(structType); + + uint32_t offset = 0; + for (uint32_t stage = 0; stage < caps::TextureStageCount; stage++) { + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_Constant, offset); + offset += sizeof(float) * 4; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvMat0, offset); + offset += sizeof(float) * 2; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvMat1, offset); + offset += sizeof(float) * 2; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvLScale, offset); + offset += sizeof(float); + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvLOffset, offset); + offset += sizeof(float); + + // Padding... + offset += sizeof(float) * 2; + } + + uint32_t sharedState = spvModule.newVar( + spvModule.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + spvModule.setDebugName(sharedState, "D3D9SharedPS"); + + return sharedState; + } + + + enum class D3D9FFVSMembers { + WorldViewMatrix, + NormalMatrix, + InverseViewMatrix, + ProjMatrix, + + Texcoord0, + Texcoord1, + Texcoord2, + Texcoord3, + Texcoord4, + Texcoord5, + Texcoord6, + Texcoord7, + + InverseOffset, + InverseExtent, + + GlobalAmbient, + + Light0, + Light1, + Light2, + Light3, + Light4, + Light5, + Light6, + Light7, + + MaterialDiffuse, + MaterialAmbient, + MaterialSpecular, + MaterialEmissive, + MaterialPower, + + TweenFactor, + + MemberCount + }; + + struct D3D9FFVertexData { + uint32_t constantBuffer; + uint32_t vertexBlendData; + uint32_t lightType; + + struct { + uint32_t worldview; + uint32_t normal; + uint32_t inverseView; + uint32_t proj; + + uint32_t texcoord[8]; + + uint32_t invOffset; + uint32_t invExtent; + + uint32_t globalAmbient; + + uint32_t materialDiffuse; + uint32_t materialSpecular; + uint32_t materialAmbient; + uint32_t materialEmissive; + uint32_t materialPower; + uint32_t tweenFactor; + } constants; + + struct { + uint32_t POSITION; + uint32_t POSITION1; + uint32_t POINTSIZE; + uint32_t NORMAL; + uint32_t NORMAL1; + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + + uint32_t BLENDWEIGHT; + uint32_t BLENDINDICES; + } in; + + struct { + uint32_t POSITION; + uint32_t POINTSIZE; + uint32_t NORMAL; + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + } out; + }; + + enum D3D9FFPSMembers { + TextureFactor = 0, + + MemberCount + }; + + struct D3D9FFPixelData { + uint32_t constantBuffer; + uint32_t sharedState; + + struct { + uint32_t textureFactor; + } constants; + + struct { + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + uint32_t POS; + } in; + + struct { + uint32_t texcoordCnt; + uint32_t typeId; + uint32_t varId; + uint32_t bound; + } samplers[8]; + + struct { + uint32_t COLOR; + } out; + }; + + class D3D9FFShaderCompiler { + + public: + + D3D9FFShaderCompiler( + Rc<DxvkDevice> Device, + const D3D9FFShaderKeyVS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options); + + D3D9FFShaderCompiler( + Rc<DxvkDevice> Device, + const D3D9FFShaderKeyFS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options); + + Rc<DxvkShader> compile(); + + DxsoIsgn isgn() { return m_isgn; } + + private: + + // Returns value for inputs + // Returns ptr for outputs + uint32_t declareIO(bool input, DxsoSemantic semantic, spv::BuiltIn builtin = spv::BuiltInMax); + + void compileVS(); + + void setupRenderStateInfo(); + + void emitLightTypeDecl(); + + void emitBaseBufferDecl(); + + void emitVertexBlendDecl(); + + void setupVS(); + + void compilePS(); + + void setupPS(); + + void emitPsSharedConstants(); + + void emitVsClipping(uint32_t vtx); + + void alphaTestPS(); + + bool isVS() { return m_programType == DxsoProgramType::VertexShader; } + bool isPS() { return !isVS(); } + + std::string m_filename; + + SpirvModule m_module; + std::vector + <DxvkResourceSlot> m_resourceSlots; + DxvkInterfaceSlots m_interfaceSlots; + std::vector<uint32_t> m_entryPointInterfaces; + + DxsoProgramType m_programType; + D3D9FFShaderKeyVS m_vsKey; + D3D9FFShaderKeyFS m_fsKey; + + D3D9FFVertexData m_vs = { }; + D3D9FFPixelData m_ps = { }; + + DxsoIsgn m_isgn; + DxsoIsgn m_osgn; + + uint32_t m_floatType; + uint32_t m_uint32Type; + uint32_t m_vec4Type; + uint32_t m_vec3Type; + uint32_t m_vec2Type; + uint32_t m_mat3Type; + uint32_t m_mat4Type; + + uint32_t m_entryPointId; + + uint32_t m_rsBlock; + uint32_t m_mainFuncLabel; + + D3D9FixedFunctionOptions m_options; + }; + + D3D9FFShaderCompiler::D3D9FFShaderCompiler( + Rc<DxvkDevice> Device, + const D3D9FFShaderKeyVS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options) + : m_module(spvVersion(1, 3)), m_options(Options) { + m_programType = DxsoProgramTypes::VertexShader; + m_vsKey = Key; + m_filename = Name; + } + + + D3D9FFShaderCompiler::D3D9FFShaderCompiler( + Rc<DxvkDevice> Device, + const D3D9FFShaderKeyFS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options) + : m_module(spvVersion(1, 3)), m_options(Options) { + m_programType = DxsoProgramTypes::PixelShader; + m_fsKey = Key; + m_filename = Name; + } + + + Rc<DxvkShader> D3D9FFShaderCompiler::compile() { + m_floatType = m_module.defFloatType(32); + m_uint32Type = m_module.defIntType(32, 0); + m_vec4Type = m_module.defVectorType(m_floatType, 4); + m_vec3Type = m_module.defVectorType(m_floatType, 3); + m_vec2Type = m_module.defVectorType(m_floatType, 2); + m_mat3Type = m_module.defMatrixType(m_vec3Type, 3); + m_mat4Type = m_module.defMatrixType(m_vec4Type, 4); + + m_entryPointId = m_module.allocateId(); + + // Set the shader name so that we recognize it in renderdoc + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(m_filename.c_str()), + nullptr); + + // Set the memory model. This is the same for all shaders. + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + m_module.enableCapability(spv::CapabilityShader); + m_module.enableCapability(spv::CapabilityImageQuery); + + m_module.functionBegin( + m_module.defVoidType(), m_entryPointId, m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), + spv::FunctionControlMaskNone); + m_module.setDebugName(m_entryPointId, "main"); + + m_mainFuncLabel = m_module.allocateId(); + m_module.opLabel(m_mainFuncLabel); + + if (isVS()) + compileVS(); + else + compilePS(); + + m_module.opReturn(); + m_module.functionEnd(); + + // Declare the entry point, we now have all the + // information we need, including the interfaces + m_module.addEntryPoint(m_entryPointId, + isVS() ? spv::ExecutionModelVertex : spv::ExecutionModelFragment, "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + + DxvkShaderOptions shaderOptions = { }; + + DxvkShaderConstData constData = { }; + + // Create the shader module object + return new DxvkShader( + isVS() ? VK_SHADER_STAGE_VERTEX_BIT : VK_SHADER_STAGE_FRAGMENT_BIT, + m_resourceSlots.size(), + m_resourceSlots.data(), + m_interfaceSlots, + m_module.compile(), + shaderOptions, + std::move(constData)); + } + + + uint32_t D3D9FFShaderCompiler::declareIO(bool input, DxsoSemantic semantic, spv::BuiltIn builtin) { + // Declare in ISGN and do linkage + auto& sgn = input + ? m_isgn : m_osgn; + + uint32_t& slots = input + ? m_interfaceSlots.inputSlots + : m_interfaceSlots.outputSlots; + + uint32_t i = sgn.elemCount++; + + uint32_t slot = i; + + if (builtin == spv::BuiltInMax) { + if (input != isVS()) { + slot = RegisterLinkerSlot(semantic); // Requires linkage... + } + + slots |= 1u << slot; + } + + auto& elem = sgn.elems[i]; + elem.slot = slot; + elem.semantic = semantic; + + // Declare variable + spv::StorageClass storageClass = input ? + spv::StorageClassInput : spv::StorageClassOutput; + + const bool scalar = semantic.usage == DxsoUsage::Fog || semantic.usage == DxsoUsage::PointSize; + uint32_t type = scalar ? m_floatType : m_vec4Type; + + uint32_t ptrType = m_module.defPointerType(type, storageClass); + + uint32_t ptr = m_module.newVar(ptrType, storageClass); + + if (builtin == spv::BuiltInMax) + m_module.decorateLocation(ptr, slot); + else + m_module.decorateBuiltIn(ptr, builtin); + + bool diffuseOrSpec = semantic == DxsoSemantic{ DxsoUsage::Color, 0 } + || semantic == DxsoSemantic{ DxsoUsage::Color, 1 }; + + if (diffuseOrSpec && m_fsKey.Stages[0].Contents.GlobalFlatShade) + m_module.decorate(ptr, spv::DecorationFlat); + + std::string name = str::format(input ? "in_" : "out_", semantic.usage, semantic.usageIndex); + m_module.setDebugName(ptr, name.c_str()); + + m_entryPointInterfaces.push_back(ptr); + + if (input) + return m_module.opLoad(type, ptr); + + return ptr; + } + + + void D3D9FFShaderCompiler::compileVS() { + setupVS(); + + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + + uint32_t gl_Position = m_vs.in.POSITION; + uint32_t vtx = m_vs.in.POSITION; + uint32_t normal = m_module.opVectorShuffle(m_vec3Type, m_vs.in.NORMAL, m_vs.in.NORMAL, 3, indices.data()); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Tween) { + uint32_t vtx1 = m_vs.in.POSITION1; + uint32_t normal1 = m_module.opVectorShuffle(m_vec3Type, m_vs.in.NORMAL1, m_vs.in.NORMAL1, 3, indices.data()); + + vtx = m_module.opFMix(m_vec3Type, vtx, vtx1, m_vs.constants.tweenFactor); + normal = m_module.opFMix(m_vec3Type, normal, normal1, m_vs.constants.tweenFactor); + } + + const uint32_t wIndex = 3; + + if (!m_vsKey.Data.Contents.HasPositionT) { + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + uint32_t blendWeightRemaining = m_module.constf32(1); + uint32_t vtxSum = m_module.constvec4f32(0, 0, 0, 0); + uint32_t nrmSum = m_module.constvec3f32(0, 0, 0); + + for (uint32_t i = 0; i <= m_vsKey.Data.Contents.VertexBlendCount; i++) { + std::array<uint32_t, 2> arrayIndices; + + if (m_vsKey.Data.Contents.VertexBlendIndexed) { + uint32_t index = m_module.opCompositeExtract(m_floatType, m_vs.in.BLENDINDICES, 1, &i); + index = m_module.opConvertFtoU(m_uint32Type, m_module.opRound(m_floatType, index)); + + arrayIndices = { m_module.constu32(0), index }; + } + else + arrayIndices = { m_module.constu32(0), m_module.constu32(i) }; + + uint32_t worldview = m_module.opLoad(m_mat4Type, + m_module.opAccessChain( + m_module.defPointerType(m_mat4Type, spv::StorageClassUniform), m_vs.vertexBlendData, arrayIndices.size(), arrayIndices.data())); + + uint32_t nrmMtx = worldview; + + std::array<uint32_t, 3> mtxIndices; + for (uint32_t i = 0; i < 3; i++) { + mtxIndices[i] = m_module.opCompositeExtract(m_vec4Type, nrmMtx, 1, &i); + mtxIndices[i] = m_module.opVectorShuffle(m_vec3Type, mtxIndices[i], mtxIndices[i], 3, indices.data()); + } + nrmMtx = m_module.opCompositeConstruct(m_mat3Type, mtxIndices.size(), mtxIndices.data()); + + uint32_t vtxResult = m_module.opVectorTimesMatrix(m_vec4Type, vtx, worldview); + uint32_t nrmResult = m_module.opVectorTimesMatrix(m_vec3Type, normal, nrmMtx); + + uint32_t weight; + if (i != m_vsKey.Data.Contents.VertexBlendCount) { + weight = m_module.opCompositeExtract(m_floatType, m_vs.in.BLENDWEIGHT, 1, &i); + blendWeightRemaining = m_module.opFSub(m_floatType, blendWeightRemaining, weight); + } + else + weight = blendWeightRemaining; + + vtxResult = m_module.opVectorTimesScalar(m_vec4Type, vtxResult, weight); + nrmResult = m_module.opVectorTimesScalar(m_vec3Type, nrmResult, weight); + + vtxSum = m_module.opFAdd(m_vec4Type, vtxSum, vtxResult); + nrmSum = m_module.opFAdd(m_vec3Type, nrmSum, nrmResult); + } + + vtx = vtxSum; + normal = nrmSum; + } + else { + vtx = m_module.opVectorTimesMatrix(m_vec4Type, vtx, m_vs.constants.worldview); + + uint32_t nrmMtx = m_vs.constants.normal; + + std::array<uint32_t, 3> mtxIndices; + for (uint32_t i = 0; i < 3; i++) { + mtxIndices[i] = m_module.opCompositeExtract(m_vec4Type, nrmMtx, 1, &i); + mtxIndices[i] = m_module.opVectorShuffle(m_vec3Type, mtxIndices[i], mtxIndices[i], 3, indices.data()); + } + nrmMtx = m_module.opCompositeConstruct(m_mat3Type, mtxIndices.size(), mtxIndices.data()); + + normal = m_module.opMatrixTimesVector(m_vec3Type, nrmMtx, normal); + } + + // Some games rely no normals not being normal. + if (m_vsKey.Data.Contents.NormalizeNormals) { + uint32_t bool_t = m_module.defBoolType(); + uint32_t bool3_t = m_module.defVectorType(bool_t, 3); + + uint32_t isZeroNormal = m_module.opAll(bool_t, m_module.opFOrdEqual(bool3_t, normal, m_module.constvec3f32(0.0f, 0.0f, 0.0f))); + + std::array<uint32_t, 3> members = { isZeroNormal, isZeroNormal, isZeroNormal }; + uint32_t isZeroNormal3 = m_module.opCompositeConstruct(bool3_t, members.size(), members.data()); + + normal = m_module.opNormalize(m_vec3Type, normal); + normal = m_module.opSelect(m_vec3Type, isZeroNormal3, m_module.constvec3f32(0.0f, 0.0f, 0.0f), normal); + } + + gl_Position = m_module.opVectorTimesMatrix(m_vec4Type, vtx, m_vs.constants.proj); + } else { + gl_Position = m_module.opFMul(m_vec4Type, gl_Position, m_vs.constants.invExtent); + gl_Position = m_module.opFAdd(m_vec4Type, gl_Position, m_vs.constants.invOffset); + + // We still need to account for perspective correction here... + + // gl_Position.w = 1.0f / gl_Position.w + // gl_Position.xyz *= gl_Position.w; + + uint32_t bool_t = m_module.defBoolType(); + + uint32_t w = m_module.opCompositeExtract (m_floatType, gl_Position, 1, &wIndex); // w = gl_Position.w + uint32_t is0 = m_module.opFOrdEqual (bool_t, w, m_module.constf32(0)); // is0 = w == 0 + uint32_t rhw = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), w); // rhw = 1.0f / w + rhw = m_module.opSelect (m_floatType, is0, m_module.constf32(1.0), rhw); // rhw = w == 0 ? 1.0 : rhw + gl_Position = m_module.opVectorTimesScalar(m_vec4Type, gl_Position, rhw); // gl_Position.xyz *= rhw + gl_Position = m_module.opCompositeInsert (m_vec4Type, rhw, gl_Position, 1, &wIndex); // gl_Position.w = rhw + } + + m_module.opStore(m_vs.out.POSITION, gl_Position); + + std::array<uint32_t, 4> outNrmIndices; + for (uint32_t i = 0; i < 3; i++) + outNrmIndices[i] = m_module.opCompositeExtract(m_floatType, normal, 1, &i); + outNrmIndices[3] = m_module.constf32(1.0f); + + uint32_t outNrm = m_module.opCompositeConstruct(m_vec4Type, outNrmIndices.size(), outNrmIndices.data()); + + m_module.opStore(m_vs.out.NORMAL, outNrm); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + uint32_t inputIndex = (m_vsKey.Data.Contents.TexcoordIndices >> (i * 3)) & 0b111; + uint32_t inputFlags = (m_vsKey.Data.Contents.TexcoordFlags >> (i * 3)) & 0b111; + + uint32_t transformed; + + const uint32_t wIndex = 3; + + uint32_t flags = (m_vsKey.Data.Contents.TransformFlags >> (i * 3)) & 0b111; + uint32_t count = flags; + switch (inputFlags) { + default: + case (DXVK_TSS_TCI_PASSTHRU >> TCIOffset): + transformed = m_vs.in.TEXCOORD[inputIndex & 0xFF]; + break; + + case (DXVK_TSS_TCI_CAMERASPACENORMAL >> TCIOffset): + transformed = outNrm; + count = 4; + break; + + case (DXVK_TSS_TCI_CAMERASPACEPOSITION >> TCIOffset): + transformed = m_module.opCompositeInsert(m_vec4Type, m_module.constf32(1.0f), vtx, 1, &wIndex); + count = 4; + break; + + case (DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR >> TCIOffset): { + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + vtx3 = m_module.opNormalize(m_vec3Type, vtx3); + + uint32_t reflection = m_module.opReflect(m_vec3Type, vtx3, normal); + + std::array<uint32_t, 4> transformIndices; + for (uint32_t i = 0; i < 3; i++) + transformIndices[i] = m_module.opCompositeExtract(m_floatType, reflection, 1, &i); + transformIndices[3] = m_module.constf32(1.0f); + + transformed = m_module.opCompositeConstruct(m_vec4Type, transformIndices.size(), transformIndices.data()); + count = 4; + break; + } + + case (DXVK_TSS_TCI_SPHEREMAP >> TCIOffset): { + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + vtx3 = m_module.opNormalize(m_vec3Type, vtx3); + + uint32_t reflection = m_module.opReflect(m_vec3Type, vtx3, normal); + uint32_t m = m_module.opFAdd(m_vec3Type, reflection, m_module.constvec3f32(0, 0, 1)); + m = m_module.opLength(m_floatType, m); + m = m_module.opFMul(m_floatType, m, m_module.constf32(2.0f)); + + std::array<uint32_t, 4> transformIndices; + for (uint32_t i = 0; i < 2; i++) { + transformIndices[i] = m_module.opCompositeExtract(m_floatType, reflection, 1, &i); + transformIndices[i] = m_module.opFDiv(m_floatType, transformIndices[i], m); + transformIndices[i] = m_module.opFAdd(m_floatType, transformIndices[i], m_module.constf32(0.5f)); + } + + transformIndices[2] = m_module.constf32(0.0f); + transformIndices[3] = m_module.constf32(1.0f); + + transformed = m_module.opCompositeConstruct(m_vec4Type, transformIndices.size(), transformIndices.data()); + count = 4; + break; + } + } + + uint32_t type = flags; + if (type != D3DTTFF_DISABLE) { + if (!m_vsKey.Data.Contents.HasPositionT) { + for (uint32_t j = count; j < 4; j++) { + // If we're outside the component count of the vertex decl for this texcoord then we pad with zeroes. + // Otherwise, pad with ones. + + // Very weird quirk in order to get texcoord transforms to work like they do in native. + // In future, maybe we could sort this out properly by chopping matrices of different sizes, but thats + // a project for another day. + uint32_t texcoordCount = (m_vsKey.Data.Contents.TexcoordDeclMask >> (3 * inputIndex)) & 0x7; + uint32_t value = j > texcoordCount ? m_module.constf32(0) : m_module.constf32(1); + transformed = m_module.opCompositeInsert(m_vec4Type, value, transformed, 1, &j); + } + + transformed = m_module.opVectorTimesMatrix(m_vec4Type, transformed, m_vs.constants.texcoord[i]); + } + + // Pad the unused section of it with the value for projection. + uint32_t lastIdx = count - 1; + uint32_t projValue = m_module.opCompositeExtract(m_floatType, transformed, 1, &lastIdx); + + for (uint32_t j = count; j < 4; j++) + transformed = m_module.opCompositeInsert(m_vec4Type, projValue, transformed, 1, &j); + } + + m_module.opStore(m_vs.out.TEXCOORD[i], transformed); + } + + if (m_vsKey.Data.Contents.UseLighting) { + auto PickSource = [&](uint32_t Source, uint32_t Material) { + if (Source == D3DMCS_MATERIAL) + return Material; + else if (Source == D3DMCS_COLOR1) + return m_vs.in.COLOR[0]; + else + return m_vs.in.COLOR[1]; + }; + + uint32_t diffuseValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + uint32_t specularValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + uint32_t ambientValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + for (uint32_t i = 0; i < m_vsKey.Data.Contents.LightCount; i++) { + uint32_t light_ptr_t = m_module.defPointerType(m_vs.lightType, spv::StorageClassUniform); + + uint32_t indexVal = m_module.constu32(uint32_t(D3D9FFVSMembers::Light0) + i); + uint32_t lightPtr = m_module.opAccessChain(light_ptr_t, m_vs.constantBuffer, 1, &indexVal); + + auto LoadLightItem = [&](uint32_t type, uint32_t idx) { + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + idx = m_module.constu32(idx); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, lightPtr, 1, &idx)); + }; + + uint32_t diffuse = LoadLightItem(m_vec4Type, 0); + uint32_t specular = LoadLightItem(m_vec4Type, 1); + uint32_t ambient = LoadLightItem(m_vec4Type, 2); + uint32_t position = LoadLightItem(m_vec4Type, 3); + uint32_t direction = LoadLightItem(m_vec4Type, 4); + uint32_t type = LoadLightItem(m_uint32Type, 5); + uint32_t range = LoadLightItem(m_floatType, 6); + uint32_t falloff = LoadLightItem(m_floatType, 7); + uint32_t atten0 = LoadLightItem(m_floatType, 8); + uint32_t atten1 = LoadLightItem(m_floatType, 9); + uint32_t atten2 = LoadLightItem(m_floatType, 10); + uint32_t theta = LoadLightItem(m_floatType, 11); + uint32_t phi = LoadLightItem(m_floatType, 12); + + uint32_t bool_t = m_module.defBoolType(); + uint32_t bool3_t = m_module.defVectorType(bool_t, 3); + + uint32_t isSpot = m_module.opIEqual(bool_t, type, m_module.constu32(D3DLIGHT_SPOT)); + uint32_t isDirectional = m_module.opIEqual(bool_t, type, m_module.constu32(D3DLIGHT_DIRECTIONAL)); + + std::array<uint32_t, 3> members = { isDirectional, isDirectional, isDirectional }; + + uint32_t isDirectional3 = m_module.opCompositeConstruct(bool3_t, members.size(), members.data()); + + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + position = m_module.opVectorShuffle(m_vec3Type, position, position, 3, indices.data()); + direction = m_module.opVectorShuffle(m_vec3Type, direction, direction, 3, indices.data()); + + uint32_t delta = m_module.opFSub(m_vec3Type, position, vtx3); + uint32_t d = m_module.opLength(m_floatType, delta); + uint32_t hitDir = m_module.opFNegate(m_vec3Type, direction); + hitDir = m_module.opSelect(m_vec3Type, isDirectional3, hitDir, delta); + hitDir = m_module.opNormalize(m_vec3Type, hitDir); + + uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1); + atten = m_module.opFFma (m_floatType, d, atten, atten0); + atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten); + atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX)); + + atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten); + atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten); + + // Spot Lighting + { + uint32_t rho = m_module.opDot (m_floatType, m_module.opFNegate(m_vec3Type, hitDir), direction); + uint32_t spotAtten = m_module.opFSub(m_floatType, rho, phi); + spotAtten = m_module.opFDiv(m_floatType, spotAtten, m_module.opFSub(m_floatType, theta, phi)); + spotAtten = m_module.opPow (m_floatType, spotAtten, falloff); + + uint32_t insideThetaAndPhi = m_module.opFOrdLessThanEqual(bool_t, rho, theta); + uint32_t insidePhi = m_module.opFOrdGreaterThan(bool_t, rho, phi); + spotAtten = m_module.opSelect(m_floatType, insidePhi, spotAtten, m_module.constf32(0.0f)); + spotAtten = m_module.opSelect(m_floatType, insideThetaAndPhi, spotAtten, m_module.constf32(1.0f)); + spotAtten = m_module.opFClamp(m_floatType, spotAtten, m_module.constf32(0.0f), m_module.constf32(1.0f)); + + spotAtten = m_module.opFMul(m_floatType, atten, spotAtten); + atten = m_module.opSelect(m_floatType, isSpot, spotAtten, atten); + } + + + uint32_t hitDot = m_module.opDot(m_floatType, normal, hitDir); + hitDot = m_module.opFClamp(m_floatType, hitDot, m_module.constf32(0.0f), m_module.constf32(1.0f)); + + uint32_t diffuseness = m_module.opFMul(m_floatType, hitDot, atten); + + uint32_t mid; + if (m_vsKey.Data.Contents.LocalViewer) { + mid = m_module.opNormalize(m_vec3Type, vtx3); + mid = m_module.opFSub(m_vec3Type, hitDir, mid); + } + else + mid = m_module.opFSub(m_vec3Type, hitDir, m_module.constvec3f32(0.0f, 0.0f, 1.0f)); + + mid = m_module.opNormalize(m_vec3Type, mid); + + uint32_t midDot = m_module.opDot(m_floatType, normal, mid); + midDot = m_module.opFClamp(m_floatType, midDot, m_module.constf32(0.0f), m_module.constf32(1.0f)); + uint32_t doSpec = m_module.opFOrdGreaterThan(bool_t, midDot, m_module.constf32(0.0f)); + uint32_t specularness = m_module.opPow(m_floatType, midDot, m_vs.constants.materialPower); + specularness = m_module.opFMul(m_floatType, specularness, atten); + specularness = m_module.opSelect(m_floatType, doSpec, specularness, m_module.constf32(0.0f)); + + uint32_t lightAmbient = m_module.opVectorTimesScalar(m_vec4Type, ambient, atten); + uint32_t lightDiffuse = m_module.opVectorTimesScalar(m_vec4Type, diffuse, diffuseness); + uint32_t lightSpecular = m_module.opVectorTimesScalar(m_vec4Type, specular, specularness); + + ambientValue = m_module.opFAdd(m_vec4Type, ambientValue, lightAmbient); + diffuseValue = m_module.opFAdd(m_vec4Type, diffuseValue, lightDiffuse); + specularValue = m_module.opFAdd(m_vec4Type, specularValue, lightSpecular); + } + + uint32_t mat_diffuse = PickSource(m_vsKey.Data.Contents.DiffuseSource, m_vs.constants.materialDiffuse); + uint32_t mat_ambient = PickSource(m_vsKey.Data.Contents.AmbientSource, m_vs.constants.materialAmbient); + uint32_t mat_emissive = PickSource(m_vsKey.Data.Contents.EmissiveSource, m_vs.constants.materialEmissive); + uint32_t mat_specular = PickSource(m_vsKey.Data.Contents.SpecularSource, m_vs.constants.materialSpecular); + + std::array<uint32_t, 4> alphaSwizzle = {0, 1, 2, 7}; + uint32_t finalColor0 = m_module.opFFma(m_vec4Type, mat_ambient, m_vs.constants.globalAmbient, mat_emissive); + finalColor0 = m_module.opFFma(m_vec4Type, mat_ambient, ambientValue, finalColor0); + finalColor0 = m_module.opFFma(m_vec4Type, mat_diffuse, diffuseValue, finalColor0); + finalColor0 = m_module.opVectorShuffle(m_vec4Type, finalColor0, mat_diffuse, alphaSwizzle.size(), alphaSwizzle.data()); + + uint32_t finalColor1 = m_module.opFMul(m_vec4Type, mat_specular, specularValue); + + // Saturate + finalColor0 = m_module.opFClamp(m_vec4Type, finalColor0, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + + finalColor1 = m_module.opFClamp(m_vec4Type, finalColor1, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + + m_module.opStore(m_vs.out.COLOR[0], finalColor0); + m_module.opStore(m_vs.out.COLOR[1], finalColor1); + } + else { + m_module.opStore(m_vs.out.COLOR[0], m_vs.in.COLOR[0]); + m_module.opStore(m_vs.out.COLOR[1], m_vs.in.COLOR[1]); + } + + D3D9FogContext fogCtx; + fogCtx.IsPixel = false; + fogCtx.RangeFog = m_vsKey.Data.Contents.RangeFog; + fogCtx.RenderState = m_rsBlock; + fogCtx.vPos = vtx; + fogCtx.HasFogInput = m_vsKey.Data.Contents.HasFog; + fogCtx.vFog = m_vs.in.FOG; + fogCtx.oColor = 0; + fogCtx.IsFixedFunction = true; + fogCtx.IsPositionT = m_vsKey.Data.Contents.HasPositionT; + fogCtx.HasSpecular = m_vsKey.Data.Contents.HasColor1; + fogCtx.Specular = m_vs.in.COLOR[1]; + m_module.opStore(m_vs.out.FOG, DoFixedFunctionFog(m_module, fogCtx)); + + auto pointInfo = GetPointSizeInfoVS(m_module, 0, vtx, m_vs.in.POINTSIZE, m_rsBlock, true); + + uint32_t pointSize = m_module.opFClamp(m_floatType, pointInfo.defaultValue, pointInfo.min, pointInfo.max); + m_module.opStore(m_vs.out.POINTSIZE, pointSize); + + if (m_vsKey.Data.Contents.VertexClipping) + emitVsClipping(vtx); + } + + + void D3D9FFShaderCompiler::setupRenderStateInfo() { + uint32_t count; + + if (m_programType == DxsoProgramType::PixelShader) { + m_interfaceSlots.pushConstOffset = 0; + m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize); + count = 5; + } + else { + m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize); + m_interfaceSlots.pushConstSize = sizeof(float) * 6; + count = 11; + } + + m_rsBlock = SetupRenderStateBlock(m_module, count); + } + + + void D3D9FFShaderCompiler::emitLightTypeDecl() { + std::array<uint32_t, 13> light_members = { + m_vec4Type, // Diffuse + m_vec4Type, // Specular + m_vec4Type, // Ambient + m_vec4Type, // Position + m_vec4Type, // Direction + m_uint32Type, // Type + m_floatType, // Range + m_floatType, // Falloff + m_floatType, // Attenuation0 + m_floatType, // Attenuation1 + m_floatType, // Attenuation2 + m_floatType, // Theta + m_floatType, // Phi + }; + + m_vs.lightType = + m_module.defStructType(light_members.size(), light_members.data()); + + m_module.setDebugName(m_vs.lightType, "light_t"); + + uint32_t offset = 0; + + m_module.memberDecorateOffset(m_vs.lightType, 0, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 0, "Diffuse"); + m_module.memberDecorateOffset(m_vs.lightType, 1, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 1, "Specular"); + m_module.memberDecorateOffset(m_vs.lightType, 2, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 2, "Ambient"); + + m_module.memberDecorateOffset(m_vs.lightType, 3, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 3, "Position"); + m_module.memberDecorateOffset(m_vs.lightType, 4, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 4, "Direction"); + + m_module.memberDecorateOffset(m_vs.lightType, 5, offset); offset += 1 * sizeof(uint32_t); + m_module.setDebugMemberName (m_vs.lightType, 5, "Type"); + + m_module.memberDecorateOffset(m_vs.lightType, 6, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 6, "Range"); + m_module.memberDecorateOffset(m_vs.lightType, 7, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 7, "Falloff"); + + m_module.memberDecorateOffset(m_vs.lightType, 8, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 8, "Attenuation0"); + m_module.memberDecorateOffset(m_vs.lightType, 9, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 9, "Attenuation1"); + m_module.memberDecorateOffset(m_vs.lightType, 10, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 10, "Attenuation2"); + + m_module.memberDecorateOffset(m_vs.lightType, 11, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 11, "Theta"); + m_module.memberDecorateOffset(m_vs.lightType, 12, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 12, "Phi"); + } + + + void D3D9FFShaderCompiler::emitBaseBufferDecl() { + // Constant Buffer for VS. + std::array<uint32_t, uint32_t(D3D9FFVSMembers::MemberCount)> members = { + m_mat4Type, // World + m_mat4Type, // View + m_mat4Type, // InverseView + m_mat4Type, // Proj + + m_mat4Type, // Texture0 + m_mat4Type, // Texture1 + m_mat4Type, // Texture2 + m_mat4Type, // Texture3 + m_mat4Type, // Texture4 + m_mat4Type, // Texture5 + m_mat4Type, // Texture6 + m_mat4Type, // Texture7 + + m_vec4Type, // Inverse Offset + m_vec4Type, // Inverse Extent + + m_vec4Type, // Global Ambient + + m_vs.lightType, // Light0 + m_vs.lightType, // Light1 + m_vs.lightType, // Light2 + m_vs.lightType, // Light3 + m_vs.lightType, // Light4 + m_vs.lightType, // Light5 + m_vs.lightType, // Light6 + m_vs.lightType, // Light7 + + m_vec4Type, // Material Diffuse + m_vec4Type, // Material Ambient + m_vec4Type, // Material Specular + m_vec4Type, // Material Emissive + m_floatType, // Material Power + + m_floatType, // Tween Factor + }; + + const uint32_t structType = + m_module.defStructType(members.size(), members.data()); + + m_module.decorateBlock(structType); + + uint32_t offset = 0; + + for (uint32_t i = 0; i < uint32_t(D3D9FFVSMembers::InverseOffset); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Matrix4); + m_module.memberDecorateMatrixStride(structType, i, 16); + m_module.memberDecorate(structType, i, spv::DecorationRowMajor); + } + + for (uint32_t i = uint32_t(D3D9FFVSMembers::InverseOffset); i < uint32_t(D3D9FFVSMembers::Light0); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::Light0) + i, offset); + offset += sizeof(D3D9Light); + } + + for (uint32_t i = uint32_t(D3D9FFVSMembers::MaterialDiffuse); i < uint32_t(D3D9FFVSMembers::MaterialPower); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::MaterialPower), offset); + offset += sizeof(float); + + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::TweenFactor), offset); + offset += sizeof(float); + + m_module.setDebugName(structType, "D3D9FixedFunctionVS"); + uint32_t member = 0; + m_module.setDebugMemberName(structType, member++, "WorldView"); + m_module.setDebugMemberName(structType, member++, "Normal"); + m_module.setDebugMemberName(structType, member++, "InverseView"); + m_module.setDebugMemberName(structType, member++, "Projection"); + + m_module.setDebugMemberName(structType, member++, "TexcoordTransform0"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform1"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform2"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform3"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform4"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform5"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform6"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform7"); + + m_module.setDebugMemberName(structType, member++, "ViewportInfo_InverseOffset"); + m_module.setDebugMemberName(structType, member++, "ViewportInfo_InverseExtent"); + + m_module.setDebugMemberName(structType, member++, "GlobalAmbient"); + + m_module.setDebugMemberName(structType, member++, "Light0"); + m_module.setDebugMemberName(structType, member++, "Light1"); + m_module.setDebugMemberName(structType, member++, "Light2"); + m_module.setDebugMemberName(structType, member++, "Light3"); + m_module.setDebugMemberName(structType, member++, "Light4"); + m_module.setDebugMemberName(structType, member++, "Light5"); + m_module.setDebugMemberName(structType, member++, "Light6"); + m_module.setDebugMemberName(structType, member++, "Light7"); + + m_module.setDebugMemberName(structType, member++, "Material_Diffuse"); + m_module.setDebugMemberName(structType, member++, "Material_Ambient"); + m_module.setDebugMemberName(structType, member++, "Material_Specular"); + m_module.setDebugMemberName(structType, member++, "Material_Emissive"); + m_module.setDebugMemberName(structType, member++, "Material_Power"); + + m_module.setDebugMemberName(structType, member++, "TweenFactor"); + + m_vs.constantBuffer = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_vs.constantBuffer, "consts"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::VertexShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSFixedFunction); + + m_module.decorateDescriptorSet(m_vs.constantBuffer, 0); + m_module.decorateBinding(m_vs.constantBuffer, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::emitVertexBlendDecl() { + const uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(m_mat4Type); + m_module.decorateArrayStride(arrayType, sizeof(Matrix4)); + + const uint32_t structType = m_module.defStructTypeUnique(1, &arrayType); + + m_module.memberDecorateMatrixStride(structType, 0, 16); + m_module.memberDecorate(structType, 0, spv::DecorationRowMajor); + + m_module.decorate(structType, spv::DecorationBufferBlock); + + m_module.memberDecorateOffset(structType, 0, 0); + + m_module.setDebugName(structType, "D3D9FF_VertexBlendData"); + m_module.setDebugMemberName(structType, 0, "WorldViewArray"); + + m_vs.vertexBlendData = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_vs.vertexBlendData, "VertexBlendData"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::VertexShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSVertexBlendData); + + m_module.decorateDescriptorSet(m_vs.vertexBlendData, 0); + m_module.decorateBinding(m_vs.vertexBlendData, bindingId); + + m_module.decorate(m_vs.vertexBlendData, spv::DecorationNonWritable); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_SHADER_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::setupVS() { + setupRenderStateInfo(); + + // VS Caps + m_module.enableCapability(spv::CapabilityClipDistance); + + emitLightTypeDecl(); + emitBaseBufferDecl(); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) + emitVertexBlendDecl(); + + // Load constants + auto LoadConstant = [&](uint32_t type, uint32_t idx) { + uint32_t offset = m_module.constu32(idx); + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, m_vs.constantBuffer, 1, &offset)); + }; + + m_vs.constants.worldview = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::WorldViewMatrix)); + m_vs.constants.normal = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::NormalMatrix)); + m_vs.constants.inverseView = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::InverseViewMatrix)); + m_vs.constants.proj = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::ProjMatrix)); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.constants.texcoord[i] = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::Texcoord0) + i); + + m_vs.constants.invOffset = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::InverseOffset)); + m_vs.constants.invExtent = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::InverseExtent)); + + m_vs.constants.globalAmbient = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::GlobalAmbient)); + + m_vs.constants.materialDiffuse = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialDiffuse)); + m_vs.constants.materialAmbient = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialAmbient)); + m_vs.constants.materialSpecular = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialSpecular)); + m_vs.constants.materialEmissive = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialEmissive)); + m_vs.constants.materialPower = LoadConstant(m_floatType, uint32_t(D3D9FFVSMembers::MaterialPower)); + m_vs.constants.tweenFactor = LoadConstant(m_floatType, uint32_t(D3D9FFVSMembers::TweenFactor)); + + // Do IO + m_vs.in.POSITION = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 0 }); + m_vs.in.NORMAL = declareIO(true, DxsoSemantic{ DxsoUsage::Normal, 0 }); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Tween) { + m_vs.in.POSITION1 = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 1 }); + m_vs.in.NORMAL1 = declareIO(true, DxsoSemantic{ DxsoUsage::Normal, 1 }); + } + else { + m_isgn.elemCount++; + m_isgn.elemCount++; + } + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.in.TEXCOORD[i] = declareIO(true, DxsoSemantic{ DxsoUsage::Texcoord, i }); + + if (m_vsKey.Data.Contents.HasColor0) + m_vs.in.COLOR[0] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 0 }); + else { + m_vs.in.COLOR[0] = m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f); + m_isgn.elemCount++; + } + + if (m_vsKey.Data.Contents.HasColor1) + m_vs.in.COLOR[1] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 1 }); + else { + m_vs.in.COLOR[1] = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + m_isgn.elemCount++; + } + + if (m_vsKey.Data.Contents.HasFog) + m_vs.in.FOG = declareIO(true, DxsoSemantic{ DxsoUsage::Fog, 0 }); + else + m_isgn.elemCount++; + + if (m_vsKey.Data.Contents.HasPointSize) + m_vs.in.POINTSIZE = declareIO(true, DxsoSemantic{ DxsoUsage::PointSize, 0 }); + else + m_isgn.elemCount++; + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + m_vs.in.BLENDWEIGHT = declareIO(true, DxsoSemantic{ DxsoUsage::BlendWeight, 0 }); + m_vs.in.BLENDINDICES = declareIO(true, DxsoSemantic{ DxsoUsage::BlendIndices, 0 }); + } + else { + m_isgn.elemCount++; + m_isgn.elemCount++; + } + + // Declare Outputs + m_vs.out.POSITION = declareIO(false, DxsoSemantic{ DxsoUsage::Position, 0 }, spv::BuiltInPosition); + if (m_options.invariantPosition) + m_module.decorate(m_vs.out.POSITION, spv::DecorationInvariant); + + m_vs.out.POINTSIZE = declareIO(false, DxsoSemantic{ DxsoUsage::PointSize, 0 }, spv::BuiltInPointSize); + + m_vs.out.NORMAL = declareIO(false, DxsoSemantic{ DxsoUsage::Normal, 0 }); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.out.TEXCOORD[i] = declareIO(false, DxsoSemantic{ DxsoUsage::Texcoord, i }); + + m_vs.out.COLOR[0] = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 0 }); + m_vs.out.COLOR[1] = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 1 }); + + m_vs.out.FOG = declareIO(false, DxsoSemantic{ DxsoUsage::Fog, 0 }); + } + + + void D3D9FFShaderCompiler::compilePS() { + setupPS(); + + uint32_t diffuse = m_ps.in.COLOR[0]; + uint32_t specular = m_ps.in.COLOR[1]; + + // Current starts of as equal to diffuse. + uint32_t current = diffuse; + // Temp starts off as equal to vec4(0) + uint32_t temp = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + uint32_t texture = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 1.0f); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + const auto& stage = m_fsKey.Stages[i].Contents; + + bool processedTexture = false; + + auto DoBumpmapCoords = [&](uint32_t typeId, uint32_t baseCoords) { + uint32_t stage = i - 1; + + uint32_t coords = baseCoords; + for (uint32_t i = 0; i < 2; i++) { + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + + uint32_t tc_m_n = m_module.opCompositeExtract(m_floatType, coords, 1, &i); + + uint32_t offset = m_module.constu32(D3D9SharedPSStages_Count * stage + D3D9SharedPSStages_BumpEnvMat0 + i); + uint32_t bm = m_module.opAccessChain(m_module.defPointerType(m_vec2Type, spv::StorageClassUniform), + m_ps.sharedState, 1, &offset); + bm = m_module.opLoad(m_vec2Type, bm); + + uint32_t t = m_module.opVectorShuffle(m_vec2Type, texture, texture, 2, indices.data()); + + uint32_t dot = m_module.opDot(m_floatType, bm, t); + + uint32_t result = m_module.opFAdd(m_floatType, tc_m_n, dot); + coords = m_module.opCompositeInsert(typeId, result, coords, 1, &i); + } + + return coords; + }; + + auto GetTexture = [&]() { + if (!processedTexture) { + SpirvImageOperands imageOperands; + uint32_t imageVarId = m_module.opLoad(m_ps.samplers[i].typeId, m_ps.samplers[i].varId); + + uint32_t texcoordCnt = m_ps.samplers[i].texcoordCnt; + + // Add one for the texcoord count + // if we need to include the divider + if (m_fsKey.Stages[i].Contents.Projected) + texcoordCnt++; + + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + + uint32_t texcoord = m_ps.in.TEXCOORD[i]; + uint32_t texcoord_t = m_module.defVectorType(m_floatType, texcoordCnt); + texcoord = m_module.opVectorShuffle(texcoord_t, + texcoord, texcoord, texcoordCnt, indices.data()); + + uint32_t projIdx = m_fsKey.Stages[i].Contents.ProjectedCount; + if (projIdx == 0) + projIdx = texcoordCnt; + else + projIdx--; + + uint32_t projValue = 0; + + if (m_fsKey.Stages[i].Contents.Projected) { + projValue = m_module.opCompositeExtract(m_floatType, m_ps.in.TEXCOORD[i], 1, &projIdx); + uint32_t insertIdx = texcoordCnt - 1; + texcoord = m_module.opCompositeInsert(texcoord_t, projValue, texcoord, 1, &insertIdx); + } + + bool shouldProject = m_fsKey.Stages[i].Contents.Projected; + + if (i != 0 && ( + m_fsKey.Stages[i - 1].Contents.ColorOp == D3DTOP_BUMPENVMAP || + m_fsKey.Stages[i - 1].Contents.ColorOp == D3DTOP_BUMPENVMAPLUMINANCE)) { + if (shouldProject) { + uint32_t projRcp = m_module.opFDiv(m_floatType, m_module.constf32(1.0), projValue); + texcoord = m_module.opVectorTimesScalar(texcoord_t, texcoord, projRcp); + } + + texcoord = DoBumpmapCoords(texcoord_t, texcoord); + + shouldProject = false; + } + + if (shouldProject) + texture = m_module.opImageSampleProjImplicitLod(m_vec4Type, imageVarId, texcoord, imageOperands); + else + texture = m_module.opImageSampleImplicitLod(m_vec4Type, imageVarId, texcoord, imageOperands); + + if (i != 0 && m_fsKey.Stages[i - 1].Contents.ColorOp == D3DTOP_BUMPENVMAPLUMINANCE) { + uint32_t index = m_module.constu32(D3D9SharedPSStages_Count * (i - 1) + D3D9SharedPSStages_BumpEnvLScale); + uint32_t lScale = m_module.opAccessChain(m_module.defPointerType(m_floatType, spv::StorageClassUniform), + m_ps.sharedState, 1, &index); + lScale = m_module.opLoad(m_floatType, lScale); + + index = m_module.constu32(D3D9SharedPSStages_Count * (i - 1) + D3D9SharedPSStages_BumpEnvLOffset); + uint32_t lOffset = m_module.opAccessChain(m_module.defPointerType(m_floatType, spv::StorageClassUniform), + m_ps.sharedState, 1, &index); + lOffset = m_module.opLoad(m_floatType, lOffset); + + uint32_t zIndex = 2; + uint32_t scale = m_module.opCompositeExtract(m_floatType, texture, 1, &zIndex); + scale = m_module.opFMul(m_floatType, scale, lScale); + scale = m_module.opFAdd(m_floatType, scale, lOffset); + scale = m_module.opFClamp(m_floatType, scale, m_module.constf32(0.0f), m_module.constf32(1.0)); + + texture = m_module.opVectorTimesScalar(m_vec4Type, texture, scale); + } + + uint32_t bool_t = m_module.defBoolType(); + uint32_t bvec4_t = m_module.defVectorType(bool_t, 4); + std::array<uint32_t, 4> boundIndices = { m_ps.samplers[i].bound, m_ps.samplers[i].bound, m_ps.samplers[i].bound, m_ps.samplers[i].bound }; + uint32_t bound4 = m_module.opCompositeConstruct(bvec4_t, boundIndices.size(), boundIndices.data()); + texture = m_module.opSelect(m_vec4Type, bound4, texture, m_module.constvec4f32(0.0f, 0.0f, 0.0f, 1.0f)); + } + + processedTexture = true; + + return texture; + }; + + auto ScalarReplicate = [&](uint32_t reg) { + std::array<uint32_t, 4> replicant = { reg, reg, reg, reg }; + return m_module.opCompositeConstruct(m_vec4Type, replicant.size(), replicant.data()); + }; + + auto AlphaReplicate = [&](uint32_t reg) { + uint32_t alphaComponentId = 3; + uint32_t alpha = m_module.opCompositeExtract(m_floatType, reg, 1, &alphaComponentId); + + return ScalarReplicate(alpha); + }; + + auto Complement = [&](uint32_t reg) { + return m_module.opFSub(m_vec4Type, + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f), + reg); + }; + + auto Saturate = [&](uint32_t reg) { + return m_module.opFClamp(m_vec4Type, reg, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + }; + + auto GetArg = [&] (uint32_t arg) { + uint32_t reg = m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f); + + switch (arg & D3DTA_SELECTMASK) { + case D3DTA_CONSTANT: { + uint32_t offset = m_module.constu32(D3D9SharedPSStages_Count * i + D3D9SharedPSStages_Constant); + uint32_t ptr = m_module.opAccessChain(m_module.defPointerType(m_vec4Type, spv::StorageClassUniform), + m_ps.sharedState, 1, &offset); + + reg = m_module.opLoad(m_vec4Type, ptr); + break; + } + case D3DTA_CURRENT: + reg = current; + break; + case D3DTA_DIFFUSE: + reg = diffuse; + break; + case D3DTA_SPECULAR: + reg = specular; + break; + case D3DTA_TEMP: + reg = temp; + break; + case D3DTA_TEXTURE: + reg = GetTexture(); + break; + case D3DTA_TFACTOR: + reg = m_ps.constants.textureFactor; + break; + default: + break; + } + + // reg = 1 - reg + if (arg & D3DTA_COMPLEMENT) + reg = Complement(reg); + + // reg = reg.wwww + if (arg & D3DTA_ALPHAREPLICATE) + reg = AlphaReplicate(reg); + + return reg; + }; + + auto DoOp = [&](D3DTEXTUREOP op, uint32_t dst, std::array<uint32_t, TextureArgCount> arg) { + switch (op) { + case D3DTOP_SELECTARG1: + dst = arg[1]; + break; + + case D3DTOP_SELECTARG2: + dst = arg[2]; + break; + + case D3DTOP_MODULATE4X: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(4.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATE2X: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(2.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATE: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + break; + + case D3DTOP_ADDSIGNED2X: + arg[2] = m_module.opFSub(m_vec4Type, arg[2], + m_module.constvec4f32(0.5f, 0.5f, 0.5f, 0.5f)); + + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(2.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_ADDSIGNED: + arg[2] = m_module.opFSub(m_vec4Type, arg[2], + m_module.constvec4f32(0.5f, 0.5f, 0.5f, 0.5f)); + + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_ADD: + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_SUBTRACT: + dst = m_module.opFSub(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_ADDSMOOTH: + dst = m_module.opFFma(m_vec4Type, Complement(arg[1]), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_BLENDDIFFUSEALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(diffuse)); + break; + + case D3DTOP_BLENDTEXTUREALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(GetTexture())); + break; + + case D3DTOP_BLENDFACTORALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(m_ps.constants.textureFactor)); + break; + + case D3DTOP_BLENDTEXTUREALPHAPM: + dst = m_module.opFFma(m_vec4Type, arg[2], Complement(AlphaReplicate(GetTexture())), arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_BLENDCURRENTALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(current)); + break; + + case D3DTOP_PREMODULATE: + Logger::warn("D3DTOP_PREMODULATE: not implemented"); + break; + + case D3DTOP_MODULATEALPHA_ADDCOLOR: + dst = m_module.opFFma(m_vec4Type, AlphaReplicate(arg[1]), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATECOLOR_ADDALPHA: + dst = m_module.opFFma(m_vec4Type, arg[1], arg[2], AlphaReplicate(arg[1])); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATEINVALPHA_ADDCOLOR: + dst = m_module.opFFma(m_vec4Type, Complement(AlphaReplicate(arg[1])), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATEINVCOLOR_ADDALPHA: + dst = m_module.opFFma(m_vec4Type, Complement(arg[1]), arg[2], AlphaReplicate(arg[1])); + dst = Saturate(dst); + break; + + case D3DTOP_BUMPENVMAPLUMINANCE: + case D3DTOP_BUMPENVMAP: + // Load texture for the next stage... + texture = GetTexture(); + break; + + case D3DTOP_DOTPRODUCT3: { + // Get vec3 of arg1 & 2 + uint32_t vec3Type = m_module.defVectorType(m_floatType, 3); + std::array<uint32_t, 3> indices = { 0, 1, 2 }; + arg[1] = m_module.opVectorShuffle(vec3Type, arg[1], arg[1], indices.size(), indices.data()); + arg[2] = m_module.opVectorShuffle(vec3Type, arg[2], arg[2], indices.size(), indices.data()); + + // Bias according to spec. + arg[1] = m_module.opFSub(vec3Type, arg[1], m_module.constvec3f32(0.5f, 0.5f, 0.5f)); + arg[2] = m_module.opFSub(vec3Type, arg[2], m_module.constvec3f32(0.5f, 0.5f, 0.5f)); + + // Do the dotting! + dst = m_module.opDot(m_floatType, arg[1], arg[2]); + + // Multiply by 4 and replicate -> vec4 + dst = m_module.opFMul(m_floatType, dst, m_module.constf32(4.0f)); + dst = ScalarReplicate(dst); + + // Saturate + dst = Saturate(dst); + + break; + } + + case D3DTOP_MULTIPLYADD: + dst = m_module.opFFma(m_vec4Type, arg[1], arg[2], arg[0]); + dst = Saturate(dst); + break; + + case D3DTOP_LERP: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], arg[0]); + break; + + default: + Logger::warn("Unhandled texture op!"); + break; + } + + return dst; + }; + + uint32_t& dst = stage.ResultIsTemp ? temp : current; + + D3DTEXTUREOP colorOp = (D3DTEXTUREOP)stage.ColorOp; + + // This cancels all subsequent stages. + if (colorOp == D3DTOP_DISABLE) + break; + + std::array<uint32_t, TextureArgCount> colorArgs = { + stage.ColorArg0, + stage.ColorArg1, + stage.ColorArg2}; + + D3DTEXTUREOP alphaOp = (D3DTEXTUREOP)stage.AlphaOp; + std::array<uint32_t, TextureArgCount> alphaArgs = { + stage.AlphaArg0, + stage.AlphaArg1, + stage.AlphaArg2}; + + auto ProcessArgs = [&](auto op, auto& args) { + for (uint32_t& arg : args) + arg = GetArg(arg); + }; + + // Fast path if alpha/color path is identical. + // D3DTOP_DOTPRODUCT3 also has special quirky behaviour here. + const bool fastPath = colorOp == alphaOp && colorArgs == alphaArgs; + if (fastPath || colorOp == D3DTOP_DOTPRODUCT3) { + if (colorOp != D3DTOP_DISABLE) { + ProcessArgs(colorOp, colorArgs); + dst = DoOp(colorOp, dst, colorArgs); + } + } + else { + std::array<uint32_t, 4> indices = { 0, 1, 2, 4 + 3 }; + + uint32_t colorResult = dst; + uint32_t alphaResult = dst; + if (colorOp != D3DTOP_DISABLE) { + ProcessArgs(colorOp, colorArgs); + colorResult = DoOp(colorOp, dst, colorArgs); + } + + if (alphaOp != D3DTOP_DISABLE) { + ProcessArgs(alphaOp, alphaArgs); + alphaResult = DoOp(alphaOp, dst, alphaArgs); + } + + // src0.x, src0.y, src0.z src1.w + if (colorResult != dst) + dst = m_module.opVectorShuffle(m_vec4Type, colorResult, dst, indices.size(), indices.data()); + + // src0.x, src0.y, src0.z src1.w + // But we flip src0, src1 to be inverse of color. + if (alphaResult != dst) + dst = m_module.opVectorShuffle(m_vec4Type, dst, alphaResult, indices.size(), indices.data()); + } + } + + if (m_fsKey.Stages[0].Contents.GlobalSpecularEnable) { + uint32_t specular = m_module.opFMul(m_vec4Type, m_ps.in.COLOR[1], m_module.constvec4f32(1.0f, 1.0f, 1.0f, 0.0f)); + + current = m_module.opFAdd(m_vec4Type, current, specular); + } + + D3D9FogContext fogCtx; + fogCtx.IsPixel = true; + fogCtx.RangeFog = false; + fogCtx.RenderState = m_rsBlock; + fogCtx.vPos = m_ps.in.POS; + fogCtx.vFog = m_ps.in.FOG; + fogCtx.oColor = current; + fogCtx.IsFixedFunction = true; + fogCtx.IsPositionT = false; + fogCtx.HasSpecular = false; + fogCtx.Specular = 0; + current = DoFixedFunctionFog(m_module, fogCtx); + + m_module.opStore(m_ps.out.COLOR, current); + + alphaTestPS(); + } + + void D3D9FFShaderCompiler::setupPS() { + setupRenderStateInfo(); + + // PS Caps + m_module.enableCapability(spv::CapabilityDerivativeControl); + + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeOriginUpperLeft); + + uint32_t pointCoord = GetPointCoord(m_module, m_entryPointInterfaces); + auto pointInfo = GetPointSizeInfoPS(m_module, m_rsBlock); + + // We need to replace TEXCOORD inputs with gl_PointCoord + // if D3DRS_POINTSPRITEENABLE is set. + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + m_ps.in.TEXCOORD[i] = declareIO(true, DxsoSemantic{ DxsoUsage::Texcoord, i }); + m_ps.in.TEXCOORD[i] = m_module.opSelect(m_vec4Type, pointInfo.isSprite, pointCoord, m_ps.in.TEXCOORD[i]); + } + + m_ps.in.COLOR[0] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 0 }); + m_ps.in.COLOR[1] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 1 }); + + m_ps.in.FOG = declareIO(true, DxsoSemantic{ DxsoUsage::Fog, 0 }); + m_ps.in.POS = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 0 }, spv::BuiltInFragCoord); + + m_ps.out.COLOR = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 0 }); + + // Constant Buffer for PS. + std::array<uint32_t, uint32_t(D3D9FFPSMembers::MemberCount)> members = { + m_vec4Type // Texture Factor + }; + + const uint32_t structType = + m_module.defStructType(members.size(), members.data()); + + m_module.decorateBlock(structType); + uint32_t offset = 0; + + for (uint32_t i = 0; i < uint32_t(D3D9FFPSMembers::MemberCount); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + m_module.setDebugName(structType, "D3D9FixedFunctionPS"); + m_module.setDebugMemberName(structType, 0, "textureFactor"); + + m_ps.constantBuffer = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_ps.constantBuffer, "consts"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::PixelShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::PSFixedFunction); + + m_module.decorateDescriptorSet(m_ps.constantBuffer, 0); + m_module.decorateBinding(m_ps.constantBuffer, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + + // Load constants + auto LoadConstant = [&](uint32_t type, uint32_t idx) { + uint32_t offset = m_module.constu32(idx); + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, m_ps.constantBuffer, 1, &offset)); + }; + + m_ps.constants.textureFactor = LoadConstant(m_vec4Type, uint32_t(D3D9FFPSMembers::TextureFactor)); + + // Samplers + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + auto& sampler = m_ps.samplers[i]; + D3DRESOURCETYPE type = D3DRESOURCETYPE(m_fsKey.Stages[i].Contents.Type + D3DRTYPE_TEXTURE); + + spv::Dim dimensionality; + VkImageViewType viewType; + + switch (type) { + default: + case D3DRTYPE_TEXTURE: + dimensionality = spv::Dim2D; + sampler.texcoordCnt = 2; + viewType = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3DRTYPE_CUBETEXTURE: + dimensionality = spv::DimCube; + sampler.texcoordCnt = 3; + viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + case D3DRTYPE_VOLUMETEXTURE: + dimensionality = spv::Dim3D; + sampler.texcoordCnt = 3; + viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + } + + sampler.typeId = m_module.defImageType( + m_module.defFloatType(32), + dimensionality, 0, 0, 0, 1, + spv::ImageFormatUnknown); + + sampler.typeId = m_module.defSampledImageType(sampler.typeId); + + sampler.varId = m_module.newVar( + m_module.defPointerType( + sampler.typeId, spv::StorageClassUniformConstant), + spv::StorageClassUniformConstant); + + std::string name = str::format("s", i); + m_module.setDebugName(sampler.varId, name.c_str()); + + const uint32_t bindingId = computeResourceSlotId(DxsoProgramType::PixelShader, + DxsoBindingType::Image, i); + + sampler.bound = m_module.specConstBool(true); + m_module.decorateSpecId(sampler.bound, bindingId); + m_module.setDebugName(sampler.bound, + str::format("s", i, "_bound").c_str()); + + m_module.decorateDescriptorSet(sampler.varId, 0); + m_module.decorateBinding(sampler.varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + resource.view = viewType; + resource.access = VK_ACCESS_SHADER_READ_BIT; + m_resourceSlots.push_back(resource); + } + + emitPsSharedConstants(); + } + + + void D3D9FFShaderCompiler::emitPsSharedConstants() { + m_ps.sharedState = GetSharedConstants(m_module); + + const uint32_t bindingId = computeResourceSlotId( + m_programType, DxsoBindingType::ConstantBuffer, + PSShared); + + m_module.decorateDescriptorSet(m_ps.sharedState, 0); + m_module.decorateBinding(m_ps.sharedState, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::emitVsClipping(uint32_t vtx) { + uint32_t worldPos = m_module.opMatrixTimesVector(m_vec4Type, m_vs.constants.inverseView, vtx); + + uint32_t clipPlaneCountId = m_module.constu32(caps::MaxClipPlanes); + + uint32_t floatType = m_module.defFloatType(32); + uint32_t vec4Type = m_module.defVectorType(floatType, 4); + + // Declare uniform buffer containing clip planes + uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId); + uint32_t clipPlaneStruct = m_module.defStructTypeUnique(1, &clipPlaneArray); + uint32_t clipPlaneBlock = m_module.newVar( + m_module.defPointerType(clipPlaneStruct, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.decorateArrayStride (clipPlaneArray, 16); + + m_module.setDebugName (clipPlaneStruct, "clip_info_t"); + m_module.setDebugMemberName (clipPlaneStruct, 0, "clip_planes"); + m_module.decorate (clipPlaneStruct, spv::DecorationBlock); + m_module.memberDecorateOffset (clipPlaneStruct, 0, 0); + + uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::VertexShader, + DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSClipPlanes); + + m_module.setDebugName (clipPlaneBlock, "clip_info"); + m_module.decorateDescriptorSet(clipPlaneBlock, 0); + m_module.decorateBinding (clipPlaneBlock, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + + // Declare output array for clip distances + uint32_t clipDistArray = m_module.newVar( + m_module.defPointerType( + m_module.defArrayType(floatType, clipPlaneCountId), + spv::StorageClassOutput), + spv::StorageClassOutput); + + m_module.decorateBuiltIn(clipDistArray, spv::BuiltInClipDistance); + m_entryPointInterfaces.push_back(clipDistArray); + + // Compute clip distances + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + std::array<uint32_t, 2> blockMembers = {{ + m_module.constu32(0), + m_module.constu32(i), + }}; + + uint32_t planeId = m_module.opLoad(vec4Type, + m_module.opAccessChain( + m_module.defPointerType(vec4Type, spv::StorageClassUniform), + clipPlaneBlock, blockMembers.size(), blockMembers.data())); + + uint32_t distId = m_module.opDot(floatType, worldPos, planeId); + + m_module.opStore( + m_module.opAccessChain( + m_module.defPointerType(floatType, spv::StorageClassOutput), + clipDistArray, 1, &blockMembers[1]), + distId); + } + } + + + void D3D9FFShaderCompiler::alphaTestPS() { + // Alpha testing + uint32_t boolType = m_module.defBoolType(); + uint32_t floatPtr = m_module.defPointerType(m_floatType, spv::StorageClassPushConstant); + + // Declare spec constants for render states + uint32_t alphaFuncId = m_module.specConst32(m_module.defIntType(32, 0), 0); + m_module.setDebugName(alphaFuncId, "alpha_func"); + m_module.decorateSpecId(alphaFuncId, getSpecId(D3D9SpecConstantId::AlphaCompareOp)); + + // Implement alpha test + auto oC0 = m_ps.out.COLOR; + // Labels for the alpha test + std::array<SpirvSwitchCaseLabel, 8> atestCaseLabels = { { + { uint32_t(VK_COMPARE_OP_NEVER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_NOT_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_ALWAYS), m_module.allocateId() }, + } }; + + uint32_t atestBeginLabel = m_module.allocateId(); + uint32_t atestTestLabel = m_module.allocateId(); + uint32_t atestDiscardLabel = m_module.allocateId(); + uint32_t atestKeepLabel = m_module.allocateId(); + uint32_t atestSkipLabel = m_module.allocateId(); + + // if (alpha_test) { ... } + uint32_t isNotAlways = m_module.opINotEqual(boolType, alphaFuncId, m_module.constu32(VK_COMPARE_OP_ALWAYS)); + m_module.opSelectionMerge(atestSkipLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(isNotAlways, atestBeginLabel, atestSkipLabel); + m_module.opLabel(atestBeginLabel); + + // Load alpha component + uint32_t alphaComponentId = 3; + uint32_t alphaId = m_module.opCompositeExtract(m_floatType, + m_module.opLoad(m_vec4Type, oC0), + 1, &alphaComponentId); + + // Load alpha reference + uint32_t alphaRefMember = m_module.constu32(uint32_t(D3D9RenderStateItem::AlphaRef)); + uint32_t alphaRefId = m_module.opLoad(m_floatType, + m_module.opAccessChain(floatPtr, m_rsBlock, 1, &alphaRefMember)); + + // switch (alpha_func) { ... } + m_module.opSelectionMerge(atestTestLabel, spv::SelectionControlMaskNone); + m_module.opSwitch(alphaFuncId, + atestCaseLabels[uint32_t(VK_COMPARE_OP_ALWAYS)].labelId, + atestCaseLabels.size(), + atestCaseLabels.data()); + + std::array<SpirvPhiLabel, 8> atestVariables; + + for (uint32_t i = 0; i < atestCaseLabels.size(); i++) { + m_module.opLabel(atestCaseLabels[i].labelId); + + atestVariables[i].labelId = atestCaseLabels[i].labelId; + atestVariables[i].varId = [&] { + switch (VkCompareOp(atestCaseLabels[i].literal)) { + case VK_COMPARE_OP_NEVER: return m_module.constBool(false); + case VK_COMPARE_OP_LESS: return m_module.opFOrdLessThan(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_EQUAL: return m_module.opFOrdEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_LESS_OR_EQUAL: return m_module.opFOrdLessThanEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER: return m_module.opFOrdGreaterThan(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_NOT_EQUAL: return m_module.opFOrdNotEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER_OR_EQUAL: return m_module.opFOrdGreaterThanEqual(boolType, alphaId, alphaRefId); + default: + case VK_COMPARE_OP_ALWAYS: return m_module.constBool(true); + } + }(); + + m_module.opBranch(atestTestLabel); + } + + // end switch + m_module.opLabel(atestTestLabel); + + uint32_t atestResult = m_module.opPhi(boolType, + atestVariables.size(), + atestVariables.data()); + uint32_t atestDiscard = m_module.opLogicalNot(boolType, atestResult); + + // if (do_discard) { ... } + m_module.opSelectionMerge(atestKeepLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(atestDiscard, atestDiscardLabel, atestKeepLabel); + + m_module.opLabel(atestDiscardLabel); + m_module.opKill(); + + // end if (do_discard) + m_module.opLabel(atestKeepLabel); + m_module.opBranch(atestSkipLabel); + + // end if (alpha_test) + m_module.opLabel(atestSkipLabel); + } + + + D3D9FFShader::D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& Key) { + Sha1Hash hash = Sha1Hash::compute(&Key, sizeof(Key)); + DxvkShaderKey shaderKey = { VK_SHADER_STAGE_VERTEX_BIT, hash }; + + std::string name = str::format("FF_", shaderKey.toString()); + + D3D9FFShaderCompiler compiler( + pDevice->GetDXVKDevice(), + Key, name, + pDevice->GetOptions()); + + m_shader = compiler.compile(); + m_isgn = compiler.isgn(); + + Dump(Key, name); + + m_shader->setShaderKey(shaderKey); + pDevice->GetDXVKDevice()->registerShader(m_shader); + } + + + D3D9FFShader::D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& Key) { + Sha1Hash hash = Sha1Hash::compute(&Key, sizeof(Key)); + DxvkShaderKey shaderKey = { VK_SHADER_STAGE_FRAGMENT_BIT, hash }; + + std::string name = str::format("FF_", shaderKey.toString()); + + D3D9FFShaderCompiler compiler( + pDevice->GetDXVKDevice(), + Key, name, + pDevice->GetOptions()); + + m_shader = compiler.compile(); + m_isgn = compiler.isgn(); + + Dump(Key, name); + + m_shader->setShaderKey(shaderKey); + pDevice->GetDXVKDevice()->registerShader(m_shader); + } + + template <typename T> + void D3D9FFShader::Dump(const T& Key, const std::string& Name) { + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); + + if (dumpPath.size() != 0) { +#ifdef _WIN32 + std::ofstream dumpStream( + str::tows(str::format(dumpPath, "/", Name, ".spv").c_str()).c_str(), + std::ios_base::binary | std::ios_base::trunc); +#else + std::ofstream dumpStream( + str::format(dumpPath, "/", Name, ".spv").c_str(), + std::ios_base::binary | std::ios_base::trunc); +#endif + + m_shader->dump(dumpStream); + } + } + + + D3D9FFShader D3D9FFShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& ShaderKey) { + // Use the shader's unique key for the lookup + auto entry = m_vsModules.find(ShaderKey); + if (entry != m_vsModules.end()) + return entry->second; + + D3D9FFShader shader( + pDevice, ShaderKey); + + m_vsModules.insert({ShaderKey, shader}); + + return shader; + } + + + D3D9FFShader D3D9FFShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& ShaderKey) { + // Use the shader's unique key for the lookup + auto entry = m_fsModules.find(ShaderKey); + if (entry != m_fsModules.end()) + return entry->second; + + D3D9FFShader shader( + pDevice, ShaderKey); + + m_fsModules.insert({ShaderKey, shader}); + + return shader; + } + + + size_t D3D9FFShaderKeyHash::operator () (const D3D9FFShaderKeyVS& key) const { + DxvkHashState state; + + std::hash<uint32_t> uint32hash; + + for (uint32_t i = 0; i < std::size(key.Data.Primitive); i++) + state.add(uint32hash(key.Data.Primitive[i])); + + return state; + } + + + size_t D3D9FFShaderKeyHash::operator () (const D3D9FFShaderKeyFS& key) const { + DxvkHashState state; + + std::hash<uint32_t> uint32hash; + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + for (uint32_t j = 0; j < std::size(key.Stages[i].Primitive); j++) + state.add(uint32hash(key.Stages[i].Primitive[j])); + } + + return state; + } + + + bool operator == (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) { + return std::memcmp(&a, &b, sizeof(D3D9FFShaderKeyVS)) == 0; + } + + + bool operator == (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) { + return std::memcmp(&a, &b, sizeof(D3D9FFShaderKeyFS)) == 0; + } + + + bool operator != (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) { + return !(a == b); + } + + + bool operator != (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) { + return !(a == b); + } + + + bool D3D9FFShaderKeyEq::operator () (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) const { + return a == b; + } + + + bool D3D9FFShaderKeyEq::operator () (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) const { + return a == b; + } + + + static inline DxsoIsgn CreateFixedFunctionIsgn() { + DxsoIsgn ffIsgn; + + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Position, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Normal, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Position, 1 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Normal, 1 }; + for (uint32_t i = 0; i < 8; i++) + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Texcoord, i }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Color, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Color, 1 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Fog, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::PointSize, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::BlendWeight, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::BlendIndices, 0 }; + + return ffIsgn; + } + + + DxsoIsgn g_ffIsgn = CreateFixedFunctionIsgn(); + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.h new file mode 100644 index 00000000..8e312aa2 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_fixed_function.h @@ -0,0 +1,254 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_shader.h" + +#include "../dxso/dxso_isgn.h" + +#include <unordered_map> +#include <bitset> + +namespace dxvk { + + class D3D9DeviceEx; + class SpirvModule; + + struct D3D9Options; + + struct D3D9FogContext { + // General inputs... + bool IsPixel; + bool RangeFog; + uint32_t RenderState; + uint32_t vPos; + uint32_t vFog; + + uint32_t oColor; + + bool HasFogInput; + + bool IsFixedFunction; + bool IsPositionT; + bool HasSpecular; + uint32_t Specular; + }; + + struct D3D9FixedFunctionOptions { + D3D9FixedFunctionOptions(const D3D9Options* options); + + bool invariantPosition; + }; + + // Returns new oFog if VS + // Returns new oColor if PS + uint32_t DoFixedFunctionFog(SpirvModule& spvModule, const D3D9FogContext& fogCtx); + + // Returns a render state block + uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count); + + struct D3D9PointSizeInfoVS { + uint32_t defaultValue; + uint32_t min; + uint32_t max; + }; + + // Default point size and point scale magic! + D3D9PointSizeInfoVS GetPointSizeInfoVS(SpirvModule& spvModule, uint32_t vPos, uint32_t vtx, uint32_t perVertPointSize, uint32_t rsBlock, bool isFixedFunction); + + struct D3D9PointSizeInfoPS { + uint32_t isSprite; + }; + + D3D9PointSizeInfoPS GetPointSizeInfoPS(SpirvModule& spvModule, uint32_t rsBlock); + + uint32_t GetPointCoord(SpirvModule& spvModule, std::vector<uint32_t>& entryPointInterfaces); + + uint32_t GetSharedConstants(SpirvModule& spvModule); + + constexpr uint32_t TCIOffset = 16; + constexpr uint32_t TCIMask = 0b111 << TCIOffset; + + enum D3D9FF_VertexBlendMode { + D3D9FF_VertexBlendMode_Disabled, + D3D9FF_VertexBlendMode_Normal, + D3D9FF_VertexBlendMode_Tween, + }; + + struct D3D9FFShaderKeyVSData { + union { + struct { + uint32_t TexcoordIndices : 24; + + uint32_t HasPositionT : 1; + + uint32_t HasColor0 : 1; // Diffuse + uint32_t HasColor1 : 1; // Specular + + uint32_t HasPointSize : 1; + + uint32_t UseLighting : 1; + + uint32_t NormalizeNormals : 1; + uint32_t LocalViewer : 1; + uint32_t RangeFog : 1; + + uint32_t TexcoordFlags : 24; + + uint32_t DiffuseSource : 2; + uint32_t AmbientSource : 2; + uint32_t SpecularSource : 2; + uint32_t EmissiveSource : 2; + + uint32_t TransformFlags : 24; + + uint32_t LightCount : 4; + + uint32_t TexcoordDeclMask : 24; + uint32_t HasFog : 1; + + uint32_t VertexBlendMode : 2; + uint32_t VertexBlendIndexed : 1; + uint32_t VertexBlendCount : 3; + + uint32_t VertexClipping : 1; + } Contents; + + uint32_t Primitive[4]; + }; + }; + + struct D3D9FFShaderKeyVS { + D3D9FFShaderKeyVS() { + // memcmp safety + std::memset(&Data, 0, sizeof(Data)); + } + + D3D9FFShaderKeyVSData Data; + }; + + constexpr uint32_t TextureArgCount = 3; + + struct D3D9FFShaderStage { + union { + struct { + uint32_t ColorOp : 5; + uint32_t ColorArg0 : 6; + uint32_t ColorArg1 : 6; + uint32_t ColorArg2 : 6; + + uint32_t AlphaOp : 5; + uint32_t AlphaArg0 : 6; + uint32_t AlphaArg1 : 6; + uint32_t AlphaArg2 : 6; + + uint32_t Type : 2; + uint32_t ResultIsTemp : 1; + uint32_t Projected : 1; + + uint32_t ProjectedCount : 3; + + // Included in here, read from Stage 0 for packing reasons + // Affects all stages. + uint32_t GlobalSpecularEnable : 1; + uint32_t GlobalFlatShade : 1; + } Contents; + + uint32_t Primitive[2]; + }; + }; + + struct D3D9FFShaderKeyFS { + D3D9FFShaderKeyFS() { + // memcmp safety + std::memset(Stages, 0, sizeof(Stages)); + + // Normalize this. DISABLE != 0. + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + Stages[i].Contents.ColorOp = D3DTOP_DISABLE; + Stages[i].Contents.AlphaOp = D3DTOP_DISABLE; + } + } + + D3D9FFShaderStage Stages[caps::TextureStageCount]; + }; + + struct D3D9FFShaderKeyHash { + size_t operator () (const D3D9FFShaderKeyVS& key) const; + size_t operator () (const D3D9FFShaderKeyFS& key) const; + }; + + bool operator == (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b); + bool operator != (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b); + bool operator == (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b); + bool operator != (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b); + + struct D3D9FFShaderKeyEq { + bool operator () (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) const; + bool operator () (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) const; + }; + + class D3D9FFShader { + + public: + + D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& Key); + + D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& Key); + + template <typename T> + void Dump(const T& Key, const std::string& Name); + + Rc<DxvkShader> GetShader() const { + return m_shader; + } + + private: + + Rc<DxvkShader> m_shader; + + DxsoIsgn m_isgn; + + }; + + + class D3D9FFShaderModuleSet : public RcObject { + + public: + + D3D9FFShader GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& ShaderKey); + + D3D9FFShader GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& ShaderKey); + + private: + + std::unordered_map< + D3D9FFShaderKeyVS, + D3D9FFShader, + D3D9FFShaderKeyHash, D3D9FFShaderKeyEq> m_vsModules; + + std::unordered_map< + D3D9FFShaderKeyFS, + D3D9FFShader, + D3D9FFShaderKeyHash, D3D9FFShaderKeyEq> m_fsModules; + + }; + + + inline const DxsoIsgn& GetFixedFunctionIsgn() { + extern DxsoIsgn g_ffIsgn; + + return g_ffIsgn; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.cpp new file mode 100644 index 00000000..424d0413 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.cpp @@ -0,0 +1,568 @@ +#include "d3d9_format.h" + +namespace dxvk { + + // It is also worth noting that the msb/lsb-ness is flipped between VK and D3D9. + D3D9_VK_FORMAT_MAPPING ConvertFormatUnfixed(D3D9Format Format) { + switch (Format) { + case D3D9Format::Unknown: return {}; + + case D3D9Format::R8G8B8: return {}; // Unsupported + + case D3D9Format::A8R8G8B8: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::X8R8G8B8: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::R5G6B5: return { + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT}; + + case D3D9Format::X1R5G5B5: return { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A1R5G5B5: return { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A4R4G4B4: return { + VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::R3G3B2: return {}; // Unsupported + + case D3D9Format::A8: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::A8R3G3B2: return {}; // Unsupported + + case D3D9Format::X4R4G4B4: return { + VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A2B10G10R10: return { + VK_FORMAT_A2B10G10R10_UNORM_PACK32, // The A2 is out of place here. This should be investigated. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8B8G8R8: return { + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::X8B8G8R8: return { + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G16R16: return { + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A2R10G10B10: return { + VK_FORMAT_A2R10G10B10_UNORM_PACK32, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A16B16G16R16: return { + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8P8: return {}; // Unsupported + + case D3D9Format::P8: return {}; // Unsupported + + case D3D9Format::L8: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A8L8: return { + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }}; + + case D3D9Format::A4L4: return { + VK_FORMAT_R4G4_UNORM_PACK8, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::V8U8: return { + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::L6V5U5: return { + // Any PACK16 format will do... + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }, + { D3D9ConversionFormat_L6V5U5, 1u, + // Convert -> float (this is a mixed snorm and unorm type) + VK_FORMAT_R16G16B16A16_SFLOAT } }; + + case D3D9Format::X8L8V8U8: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }, + { D3D9ConversionFormat_X8L8V8U8, 1u, + // Convert -> float (this is a mixed snorm and unorm type) + VK_FORMAT_R16G16B16A16_SFLOAT } }; + + case D3D9Format::Q8W8V8U8: return { + VK_FORMAT_R8G8B8A8_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::V16U16: return { + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A2W10V10U10: return { + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }, + { D3D9ConversionFormat_A2W10V10U10, 1u, + // Convert -> float (this is a mixed snorm and unorm type) + VK_FORMAT_R16G16B16A16_SFLOAT } }; + + case D3D9Format::UYVY: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9ConversionFormat_UYVY, 1u } + }; + + case D3D9Format::R8G8_B8G8: return { + VK_FORMAT_G8B8G8R8_422_UNORM, // This format may have been _SCALED in DX9. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::YUY2: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9ConversionFormat_YUY2, 1u } + }; + + case D3D9Format::G8R8_G8B8: return { + VK_FORMAT_B8G8R8G8_422_UNORM, // This format may have been _SCALED in DX9. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT1: return { + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT2: return { + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT3: return { + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT4: return { + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT5: return { + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::D16_LOCKABLE: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D32: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D15S1: return {}; // Unsupported (everywhere) + + case D3D9Format::D24S8: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::D24X8: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D24X4S4: return {}; // Unsupported (everywhere) + + case D3D9Format::D16: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D32F_LOCKABLE: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D24FS8: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::D32_LOCKABLE: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::S8_LOCKABLE: return { + VK_FORMAT_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::L16: return { + VK_FORMAT_R16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::VERTEXDATA: return { + VK_FORMAT_R8_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::INDEX16: return { + VK_FORMAT_R16_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::INDEX32: return { + VK_FORMAT_R32_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::Q16W16V16U16: return { + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::MULTI2_ARGB8: return {}; // Unsupported + + case D3D9Format::R16F: return { + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G16R16F: return { + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A16B16G16R16F: return { + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::R32F: return { + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G32R32F: return { + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A32B32G32R32F: return { + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::CxV8U8: return {}; // Unsupported + + case D3D9Format::A1: return {}; // Unsupported + + case D3D9Format::A2B10G10R10_XR_BIAS: return { + VK_FORMAT_A2B10G10R10_SNORM_PACK32, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::BINARYBUFFER: return { + VK_FORMAT_R8_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::ATI1: return { + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::ATI2: return { + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::INST: return {}; // Driver hack, handled elsewhere + + case D3D9Format::DF24: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::DF16: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::NULL_FORMAT: return {}; // Driver hack, handled elsewhere + + case D3D9Format::GET4: return {}; // Unsupported + + case D3D9Format::GET1: return {}; // Unsupported + + case D3D9Format::NVDB: return {}; // Driver hack, handled elsewhere + + case D3D9Format::A2M1: return {}; // Driver hack, handled elsewhere + + case D3D9Format::A2M0: return {}; // Driver hack, handled elsewhere + + case D3D9Format::ATOC: return {}; // Driver hack, handled elsewhere + + case D3D9Format::INTZ: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::NV12: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9ConversionFormat_NV12, 2u, VK_FORMAT_B8G8R8A8_UNORM } + }; + + case D3D9Format::YV12: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9ConversionFormat_YV12, 3u, VK_FORMAT_B8G8R8A8_UNORM } + }; + + case D3D9Format::RAWZ: return {}; // Unsupported + + default: + Logger::warn(str::format("ConvertFormat: Unknown format encountered: ", Format)); + return {}; // Unsupported + } + } + + D3D9VkFormatTable::D3D9VkFormatTable( + const Rc<DxvkAdapter>& adapter, + const D3D9Options& options) { + m_dfSupport = options.supportDFFormats; + m_x4r4g4b4Support = options.supportX4R4G4B4; + m_d32supportFinal = options.supportD32; + + // AMD do not support 24-bit depth buffers on Vulkan, + // so we have to fall back to a 32-bit depth format. + m_d24s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + + // NVIDIA do not support 16-bit depth buffers with stencil on Vulkan, + // so we have to fall back to a 32-bit depth format. + m_d16s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + + // VK_EXT_4444_formats + m_a4r4g4b4Support = CheckImageFormatSupport(adapter, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + + if (!m_d24s8Support) + Logger::info("D3D9: VK_FORMAT_D24_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT"); + + if (!m_d16s8Support) { + if (m_d24s8Support) + Logger::info("D3D9: VK_FORMAT_D16_UNORM_S8_UINT -> VK_FORMAT_D24_UNORM_S8_UINT"); + else + Logger::info("D3D9: VK_FORMAT_D16_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT"); + } + + if (!m_a4r4g4b4Support) + Logger::warn("D3D9: VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT -> VK_FORMAT_B4G4R4A4_UNORM_PACK16"); + } + + D3D9_VK_FORMAT_MAPPING D3D9VkFormatTable::GetFormatMapping( + D3D9Format Format) const { + D3D9_VK_FORMAT_MAPPING mapping = ConvertFormatUnfixed(Format); + + if (Format == D3D9Format::X4R4G4B4 && !m_x4r4g4b4Support) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::DF16 && !m_dfSupport) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::DF24 && !m_dfSupport) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::D32 && !m_d32supportFinal) + return D3D9_VK_FORMAT_MAPPING(); + + if (!m_d24s8Support && mapping.FormatColor == VK_FORMAT_D24_UNORM_S8_UINT) + mapping.FormatColor = VK_FORMAT_D32_SFLOAT_S8_UINT; + + if (!m_d16s8Support && mapping.FormatColor == VK_FORMAT_D16_UNORM_S8_UINT) + mapping.FormatColor = m_d24s8Support ? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; + + if (!m_a4r4g4b4Support && mapping.FormatColor == VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT) { + VkComponentSwizzle alphaSwizzle = Format == D3D9Format::A4R4G4B4 + ? VK_COMPONENT_SWIZZLE_B + : VK_COMPONENT_SWIZZLE_ONE; + + mapping.FormatColor = VK_FORMAT_B4G4R4A4_UNORM_PACK16; + mapping.Swizzle = { + VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_A, alphaSwizzle }; + } + + return mapping; + } + + + const DxvkFormatInfo* D3D9VkFormatTable::GetUnsupportedFormatInfo( + D3D9Format Format) const { + static const DxvkFormatInfo r8b8g8 = { 3, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo r3g3b2 = { 1, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo a8r3g3b2 = { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo a8p8 = { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo p8 = { 1, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo l6v5u5 = { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo x8l8v8u8 = { 4, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo a2w10v10u10 = { 4, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo cxv8u8 = { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + static const DxvkFormatInfo unknown = {}; + + switch (Format) { + case D3D9Format::R8G8B8: + return &r8b8g8; + + case D3D9Format::R3G3B2: + return &r3g3b2; + + case D3D9Format::A8R3G3B2: + return &a8r3g3b2; + + case D3D9Format::A8P8: + return &a8p8; + + case D3D9Format::P8: + return &p8; + + case D3D9Format::L6V5U5: + return &l6v5u5; + + case D3D9Format::X8L8V8U8: + return &x8l8v8u8; + + case D3D9Format::A2W10V10U10: + return &a2w10v10u10; + + // MULTI2_ARGB8 -> Don't have a clue what this is. + + case D3D9Format::CxV8U8: + return &cxv8u8; + + // A1 -> Doesn't map nicely here cause it's not byte aligned. + // Gonna just pretend that doesn't exist until something + // depends on that. + + default: + return &unknown; + } + } + + + bool D3D9VkFormatTable::CheckImageFormatSupport( + const Rc<DxvkAdapter>& Adapter, + VkFormat Format, + VkFormatFeatureFlags Features) const { + VkFormatProperties supported = Adapter->formatProperties(Format); + + return (supported.linearTilingFeatures & Features) == Features + || (supported.optimalTilingFeatures & Features) == Features; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.h new file mode 100644 index 00000000..43ec8629 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format.h @@ -0,0 +1,223 @@ +#pragma once + +#include "d3d9_include.h" +#include "d3d9_options.h" + +#include "../dxvk/dxvk_adapter.h" +#include "../dxvk/dxvk_format.h" + +#include <unordered_map> + +namespace dxvk { + + enum class D3D9Format : uint32_t { + Unknown = 0, + + R8G8B8 = 20, + A8R8G8B8 = 21, + X8R8G8B8 = 22, + R5G6B5 = 23, + X1R5G5B5 = 24, + A1R5G5B5 = 25, + A4R4G4B4 = 26, + R3G3B2 = 27, + A8 = 28, + A8R3G3B2 = 29, + X4R4G4B4 = 30, + A2B10G10R10 = 31, + A8B8G8R8 = 32, + X8B8G8R8 = 33, + G16R16 = 34, + A2R10G10B10 = 35, + A16B16G16R16 = 36, + A8P8 = 40, + P8 = 41, + L8 = 50, + A8L8 = 51, + A4L4 = 52, + V8U8 = 60, + L6V5U5 = 61, + X8L8V8U8 = 62, + Q8W8V8U8 = 63, + V16U16 = 64, + A2W10V10U10 = 67, + UYVY = MAKEFOURCC('U', 'Y', 'V', 'Y'), + R8G8_B8G8 = MAKEFOURCC('R', 'G', 'B', 'G'), + YUY2 = MAKEFOURCC('Y', 'U', 'Y', '2'), + G8R8_G8B8 = MAKEFOURCC('G', 'R', 'G', 'B'), + DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), + DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), + DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), + DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), + DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), + D16_LOCKABLE = 70, + D32 = 71, + D15S1 = 73, + D24S8 = 75, + D24X8 = 77, + D24X4S4 = 79, + D16 = 80, + D32F_LOCKABLE = 82, + D24FS8 = 83, + D32_LOCKABLE = 84, + S8_LOCKABLE = 85, + L16 = 81, + VERTEXDATA = 100, + INDEX16 = 101, + INDEX32 = 102, + Q16W16V16U16 = 110, + MULTI2_ARGB8 = MAKEFOURCC('M', 'E', 'T', '1'), + R16F = 111, + G16R16F = 112, + A16B16G16R16F = 113, + R32F = 114, + G32R32F = 115, + A32B32G32R32F = 116, + CxV8U8 = 117, + A1 = 118, + A2B10G10R10_XR_BIAS = 119, + BINARYBUFFER = 199, + + // Driver Hacks / Unofficial Formats + ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), + ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), + INST = MAKEFOURCC('I', 'N', 'S', 'T'), + DF24 = MAKEFOURCC('D', 'F', '2', '4'), + DF16 = MAKEFOURCC('D', 'F', '1', '6'), + NULL_FORMAT = MAKEFOURCC('N', 'U', 'L', 'L'), + GET4 = MAKEFOURCC('G', 'E', 'T', '4'), + GET1 = MAKEFOURCC('G', 'E', 'T', '1'), + NVDB = MAKEFOURCC('N', 'V', 'D', 'B'), + A2M1 = MAKEFOURCC('A', '2', 'M', '1'), + A2M0 = MAKEFOURCC('A', '2', 'M', '0'), + ATOC = MAKEFOURCC('A', 'T', 'O', 'C'), + INTZ = MAKEFOURCC('I', 'N', 'T', 'Z'), + RAWZ = MAKEFOURCC('R', 'A', 'W', 'Z'), + RESZ = MAKEFOURCC('R', 'E', 'S', 'Z'), + + NV11 = MAKEFOURCC('N', 'V', '1', '1'), + NV12 = MAKEFOURCC('N', 'V', '1', '2'), + P010 = MAKEFOURCC('P', '0', '1', '0'), // Same as NV12 but 10 bit + P016 = MAKEFOURCC('P', '0', '1', '6'), // Same as NV12 but 16 bit + Y210 = MAKEFOURCC('Y', '2', '1', '0'), + Y216 = MAKEFOURCC('Y', '2', '1', '6'), + Y410 = MAKEFOURCC('Y', '4', '1', '0'), + AYUV = MAKEFOURCC('A', 'Y', 'U', 'V'), + YV12 = MAKEFOURCC('Y', 'V', '1', '2'), + OPAQUE_420 = MAKEFOURCC('4', '2', '0', 'O'), + + // Not supported but exist + AI44 = MAKEFOURCC('A', 'I', '4', '4'), + IA44 = MAKEFOURCC('I', 'A', '4', '4'), + R2VB = MAKEFOURCC('R', '2', 'V', 'B'), + COPM = MAKEFOURCC('C', 'O', 'P', 'M'), + SSAA = MAKEFOURCC('S', 'S', 'A', 'A'), + AL16 = MAKEFOURCC('A', 'L', '1', '6'), + R16 = MAKEFOURCC(' ', 'R', '1', '6'), + + EXT1 = MAKEFOURCC('E', 'X', 'T', '1'), + FXT1 = MAKEFOURCC('F', 'X', 'T', '1'), + GXT1 = MAKEFOURCC('G', 'X', 'T', '1'), + HXT1 = MAKEFOURCC('H', 'X', 'T', '1'), + }; + + inline D3D9Format EnumerateFormat(D3DFORMAT format) { + return static_cast<D3D9Format>(format); + } + + std::ostream& operator << (std::ostream& os, D3D9Format format); + + enum D3D9ConversionFormat : uint32_t { + D3D9ConversionFormat_None = 0, + D3D9ConversionFormat_YUY2 = 1, + D3D9ConversionFormat_UYVY, + D3D9ConversionFormat_L6V5U5, + D3D9ConversionFormat_X8L8V8U8, + D3D9ConversionFormat_A2W10V10U10, + D3D9ConversionFormat_NV12, + D3D9ConversionFormat_YV12, + D3D9ConversionFormat_Count + }; + + struct D3D9_CONVERSION_FORMAT_INFO { + D3D9ConversionFormat FormatType = D3D9ConversionFormat_None; + uint32_t PlaneCount = 1; + VkFormat FormatColor = VK_FORMAT_UNDEFINED; + VkFormat FormatSrgb = VK_FORMAT_UNDEFINED; + }; + + /** + * \brief Format mapping + * + * Maps a D3D9 format to a set of Vulkan formats. + */ + struct D3D9_VK_FORMAT_MAPPING { + union { + struct { + VkFormat FormatColor; ///< Corresponding color format + VkFormat FormatSrgb; ///< Corresponding color format + }; + VkFormat Formats[2]; + }; + VkImageAspectFlags Aspect = 0; ///< Defined aspects for the color format + VkComponentMapping Swizzle = { ///< Color component swizzle + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }; + D3D9_CONVERSION_FORMAT_INFO ConversionFormatInfo = { }; + + bool IsValid() const { return FormatColor != VK_FORMAT_UNDEFINED; } + }; + + D3D9_VK_FORMAT_MAPPING ConvertFormatUnfixed(D3D9Format Format); + + /** + * \brief Format table + * + * Initializes a format table for a specific + * device and provides methods to look up + * formats. + */ + class D3D9VkFormatTable { + + public: + + D3D9VkFormatTable( + const Rc<DxvkAdapter>& adapter, + const D3D9Options& options); + + /** + * \brief Retrieves info for a given D3D9 format + * + * \param [in] Format The D3D9 format to look up + * \param [in] Mode the format lookup mode + * \returns Format info + */ + D3D9_VK_FORMAT_MAPPING GetFormatMapping( + D3D9Format Format) const; + + /** + * \brief Retrieves format info for unsupported + * formats. + * + * \param [in] Format The D3D9 format to look up + */ + const DxvkFormatInfo* GetUnsupportedFormatInfo( + D3D9Format Format) const; + + private: + + bool CheckImageFormatSupport( + const Rc<DxvkAdapter>& Adapter, + VkFormat Format, + VkFormatFeatureFlags Features) const; + + bool m_a4r4g4b4Support; + bool m_d24s8Support; + bool m_d16s8Support; + + bool m_dfSupport; + bool m_x4r4g4b4Support; + bool m_d32supportFinal; + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.cpp new file mode 100644 index 00000000..0e346636 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.cpp @@ -0,0 +1,146 @@ +#include "d3d9_format_helpers.h" + +#include <d3d9_convert_yuy2_uyvy.h> +#include <d3d9_convert_l6v5u5.h> +#include <d3d9_convert_x8l8v8u8.h> +#include <d3d9_convert_a2w10v10u10.h> +#include <d3d9_convert_nv12.h> +#include <d3d9_convert_yv12.h> + +namespace dxvk { + + D3D9FormatHelper::D3D9FormatHelper(const Rc<DxvkDevice>& device) + : m_device(device), m_context(m_device->createContext()) { + m_context->beginRecording( + m_device->createCommandList()); + + InitShaders(); + } + + + void D3D9FormatHelper::Flush() { + if (m_transferCommands != 0) + FlushInternal(); + } + + + void D3D9FormatHelper::ConvertFormat( + D3D9_CONVERSION_FORMAT_INFO conversionFormat, + const Rc<DxvkImage>& dstImage, + VkImageSubresourceLayers dstSubresource, + const DxvkBufferSlice& srcSlice) { + switch (conversionFormat.FormatType) { + case D3D9ConversionFormat_YUY2: + case D3D9ConversionFormat_UYVY: { + uint32_t specConstant = conversionFormat.FormatType == D3D9ConversionFormat_UYVY ? 1 : 0; + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R32_UINT, specConstant, { 2u, 1u }); + break; + } + + case D3D9ConversionFormat_NV12: + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R16_UINT, 0, { 2u, 1u }); + break; + + case D3D9ConversionFormat_YV12: + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R8_UINT, 0, { 1u, 1u }); + break; + + case D3D9ConversionFormat_L6V5U5: + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R16_UINT, 0, { 1u, 1u }); + break; + + case D3D9ConversionFormat_X8L8V8U8: + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); + break; + + case D3D9ConversionFormat_A2W10V10U10: + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcSlice, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); + break; + + default: + Logger::warn("Unimplemented format conversion"); + } + } + + + void D3D9FormatHelper::ConvertGenericFormat( + D3D9_CONVERSION_FORMAT_INFO videoFormat, + const Rc<DxvkImage>& dstImage, + VkImageSubresourceLayers dstSubresource, + const DxvkBufferSlice& srcSlice, + VkFormat bufferFormat, + uint32_t specConstantValue, + VkExtent2D macroPixelRun) { + DxvkImageViewCreateInfo imageViewInfo; + imageViewInfo.type = VK_IMAGE_VIEW_TYPE_2D; + imageViewInfo.format = dstImage->info().format; + imageViewInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT; + imageViewInfo.aspect = dstSubresource.aspectMask; + imageViewInfo.minLevel = dstSubresource.mipLevel; + imageViewInfo.numLevels = 1; + imageViewInfo.minLayer = dstSubresource.baseArrayLayer; + imageViewInfo.numLayers = dstSubresource.layerCount; + auto tmpImageView = m_device->createImageView(dstImage, imageViewInfo); + + VkExtent3D imageExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); + imageExtent = VkExtent3D{ imageExtent.width / macroPixelRun.width, + imageExtent.height / macroPixelRun.height, + 1 }; + + DxvkBufferViewCreateInfo bufferViewInfo; + bufferViewInfo.format = bufferFormat; + bufferViewInfo.rangeOffset = srcSlice.offset(); + bufferViewInfo.rangeLength = srcSlice.length(); + auto tmpBufferView = m_device->createBufferView(srcSlice.buffer(), bufferViewInfo); + + if (specConstantValue) + m_context->setSpecConstant(VK_PIPELINE_BIND_POINT_COMPUTE, 0, specConstantValue); + + m_context->bindResourceView(BindingIds::Image, tmpImageView, nullptr); + m_context->bindResourceView(BindingIds::Buffer, nullptr, tmpBufferView); + m_context->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, m_shaders[videoFormat.FormatType]); + m_context->pushConstants(0, sizeof(VkExtent2D), &imageExtent); + m_context->dispatch( + (imageExtent.width / 8) + (imageExtent.width % 8), + (imageExtent.height / 8) + (imageExtent.height % 8), + 1); + + // Reset the spec constants used... + if (specConstantValue) + m_context->setSpecConstant(VK_PIPELINE_BIND_POINT_COMPUTE, 0, 0); + + m_transferCommands += 1; + } + + + void D3D9FormatHelper::InitShaders() { + m_shaders[D3D9ConversionFormat_YUY2] = InitShader(d3d9_convert_yuy2_uyvy); + m_shaders[D3D9ConversionFormat_UYVY] = m_shaders[D3D9ConversionFormat_YUY2]; + m_shaders[D3D9ConversionFormat_L6V5U5] = InitShader(d3d9_convert_l6v5u5); + m_shaders[D3D9ConversionFormat_X8L8V8U8] = InitShader(d3d9_convert_x8l8v8u8); + m_shaders[D3D9ConversionFormat_A2W10V10U10] = InitShader(d3d9_convert_a2w10v10u10); + m_shaders[D3D9ConversionFormat_NV12] = InitShader(d3d9_convert_nv12); + m_shaders[D3D9ConversionFormat_YV12] = InitShader(d3d9_convert_yv12); + } + + + Rc<DxvkShader> D3D9FormatHelper::InitShader(SpirvCodeBuffer code) { + const std::array<DxvkResourceSlot, 2> resourceSlots = { { + { BindingIds::Image, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_VIEW_TYPE_2D }, + { BindingIds::Buffer, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, VK_IMAGE_VIEW_TYPE_1D }, + } }; + + return m_device->createShader( + VK_SHADER_STAGE_COMPUTE_BIT, + resourceSlots.size(), resourceSlots.data(), + { 0u, 0u, 0u, sizeof(VkExtent2D) }, code); + } + + + void D3D9FormatHelper::FlushInternal() { + m_context->flushCommandList(); + + m_transferCommands = 0; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.h new file mode 100644 index 00000000..16348eda --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_format_helpers.h @@ -0,0 +1,55 @@ +#pragma once + +#include "d3d9_include.h" +#include "d3d9_format.h" +#include "../dxvk/dxvk_device.h" +#include "../dxvk/dxvk_context.h" + +namespace dxvk { + + class D3D9FormatHelper { + + public: + + D3D9FormatHelper(const Rc<DxvkDevice>& device); + + void Flush(); + + void ConvertFormat( + D3D9_CONVERSION_FORMAT_INFO conversionFormat, + const Rc<DxvkImage>& dstImage, + VkImageSubresourceLayers dstSubresource, + const DxvkBufferSlice& srcSlice); + + private: + + void ConvertGenericFormat( + D3D9_CONVERSION_FORMAT_INFO videoFormat, + const Rc<DxvkImage>& dstImage, + VkImageSubresourceLayers dstSubresource, + const DxvkBufferSlice& srcSlice, + VkFormat bufferFormat, + uint32_t specConstantValue, + VkExtent2D macroPixelRun); + + enum BindingIds : uint32_t { + Image = 0, + Buffer = 1, + }; + + void InitShaders(); + + Rc<DxvkShader> InitShader(SpirvCodeBuffer code); + + void FlushInternal(); + + Rc<DxvkDevice> m_device; + Rc<DxvkContext> m_context; + + size_t m_transferCommands = 0; + + std::array<Rc<DxvkShader>, D3D9ConversionFormat_Count> m_shaders; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.cpp new file mode 100644 index 00000000..e37a5a0e --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.cpp @@ -0,0 +1,36 @@ +#include "d3d9_hud.h" + +namespace dxvk::hud { + + HudSamplerCount::HudSamplerCount(D3D9DeviceEx* device) + : m_device (device) + , m_samplerCount ("0"){ + + } + + + void HudSamplerCount::update(dxvk::high_resolution_clock::time_point time) { + m_samplerCount = str::format(m_device->GetSamplerCount()); + } + + + HudPos HudSamplerCount::render( + HudRenderer& renderer, + HudPos position) { + position.y += 16.0f; + + renderer.drawText(16.0f, + { position.x, position.y }, + { 0.0f, 1.0f, 0.75f, 1.0f }, + "Samplers:"); + + renderer.drawText(16.0f, + { position.x + 120.0f, position.y }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + m_samplerCount); + + position.y += 8.0f; + return position; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.h new file mode 100644 index 00000000..b500e48b --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_hud.h @@ -0,0 +1,31 @@ +#pragma once + +#include "d3d9_device.h" +#include "../dxvk/hud/dxvk_hud_item.h" + +namespace dxvk::hud { + + /** + * \brief HUD item to display DXVK version + */ + class HudSamplerCount : public HudItem { + + public: + + HudSamplerCount(D3D9DeviceEx* device); + + void update(dxvk::high_resolution_clock::time_point time); + + HudPos render( + HudRenderer& renderer, + HudPos position); + + private: + + D3D9DeviceEx* m_device; + + std::string m_samplerCount; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_include.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_include.h new file mode 100644 index 00000000..99a233d1 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_include.h @@ -0,0 +1,95 @@ +#pragma once + +#ifndef _MSC_VER +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0A00 +#endif + +#include <stdint.h> +#include <d3d9.h> + +//for some reason we need to specify __declspec(dllexport) for MinGW +#if defined(__WINE__) || (defined(DXVK_NATIVE) && !defined(_WIN32)) +#define DLLEXPORT __attribute__((visibility("default"))) +#else +#define DLLEXPORT +#endif + + +#include "../util/com/com_guid.h" +#include "../util/com/com_object.h" +#include "../util/com/com_pointer.h" + +#include "../util/log/log.h" +#include "../util/log/log_debug.h" + +#include "../util/rc/util_rc.h" +#include "../util/rc/util_rc_ptr.h" + +#include "../util/sync/sync_recursive.h" + +#include "../util/util_env.h" +#include "../util/util_enum.h" +#include "../util/util_error.h" +#include "../util/util_flags.h" +#include "../util/util_likely.h" +#include "../util/util_math.h" +#include "../util/util_monitor.h" +#include "../util/util_string.h" + +// Missed definitions in Wine/MinGW. + +#ifndef D3DPRESENT_BACK_BUFFERS_MAX_EX +#define D3DPRESENT_BACK_BUFFERS_MAX_EX 30 +#endif + +#ifndef D3DSI_OPCODE_MASK +#define D3DSI_OPCODE_MASK 0x0000FFFF +#endif + +#ifndef D3DSP_TEXTURETYPE_MASK +#define D3DSP_TEXTURETYPE_MASK 0x78000000 +#endif + +#ifndef D3DUSAGE_AUTOGENMIPMAP +#define D3DUSAGE_AUTOGENMIPMAP 0x00000400L +#endif + +#ifndef D3DSP_DCL_USAGE_MASK +#define D3DSP_DCL_USAGE_MASK 0x0000000f +#endif + +#ifndef D3DSP_OPCODESPECIFICCONTROL_MASK +#define D3DSP_OPCODESPECIFICCONTROL_MASK 0x00ff0000 +#endif + +#ifndef D3DSP_OPCODESPECIFICCONTROL_SHIFT +#define D3DSP_OPCODESPECIFICCONTROL_SHIFT 16 +#endif + +#ifndef D3DCURSOR_IMMEDIATE_UPDATE +#define D3DCURSOR_IMMEDIATE_UPDATE 0x00000001L +#endif + +#ifndef D3DPRESENT_FORCEIMMEDIATE +#define D3DPRESENT_FORCEIMMEDIATE 0x00000100L +#endif + +// MinGW headers are broken. Who'dve guessed? +#ifndef _MSC_VER +typedef struct _D3DDEVINFO_RESOURCEMANAGER +{ + char dummy; +} D3DDEVINFO_RESOURCEMANAGER, * LPD3DDEVINFO_RESOURCEMANAGER; + +#ifndef __WINE__ +extern "C" WINUSERAPI WINBOOL WINAPI SetProcessDPIAware(VOID); +#endif +#endif + +// This is the managed pool on D3D9Ex, it's just hidden! +#define D3DPOOL_MANAGED_EX D3DPOOL(6) + +using D3D9VertexElements = std::vector<D3DVERTEXELEMENT9>; diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.cpp new file mode 100644 index 00000000..d3076954 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.cpp @@ -0,0 +1,183 @@ +#include <cstring> + +#include "d3d9_initializer.h" + +namespace dxvk { + + D3D9Initializer::D3D9Initializer( + const Rc<DxvkDevice>& Device) + : m_device(Device), m_context(m_device->createContext()) { + m_context->beginRecording( + m_device->createCommandList()); + } + + + D3D9Initializer::~D3D9Initializer() { + + } + + + void D3D9Initializer::Flush() { + std::lock_guard<dxvk::mutex> lock(m_mutex); + + if (m_transferCommands != 0) + FlushInternal(); + } + + + void D3D9Initializer::InitBuffer( + D3D9CommonBuffer* pBuffer) { + VkMemoryPropertyFlags memFlags = pBuffer->GetBuffer<D3D9_COMMON_BUFFER_TYPE_REAL>()->memFlags(); + + (memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? InitHostVisibleBuffer(pBuffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>()) + : InitDeviceLocalBuffer(pBuffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>()); + + if (pBuffer->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + InitHostVisibleBuffer(pBuffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_STAGING>()); + } + + + void D3D9Initializer::InitTexture( + D3D9CommonTexture* pTexture, + void* pInitialData) { + if (pTexture->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_NONE) + return; + + (pTexture->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) + ? InitDeviceLocalTexture(pTexture) + : InitHostVisibleTexture(pTexture, pInitialData); + } + + + void D3D9Initializer::InitDeviceLocalBuffer( + DxvkBufferSlice Slice) { + std::lock_guard<dxvk::mutex> lock(m_mutex); + + m_transferCommands += 1; + + m_context->clearBuffer( + Slice.buffer(), + Slice.offset(), + Slice.length(), + 0u); + + FlushImplicit(); + } + + + void D3D9Initializer::InitHostVisibleBuffer( + DxvkBufferSlice Slice) { + // If the buffer is mapped, we can write data directly + // to the mapped memory region instead of doing it on + // the GPU. Same goes for zero-initialization. + std::memset( + Slice.mapPtr(0), 0, + Slice.length()); + } + + + void D3D9Initializer::InitDeviceLocalTexture( + D3D9CommonTexture* pTexture) { + std::lock_guard<dxvk::mutex> lock(m_mutex); + + auto InitImage = [&](Rc<DxvkImage> image) { + if (image == nullptr) + return; + + auto formatInfo = imageFormatInfo(image->info().format); + + m_transferCommands += 1; + + // While the Microsoft docs state that resource contents are + // undefined if no initial data is provided, some applications + // expect a resource to be pre-cleared. We can only do that + // for non-compressed images, but that should be fine. + VkImageSubresourceRange subresources; + subresources.aspectMask = formatInfo->aspectMask; + subresources.baseMipLevel = 0; + subresources.levelCount = image->info().mipLevels; + subresources.baseArrayLayer = 0; + subresources.layerCount = image->info().numLayers; + + if (formatInfo->flags.test(DxvkFormatFlag::BlockCompressed)) { + m_context->clearCompressedColorImage(image, subresources); + } else { + if (subresources.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { + VkClearColorValue value = { }; + + m_context->clearColorImage( + image, value, subresources); + } else { + VkClearDepthStencilValue value; + value.depth = 0.0f; + value.stencil = 0; + + m_context->clearDepthStencilImage( + image, value, subresources); + } + } + }; + + InitImage(pTexture->GetImage()); + + FlushImplicit(); + } + + + void D3D9Initializer::InitHostVisibleTexture( + D3D9CommonTexture* pTexture, + void* pInitialData) { + // If the buffer is mapped, we can write data directly + // to the mapped memory region instead of doing it on + // the GPU. Same goes for zero-initialization. + const D3D9_COMMON_TEXTURE_DESC* desc = pTexture->Desc(); + for (uint32_t a = 0; a < desc->ArraySize; a++) { + for (uint32_t m = 0; m < desc->MipLevels; m++) { + uint32_t subresource = pTexture->CalcSubresource(a, m); + DxvkBufferSliceHandle mapSlice = pTexture->GetBuffer(subresource)->getSliceHandle(); + + if (pInitialData != nullptr) { + VkExtent3D mipExtent = pTexture->GetExtentMip(m); + const DxvkFormatInfo* formatInfo = imageFormatInfo(pTexture->GetFormatMapping().FormatColor); + VkExtent3D blockCount = util::computeBlockCount(mipExtent, formatInfo->blockSize); + uint32_t pitch = blockCount.width * formatInfo->elementSize; + uint32_t alignedPitch = align(pitch, 4); + + util::packImageData( + mapSlice.mapPtr, + pInitialData, + pitch, + pitch * blockCount.height, + alignedPitch, + alignedPitch * blockCount.height, + D3D9CommonTexture::GetImageTypeFromResourceType(pTexture->GetType()), + mipExtent, + pTexture->Desc()->ArraySize, + formatInfo, + VK_IMAGE_ASPECT_COLOR_BIT); + } else { + std::memset( + mapSlice.mapPtr, 0, + mapSlice.length); + } + } + } + } + + + void D3D9Initializer::FlushImplicit() { + if (m_transferCommands > MaxTransferCommands + || m_transferMemory > MaxTransferMemory) + FlushInternal(); + } + + + void D3D9Initializer::FlushInternal() { + m_context->flushCommandList(); + + m_transferCommands = 0; + m_transferMemory = 0; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.h new file mode 100644 index 00000000..59802f95 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_initializer.h @@ -0,0 +1,62 @@ +#pragma once + +#include "d3d9_common_buffer.h" +#include "d3d9_common_texture.h" + +namespace dxvk { + + /** + * \brief Resource initialization context + * + * Manages a context which is used for resource + * initialization. This includes + * zero-initialization for buffers and images. + */ + class D3D9Initializer { + constexpr static size_t MaxTransferMemory = 32 * 1024 * 1024; + constexpr static size_t MaxTransferCommands = 512; + public: + + D3D9Initializer( + const Rc<DxvkDevice>& Device); + + ~D3D9Initializer(); + + void Flush(); + + void InitBuffer( + D3D9CommonBuffer* pBuffer); + + void InitTexture( + D3D9CommonTexture* pTexture, + void* pInitialData = nullptr); + + private: + + dxvk::mutex m_mutex; + + Rc<DxvkDevice> m_device; + Rc<DxvkContext> m_context; + + size_t m_transferCommands = 0; + size_t m_transferMemory = 0; + + void InitDeviceLocalBuffer( + DxvkBufferSlice Slice); + + void InitHostVisibleBuffer( + DxvkBufferSlice Slice); + + void InitDeviceLocalTexture( + D3D9CommonTexture* pTexture); + + void InitHostVisibleTexture( + D3D9CommonTexture* pTexture, + void* pInitialData); + + void FlushImplicit(); + void FlushInternal(); + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.cpp new file mode 100644 index 00000000..dd141333 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.cpp @@ -0,0 +1,374 @@ +#include "d3d9_interface.h" + +#include "d3d9_monitor.h" +#include "d3d9_caps.h" +#include "d3d9_device.h" + +#include <algorithm> + +namespace dxvk { + + D3D9InterfaceEx::D3D9InterfaceEx(bool bExtended) + : m_instance ( new DxvkInstance() ) + , m_extended ( bExtended ) + , m_d3d9Options ( nullptr, m_instance->config() ) { + +#ifndef DXVK_NATIVE + // D3D9 doesn't enumerate adapters like physical adapters... + // only as connected displays. + + // Let's create some "adapters" for the amount of displays we have. + // We'll go through and match up displays -> our adapters in order. + // If we run out of adapters, then we'll just make repeats of the first one. + // We can't match up by names on Linux/Wine as they don't match at all + // like on Windows, so this is our best option. + if (m_d3d9Options.enumerateByDisplays) { + DISPLAY_DEVICEA device = { }; + device.cb = sizeof(device); + + uint32_t adapterOrdinal = 0; + uint32_t i = 0; + while (::EnumDisplayDevicesA(nullptr, i++, &device, 0)) { + // If we aren't attached, skip over. + if (!(device.StateFlags & DISPLAY_DEVICE_ATTACHED_TO_DESKTOP)) + continue; + + // If we are a mirror, skip over this device. + if (device.StateFlags & DISPLAY_DEVICE_MIRRORING_DRIVER) + continue; + + Rc<DxvkAdapter> adapter = adapterOrdinal >= m_instance->adapterCount() + ? m_instance->enumAdapters(0) + : m_instance->enumAdapters(adapterOrdinal); + + if (adapter != nullptr) + m_adapters.emplace_back(this, adapter, adapterOrdinal++, i - 1); + } + } + else +#endif + { + const uint32_t adapterCount = m_instance->adapterCount(); + m_adapters.reserve(adapterCount); + + for (uint32_t i = 0; i < adapterCount; i++) + m_adapters.emplace_back(this, m_instance->enumAdapters(i), i, 0); + } + +#ifndef DXVK_NATIVE + if (m_d3d9Options.dpiAware) { + Logger::info("Process set as DPI aware"); + SetProcessDPIAware(); + } +#endif + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3D9) + || (m_extended && riid == __uuidof(IDirect3D9Ex))) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9InterfaceEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::RegisterSoftwareDevice(void* pInitializeFunction) { + Logger::warn("D3D9InterfaceEx::RegisterSoftwareDevice: Stub"); + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterCount() { + return UINT(m_adapters.size()); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterIdentifier( + UINT Adapter, + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterIdentifier(Flags, pIdentifier); + + return D3DERR_INVALIDCALL; + } + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterModeCount(UINT Adapter, D3DFORMAT Format) { + D3DDISPLAYMODEFILTER filter; + filter.Size = sizeof(D3DDISPLAYMODEFILTER); + filter.Format = Format; + filter.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + return this->GetAdapterModeCountEx(Adapter, &filter); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE* pMode) { + if (auto* adapter = GetAdapter(Adapter)) { + D3DDISPLAYMODEEX modeEx = { }; + modeEx.Size = sizeof(D3DDISPLAYMODEEX); + HRESULT hr = adapter->GetAdapterDisplayModeEx(&modeEx, nullptr); + + if (FAILED(hr)) + return hr; + + pMode->Width = modeEx.Width; + pMode->Height = modeEx.Height; + pMode->RefreshRate = modeEx.RefreshRate; + pMode->Format = modeEx.Format; + + return D3D_OK; + } + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceType( + UINT Adapter, + D3DDEVTYPE DevType, + D3DFORMAT AdapterFormat, + D3DFORMAT BackBufferFormat, + BOOL bWindowed) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceType( + DevType, EnumerateFormat(AdapterFormat), + EnumerateFormat(BackBufferFormat), bWindowed); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceFormat( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceFormat( + DeviceType, EnumerateFormat(AdapterFormat), + Usage, RType, + EnumerateFormat(CheckFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceMultiSampleType( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceMultiSampleType( + DeviceType, EnumerateFormat(SurfaceFormat), + Windowed, MultiSampleType, + pQualityLevels); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDepthStencilMatch( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + D3DFORMAT RenderTargetFormat, + D3DFORMAT DepthStencilFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDepthStencilMatch( + DeviceType, EnumerateFormat(AdapterFormat), + EnumerateFormat(RenderTargetFormat), + EnumerateFormat(DepthStencilFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceFormatConversion( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SourceFormat, + D3DFORMAT TargetFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceFormatConversion( + DeviceType, EnumerateFormat(SourceFormat), + EnumerateFormat(TargetFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetDeviceCaps( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetDeviceCaps( + DeviceType, pCaps); + + return D3DERR_INVALIDCALL; + } + + + HMONITOR STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterMonitor(UINT Adapter) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetMonitor(); + + return nullptr; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CreateDevice( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DDevice9** ppReturnedDeviceInterface) { + return this->CreateDeviceEx( + Adapter, + DeviceType, + hFocusWindow, + BehaviorFlags, + pPresentationParameters, + nullptr, // <-- pFullscreenDisplayMode + reinterpret_cast<IDirect3DDevice9Ex**>(ppReturnedDeviceInterface)); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::EnumAdapterModes( + UINT Adapter, + D3DFORMAT Format, + UINT Mode, + D3DDISPLAYMODE* pMode) { + if (pMode == nullptr) + return D3DERR_INVALIDCALL; + + D3DDISPLAYMODEFILTER filter; + filter.Format = Format; + filter.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + filter.Size = sizeof(D3DDISPLAYMODEFILTER); + + D3DDISPLAYMODEEX modeEx = { }; + modeEx.Size = sizeof(D3DDISPLAYMODEEX); + HRESULT hr = this->EnumAdapterModesEx(Adapter, &filter, Mode, &modeEx); + + if (FAILED(hr)) + return hr; + + pMode->Width = modeEx.Width; + pMode->Height = modeEx.Height; + pMode->RefreshRate = modeEx.RefreshRate; + pMode->Format = modeEx.Format; + + return D3D_OK; + } + + + // Ex Methods + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterModeCountEx(UINT Adapter, CONST D3DDISPLAYMODEFILTER* pFilter) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterModeCountEx(pFilter); + + return 0; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::EnumAdapterModesEx( + UINT Adapter, + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->EnumAdapterModesEx(pFilter, Mode, pMode); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterDisplayModeEx( + UINT Adapter, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterDisplayModeEx(pMode, pRotation); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CreateDeviceEx( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DDevice9Ex** ppReturnedDeviceInterface) { + InitReturnPtr(ppReturnedDeviceInterface); + + if (ppReturnedDeviceInterface == nullptr + || pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + auto* adapter = GetAdapter(Adapter); + + if (adapter == nullptr) + return D3DERR_INVALIDCALL; + + auto dxvkAdapter = adapter->GetDXVKAdapter(); + + try { + auto dxvkDevice = dxvkAdapter->createDevice(m_instance, D3D9DeviceEx::GetDeviceFeatures(dxvkAdapter)); + + auto* device = new D3D9DeviceEx( + this, + adapter, + DeviceType, + hFocusWindow, + BehaviorFlags, + dxvkDevice); + + HRESULT hr = device->InitialReset(pPresentationParameters, pFullscreenDisplayMode); + + if (FAILED(hr)) + return hr; + + *ppReturnedDeviceInterface = ref(device); + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterLUID(UINT Adapter, LUID* pLUID) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterLUID(pLUID); + + return D3DERR_INVALIDCALL; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.h new file mode 100644 index 00000000..85cc5a03 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_interface.h @@ -0,0 +1,148 @@ +#pragma once + +#include "d3d9_adapter.h" + +#include "../dxvk/dxvk_instance.h" + +namespace dxvk { + + /** + * \brief D3D9 interface implementation + * + * Implements the IDirect3DDevice9Ex interfaces + * which provides the way to get adapters and create other objects such as \ref IDirect3DDevice9Ex. + * similar to \ref DxgiFactory but for D3D9. + */ + class D3D9InterfaceEx final : public ComObjectClamp<IDirect3D9Ex> { + + public: + + D3D9InterfaceEx(bool bExtended); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE RegisterSoftwareDevice(void* pInitializeFunction); + + UINT STDMETHODCALLTYPE GetAdapterCount(); + + HRESULT STDMETHODCALLTYPE GetAdapterIdentifier( + UINT Adapter, + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier); + + UINT STDMETHODCALLTYPE GetAdapterModeCount(UINT Adapter, D3DFORMAT Format); + + HRESULT STDMETHODCALLTYPE GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE CheckDeviceType( + UINT Adapter, + D3DDEVTYPE DevType, + D3DFORMAT AdapterFormat, + D3DFORMAT BackBufferFormat, + BOOL bWindowed); + + HRESULT STDMETHODCALLTYPE CheckDeviceFormat( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat); + + HRESULT STDMETHODCALLTYPE CheckDeviceMultiSampleType( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels); + + HRESULT STDMETHODCALLTYPE CheckDepthStencilMatch( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + D3DFORMAT RenderTargetFormat, + D3DFORMAT DepthStencilFormat); + + HRESULT STDMETHODCALLTYPE CheckDeviceFormatConversion( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SourceFormat, + D3DFORMAT TargetFormat); + + HRESULT STDMETHODCALLTYPE GetDeviceCaps( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps); + + HMONITOR STDMETHODCALLTYPE GetAdapterMonitor(UINT Adapter); + + HRESULT STDMETHODCALLTYPE CreateDevice( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DDevice9** ppReturnedDeviceInterface); + + HRESULT STDMETHODCALLTYPE EnumAdapterModes( + UINT Adapter, + D3DFORMAT Format, + UINT Mode, + D3DDISPLAYMODE* pMode); + + // Ex Methods + + UINT STDMETHODCALLTYPE GetAdapterModeCountEx(UINT Adapter, CONST D3DDISPLAYMODEFILTER* pFilter); + + HRESULT STDMETHODCALLTYPE EnumAdapterModesEx( + UINT Adapter, + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode); + + HRESULT STDMETHODCALLTYPE GetAdapterDisplayModeEx( + UINT Adapter, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT STDMETHODCALLTYPE CreateDeviceEx( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DDevice9Ex** ppReturnedDeviceInterface); + + HRESULT STDMETHODCALLTYPE GetAdapterLUID(UINT Adapter, LUID* pLUID); + + const D3D9Options& GetOptions() { return m_d3d9Options; } + + D3D9Adapter* GetAdapter(UINT Ordinal) { + return Ordinal < m_adapters.size() + ? &m_adapters[Ordinal] + : nullptr; + } + + bool IsExtended() { return m_extended; } + + Rc<DxvkInstance> GetInstance() { return m_instance; } + + private: + + void CacheModes(D3D9Format Format); + + static const char* GetDriverDllName(DxvkGpuVendor vendor); + + Rc<DxvkInstance> m_instance; + + bool m_extended; + + D3D9Options m_d3d9Options; + + std::vector<D3D9Adapter> m_adapters; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_main.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_main.cpp new file mode 100644 index 00000000..dfca9c1d --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_main.cpp @@ -0,0 +1,86 @@ +#include "../dxvk/dxvk_instance.h" + +#include "d3d9_interface.h" +#include "d3d9_shader_validator.h" + +class D3DFE_PROCESSVERTICES; +using PSGPERRORID = UINT; + +namespace dxvk { + Logger Logger::s_instance("d3d9.log"); + + HRESULT CreateD3D9( + bool Extended, + IDirect3D9Ex** ppDirect3D9Ex) { + if (!ppDirect3D9Ex) + return D3DERR_INVALIDCALL; + + *ppDirect3D9Ex = ref(new D3D9InterfaceEx( Extended )); + return D3D_OK; + } +} + +extern "C" { + + DLLEXPORT IDirect3D9* __stdcall Direct3DCreate9(UINT nSDKVersion) { + IDirect3D9Ex* pDirect3D = nullptr; + dxvk::CreateD3D9(false, &pDirect3D); + + return pDirect3D; + } + + DLLEXPORT HRESULT __stdcall Direct3DCreate9Ex(UINT nSDKVersion, IDirect3D9Ex** ppDirect3D9Ex) { + return dxvk::CreateD3D9(true, ppDirect3D9Ex); + } + + DLLEXPORT int __stdcall D3DPERF_BeginEvent(D3DCOLOR col, LPCWSTR wszName) { + return 0; + } + + DLLEXPORT int __stdcall D3DPERF_EndEvent(void) { + return 0; + } + + DLLEXPORT void __stdcall D3DPERF_SetMarker(D3DCOLOR col, LPCWSTR wszName) { + } + + DLLEXPORT void __stdcall D3DPERF_SetRegion(D3DCOLOR col, LPCWSTR wszName) { + } + + DLLEXPORT BOOL __stdcall D3DPERF_QueryRepeatFrame(void) { + return FALSE; + } + + DLLEXPORT void __stdcall D3DPERF_SetOptions(DWORD dwOptions) { + } + + DLLEXPORT DWORD __stdcall D3DPERF_GetStatus(void) { + return 0; + } + + + DLLEXPORT void __stdcall DebugSetMute(void) { + } + + DLLEXPORT int __stdcall DebugSetLevel(void) { + return 0; + } + + // Processor Specific Geometry Pipeline + // for P3 SIMD/AMD 3DNow. + + DLLEXPORT void __stdcall PSGPError(D3DFE_PROCESSVERTICES* a, PSGPERRORID b, UINT c) { + } + + DLLEXPORT void __stdcall PSGPSampleTexture(D3DFE_PROCESSVERTICES* a, UINT b, float(*const c)[4], UINT d, float(*const e)[4]) { + } + + DLLEXPORT dxvk::D3D9ShaderValidator* __stdcall Direct3DShaderValidatorCreate9(void) { + return ref(new dxvk::D3D9ShaderValidator()); + } + + DLLEXPORT int __stdcall Direct3D9EnableMaximizedWindowedModeShim(UINT a) { + return 0; + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.cpp new file mode 100644 index 00000000..f6fa0640 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.cpp @@ -0,0 +1,66 @@ +#include "d3d9_monitor.h" + +#include "d3d9_format.h" + +#include "../wsi/wsi_window.h" +#include "../wsi/wsi_monitor.h" + +namespace dxvk { + + uint32_t GetMonitorFormatBpp(D3D9Format Format) { + switch (Format) { + case D3D9Format::A8R8G8B8: + case D3D9Format::X8R8G8B8: // This is still 32 bit even though the alpha is unspecified. + case D3D9Format::A2R10G10B10: + return 32; + + case D3D9Format::A1R5G5B5: + case D3D9Format::X1R5G5B5: + case D3D9Format::R5G6B5: + return 16; + + default: + Logger::warn(str::format( + "GetMonitorFormatBpp: Unknown format: ", + Format)); + return 32; + } + } + + + bool IsSupportedAdapterFormat( + D3D9Format Format) { + return Format == D3D9Format::A2R10G10B10 + || Format == D3D9Format::X8R8G8B8 + || Format == D3D9Format::X1R5G5B5 + || Format == D3D9Format::R5G6B5; + } + + + bool IsSupportedBackBufferFormat( + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL Windowed) { + if (!Windowed) { + return (AdapterFormat == D3D9Format::A2R10G10B10 && BackBufferFormat == D3D9Format::A2R10G10B10) || + (AdapterFormat == D3D9Format::X8R8G8B8 && BackBufferFormat == D3D9Format::X8R8G8B8) || + (AdapterFormat == D3D9Format::X8R8G8B8 && BackBufferFormat == D3D9Format::A8R8G8B8) || + (AdapterFormat == D3D9Format::X1R5G5B5 && BackBufferFormat == D3D9Format::X1R5G5B5) || + (AdapterFormat == D3D9Format::X1R5G5B5 && BackBufferFormat == D3D9Format::A1R5G5B5) || + (AdapterFormat == D3D9Format::R5G6B5 && BackBufferFormat == D3D9Format::R5G6B5); + } + + return IsSupportedBackBufferFormat(BackBufferFormat); + } + + bool IsSupportedBackBufferFormat( + D3D9Format BackBufferFormat) { + return BackBufferFormat == D3D9Format::A2R10G10B10 + || BackBufferFormat == D3D9Format::A8R8G8B8 + || BackBufferFormat == D3D9Format::X8R8G8B8 + || BackBufferFormat == D3D9Format::A1R5G5B5 + || BackBufferFormat == D3D9Format::X1R5G5B5 + || BackBufferFormat == D3D9Format::R5G6B5; + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.h new file mode 100644 index 00000000..0a9d1a89 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_monitor.h @@ -0,0 +1,35 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_format.h" + +namespace dxvk { + + /** + * \brief Queries bits per pixel for a format + * + * The format must be a valid swap chain format. + * \param [in] Format The D3D9 format to query + * \returns Bits per pixel for this format + */ + uint32_t GetMonitorFormatBpp( + D3D9Format Format); + + /** + * \brief Returns if a format is supported for a backbuffer/swapchain. + * + * \param [in] Format The D3D9 format to query + * \returns If it is supported as a swapchain/backbuffer format. + */ + bool IsSupportedAdapterFormat( + D3D9Format Format); + + bool IsSupportedBackBufferFormat( + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL Windowed); + + bool IsSupportedBackBufferFormat( + D3D9Format BackBufferFormat); +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.cpp new file mode 100644 index 00000000..603d4b6b --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.cpp @@ -0,0 +1,9 @@ +#include "d3d9_device.h" + +namespace dxvk { + + D3D9Multithread::D3D9Multithread( + BOOL Protected) + : m_protected( Protected ) { } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.h new file mode 100644 index 00000000..f91857b6 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_multithread.h @@ -0,0 +1,77 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + /** + * \brief Device lock + * + * Lightweight RAII wrapper that implements + * a subset of the functionality provided by + * \c std::unique_lock, with the goal of being + * cheaper to construct and destroy. + */ + class D3D9DeviceLock { + + public: + + D3D9DeviceLock() + : m_mutex(nullptr) { } + + D3D9DeviceLock(sync::RecursiveSpinlock& mutex) + : m_mutex(&mutex) { + mutex.lock(); + } + + D3D9DeviceLock(D3D9DeviceLock&& other) + : m_mutex(other.m_mutex) { + other.m_mutex = nullptr; + } + + D3D9DeviceLock& operator = (D3D9DeviceLock&& other) { + if (m_mutex) + m_mutex->unlock(); + + m_mutex = other.m_mutex; + other.m_mutex = nullptr; + return *this; + } + + ~D3D9DeviceLock() { + if (m_mutex != nullptr) + m_mutex->unlock(); + } + + private: + + sync::RecursiveSpinlock* m_mutex; + + }; + + + /** + * \brief D3D9 context lock + */ + class D3D9Multithread { + + public: + + D3D9Multithread( + BOOL Protected); + + D3D9DeviceLock AcquireLock() { + return m_protected + ? D3D9DeviceLock(m_mutex) + : D3D9DeviceLock(); + } + + private: + + BOOL m_protected; + + sync::RecursiveSpinlock m_mutex; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.cpp new file mode 100644 index 00000000..e7b7b6a6 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.cpp @@ -0,0 +1,230 @@ +#include "d3d9_format.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, D3D9Format e) { + switch (e) { + ENUM_NAME(D3D9Format::Unknown); + + ENUM_NAME(D3D9Format::R8G8B8); + ENUM_NAME(D3D9Format::A8R8G8B8); + ENUM_NAME(D3D9Format::X8R8G8B8); + ENUM_NAME(D3D9Format::R5G6B5); + ENUM_NAME(D3D9Format::X1R5G5B5); + ENUM_NAME(D3D9Format::A1R5G5B5); + ENUM_NAME(D3D9Format::A4R4G4B4); + ENUM_NAME(D3D9Format::R3G3B2); + ENUM_NAME(D3D9Format::A8); + ENUM_NAME(D3D9Format::A8R3G3B2); + ENUM_NAME(D3D9Format::X4R4G4B4); + ENUM_NAME(D3D9Format::A2B10G10R10); + ENUM_NAME(D3D9Format::A8B8G8R8); + ENUM_NAME(D3D9Format::X8B8G8R8); + ENUM_NAME(D3D9Format::G16R16); + ENUM_NAME(D3D9Format::A2R10G10B10); + ENUM_NAME(D3D9Format::A16B16G16R16); + ENUM_NAME(D3D9Format::A8P8); + ENUM_NAME(D3D9Format::P8); + ENUM_NAME(D3D9Format::L8); + ENUM_NAME(D3D9Format::A8L8); + ENUM_NAME(D3D9Format::A4L4); + ENUM_NAME(D3D9Format::V8U8); + ENUM_NAME(D3D9Format::L6V5U5); + ENUM_NAME(D3D9Format::X8L8V8U8); + ENUM_NAME(D3D9Format::Q8W8V8U8); + ENUM_NAME(D3D9Format::V16U16); + ENUM_NAME(D3D9Format::A2W10V10U10); + ENUM_NAME(D3D9Format::UYVY); + ENUM_NAME(D3D9Format::R8G8_B8G8); + ENUM_NAME(D3D9Format::YUY2); + ENUM_NAME(D3D9Format::G8R8_G8B8); + ENUM_NAME(D3D9Format::DXT1); + ENUM_NAME(D3D9Format::DXT2); + ENUM_NAME(D3D9Format::DXT3); + ENUM_NAME(D3D9Format::DXT4); + ENUM_NAME(D3D9Format::DXT5); + ENUM_NAME(D3D9Format::D16_LOCKABLE); + ENUM_NAME(D3D9Format::D32); + ENUM_NAME(D3D9Format::D15S1); + ENUM_NAME(D3D9Format::D24S8); + ENUM_NAME(D3D9Format::D24X8); + ENUM_NAME(D3D9Format::D24X4S4); + ENUM_NAME(D3D9Format::D16); + ENUM_NAME(D3D9Format::D32F_LOCKABLE); + ENUM_NAME(D3D9Format::D24FS8); + ENUM_NAME(D3D9Format::D32_LOCKABLE); + ENUM_NAME(D3D9Format::S8_LOCKABLE); + ENUM_NAME(D3D9Format::L16); + ENUM_NAME(D3D9Format::VERTEXDATA); + ENUM_NAME(D3D9Format::INDEX16); + ENUM_NAME(D3D9Format::INDEX32); + ENUM_NAME(D3D9Format::Q16W16V16U16); + ENUM_NAME(D3D9Format::MULTI2_ARGB8); + ENUM_NAME(D3D9Format::R16F); + ENUM_NAME(D3D9Format::G16R16F); + ENUM_NAME(D3D9Format::A16B16G16R16F); + ENUM_NAME(D3D9Format::R32F); + ENUM_NAME(D3D9Format::G32R32F); + ENUM_NAME(D3D9Format::A32B32G32R32F); + ENUM_NAME(D3D9Format::CxV8U8); + ENUM_NAME(D3D9Format::A1); + ENUM_NAME(D3D9Format::A2B10G10R10_XR_BIAS); + ENUM_NAME(D3D9Format::BINARYBUFFER); + + // Driver Hacks / Unofficial Formats + ENUM_NAME(D3D9Format::ATI1); + ENUM_NAME(D3D9Format::ATI2); + ENUM_NAME(D3D9Format::INST); + ENUM_NAME(D3D9Format::DF24); + ENUM_NAME(D3D9Format::DF16); + ENUM_NAME(D3D9Format::NULL_FORMAT); + ENUM_NAME(D3D9Format::GET4); + ENUM_NAME(D3D9Format::GET1); + ENUM_NAME(D3D9Format::NVDB); + ENUM_NAME(D3D9Format::A2M1); + ENUM_NAME(D3D9Format::A2M0); + ENUM_NAME(D3D9Format::ATOC); + ENUM_NAME(D3D9Format::INTZ); + ENUM_NAME(D3D9Format::RAWZ); + ENUM_NAME(D3D9Format::RESZ); + + ENUM_NAME(D3D9Format::NV11); + ENUM_NAME(D3D9Format::NV12); + ENUM_NAME(D3D9Format::P010); + ENUM_NAME(D3D9Format::P016); + ENUM_NAME(D3D9Format::Y210); + ENUM_NAME(D3D9Format::Y216); + ENUM_NAME(D3D9Format::Y410); + ENUM_NAME(D3D9Format::AYUV); + ENUM_NAME(D3D9Format::YV12); + ENUM_NAME(D3D9Format::OPAQUE_420); + + ENUM_NAME(D3D9Format::AI44); + ENUM_NAME(D3D9Format::IA44); + ENUM_NAME(D3D9Format::R2VB); + ENUM_NAME(D3D9Format::COPM); + ENUM_NAME(D3D9Format::SSAA); + ENUM_NAME(D3D9Format::AL16); + ENUM_NAME(D3D9Format::R16); + + ENUM_NAME(D3D9Format::EXT1); + ENUM_NAME(D3D9Format::FXT1); + ENUM_NAME(D3D9Format::GXT1); + ENUM_NAME(D3D9Format::HXT1); + + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, D3DRENDERSTATETYPE e) { + switch (e) { + ENUM_NAME(D3DRS_ZENABLE); + ENUM_NAME(D3DRS_FILLMODE); + ENUM_NAME(D3DRS_SHADEMODE); + ENUM_NAME(D3DRS_ZWRITEENABLE); + ENUM_NAME(D3DRS_ALPHATESTENABLE); + ENUM_NAME(D3DRS_LASTPIXEL); + ENUM_NAME(D3DRS_SRCBLEND); + ENUM_NAME(D3DRS_DESTBLEND); + ENUM_NAME(D3DRS_CULLMODE); + ENUM_NAME(D3DRS_ZFUNC); + ENUM_NAME(D3DRS_ALPHAREF); + ENUM_NAME(D3DRS_ALPHAFUNC); + ENUM_NAME(D3DRS_DITHERENABLE); + ENUM_NAME(D3DRS_ALPHABLENDENABLE); + ENUM_NAME(D3DRS_FOGENABLE); + ENUM_NAME(D3DRS_SPECULARENABLE); + ENUM_NAME(D3DRS_FOGCOLOR); + ENUM_NAME(D3DRS_FOGTABLEMODE); + ENUM_NAME(D3DRS_FOGSTART); + ENUM_NAME(D3DRS_FOGEND); + ENUM_NAME(D3DRS_FOGDENSITY); + ENUM_NAME(D3DRS_RANGEFOGENABLE); + ENUM_NAME(D3DRS_STENCILENABLE); + ENUM_NAME(D3DRS_STENCILFAIL); + ENUM_NAME(D3DRS_STENCILZFAIL); + ENUM_NAME(D3DRS_STENCILPASS); + ENUM_NAME(D3DRS_STENCILFUNC); + ENUM_NAME(D3DRS_STENCILREF); + ENUM_NAME(D3DRS_STENCILMASK); + ENUM_NAME(D3DRS_STENCILWRITEMASK); + ENUM_NAME(D3DRS_TEXTUREFACTOR); + ENUM_NAME(D3DRS_WRAP0); + ENUM_NAME(D3DRS_WRAP1); + ENUM_NAME(D3DRS_WRAP2); + ENUM_NAME(D3DRS_WRAP3); + ENUM_NAME(D3DRS_WRAP4); + ENUM_NAME(D3DRS_WRAP5); + ENUM_NAME(D3DRS_WRAP6); + ENUM_NAME(D3DRS_WRAP7); + ENUM_NAME(D3DRS_CLIPPING); + ENUM_NAME(D3DRS_LIGHTING); + ENUM_NAME(D3DRS_AMBIENT); + ENUM_NAME(D3DRS_FOGVERTEXMODE); + ENUM_NAME(D3DRS_COLORVERTEX); + ENUM_NAME(D3DRS_LOCALVIEWER); + ENUM_NAME(D3DRS_NORMALIZENORMALS); + ENUM_NAME(D3DRS_DIFFUSEMATERIALSOURCE); + ENUM_NAME(D3DRS_SPECULARMATERIALSOURCE); + ENUM_NAME(D3DRS_AMBIENTMATERIALSOURCE); + ENUM_NAME(D3DRS_EMISSIVEMATERIALSOURCE); + ENUM_NAME(D3DRS_VERTEXBLEND); + ENUM_NAME(D3DRS_CLIPPLANEENABLE); + ENUM_NAME(D3DRS_POINTSIZE); + ENUM_NAME(D3DRS_POINTSIZE_MIN); + ENUM_NAME(D3DRS_POINTSPRITEENABLE); + ENUM_NAME(D3DRS_POINTSCALEENABLE); + ENUM_NAME(D3DRS_POINTSCALE_A); + ENUM_NAME(D3DRS_POINTSCALE_B); + ENUM_NAME(D3DRS_POINTSCALE_C); + ENUM_NAME(D3DRS_MULTISAMPLEANTIALIAS); + ENUM_NAME(D3DRS_MULTISAMPLEMASK); + ENUM_NAME(D3DRS_PATCHEDGESTYLE); + ENUM_NAME(D3DRS_DEBUGMONITORTOKEN); + ENUM_NAME(D3DRS_POINTSIZE_MAX); + ENUM_NAME(D3DRS_INDEXEDVERTEXBLENDENABLE); + ENUM_NAME(D3DRS_COLORWRITEENABLE); + ENUM_NAME(D3DRS_TWEENFACTOR); + ENUM_NAME(D3DRS_BLENDOP); + ENUM_NAME(D3DRS_POSITIONDEGREE); + ENUM_NAME(D3DRS_NORMALDEGREE); + ENUM_NAME(D3DRS_SCISSORTESTENABLE); + ENUM_NAME(D3DRS_SLOPESCALEDEPTHBIAS); + ENUM_NAME(D3DRS_ANTIALIASEDLINEENABLE); + ENUM_NAME(D3DRS_MINTESSELLATIONLEVEL); + ENUM_NAME(D3DRS_MAXTESSELLATIONLEVEL); + ENUM_NAME(D3DRS_ADAPTIVETESS_X); + ENUM_NAME(D3DRS_ADAPTIVETESS_Y); + ENUM_NAME(D3DRS_ADAPTIVETESS_Z); + ENUM_NAME(D3DRS_ADAPTIVETESS_W); + ENUM_NAME(D3DRS_ENABLEADAPTIVETESSELLATION); + ENUM_NAME(D3DRS_TWOSIDEDSTENCILMODE); + ENUM_NAME(D3DRS_CCW_STENCILFAIL); + ENUM_NAME(D3DRS_CCW_STENCILZFAIL); + ENUM_NAME(D3DRS_CCW_STENCILPASS); + ENUM_NAME(D3DRS_CCW_STENCILFUNC); + ENUM_NAME(D3DRS_COLORWRITEENABLE1); + ENUM_NAME(D3DRS_COLORWRITEENABLE2); + ENUM_NAME(D3DRS_COLORWRITEENABLE3); + ENUM_NAME(D3DRS_BLENDFACTOR); + ENUM_NAME(D3DRS_SRGBWRITEENABLE); + ENUM_NAME(D3DRS_DEPTHBIAS); + ENUM_NAME(D3DRS_WRAP8); + ENUM_NAME(D3DRS_WRAP9); + ENUM_NAME(D3DRS_WRAP10); + ENUM_NAME(D3DRS_WRAP11); + ENUM_NAME(D3DRS_WRAP12); + ENUM_NAME(D3DRS_WRAP13); + ENUM_NAME(D3DRS_WRAP14); + ENUM_NAME(D3DRS_WRAP15); + ENUM_NAME(D3DRS_SEPARATEALPHABLENDENABLE); + ENUM_NAME(D3DRS_SRCBLENDALPHA); + ENUM_NAME(D3DRS_DESTBLENDALPHA); + ENUM_NAME(D3DRS_BLENDOPALPHA); + + ENUM_DEFAULT(e); + } + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.h new file mode 100644 index 00000000..82d9c479 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_names.h @@ -0,0 +1,7 @@ +#include "d3d9_include.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, D3DRENDERSTATETYPE e); + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.cpp new file mode 100644 index 00000000..0f99fefd --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.cpp @@ -0,0 +1,92 @@ +#include "d3d9_options.h" + +#include "d3d9_caps.h" +#include "d3d9_config.h" + +namespace dxvk { + + static int32_t parsePciId(const std::string& str) { + if (str.size() != 4) + return -1; + + int32_t id = 0; + + for (size_t i = 0; i < str.size(); i++) { + id *= 16; + + if (str[i] >= '0' && str[i] <= '9') + id += str[i] - '0'; + else if (str[i] >= 'A' && str[i] <= 'F') + id += str[i] - 'A' + 10; + else if (str[i] >= 'a' && str[i] <= 'f') + id += str[i] - 'a' + 10; + else + return -1; + } + + return id; + } + + + D3D9Options::D3D9Options(const Rc<DxvkDevice>& device, const Config& config) { + const Rc<DxvkAdapter> adapter = device != nullptr ? device->adapter() : nullptr; + + // Fetch these as a string representing a hexadecimal number and parse it. + this->customVendorId = parsePciId(config.getOption<std::string>("d3d9.customVendorId")); + this->customDeviceId = parsePciId(config.getOption<std::string>("d3d9.customDeviceId")); + this->customDeviceDesc = config.getOption<std::string>("d3d9.customDeviceDesc"); + + const int32_t vendorId = this->customDeviceId != -1 + ? this->customDeviceId + : (adapter != nullptr ? adapter->deviceProperties().vendorID : 0); + + this->maxFrameLatency = config.getOption<int32_t> ("d3d9.maxFrameLatency", 0); + this->maxFrameRate = config.getOption<int32_t> ("d3d9.maxFrameRate", 0); + this->presentInterval = config.getOption<int32_t> ("d3d9.presentInterval", -1); + this->shaderModel = config.getOption<int32_t> ("d3d9.shaderModel", 3); + this->evictManagedOnUnlock = config.getOption<bool> ("d3d9.evictManagedOnUnlock", false); + this->dpiAware = config.getOption<bool> ("d3d9.dpiAware", true); + this->strictConstantCopies = config.getOption<bool> ("d3d9.strictConstantCopies", false); + this->strictPow = config.getOption<bool> ("d3d9.strictPow", true); + this->lenientClear = config.getOption<bool> ("d3d9.lenientClear", false); + this->numBackBuffers = config.getOption<int32_t> ("d3d9.numBackBuffers", 0); + this->noExplicitFrontBuffer = config.getOption<bool> ("d3d9.noExplicitFrontBuffer", false); + this->deferSurfaceCreation = config.getOption<bool> ("d3d9.deferSurfaceCreation", false); + this->samplerAnisotropy = config.getOption<int32_t> ("d3d9.samplerAnisotropy", -1); + this->maxAvailableMemory = config.getOption<int32_t> ("d3d9.maxAvailableMemory", 4096); + this->supportDFFormats = config.getOption<bool> ("d3d9.supportDFFormats", true); + this->supportX4R4G4B4 = config.getOption<bool> ("d3d9.supportX4R4G4B4", true); + this->supportD32 = config.getOption<bool> ("d3d9.supportD32", true); + this->swvpFloatCount = config.getOption<int32_t> ("d3d9.swvpFloatCount", caps::MaxFloatConstantsSoftware); + this->swvpIntCount = config.getOption<int32_t> ("d3d9.swvpIntCount", caps::MaxOtherConstantsSoftware); + this->swvpBoolCount = config.getOption<int32_t> ("d3d9.swvpBoolCount", caps::MaxOtherConstantsSoftware); + this->disableA8RT = config.getOption<bool> ("d3d9.disableA8RT", false); + this->invariantPosition = config.getOption<bool> ("d3d9.invariantPosition", false); + this->memoryTrackTest = config.getOption<bool> ("d3d9.memoryTrackTest", false); + this->supportVCache = config.getOption<bool> ("d3d9.supportVCache", vendorId == 0x10de); + this->enableDialogMode = config.getOption<bool> ("d3d9.enableDialogMode", false); + this->forceSamplerTypeSpecConstants = config.getOption<bool> ("d3d9.forceSamplerTypeSpecConstants", false); + this->forceSwapchainMSAA = config.getOption<int32_t> ("d3d9.forceSwapchainMSAA", -1); + this->forceAspectRatio = config.getOption<std::string> ("d3d9.forceAspectRatio", ""); + this->allowDoNotWait = config.getOption<bool> ("d3d9.allowDoNotWait", true); + this->allowDiscard = config.getOption<bool> ("d3d9.allowDiscard", true); + this->enumerateByDisplays = config.getOption<bool> ("d3d9.enumerateByDisplays", true); + this->longMad = config.getOption<bool> ("d3d9.longMad", false); + this->tearFree = config.getOption<Tristate> ("d3d9.tearFree", Tristate::Auto); + this->alphaTestWiggleRoom = config.getOption<bool> ("d3d9.alphaTestWiggleRoom", false); + this->apitraceMode = config.getOption<bool> ("d3d9.apitraceMode", false); + this->deviceLocalConstantBuffers = config.getOption<bool> ("d3d9.deviceLocalConstantBuffers", false); + + // If we are not Nvidia, enable general hazards. + this->generalHazards = adapter != nullptr + && !adapter->matchesDriver( + DxvkGpuVendor::Nvidia, + VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, + 0, 0); + applyTristate(this->generalHazards, config.getOption<Tristate>("d3d9.generalHazards", Tristate::Auto)); + + this->d3d9FloatEmulation = config::FloatEmulationByDefault; // <-- Future Extension? + applyTristate(this->d3d9FloatEmulation, config.getOption<Tristate>("d3d9.floatEmulation", Tristate::Auto)); + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.h new file mode 100644 index 00000000..e49bb61e --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_options.h @@ -0,0 +1,160 @@ +#pragma once + +#include "../util/config/config.h" +#include "../dxvk/dxvk_device.h" + +#include "d3d9_include.h" + +namespace dxvk { + + struct D3D9Options { + + D3D9Options(const Rc<DxvkDevice>& device, const Config& config); + + /// Override PCI vendor and device IDs reported to the + /// application. This may make apps think they are running + /// on a different GPU than they do and behave differently. + int32_t customVendorId; + int32_t customDeviceId; + std::string customDeviceDesc; + + /// Present interval. Overrides the value + /// in D3DPRESENT_PARAMS used in swapchain present. + int32_t presentInterval; + + /// Override maximum frame latency if the app specifies + /// a higher value. May help with frame timing issues. + int32_t maxFrameLatency; + + /// Limit frame rate + int32_t maxFrameRate; + + /// Set the max shader model the device can support in the caps. + int32_t shaderModel; + + /// Whether or not managed resources should stay in memory until unlock, or until manually evicted. + bool evictManagedOnUnlock; + + /// Whether or not to set the process as DPI aware in Windows when the API interface is created. + bool dpiAware; + + /// True: Copy our constant set into UBO if we are relative indexing ever. + /// False: Copy our constant set into UBO if we are relative indexing at the start of a defined constant + /// Why?: In theory, FXC should never generate code where this would be an issue. + bool strictConstantCopies; + + /// Whether or not we should care about pow(0, 0) = 1 + bool strictPow; + + /// Whether or not to do a fast path clear if we're close enough to the whole render target. + bool lenientClear; + + /// Back buffer count for the Vulkan swap chain. + /// Overrides buffer count in present parameters. + int32_t numBackBuffers; + + /// Don't create an explicit front buffer in our own swapchain. The Vulkan swapchain is unaffected. + /// Some games don't handle front/backbuffer flipping very well because they don't always redraw + /// each frame completely, and rely on old pixel data from the previous frame to still be there. + /// When this option is set and a game only requests one backbuffer, there will be no flipping in + /// our own swapchain, so the game will always draw to the same buffer and can rely on old pixel + /// data to still be there after a Present call. + /// This means that D3D9SwapChainEx::GetFrontBufferData returns data from the backbuffer of the + /// previous frame, which is the same as the current backbuffer if only 1 backbuffer was requested. + bool noExplicitFrontBuffer; + + /// Defer surface creation + bool deferSurfaceCreation; + + /// Whether to transition to general + /// for rendering hazards + bool generalHazards; + + /// Anisotropic filter override + /// + /// Enforces anisotropic filtering with the + /// given anisotropy value for all samplers. + int32_t samplerAnisotropy; + + /// Max available memory override + /// + /// Changes the max initial value used in + /// tracking and GetAvailableTextureMem + uint32_t maxAvailableMemory; + + /// D3D9 Floating Point Emulation (anything * 0 = 0) + bool d3d9FloatEmulation; + + /// Support the DF16 & DF24 texture format + bool supportDFFormats; + + /// Support X4R4G4B4 + bool supportX4R4G4B4; + + /// Support D32 + bool supportD32; + + /// SWVP Constant Limits + int32_t swvpFloatCount; + int32_t swvpIntCount; + int32_t swvpBoolCount; + + /// Disable D3DFMT_A8 for render targets. + /// Specifically to work around a game + /// bug in The Sims 2 that happens on native too! + bool disableA8RT; + + /// Work around a NV driver quirk + /// Fixes flickering/z-fighting in some games. + bool invariantPosition; + + /// Whether or not to respect memory tracking for + /// failing resource allocation. + bool memoryTrackTest; + + /// Support VCACHE query + bool supportVCache; + + /// Forced aspect ratio, disable other modes + std::string forceAspectRatio; + + /// Enable dialog mode (ie. no exclusive fullscreen) + bool enableDialogMode; + + /// Always use a spec constant to determine sampler type (instead of just in PS 1.x) + /// Works around a game bug in Halo CE where it gives cube textures to 2d/volume samplers + bool forceSamplerTypeSpecConstants; + + /// Forces an MSAA level on the swapchain + int32_t forceSwapchainMSAA; + + /// Allow D3DLOCK_DONOTWAIT + bool allowDoNotWait; + + /// Allow D3DLOCK_DISCARD + bool allowDiscard; + + /// Enumerate adapters by displays + bool enumerateByDisplays; + + /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd? + /// This solves some rendering bugs in games that have z-pass shaders which + /// don't match entirely to the regular vertex shader in this way. + bool longMad; + + /// Tear-free mode if vsync is disabled + /// Tearing mode if vsync is enabled + Tristate tearFree; + + /// Workaround for games using alpha test == 1.0, etc due to wonky interpolation or + /// misc. imprecision on some vendors + bool alphaTestWiggleRoom; + + /// Apitrace mode: Maps all buffers in cached memory. + bool apitraceMode; + + /// Use device local memory for constant buffers. + bool deviceLocalConstantBuffers; + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.cpp new file mode 100644 index 00000000..7ce47cf9 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.cpp @@ -0,0 +1,338 @@ +#include "d3d9_query.h" + +#include "d3d9_device.h" + +namespace dxvk { + + D3D9Query::D3D9Query( + D3D9DeviceEx* pDevice, + D3DQUERYTYPE QueryType) + : D3D9DeviceChild<IDirect3DQuery9>(pDevice) + , m_queryType (QueryType) + , m_state (D3D9_VK_QUERY_INITIAL) { + Rc<DxvkDevice> dxvkDevice = m_parent->GetDXVKDevice(); + + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: + break; + + case D3DQUERYTYPE_EVENT: + m_event[0] = dxvkDevice->createGpuEvent(); + break; + + case D3DQUERYTYPE_OCCLUSION: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_CONTROL_PRECISE_BIT, 0); + break; + + case D3DQUERYTYPE_TIMESTAMP: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_TIMESTAMP, 0, 0); + break; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + for (uint32_t i = 0; i < 2; i++) { + m_query[i] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_TIMESTAMP, 0, 0); + } + break; + + case D3DQUERYTYPE_TIMESTAMPFREQ: + break; + + case D3DQUERYTYPE_VERTEXSTATS: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_PIPELINE_STATISTICS, 0, 0); + break; + + default: + throw DxvkError(str::format("D3D9Query: Unsupported query type ", m_queryType)); + } + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DQuery9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Query::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DQUERYTYPE STDMETHODCALLTYPE D3D9Query::GetType() { + return m_queryType; + } + + + DWORD STDMETHODCALLTYPE D3D9Query::GetDataSize() { + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: return sizeof(D3DDEVINFO_VCACHE); + case D3DQUERYTYPE_RESOURCEMANAGER: return sizeof(D3DDEVINFO_RESOURCEMANAGER); + case D3DQUERYTYPE_VERTEXSTATS: return sizeof(D3DDEVINFO_D3DVERTEXSTATS); + case D3DQUERYTYPE_EVENT: return sizeof(BOOL); + case D3DQUERYTYPE_OCCLUSION: return sizeof(DWORD); + case D3DQUERYTYPE_TIMESTAMP: return sizeof(UINT64); + case D3DQUERYTYPE_TIMESTAMPDISJOINT: return sizeof(BOOL); + case D3DQUERYTYPE_TIMESTAMPFREQ: return sizeof(UINT64); + case D3DQUERYTYPE_PIPELINETIMINGS: return sizeof(D3DDEVINFO_D3D9PIPELINETIMINGS); + case D3DQUERYTYPE_INTERFACETIMINGS: return sizeof(D3DDEVINFO_D3D9INTERFACETIMINGS); + case D3DQUERYTYPE_VERTEXTIMINGS: return sizeof(D3DDEVINFO_D3D9STAGETIMINGS); + case D3DQUERYTYPE_PIXELTIMINGS: return sizeof(D3DDEVINFO_D3D9PIPELINETIMINGS); + case D3DQUERYTYPE_BANDWIDTHTIMINGS: return sizeof(D3DDEVINFO_D3D9BANDWIDTHTIMINGS); + case D3DQUERYTYPE_CACHEUTILIZATION: return sizeof(D3DDEVINFO_D3D9CACHEUTILIZATION); + default: return 0; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::Issue(DWORD dwIssueFlags) { + // Note: No need to submit to CS if we don't do anything! + + if (dwIssueFlags == D3DISSUE_BEGIN) { + if (QueryBeginnable(m_queryType)) { + if (m_state == D3D9_VK_QUERY_BEGUN && QueryEndable(m_queryType)) + m_parent->End(this); + + m_parent->Begin(this); + + m_state = D3D9_VK_QUERY_BEGUN; + } + } + else { + if (QueryEndable(m_queryType)) { + if (m_state != D3D9_VK_QUERY_BEGUN && QueryBeginnable(m_queryType)) + m_parent->Begin(this); + + m_resetCtr.fetch_add(1, std::memory_order_acquire); + + m_parent->End(this); + + } + m_state = D3D9_VK_QUERY_ENDED; + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::GetData(void* pData, DWORD dwSize, DWORD dwGetDataFlags) { + if (m_state == D3D9_VK_QUERY_CACHED) { + // Query data was already retrieved once. + // Use cached query data to prevent having to check the VK event + // and having to iterate over the VK queries again + if (likely(pData && dwSize)) { + if (m_queryType != D3DQUERYTYPE_EVENT) { + memcpy(pData, &m_dataCache, dwSize); + } else { + *static_cast<bool*>(pData) = true; + } + } + return D3D_OK; + } + + HRESULT hr = this->GetQueryData(pData, dwSize); + + bool flush = dwGetDataFlags & D3DGETDATA_FLUSH; + + // If we get S_FALSE and it's not from the fact + // they didn't call end, do some flushy stuff... + if (flush && hr == S_FALSE && m_state != D3D9_VK_QUERY_BEGUN) { + this->NotifyStall(); + m_parent->FlushImplicit(FALSE); + } + + return hr; + } + + + HRESULT D3D9Query::GetQueryData(void* pData, DWORD dwSize) { + // Let the game know that calling end might be a good idea... + if (m_state == D3D9_VK_QUERY_BEGUN) + return S_FALSE; + + if (unlikely(!pData && dwSize)) + return D3DERR_INVALIDCALL; + + // The game forgot to even issue the query! + // Let's do it for them... + // This will issue both the begin, and the end. + if (m_state == D3D9_VK_QUERY_INITIAL) + this->Issue(D3DISSUE_END); + + if (m_resetCtr != 0u) + return S_FALSE; + + if (m_queryType == D3DQUERYTYPE_EVENT) { + DxvkGpuEventStatus status = m_event[0]->test(); + + if (status == DxvkGpuEventStatus::Invalid) + return D3DERR_INVALIDCALL; + + bool signaled = status == DxvkGpuEventStatus::Signaled; + + if (pData != nullptr) + *static_cast<BOOL*>(pData) = signaled; + + if (signaled) { + m_state = D3D9_VK_QUERY_CACHED; + return D3D_OK; + } else { + return S_FALSE; + } + } + else { + std::array<DxvkQueryData, MaxGpuQueries> queryData = { }; + + for (uint32_t i = 0; i < MaxGpuQueries && m_query[i] != nullptr; i++) { + DxvkGpuQueryStatus status = m_query[i]->getData(queryData[i]); + + if (status == DxvkGpuQueryStatus::Invalid + || status == DxvkGpuQueryStatus::Failed) + return D3DERR_INVALIDCALL; + + if (status == DxvkGpuQueryStatus::Pending) + return S_FALSE; + } + + if (pData == nullptr) + return D3D_OK; + + + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: + // Don't know what the hell any of this means. + // Nor do I care. This just makes games work. + m_dataCache.VCache.Pattern = MAKEFOURCC('H', 'C', 'A', 'C'); + m_dataCache.VCache.OptMethod = 1; + m_dataCache.VCache.CacheSize = 24; + m_dataCache.VCache.MagicNumber = 20; + break; + + case D3DQUERYTYPE_OCCLUSION: + m_dataCache.Occlusion = DWORD(queryData[0].occlusion.samplesPassed); + break; + + case D3DQUERYTYPE_TIMESTAMP: + m_dataCache.Timestamp = queryData[0].timestamp.time; + break; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + m_dataCache.TimestampDisjoint = queryData[0].timestamp.time < queryData[1].timestamp.time; + break; + + case D3DQUERYTYPE_TIMESTAMPFREQ: + m_dataCache.TimestampFreq = GetTimestampQueryFrequency(); + break; + + case D3DQUERYTYPE_VERTEXSTATS: + m_dataCache.VertexStats.NumRenderedTriangles = queryData[0].statistic.iaPrimitives; + m_dataCache.VertexStats.NumExtraClippingTriangles = queryData[0].statistic.clipPrimitives; + break; + + default: + break; + } + + if (likely(pData && dwSize)) + memcpy(pData, &m_dataCache, dwSize); + + m_state = D3D9_VK_QUERY_CACHED; + return D3D_OK; + } + } + + + UINT64 D3D9Query::GetTimestampQueryFrequency() const { + Rc<DxvkDevice> device = m_parent->GetDXVKDevice(); + Rc<DxvkAdapter> adapter = device->adapter(); + + VkPhysicalDeviceLimits limits = adapter->deviceProperties().limits; + return uint64_t(1'000'000'000.0f / limits.timestampPeriod); + } + + + void D3D9Query::Begin(DxvkContext* ctx) { + switch (m_queryType) { + case D3DQUERYTYPE_OCCLUSION: + case D3DQUERYTYPE_VERTEXSTATS: + ctx->beginQuery(m_query[0]); + break; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + ctx->writeTimestamp(m_query[1]); + break; + + default: break; + } + } + + + void D3D9Query::End(DxvkContext* ctx) { + switch (m_queryType) { + case D3DQUERYTYPE_TIMESTAMP: + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + ctx->writeTimestamp(m_query[0]); + break; + + case D3DQUERYTYPE_VERTEXSTATS: + case D3DQUERYTYPE_OCCLUSION: + ctx->endQuery(m_query[0]); + break; + + case D3DQUERYTYPE_EVENT: + ctx->signalGpuEvent(m_event[0]); + break; + + default: break; + } + + m_resetCtr.fetch_sub(1, std::memory_order_release); + } + + + bool D3D9Query::QueryBeginnable(D3DQUERYTYPE QueryType) { + return QueryType == D3DQUERYTYPE_OCCLUSION + || QueryType == D3DQUERYTYPE_VERTEXSTATS + || QueryType == D3DQUERYTYPE_TIMESTAMPDISJOINT; + } + + + bool D3D9Query::QueryEndable(D3DQUERYTYPE QueryType) { + return QueryBeginnable(QueryType) + || QueryType == D3DQUERYTYPE_TIMESTAMP + || QueryType == D3DQUERYTYPE_EVENT; + } + + + HRESULT D3D9Query::QuerySupported(D3D9DeviceEx* pDevice, D3DQUERYTYPE QueryType) { + switch (QueryType) { + case D3DQUERYTYPE_VCACHE: + if (!pDevice->GetOptions()->supportVCache) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + case D3DQUERYTYPE_EVENT: + case D3DQUERYTYPE_OCCLUSION: + case D3DQUERYTYPE_TIMESTAMP: + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + case D3DQUERYTYPE_TIMESTAMPFREQ: + case D3DQUERYTYPE_VERTEXSTATS: + return D3D_OK; + + default: + return D3DERR_NOTAVAILABLE; + } + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.h new file mode 100644 index 00000000..8d742c96 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_query.h @@ -0,0 +1,91 @@ +#pragma once + +#include "d3d9_device_child.h" + +#include "../dxvk/dxvk_context.h" + +namespace dxvk { + + enum D3D9_VK_QUERY_STATE : uint32_t { + D3D9_VK_QUERY_INITIAL, + D3D9_VK_QUERY_BEGUN, + D3D9_VK_QUERY_ENDED, + D3D9_VK_QUERY_CACHED + }; + + union D3D9_QUERY_DATA { + D3DDEVINFO_VCACHE VCache; + DWORD Occlusion; + UINT64 Timestamp; + BOOL TimestampDisjoint; + UINT64 TimestampFreq; + D3DDEVINFO_D3DVERTEXSTATS VertexStats; + }; + + class D3D9Query : public D3D9DeviceChild<IDirect3DQuery9> { + constexpr static uint32_t MaxGpuQueries = 2; + constexpr static uint32_t MaxGpuEvents = 1; + public: + + D3D9Query( + D3D9DeviceEx* pDevice, + D3DQUERYTYPE QueryType); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DQUERYTYPE STDMETHODCALLTYPE GetType() final; + + DWORD STDMETHODCALLTYPE GetDataSize() final; + + HRESULT STDMETHODCALLTYPE Issue(DWORD dwIssueFlags) final; + + HRESULT STDMETHODCALLTYPE GetData(void* pData, DWORD dwSize, DWORD dwGetDataFlags) final; + + HRESULT GetQueryData(void* pData, DWORD dwSize); + + void Begin(DxvkContext* ctx); + void End(DxvkContext* ctx); + + static bool QueryBeginnable(D3DQUERYTYPE QueryType); + static bool QueryEndable(D3DQUERYTYPE QueryType); + + static HRESULT QuerySupported(D3D9DeviceEx* pDevice, D3DQUERYTYPE QueryType); + + bool IsEvent() const { + return m_queryType == D3DQUERYTYPE_EVENT; + } + + bool IsStalling() const { + return m_stallFlag; + } + + void NotifyEnd() { + m_stallMask <<= 1; + } + + void NotifyStall() { + m_stallMask |= 1; + m_stallFlag |= bit::popcnt(m_stallMask) >= 16; + } + + private: + + D3DQUERYTYPE m_queryType; + + D3D9_VK_QUERY_STATE m_state; + + std::array<Rc<DxvkGpuQuery>, MaxGpuQueries> m_query; + std::array<Rc<DxvkGpuEvent>, MaxGpuEvents> m_event; + + uint32_t m_stallMask = 0; + bool m_stallFlag = false; + + std::atomic<uint32_t> m_resetCtr = { 0u }; + + D3D9_QUERY_DATA m_dataCache; + + UINT64 GetTimestampQueryFrequency() const; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_resource.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_resource.h new file mode 100644 index 00000000..5d343e20 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_resource.h @@ -0,0 +1,84 @@ +#pragma once + +#include "d3d9_device_child.h" + +#include "../util/com/com_private_data.h" + +namespace dxvk { + + template <typename... Type> + class D3D9Resource : public D3D9DeviceChild<Type...> { + + public: + + D3D9Resource(D3D9DeviceEx* pDevice) + : D3D9DeviceChild<Type...>(pDevice) + , m_priority ( 0 ) { } + + HRESULT STDMETHODCALLTYPE SetPrivateData( + REFGUID refguid, + const void* pData, + DWORD SizeOfData, + DWORD Flags) final { + HRESULT hr; + if (Flags & D3DSPD_IUNKNOWN) { + IUnknown* unknown = + const_cast<IUnknown*>( + reinterpret_cast<const IUnknown*>(pData)); + hr = m_privateData.setInterface( + refguid, unknown); + } + else + hr = m_privateData.setData( + refguid, SizeOfData, pData); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE GetPrivateData( + REFGUID refguid, + void* pData, + DWORD* pSizeOfData) final { + HRESULT hr = m_privateData.getData( + refguid, reinterpret_cast<UINT*>(pSizeOfData), pData); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE FreePrivateData(REFGUID refguid) final { + HRESULT hr = m_privateData.setData(refguid, 0, nullptr); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + DWORD STDMETHODCALLTYPE SetPriority(DWORD PriorityNew) { + DWORD oldPriority = m_priority; + m_priority = PriorityNew; + return oldPriority; + } + + DWORD STDMETHODCALLTYPE GetPriority() { + return m_priority; + } + + + protected: + + DWORD m_priority; + + private: + + ComPrivateData m_privateData; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.cpp new file mode 100644 index 00000000..6a68f220 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.cpp @@ -0,0 +1,44 @@ +#include "d3d9_sampler.h" + +namespace dxvk { + + size_t D3D9SamplerKeyHash::operator () (const D3D9SamplerKey& key) const { + DxvkHashState state; + + std::hash<DWORD> dhash; + std::hash<D3DTEXTUREADDRESS> tahash; + std::hash<D3DTEXTUREFILTERTYPE> tfhash; + std::hash<float> fhash; + std::hash<bool> bhash; + + state.add(tahash(key.AddressU)); + state.add(tahash(key.AddressV)); + state.add(tahash(key.AddressW)); + state.add(tfhash(key.MagFilter)); + state.add(tfhash(key.MinFilter)); + state.add(tfhash(key.MipFilter)); + state.add(dhash (key.MaxAnisotropy)); + state.add(fhash (key.MipmapLodBias)); + state.add(dhash (key.MaxMipLevel)); + state.add(dhash (key.BorderColor)); + state.add(bhash (key.Depth)); + + return state; + } + + + bool D3D9SamplerKeyEq::operator () (const D3D9SamplerKey& a, const D3D9SamplerKey& b) const { + return a.AddressU == b.AddressU + && a.AddressV == b.AddressV + && a.AddressW == b.AddressW + && a.MagFilter == b.MagFilter + && a.MinFilter == b.MinFilter + && a.MipFilter == b.MipFilter + && a.MaxAnisotropy == b.MaxAnisotropy + && a.MipmapLodBias == b.MipmapLodBias + && a.MaxMipLevel == b.MaxMipLevel + && a.BorderColor == b.BorderColor + && a.Depth == b.Depth; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.h new file mode 100644 index 00000000..44471094 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_sampler.h @@ -0,0 +1,75 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_util.h" + +#include "../dxvk/dxvk_hash.h" + +#include "../util/util_math.h" + +namespace dxvk { + + struct D3D9SamplerKey { + D3DTEXTUREADDRESS AddressU; + D3DTEXTUREADDRESS AddressV; + D3DTEXTUREADDRESS AddressW; + D3DTEXTUREFILTERTYPE MagFilter; + D3DTEXTUREFILTERTYPE MinFilter; + D3DTEXTUREFILTERTYPE MipFilter; + DWORD MaxAnisotropy; + float MipmapLodBias; + DWORD MaxMipLevel; + D3DCOLOR BorderColor; + bool Depth; + }; + + struct D3D9SamplerKeyHash { + size_t operator () (const D3D9SamplerKey& key) const; + }; + + struct D3D9SamplerKeyEq { + bool operator () (const D3D9SamplerKey& a, const D3D9SamplerKey& b) const; + }; + + inline void NormalizeSamplerKey(D3D9SamplerKey& key) { + key.AddressU = std::clamp(key.AddressU, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + key.AddressV = std::clamp(key.AddressV, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + key.AddressW = std::clamp(key.AddressW, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + + bool hasAnisotropy = IsAnisotropic(key.MagFilter) || IsAnisotropic(key.MinFilter); + + key.MagFilter = std::clamp(key.MagFilter, D3DTEXF_NONE, D3DTEXF_LINEAR); + key.MinFilter = std::clamp(key.MinFilter, D3DTEXF_NONE, D3DTEXF_LINEAR); + key.MipFilter = std::clamp(key.MipFilter, D3DTEXF_NONE, D3DTEXF_LINEAR); + + key.MaxAnisotropy = hasAnisotropy + ? std::clamp<DWORD>(key.MaxAnisotropy, 1, 16) + : 1; + + if (key.MipFilter == D3DTEXF_NONE) { + // May as well try and keep slots down. + key.MipmapLodBias = 0; + } + else { + // Games also pass NAN here, this accounts for that. + if (unlikely(key.MipmapLodBias != key.MipmapLodBias)) + key.MipmapLodBias = 0.0f; + + // Clamp between -15.0f and 15.0f, matching mip limits of d3d9. + key.MipmapLodBias = std::clamp(key.MipmapLodBias, -15.0f, 15.0f); + + // Round to the nearest .5 + // Fixes sampler leaks in UE3 games w/ mip streaming + // eg. Borderlands 2 + key.MipmapLodBias = std::round(key.MipmapLodBias * 2.0f) / 2.0f; + } + + if (key.AddressU != D3DTADDRESS_BORDER + && key.AddressV != D3DTADDRESS_BORDER + && key.AddressW != D3DTADDRESS_BORDER) { + key.BorderColor = 0; + } + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.cpp new file mode 100644 index 00000000..8c717db0 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.cpp @@ -0,0 +1,192 @@ +#include "d3d9_shader.h" + +#include "d3d9_device.h" +#include "d3d9_util.h" + +namespace dxvk { + +#ifndef DXVK_NATIVE + typedef HRESULT (STDMETHODCALLTYPE *D3DXDisassembleShader) ( + const void* pShader, + BOOL EnableColorCode, + char* pComments, + ID3DBlob** ppDisassembly); // ppDisassembly is actually a D3DXBUFFER, but it has the exact same vtable as a ID3DBlob at the start. + + D3DXDisassembleShader g_pfnDisassembleShader = nullptr; + + HRESULT DisassembleShader( + const void* pShader, + BOOL EnableColorCode, + char* pComments, + ID3DBlob** ppDisassembly) { + if (g_pfnDisassembleShader == nullptr) { + HMODULE d3d9x = LoadLibraryA("d3dx9.dll"); + + if (d3d9x == nullptr) + d3d9x = LoadLibraryA("d3dx9_43.dll"); + + g_pfnDisassembleShader = + reinterpret_cast<D3DXDisassembleShader>(GetProcAddress(d3d9x, "D3DXDisassembleShader")); + } + + if (g_pfnDisassembleShader == nullptr) + return D3DERR_INVALIDCALL; + + return g_pfnDisassembleShader( + pShader, + EnableColorCode, + pComments, + ppDisassembly); + } +#endif + + + D3D9CommonShader::D3D9CommonShader() {} + + D3D9CommonShader::D3D9CommonShader( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const DxvkShaderKey& Key, + const DxsoModuleInfo* pDxsoModuleInfo, + const void* pShaderBytecode, + const DxsoAnalysisInfo& AnalysisInfo, + DxsoModule* pModule) { + const uint32_t bytecodeLength = AnalysisInfo.bytecodeByteLength; + m_bytecode.resize(bytecodeLength); + std::memcpy(m_bytecode.data(), pShaderBytecode, bytecodeLength); + + const std::string name = Key.toString(); + Logger::debug(str::format("Compiling shader ", name)); + + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); +#ifndef DXVK_NATIVE + // If requested by the user, dump both the raw DXBC + // shader and the compiled SPIR-V module to a file. + if (dumpPath.size() != 0) { + DxsoReader reader( + reinterpret_cast<const char*>(pShaderBytecode)); + + reader.store(std::ofstream(str::tows(str::format(dumpPath, "/", name, ".dxso").c_str()).c_str(), + std::ios_base::binary | std::ios_base::trunc), bytecodeLength); + + char comment[2048]; + Com<ID3DBlob> blob; + HRESULT hr = DisassembleShader( + pShaderBytecode, + TRUE, + comment, + &blob); + + if (SUCCEEDED(hr)) { + std::ofstream disassembledOut(str::tows(str::format(dumpPath, "/", name, ".dxso.dis").c_str()).c_str(), std::ios_base::binary | std::ios_base::trunc); + disassembledOut.write( + reinterpret_cast<const char*>(blob->GetBufferPointer()), + blob->GetBufferSize()); + } + } +#endif + + // Decide whether we need to create a pass-through + // geometry shader for vertex shader stream output + + const D3D9ConstantLayout& constantLayout = ShaderStage == VK_SHADER_STAGE_VERTEX_BIT + ? pDevice->GetVertexConstantLayout() + : pDevice->GetPixelConstantLayout(); + m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout); + m_isgn = pModule->isgn(); + m_usedSamplers = pModule->usedSamplers(); + + // Shift up these sampler bits so we can just + // do an or per-draw in the device. + // We shift by 17 because 16 ps samplers + 1 dmap (tess) + if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT) + m_usedSamplers <<= 17; + + m_usedRTs = pModule->usedRTs(); + + m_info = pModule->info(); + m_meta = pModule->meta(); + m_constants = pModule->constants(); + + m_shaders[0]->setShaderKey(Key); + + if (m_shaders[1] != nullptr) { + // Lets lie about the shader key type for the state cache. + m_shaders[1]->setShaderKey({ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, Key.sha1() }); + } + + if (dumpPath.size() != 0) { +#ifdef _WIN32 + std::ofstream dumpStream( + str::tows(str::format(dumpPath, "/", name, ".spv").c_str()).c_str(), + std::ios_base::binary | std::ios_base::trunc); +#else + std::ofstream dumpStream( + str::format(dumpPath, "/", name, ".spv").c_str(), + std::ios_base::binary | std::ios_base::trunc); +#endif + + m_shaders[0]->dump(dumpStream); + } + + pDevice->GetDXVKDevice()->registerShader(m_shaders[0]); + + if (m_shaders[1] != nullptr) + pDevice->GetDXVKDevice()->registerShader(m_shaders[1]); + } + + + void D3D9ShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode) { + DxsoReader reader( + reinterpret_cast<const char*>(pShaderBytecode)); + + DxsoModule module(reader); + + if (module.info().majorVersion() > pDxbcModuleInfo->options.shaderModel) + throw DxvkError("GetShaderModule: Out of range of supported shader model"); + + if (module.info().shaderStage() != ShaderStage) + throw DxvkError("GetShaderModule: Bytecode does not match shader stage"); + + DxsoAnalysisInfo info = module.analyze(); + + DxvkShaderKey lookupKey = DxvkShaderKey( + ShaderStage, + Sha1Hash::compute(pShaderBytecode, info.bytecodeByteLength)); + + // Use the shader's unique key for the lookup + { std::unique_lock<dxvk::mutex> lock(m_mutex); + + auto entry = m_modules.find(lookupKey); + if (entry != m_modules.end()) { + *pShaderModule = entry->second; + return; + } + } + + // This shader has not been compiled yet, so we have to create a + // new module. This takes a while, so we won't lock the structure. + *pShaderModule = D3D9CommonShader( + pDevice, ShaderStage, lookupKey, + pDxbcModuleInfo, pShaderBytecode, + info, &module); + + // Insert the new module into the lookup table. If another thread + // has compiled the same shader in the meantime, we should return + // that object instead and discard the newly created module. + { std::unique_lock<dxvk::mutex> lock(m_mutex); + + auto status = m_modules.insert({ lookupKey, *pShaderModule }); + if (!status.second) { + *pShaderModule = status.first->second; + return; + } + } + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.h new file mode 100644 index 00000000..9bfc3100 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader.h @@ -0,0 +1,196 @@ +#pragma once + +#include "d3d9_resource.h" +#include "../dxso/dxso_module.h" +#include "d3d9_shader_permutations.h" +#include "d3d9_util.h" + +#include <array> + +namespace dxvk { + + + /** + * \brief Common shader object + * + * Stores the compiled SPIR-V shader and the SHA-1 + * hash of the original DXBC shader, which can be + * used to identify the shader. + */ + class D3D9CommonShader { + + public: + + D3D9CommonShader(); + + D3D9CommonShader( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const DxvkShaderKey& Key, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode, + const DxsoAnalysisInfo& AnalysisInfo, + DxsoModule* pModule); + + + Rc<DxvkShader> GetShader(D3D9ShaderPermutation Permutation) const { + return m_shaders[Permutation]; + } + + std::string GetName() const { + return m_shaders[D3D9ShaderPermutations::None]->debugName(); + } + + const std::vector<uint8_t>& GetBytecode() const { + return m_bytecode; + } + + const DxsoIsgn& GetIsgn() const { + return m_isgn; + } + + const DxsoShaderMetaInfo& GetMeta() const { return m_meta; } + const DxsoDefinedConstants& GetConstants() const { return m_constants; } + + D3D9ShaderMasks GetShaderMask() const { return D3D9ShaderMasks{ m_usedSamplers, m_usedRTs }; } + + const DxsoProgramInfo& GetInfo() const { return m_info; } + + private: + + DxsoIsgn m_isgn; + uint32_t m_usedSamplers; + uint32_t m_usedRTs; + + DxsoProgramInfo m_info; + DxsoShaderMetaInfo m_meta; + DxsoDefinedConstants m_constants; + + DxsoPermutations m_shaders; + + std::vector<uint8_t> m_bytecode; + + }; + + /** + * \brief Common shader interface + * + * Implements methods for all D3D11*Shader + * interfaces and stores the actual shader + * module object. + */ + template <typename Base> + class D3D9Shader : public D3D9DeviceChild<Base> { + + public: + + D3D9Shader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9DeviceChild<Base>( pDevice ) + , m_shader ( CommonShader ) { } + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(Base)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Shader::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + HRESULT STDMETHODCALLTYPE GetFunction(void* pOut, UINT* pSizeOfData) { + if (pSizeOfData == nullptr) + return D3DERR_INVALIDCALL; + + const auto& bytecode = m_shader.GetBytecode(); + + if (pOut == nullptr) { + *pSizeOfData = bytecode.size(); + return D3D_OK; + } + + size_t copyAmount = std::min(size_t(*pSizeOfData), bytecode.size()); + std::memcpy(pOut, bytecode.data(), copyAmount); + + return D3D_OK; + } + + const D3D9CommonShader* GetCommonShader() const { + return &m_shader; + } + + private: + + D3D9CommonShader m_shader; + + }; + + // Needs their own classes and not usings for forward decl. + + class D3D9VertexShader final : public D3D9Shader<IDirect3DVertexShader9> { + + public: + + D3D9VertexShader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9Shader<IDirect3DVertexShader9>( pDevice, CommonShader ) { } + + }; + + class D3D9PixelShader final : public D3D9Shader<IDirect3DPixelShader9> { + + public: + + D3D9PixelShader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9Shader<IDirect3DPixelShader9>( pDevice, CommonShader ) { } + + }; + + /** + * \brief Shader module set + * + * Some applications may compile the same shader multiple + * times, so we should cache the resulting shader modules + * and reuse them rather than creating new ones. This + * class is thread-safe. + */ + class D3D9ShaderModuleSet : public RcObject { + + public: + + void GetShaderModule( + D3D9DeviceEx* pDevice, + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode); + + private: + + dxvk::mutex m_mutex; + + std::unordered_map< + DxvkShaderKey, + D3D9CommonShader, + DxvkHash, DxvkEq> m_modules; + + }; + + template<typename T> + const D3D9CommonShader* GetCommonShader(const T& pShader) { + return pShader != nullptr ? pShader->GetCommonShader() : nullptr; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_permutations.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_permutations.h new file mode 100644 index 00000000..cf2301d2 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_permutations.h @@ -0,0 +1,20 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class DxvkShader; + + namespace D3D9ShaderPermutations { + enum D3D9ShaderPermutation { + None, + FlatShade, + Count + }; + } + using D3D9ShaderPermutation = D3D9ShaderPermutations::D3D9ShaderPermutation; + + using DxsoPermutations = std::array<Rc<DxvkShader>, D3D9ShaderPermutations::Count>; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_validator.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_validator.h new file mode 100644 index 00000000..8e433027 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_shader_validator.h @@ -0,0 +1,68 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class IDirect3DShaderValidator9 : public IUnknown { + + public: + + virtual HRESULT STDMETHODCALLTYPE Begin( + void* pCallback, + void* pUserParam, + DWORD Unknown) = 0; + + virtual HRESULT STDMETHODCALLTYPE Instruction( + const char* pUnknown1, + UINT Unknown2, + const DWORD* pInstruction, + DWORD InstructionLength) = 0; + + virtual HRESULT STDMETHODCALLTYPE End() = 0; + + }; + + class D3D9ShaderValidator final : public ComObjectClamp<IDirect3DShaderValidator9> { + + public: + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = ref(this); + return S_OK; + } + + + HRESULT STDMETHODCALLTYPE Begin( + void* pCallback, + void* pUserParam, + DWORD Unknown) { + Logger::debug("D3D9ShaderValidator::Begin: Stub"); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE Instruction( + const char* pUnknown1, + UINT Unknown2, + const DWORD* pInstruction, + DWORD InstructionLength) { + Logger::debug("D3D9ShaderValidator::Instruction: Stub"); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE End() { + Logger::debug("D3D9ShaderValidator::End: Stub"); + + return D3D_OK; + } + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_spec_constants.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_spec_constants.h new file mode 100644 index 00000000..7a8a33d5 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_spec_constants.h @@ -0,0 +1,24 @@ +#pragma once + +#include <cstdint> + +namespace dxvk { + + enum D3D9SpecConstantId : uint32_t { + AlphaCompareOp = 0, + SamplerType = 1, + FogEnabled = 2, + VertexFogMode = 3, + PixelFogMode = 4, + + PointMode = 5, + ProjectionType = 6, + + VertexShaderBools = 7, + PixelShaderBools = 8, + Fetch4 = 9, + + SamplerDepthMode = 10, + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.cpp new file mode 100644 index 00000000..c0ee06ec --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.cpp @@ -0,0 +1,26 @@ +#include "d3d9_state.h" + +#include "d3d9_texture.h" + +namespace dxvk { + + D3D9CapturableState::D3D9CapturableState() { + for (uint32_t i = 0; i < textures.size(); i++) + textures[i] = nullptr; + + for (uint32_t i = 0; i < clipPlanes.size(); i++) + clipPlanes[i] = D3D9ClipPlane(); + + for (uint32_t i = 0; i < streamFreq.size(); i++) + streamFreq[i] = 1; + + for (uint32_t i = 0; i < enabledLightIndices.size(); i++) + enabledLightIndices[i] = UINT32_MAX; + } + + D3D9CapturableState::~D3D9CapturableState() { + for (uint32_t i = 0; i < textures.size(); i++) + TextureChangePrivate(textures[i], nullptr); + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.h new file mode 100644 index 00000000..e895f5db --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_state.h @@ -0,0 +1,300 @@ +#pragma once + +#include "d3d9_caps.h" +#include "d3d9_constant_set.h" +#include "../dxso/dxso_common.h" +#include "../util/util_matrix.h" + +#include "d3d9_surface.h" +#include "d3d9_shader.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_buffer.h" + +#include <array> +#include <bitset> +#include <optional> + +namespace dxvk { + + static constexpr uint32_t RenderStateCount = 256; + static constexpr uint32_t SamplerStateCount = D3DSAMP_DMAPOFFSET + 1; + static constexpr uint32_t SamplerCount = 21; + static constexpr uint32_t TextureStageStateCount = DXVK_TSS_COUNT; + + namespace hacks::PointSize { + static constexpr DWORD AlphaToCoverageDisabled = MAKEFOURCC('A', '2', 'M', '0'); + static constexpr DWORD AlphaToCoverageEnabled = MAKEFOURCC('A', '2', 'M', '1'); + } + + struct D3D9ClipPlane { + float coeff[4]; + }; + + struct D3D9RenderStateInfo { + std::array<float, 3> fogColor = { }; + float fogScale = 0.0f; + float fogEnd = 1.0f; + float fogDensity = 1.0f; + + float alphaRef = 0.0f; + + float pointSize = 1.0f; + float pointSizeMin = 1.0f; + float pointSizeMax = 64.0f; + float pointScaleA = 1.0f; + float pointScaleB = 0.0f; + float pointScaleC = 0.0f; + }; + + enum class D3D9RenderStateItem { + FogColor = 0, + FogScale = 1, + FogEnd, + FogDensity, + AlphaRef, + + PointSize, + PointSizeMin, + PointSizeMax, + PointScaleA, + PointScaleB, + PointScaleC, + + Count + }; + + + // This is needed in fixed function for POSITION_T support. + // These are constants we need to * and add to move + // Window Coords -> Real Coords w/ respect to the viewport. + struct D3D9ViewportInfo { + Vector4 inverseOffset; + Vector4 inverseExtent; + }; + + struct D3D9Light { + D3D9Light(const D3DLIGHT9& light, Matrix4 viewMtx) { + Diffuse = Vector4(light.Diffuse.r, light.Diffuse.g, light.Diffuse.b, light.Diffuse.a); + Specular = Vector4(light.Specular.r, light.Specular.g, light.Specular.b, light.Specular.a); + Ambient = Vector4(light.Ambient.r, light.Ambient.g, light.Ambient.b, light.Ambient.a); + + Position = viewMtx * Vector4(light.Position.x, light.Position.y, light.Position.z, 1.0f); + Direction = Vector4(light.Direction.x, light.Direction.y, light.Direction.z, 0.0f); + Direction = normalize(viewMtx * Direction); + + Type = light.Type; + Range = light.Range; + Falloff = light.Falloff; + Attenuation0 = light.Attenuation0; + Attenuation1 = light.Attenuation1; + Attenuation2 = light.Attenuation2; + Theta = cosf(light.Theta / 2.0f); + Phi = cosf(light.Phi / 2.0f); + } + + Vector4 Diffuse; + Vector4 Specular; + Vector4 Ambient; + + Vector4 Position; + Vector4 Direction; + + D3DLIGHTTYPE Type; + float Range; + float Falloff; + float Attenuation0; + float Attenuation1; + float Attenuation2; + float Theta; + float Phi; + }; + + + struct D3D9FixedFunctionVS { + Matrix4 WorldView; + Matrix4 NormalMatrix; + Matrix4 InverseView; + Matrix4 Projection; + + std::array<Matrix4, 8> TexcoordMatrices; + + D3D9ViewportInfo ViewportInfo; + + Vector4 GlobalAmbient; + std::array<D3D9Light, caps::MaxEnabledLights> Lights; + D3DMATERIAL9 Material; + float TweenFactor; + }; + + + struct D3D9FixedFunctionVertexBlendDataHW { + Matrix4 WorldView[8]; + }; + + + struct D3D9FixedFunctionVertexBlendDataSW { + Matrix4 WorldView[256]; + }; + + + struct D3D9FixedFunctionPS { + Vector4 textureFactor; + }; + + enum D3D9SharedPSStages { + D3D9SharedPSStages_Constant, + D3D9SharedPSStages_BumpEnvMat0, + D3D9SharedPSStages_BumpEnvMat1, + D3D9SharedPSStages_BumpEnvLScale, + D3D9SharedPSStages_BumpEnvLOffset, + D3D9SharedPSStages_Count, + }; + + struct D3D9SharedPS { + struct Stage { + float Constant[4]; + float BumpEnvMat[2][2]; + float BumpEnvLScale; + float BumpEnvLOffset; + float Padding[2]; + } Stages[8]; + }; + + struct D3D9VBO { + Com<D3D9VertexBuffer, false> vertexBuffer; + + UINT offset = 0; + UINT stride = 0; + }; + + constexpr D3DLIGHT9 DefaultLight = { + D3DLIGHT_DIRECTIONAL, // Type + {1.0f, 1.0f, 1.0f, 1.0f}, // Diffuse + {0.0f, 0.0f, 0.0f, 0.0f}, // Specular + {0.0f, 0.0f, 0.0f, 0.0f}, // Ambient + {0.0f, 0.0f, 0.0f}, // Position + {0.0f, 0.0f, 0.0f}, // Direction + 0.0f, // Range + 0.0f, // Falloff + 0.0f, 0.0f, 0.0f, // Attenuations [constant, linear, quadratic] + 0.0f, // Theta + 0.0f // Phi + }; + + struct D3D9CapturableState { + D3D9CapturableState(); + + ~D3D9CapturableState(); + + Com<D3D9VertexDecl, false> vertexDecl; + Com<D3D9IndexBuffer, false> indices; + + std::array<DWORD, RenderStateCount> renderStates = { 0 }; + + std::array< + std::array<DWORD, SamplerStateCount>, + SamplerCount> samplerStates; + + std::array<D3D9VBO, caps::MaxStreams> vertexBuffers; + + std::array< + IDirect3DBaseTexture9*, + SamplerCount> textures; + + Com<D3D9VertexShader, false> vertexShader; + Com<D3D9PixelShader, false> pixelShader; + + D3DVIEWPORT9 viewport; + RECT scissorRect; + + std::array< + D3D9ClipPlane, + caps::MaxClipPlanes> clipPlanes; + + std::array< + std::array<DWORD, TextureStageStateCount>, + caps::TextureStageCount> textureStages; + + D3D9ShaderConstantsVSSoftware vsConsts; + D3D9ShaderConstantsPS psConsts; + + std::array<UINT, caps::MaxStreams> streamFreq; + + std::array<Matrix4, caps::MaxTransforms> transforms; + + D3DMATERIAL9 material = D3DMATERIAL9(); + + std::vector<std::optional<D3DLIGHT9>> lights; + std::array<DWORD, caps::MaxEnabledLights> enabledLightIndices; + + bool IsLightEnabled(DWORD Index) { + const auto& indices = enabledLightIndices; + return std::find(indices.begin(), indices.end(), Index) != indices.end(); + } + }; + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT UpdateStateConstants( + D3D9CapturableState* pState, + UINT StartRegister, + const T* pConstantData, + UINT Count, + bool FloatEmu) { + auto UpdateHelper = [&] (auto& set) { + if constexpr (ConstantType == D3D9ConstantType::Float) { + + if (!FloatEmu) { + size_t size = Count * sizeof(Vector4); + + std::memcpy(set.fConsts[StartRegister].data, pConstantData, size); + } + else { + for (UINT i = 0; i < Count; i++) + set.fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4)); + } + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + size_t size = Count * sizeof(Vector4i); + + std::memcpy(set.iConsts[StartRegister].data, pConstantData, size); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; + + const uint32_t bit = 1u << bitIdx; + + set.bConsts[arrayIdx] &= ~bit; + if (pConstantData[i]) + set.bConsts[arrayIdx] |= bit; + } + } + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? UpdateHelper(pState->vsConsts) + : UpdateHelper(pState->psConsts); + } + + struct Direct3DState9 : public D3D9CapturableState { + + std::array<Com<D3D9Surface, false>, caps::MaxSimultaneousRenderTargets> renderTargets; + Com<D3D9Surface, false> depthStencil; + + }; + + + struct D3D9InputAssemblyState { + D3DPRIMITIVETYPE primitiveType = D3DPRIMITIVETYPE(0); + uint32_t streamsInstanced = 0; + uint32_t streamsUsed = 0; + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.cpp new file mode 100644 index 00000000..9cbf92f9 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.cpp @@ -0,0 +1,531 @@ +#include "d3d9_stateblock.h" + +#include "d3d9_device.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_buffer.h" +#include "d3d9_shader.h" +#include "d3d9_texture.h" + +#include "d3d9_util.h" + +namespace dxvk { + + D3D9StateBlock::D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type) + : D3D9StateBlockBase(pDevice) + , m_deviceState (pDevice->GetRawState()) { + CaptureType(Type); + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DStateBlock9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9StateBlock::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::Capture() { + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl)) + SetVertexDeclaration(m_deviceState->vertexDecl.ptr()); + + ApplyOrCapture<D3D9StateFunction::Capture>(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::Apply() { + m_applying = true; + + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl) && m_state.vertexDecl != nullptr) + m_parent->SetVertexDeclaration(m_state.vertexDecl.ptr()); + + ApplyOrCapture<D3D9StateFunction::Apply>(); + m_applying = false; + + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexDeclaration(D3D9VertexDecl* pDecl) { + m_state.vertexDecl = pDecl; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexDecl); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetIndices(D3D9IndexBuffer* pIndexData) { + m_state.indices = pIndexData; + + m_captures.flags.set(D3D9CapturedStateFlag::Indices); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { + m_state.renderStates[State] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + m_captures.renderStates.set(State, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + m_state.samplerStates[StateSampler][Type] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + m_captures.samplers.set(StateSampler, true); + m_captures.samplerStates[StateSampler].set(Type, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStreamSource( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT OffsetInBytes, + UINT Stride) { + m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData; + + m_state.vertexBuffers[StreamNumber].offset = OffsetInBytes; + m_state.vertexBuffers[StreamNumber].stride = Stride; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); + m_captures.vertexBuffers.set(StreamNumber, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { + m_state.streamFreq[StreamNumber] = Setting; + + m_captures.flags.set(D3D9CapturedStateFlag::StreamFreq); + m_captures.streamFreq.set(StreamNumber, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { + TextureChangePrivate(m_state.textures[StateSampler], pTexture); + + m_captures.flags.set(D3D9CapturedStateFlag::Textures); + m_captures.textures.set(StateSampler, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexShader(D3D9VertexShader* pShader) { + m_state.vertexShader = pShader; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexShader); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetPixelShader(D3D9PixelShader* pShader) { + m_state.pixelShader = pShader; + + m_captures.flags.set(D3D9CapturedStateFlag::PixelShader); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetMaterial(const D3DMATERIAL9* pMaterial) { + m_state.material = *pMaterial; + + m_captures.flags.set(D3D9CapturedStateFlag::Material); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + m_state.transforms[idx] = ConvertMatrix(pMatrix); + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.set(idx, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateTextureStageState( + DWORD Stage, + D3D9TextureStageStateTypes Type, + DWORD Value) { + m_state.textureStages[Stage][Type] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); + m_captures.textureStages.set(Stage, true); + m_captures.textureStageStates[Stage].set(Type, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix); + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.set(idx, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetViewport(const D3DVIEWPORT9* pViewport) { + m_state.viewport = *pViewport; + + m_captures.flags.set(D3D9CapturedStateFlag::Viewport); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetScissorRect(const RECT* pRect) { + m_state.scissorRect = *pRect; + + m_captures.flags.set(D3D9CapturedStateFlag::ScissorRect); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetClipPlane(DWORD Index, const float* pPlane) { + for (uint32_t i = 0; i < 4; i++) + m_state.clipPlanes[Index].coeff[i] = pPlane[i]; + + m_captures.flags.set(D3D9CapturedStateFlag::ClipPlanes); + m_captures.clipPlanes.set(Index, true); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT D3D9StateBlock::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.vsConsts.bConsts[idx] &= ~mask; + m_state.vsConsts.bConsts[idx] |= bits & mask; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.psConsts.bConsts[idx] &= ~mask; + m_state.psConsts.bConsts[idx] |= bits & mask; + return D3D_OK; + } + + + void D3D9StateBlock::CapturePixelRenderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + + m_captures.renderStates.set(D3DRS_ZENABLE, true); + m_captures.renderStates.set(D3DRS_FILLMODE, true); + m_captures.renderStates.set(D3DRS_SHADEMODE, true); + m_captures.renderStates.set(D3DRS_ZWRITEENABLE, true); + m_captures.renderStates.set(D3DRS_ALPHATESTENABLE, true); + m_captures.renderStates.set(D3DRS_LASTPIXEL, true); + m_captures.renderStates.set(D3DRS_SRCBLEND, true); + m_captures.renderStates.set(D3DRS_DESTBLEND, true); + m_captures.renderStates.set(D3DRS_ZFUNC, true); + m_captures.renderStates.set(D3DRS_ALPHAREF, true); + m_captures.renderStates.set(D3DRS_ALPHAFUNC, true); + m_captures.renderStates.set(D3DRS_DITHERENABLE, true); + m_captures.renderStates.set(D3DRS_FOGSTART, true); + m_captures.renderStates.set(D3DRS_FOGEND, true); + m_captures.renderStates.set(D3DRS_FOGDENSITY, true); + m_captures.renderStates.set(D3DRS_ALPHABLENDENABLE, true); + m_captures.renderStates.set(D3DRS_DEPTHBIAS, true); + m_captures.renderStates.set(D3DRS_STENCILENABLE, true); + m_captures.renderStates.set(D3DRS_STENCILFAIL, true); + m_captures.renderStates.set(D3DRS_STENCILZFAIL, true); + m_captures.renderStates.set(D3DRS_STENCILPASS, true); + m_captures.renderStates.set(D3DRS_STENCILFUNC, true); + m_captures.renderStates.set(D3DRS_STENCILREF, true); + m_captures.renderStates.set(D3DRS_STENCILMASK, true); + m_captures.renderStates.set(D3DRS_STENCILWRITEMASK, true); + m_captures.renderStates.set(D3DRS_TEXTUREFACTOR, true); + m_captures.renderStates.set(D3DRS_WRAP0, true); + m_captures.renderStates.set(D3DRS_WRAP1, true); + m_captures.renderStates.set(D3DRS_WRAP2, true); + m_captures.renderStates.set(D3DRS_WRAP3, true); + m_captures.renderStates.set(D3DRS_WRAP4, true); + m_captures.renderStates.set(D3DRS_WRAP5, true); + m_captures.renderStates.set(D3DRS_WRAP6, true); + m_captures.renderStates.set(D3DRS_WRAP7, true); + m_captures.renderStates.set(D3DRS_WRAP8, true); + m_captures.renderStates.set(D3DRS_WRAP9, true); + m_captures.renderStates.set(D3DRS_WRAP10, true); + m_captures.renderStates.set(D3DRS_WRAP11, true); + m_captures.renderStates.set(D3DRS_WRAP12, true); + m_captures.renderStates.set(D3DRS_WRAP13, true); + m_captures.renderStates.set(D3DRS_WRAP14, true); + m_captures.renderStates.set(D3DRS_WRAP15, true); + m_captures.renderStates.set(D3DRS_COLORWRITEENABLE, true); + m_captures.renderStates.set(D3DRS_BLENDOP, true); + m_captures.renderStates.set(D3DRS_SCISSORTESTENABLE, true); + m_captures.renderStates.set(D3DRS_SLOPESCALEDEPTHBIAS, true); + m_captures.renderStates.set(D3DRS_ANTIALIASEDLINEENABLE, true); + m_captures.renderStates.set(D3DRS_TWOSIDEDSTENCILMODE, true); + m_captures.renderStates.set(D3DRS_CCW_STENCILFAIL, true); + m_captures.renderStates.set(D3DRS_CCW_STENCILZFAIL, true); + m_captures.renderStates.set(D3DRS_CCW_STENCILPASS, true); + m_captures.renderStates.set(D3DRS_CCW_STENCILFUNC, true); + m_captures.renderStates.set(D3DRS_COLORWRITEENABLE1, true); + m_captures.renderStates.set(D3DRS_COLORWRITEENABLE2, true); + m_captures.renderStates.set(D3DRS_COLORWRITEENABLE3, true); + m_captures.renderStates.set(D3DRS_BLENDFACTOR, true); + m_captures.renderStates.set(D3DRS_SRGBWRITEENABLE, true); + m_captures.renderStates.set(D3DRS_SEPARATEALPHABLENDENABLE, true); + m_captures.renderStates.set(D3DRS_SRCBLENDALPHA, true); + m_captures.renderStates.set(D3DRS_DESTBLENDALPHA, true); + m_captures.renderStates.set(D3DRS_BLENDOPALPHA, true); + } + + + void D3D9StateBlock::CapturePixelSamplerStates() { + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + + for (uint32_t i = 0; i < 17; i++) { + m_captures.samplers.set(i, true); + + m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true); + m_captures.samplerStates[i].set(D3DSAMP_ADDRESSV, true); + m_captures.samplerStates[i].set(D3DSAMP_ADDRESSW, true); + m_captures.samplerStates[i].set(D3DSAMP_BORDERCOLOR, true); + m_captures.samplerStates[i].set(D3DSAMP_MAGFILTER, true); + m_captures.samplerStates[i].set(D3DSAMP_MINFILTER, true); + m_captures.samplerStates[i].set(D3DSAMP_MIPFILTER, true); + m_captures.samplerStates[i].set(D3DSAMP_MIPMAPLODBIAS, true); + m_captures.samplerStates[i].set(D3DSAMP_MAXMIPLEVEL, true); + m_captures.samplerStates[i].set(D3DSAMP_MAXANISOTROPY, true); + m_captures.samplerStates[i].set(D3DSAMP_SRGBTEXTURE, true); + m_captures.samplerStates[i].set(D3DSAMP_ELEMENTINDEX, true); + } + } + + + void D3D9StateBlock::CapturePixelShaderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::PixelShader); + m_captures.flags.set(D3D9CapturedStateFlag::PsConstants); + + m_captures.psConsts.fConsts.setAll(); + m_captures.psConsts.iConsts.setAll(); + m_captures.psConsts.bConsts.setAll(); + } + + + void D3D9StateBlock::CaptureVertexRenderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + + m_captures.renderStates.set(D3DRS_CULLMODE, true); + m_captures.renderStates.set(D3DRS_FOGENABLE, true); + m_captures.renderStates.set(D3DRS_FOGCOLOR, true); + m_captures.renderStates.set(D3DRS_FOGTABLEMODE, true); + m_captures.renderStates.set(D3DRS_FOGSTART, true); + m_captures.renderStates.set(D3DRS_FOGEND, true); + m_captures.renderStates.set(D3DRS_FOGDENSITY, true); + m_captures.renderStates.set(D3DRS_RANGEFOGENABLE, true); + m_captures.renderStates.set(D3DRS_AMBIENT, true); + m_captures.renderStates.set(D3DRS_COLORVERTEX, true); + m_captures.renderStates.set(D3DRS_FOGVERTEXMODE, true); + m_captures.renderStates.set(D3DRS_CLIPPING, true); + m_captures.renderStates.set(D3DRS_LIGHTING, true); + m_captures.renderStates.set(D3DRS_LOCALVIEWER, true); + m_captures.renderStates.set(D3DRS_EMISSIVEMATERIALSOURCE, true); + m_captures.renderStates.set(D3DRS_AMBIENTMATERIALSOURCE, true); + m_captures.renderStates.set(D3DRS_DIFFUSEMATERIALSOURCE, true); + m_captures.renderStates.set(D3DRS_SPECULARMATERIALSOURCE, true); + m_captures.renderStates.set(D3DRS_VERTEXBLEND, true); + m_captures.renderStates.set(D3DRS_CLIPPLANEENABLE, true); + m_captures.renderStates.set(D3DRS_POINTSIZE, true); + m_captures.renderStates.set(D3DRS_POINTSIZE_MIN, true); + m_captures.renderStates.set(D3DRS_POINTSPRITEENABLE, true); + m_captures.renderStates.set(D3DRS_POINTSCALEENABLE, true); + m_captures.renderStates.set(D3DRS_POINTSCALE_A, true); + m_captures.renderStates.set(D3DRS_POINTSCALE_B, true); + m_captures.renderStates.set(D3DRS_POINTSCALE_C, true); + m_captures.renderStates.set(D3DRS_MULTISAMPLEANTIALIAS, true); + m_captures.renderStates.set(D3DRS_MULTISAMPLEMASK, true); + m_captures.renderStates.set(D3DRS_PATCHEDGESTYLE, true); + m_captures.renderStates.set(D3DRS_POINTSIZE_MAX, true); + m_captures.renderStates.set(D3DRS_INDEXEDVERTEXBLENDENABLE, true); + m_captures.renderStates.set(D3DRS_TWEENFACTOR, true); + m_captures.renderStates.set(D3DRS_POSITIONDEGREE, true); + m_captures.renderStates.set(D3DRS_NORMALDEGREE, true); + m_captures.renderStates.set(D3DRS_MINTESSELLATIONLEVEL, true); + m_captures.renderStates.set(D3DRS_MAXTESSELLATIONLEVEL, true); + m_captures.renderStates.set(D3DRS_ADAPTIVETESS_X, true); + m_captures.renderStates.set(D3DRS_ADAPTIVETESS_Y, true); + m_captures.renderStates.set(D3DRS_ADAPTIVETESS_Z, true); + m_captures.renderStates.set(D3DRS_ADAPTIVETESS_W, true); + m_captures.renderStates.set(D3DRS_ENABLEADAPTIVETESSELLATION, true); + m_captures.renderStates.set(D3DRS_NORMALIZENORMALS, true); + m_captures.renderStates.set(D3DRS_SPECULARENABLE, true); + m_captures.renderStates.set(D3DRS_SHADEMODE, true); + } + + + void D3D9StateBlock::CaptureVertexSamplerStates() { + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + + for (uint32_t i = 17; i < SamplerCount; i++) { + m_captures.samplers.set(i, true); + m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true); + } + } + + + void D3D9StateBlock::CaptureVertexShaderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::VertexShader); + m_captures.flags.set(D3D9CapturedStateFlag::VsConstants); + + for (uint32_t i = 0; i < m_parent->GetVertexConstantLayout().floatCount / 32; i++) + m_captures.vsConsts.fConsts.dword(i) = std::numeric_limits<uint32_t>::max(); + + for (uint32_t i = 0; i < m_parent->GetVertexConstantLayout().intCount / 32; i++) + m_captures.vsConsts.iConsts.dword(i) = std::numeric_limits<uint32_t>::max(); + + for (uint32_t i = 0; i < m_parent->GetVertexConstantLayout().bitmaskCount; i++) + m_captures.vsConsts.bConsts.dword(i) = std::numeric_limits<uint32_t>::max(); + } + + + void D3D9StateBlock::CaptureType(D3D9StateBlockType Type) { + if (Type == D3D9StateBlockType::PixelState || Type == D3D9StateBlockType::All) { + CapturePixelRenderStates(); + CapturePixelSamplerStates(); + CapturePixelShaderStates(); + + m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); + m_captures.textureStages.setAll(); + for (auto& stage : m_captures.textureStageStates) + stage.setAll(); + } + + if (Type == D3D9StateBlockType::VertexState || Type == D3D9StateBlockType::All) { + CaptureVertexRenderStates(); + CaptureVertexSamplerStates(); + CaptureVertexShaderStates(); + + m_captures.flags.set(D3D9CapturedStateFlag::VertexDecl); + m_captures.flags.set(D3D9CapturedStateFlag::StreamFreq); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + m_captures.streamFreq.set(i, true); + } + + if (Type == D3D9StateBlockType::All) { + m_captures.flags.set(D3D9CapturedStateFlag::Textures); + m_captures.textures.setAll(); + + m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); + m_captures.vertexBuffers.setAll(); + + m_captures.flags.set(D3D9CapturedStateFlag::Indices); + m_captures.flags.set(D3D9CapturedStateFlag::Viewport); + m_captures.flags.set(D3D9CapturedStateFlag::ScissorRect); + + m_captures.flags.set(D3D9CapturedStateFlag::ClipPlanes); + m_captures.clipPlanes.setAll(); + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.setAll(); + + m_captures.flags.set(D3D9CapturedStateFlag::Material); + } + + if (Type != D3D9StateBlockType::None) + this->Capture(); + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.h new file mode 100644 index 00000000..43a712ad --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_stateblock.h @@ -0,0 +1,380 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_device.h" +#include "d3d9_state.h" + +#include "../util/util_bit.h" + +namespace dxvk { + + enum class D3D9CapturedStateFlag : uint32_t { + VertexDecl, + Indices, + RenderStates, + SamplerStates, + VertexBuffers, + Textures, + VertexShader, + PixelShader, + Viewport, + ScissorRect, + ClipPlanes, + VsConstants, + PsConstants, + StreamFreq, + Transforms, + TextureStages, + Material + }; + + using D3D9CapturedStateFlags = Flags<D3D9CapturedStateFlag>; + + struct D3D9StateCaptures { + D3D9CapturedStateFlags flags; + + bit::bitset<RenderStateCount> renderStates; + + bit::bitset<SamplerCount> samplers; + std::array< + bit::bitset<SamplerStateCount>, + SamplerCount> samplerStates; + + bit::bitset<caps::MaxStreams> vertexBuffers; + bit::bitset<SamplerCount> textures; + bit::bitset<caps::MaxClipPlanes> clipPlanes; + bit::bitset<caps::MaxStreams> streamFreq; + bit::bitset<caps::MaxTransforms> transforms; + bit::bitset<caps::TextureStageCount> textureStages; + std::array< + bit::bitset<TextureStageStateCount>, + caps::TextureStageCount> textureStageStates; + + struct { + bit::bitset<caps::MaxFloatConstantsSoftware> fConsts; + bit::bitset<caps::MaxOtherConstantsSoftware> iConsts; + bit::bitset<caps::MaxOtherConstantsSoftware> bConsts; + } vsConsts; + + struct { + bit::bitset<caps::MaxFloatConstantsPS> fConsts; + bit::bitset<caps::MaxOtherConstants> iConsts; + bit::bitset<caps::MaxOtherConstants> bConsts; + } psConsts; + }; + + enum class D3D9StateBlockType :uint32_t { + None, + VertexState, + PixelState, + All + }; + + inline D3D9StateBlockType ConvertStateBlockType(D3DSTATEBLOCKTYPE type) { + switch (type) { + case D3DSBT_PIXELSTATE: return D3D9StateBlockType::PixelState; + case D3DSBT_VERTEXSTATE: return D3D9StateBlockType::VertexState; + default: + case D3DSBT_ALL: return D3D9StateBlockType::All; + } + } + + using D3D9StateBlockBase = D3D9DeviceChild<IDirect3DStateBlock9>; + class D3D9StateBlock : public D3D9StateBlockBase { + + public: + + D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject) final; + + HRESULT STDMETHODCALLTYPE Capture() final; + HRESULT STDMETHODCALLTYPE Apply() final; + + HRESULT SetVertexDeclaration(D3D9VertexDecl* pDecl); + + HRESULT SetIndices(D3D9IndexBuffer* pIndexData); + + HRESULT SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); + + HRESULT SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT SetStreamSource( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT OffsetInBytes, + UINT Stride); + + HRESULT SetStreamSourceFreq(UINT StreamNumber, UINT Setting); + + HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); + + HRESULT SetVertexShader(D3D9VertexShader* pShader); + + HRESULT SetPixelShader(D3D9PixelShader* pShader); + + HRESULT SetMaterial(const D3DMATERIAL9* pMaterial); + + HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + HRESULT SetStateTextureStageState( + DWORD Stage, + D3D9TextureStageStateTypes Type, + DWORD Value); + + HRESULT MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + HRESULT SetViewport(const D3DVIEWPORT9* pViewport); + + HRESULT SetScissorRect(const RECT* pRect); + + HRESULT SetClipPlane(DWORD Index, const float* pPlane); + + + HRESULT SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + + HRESULT SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + enum class D3D9StateFunction { + Apply, + Capture + }; + + template <typename Dst, typename Src> + void ApplyOrCapture(Dst* dst, const Src* src) { + if (m_captures.flags.test(D3D9CapturedStateFlag::StreamFreq)) { + for (uint32_t idx : bit::BitMask(m_captures.streamFreq.dword(0))) + dst->SetStreamSourceFreq(idx, src->streamFreq[idx]); + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Indices)) + dst->SetIndices(src->indices.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::RenderStates)) { + for (uint32_t i = 0; i < m_captures.renderStates.dwordCount(); i++) { + for (uint32_t rs : bit::BitMask(m_captures.renderStates.dword(i))) { + uint32_t idx = i * 32 + rs; + + dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); + } + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::SamplerStates)) { + for (uint32_t samplerIdx : bit::BitMask(m_captures.samplers.dword(0))) { + for (uint32_t stateIdx : bit::BitMask(m_captures.samplerStates[samplerIdx].dword(0))) + dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { + for (uint32_t idx : bit::BitMask(m_captures.vertexBuffers.dword(0))) { + const auto& vbo = src->vertexBuffers[idx]; + dst->SetStreamSource( + idx, + vbo.vertexBuffer.ptr(), + vbo.offset, + vbo.stride); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) + dst->SetMaterial(&src->material); + + if (m_captures.flags.test(D3D9CapturedStateFlag::Textures)) { + for (uint32_t idx : bit::BitMask(m_captures.textures.dword(0))) + dst->SetStateTexture(idx, src->textures[idx]); + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexShader)) + dst->SetVertexShader(src->vertexShader.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::PixelShader)) + dst->SetPixelShader(src->pixelShader.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::Transforms)) { + for (uint32_t i = 0; i < m_captures.transforms.dwordCount(); i++) { + for (uint32_t trans : bit::BitMask(m_captures.transforms.dword(i))) { + uint32_t idx = i * 32 + trans; + + dst->SetStateTransform(idx, reinterpret_cast<const D3DMATRIX*>(&src->transforms[idx])); + } + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::TextureStages)) { + for (uint32_t stageIdx : bit::BitMask(m_captures.textureStages.dword(0))) { + for (uint32_t stateIdx : bit::BitMask(m_captures.textureStageStates[stageIdx].dword(0))) + dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), src->textureStages[stageIdx][stateIdx]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Viewport)) + dst->SetViewport(&src->viewport); + + if (m_captures.flags.test(D3D9CapturedStateFlag::ScissorRect)) + dst->SetScissorRect(&src->scissorRect); + + if (m_captures.flags.test(D3D9CapturedStateFlag::ClipPlanes)) { + for (uint32_t idx : bit::BitMask(m_captures.clipPlanes.dword(0))) + dst->SetClipPlane(idx, src->clipPlanes[idx].coeff); + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VsConstants)) { + for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.fConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); + } + } + + for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.iConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); + } + } + + if (m_captures.vsConsts.bConsts.any()) { + for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::PsConstants)) { + for (uint32_t i = 0; i < m_captures.psConsts.fConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.psConsts.fConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts.fConsts[idx], 1); + } + } + + for (uint32_t i = 0; i < m_captures.psConsts.iConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.psConsts.iConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts.iConsts[idx], 1); + } + } + + if (m_captures.psConsts.bConsts.any()) { + for (uint32_t i = 0; i < m_captures.psConsts.bConsts.dwordCount(); i++) + dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts.bConsts[i]); + } + } + } + + template <D3D9StateFunction Func> + void ApplyOrCapture() { + if constexpr (Func == D3D9StateFunction::Apply) + ApplyOrCapture(m_parent, &m_state); + else if constexpr (Func == D3D9StateFunction::Capture) + ApplyOrCapture(this, m_deviceState); + } + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count) { + auto SetHelper = [&](auto& setCaptures) { + if constexpr (ProgramType == DxsoProgramTypes::VertexShader) + m_captures.flags.set(D3D9CapturedStateFlag::VsConstants); + else + m_captures.flags.set(D3D9CapturedStateFlag::PsConstants); + + for (uint32_t i = 0; i < Count; i++) { + uint32_t reg = StartRegister + i; + if constexpr (ConstantType == D3D9ConstantType::Float) + setCaptures.fConsts.set(reg, true); + else if constexpr (ConstantType == D3D9ConstantType::Int) + setCaptures.iConsts.set(reg, true); + else if constexpr (ConstantType == D3D9ConstantType::Bool) + setCaptures.bConsts.set(reg, true); + } + + UpdateStateConstants< + ProgramType, + ConstantType, + T>( + &m_state, + StartRegister, + pConstantData, + Count, + false); + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? SetHelper(m_captures.vsConsts) + : SetHelper(m_captures.psConsts); + } + + HRESULT SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); + HRESULT SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); + + inline bool IsApplying() { + return m_applying; + } + + private: + + void CapturePixelRenderStates(); + void CapturePixelSamplerStates(); + void CapturePixelShaderStates(); + + void CaptureVertexRenderStates(); + void CaptureVertexSamplerStates(); + void CaptureVertexShaderStates(); + + void CaptureType(D3D9StateBlockType State); + + D3D9CapturableState m_state; + D3D9StateCaptures m_captures; + + D3D9CapturableState* m_deviceState; + + bool m_applying = false; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_subresource.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_subresource.h new file mode 100644 index 00000000..8d047501 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_subresource.h @@ -0,0 +1,149 @@ +#pragma once + +#include "d3d9_resource.h" +#include "d3d9_common_texture.h" + +namespace dxvk { + + template <typename... Type> + class D3D9Subresource : public D3D9Resource<Type...> { + + public: + + D3D9Subresource( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pBaseTexture, + IUnknown* pContainer) + : D3D9Resource<Type...>(pDevice), + m_container (pContainer), + m_baseTexture (pBaseTexture), + m_texture (pTexture), + m_face (Face), + m_mipLevel (MipLevel), + m_isSrgbCompatible (pTexture->IsSrgbCompatible()), + m_isNull (pTexture->IsNull()) { + + } + + ~D3D9Subresource() { + // We own the texture! + if (m_baseTexture == nullptr) + delete m_texture; + } + + ULONG STDMETHODCALLTYPE AddRef() final { + if (m_baseTexture != nullptr) + return m_baseTexture->AddRef(); + + return D3D9Resource<Type...>::AddRef(); + } + + ULONG STDMETHODCALLTYPE Release() final { + if (m_baseTexture != nullptr) + return m_baseTexture->Release(); + + return D3D9Resource<Type...>::Release(); + } + + HRESULT STDMETHODCALLTYPE GetContainer(REFIID riid, void** ppContainer) final { + if (m_container != nullptr) + return m_container->QueryInterface(riid, ppContainer); + + return this->GetDevice()->QueryInterface(riid, ppContainer); + } + + void STDMETHODCALLTYPE PreLoad() { + m_texture->PreLoadSubresource(GetSubresource()); + } + + inline D3D9CommonTexture* GetCommonTexture() { + return m_texture; + } + + inline UINT GetFace() const { + return m_face; + } + + inline UINT GetMipLevel() const { + return m_mipLevel; + } + + inline UINT GetSubresource() const { + return m_texture->CalcSubresource(m_face, m_mipLevel); + } + + inline const Rc<DxvkImageView>& GetImageView(bool Srgb) { + Srgb &= m_isSrgbCompatible; + Rc<DxvkImageView>& view = m_sampleView.Pick(Srgb); + + if (unlikely(view == nullptr && !IsNull())) + view = m_texture->CreateView(m_face, m_mipLevel, VK_IMAGE_USAGE_SAMPLED_BIT, Srgb); + + return view; + } + + inline const Rc<DxvkImageView>& GetRenderTargetView(bool Srgb) { + Srgb &= m_isSrgbCompatible; + Rc<DxvkImageView>& view = m_renderTargetView.Pick(Srgb); + + if (unlikely(view == nullptr && !IsNull())) + view = m_texture->CreateView(m_face, m_mipLevel, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, Srgb); + + return view; + } + + inline VkImageLayout GetRenderTargetLayout() const { + return m_texture->DetermineRenderTargetLayout(); + } + + inline const Rc<DxvkImageView>& GetDepthStencilView() { + Rc<DxvkImageView>& view = m_depthStencilView; + + if (unlikely(view == nullptr)) + view = m_texture->CreateView(m_face, m_mipLevel, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, false); + + return view; + } + + inline VkImageLayout GetDepthStencilLayout(bool write, bool hazardous) const { + return m_texture->DetermineDepthStencilLayout(write, hazardous); + } + + inline bool IsNull() { + return m_isNull; + } + + inline IDirect3DBaseTexture9* GetBaseTexture() { + return m_baseTexture; + } + + inline void Swap(D3D9Subresource* Other) { + // Only used for swap chain back buffers that don't + // have a container and all have identical properties + std::swap(m_texture, Other->m_texture); + std::swap(m_sampleView, Other->m_sampleView); + std::swap(m_renderTargetView, Other->m_renderTargetView); + } + + protected: + + IUnknown* m_container; + IDirect3DBaseTexture9* m_baseTexture; + + D3D9CommonTexture* m_texture; + + UINT m_face : 8; + UINT m_mipLevel : 16; + UINT m_isSrgbCompatible : 1; + UINT m_isNull : 1; + + D3D9ColorView m_sampleView; + D3D9ColorView m_renderTargetView; + Rc<DxvkImageView> m_depthStencilView; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.cpp new file mode 100644 index 00000000..6158675e --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.cpp @@ -0,0 +1,204 @@ +#include "d3d9_surface.h" +#include "d3d9_texture.h" +#include "d3d9_swapchain.h" + +#include "d3d9_device.h" + +namespace dxvk { + + D3D9Surface::D3D9Surface( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + IUnknown* pContainer) + : D3D9SurfaceBase( + pDevice, + new D3D9CommonTexture( pDevice, pDesc, D3DRTYPE_SURFACE), + 0, 0, + nullptr, + pContainer) { } + + D3D9Surface::D3D9Surface( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pBaseTexture) + : D3D9SurfaceBase( + pDevice, + pTexture, + Face, MipLevel, + pBaseTexture, + pBaseTexture) { } + + void D3D9Surface::AddRefPrivate() { + if (m_baseTexture != nullptr) { + D3DRESOURCETYPE type = m_baseTexture->GetType(); + if (type == D3DRTYPE_TEXTURE) + static_cast<D3D9Texture2D*> (m_baseTexture)->AddRefPrivate(); + else //if (type == D3DRTYPE_CUBETEXTURE) + static_cast<D3D9TextureCube*>(m_baseTexture)->AddRefPrivate(); + + return; + } + + D3D9SurfaceBase::AddRefPrivate(); + } + + void D3D9Surface::ReleasePrivate() { + if (m_baseTexture != nullptr) { + D3DRESOURCETYPE type = m_baseTexture->GetType(); + if (type == D3DRTYPE_TEXTURE) + static_cast<D3D9Texture2D*> (m_baseTexture)->ReleasePrivate(); + else //if (type == D3DRTYPE_CUBETEXTURE) + static_cast<D3D9TextureCube*>(m_baseTexture)->ReleasePrivate(); + + return; + } + + D3D9SurfaceBase::ReleasePrivate(); + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DSurface9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Surface::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Surface::GetType() { + return D3DRTYPE_SURFACE; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::GetDesc(D3DSURFACE_DESC *pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + auto& desc = *(m_texture->Desc()); + + pDesc->Format = static_cast<D3DFORMAT>(desc.Format); + pDesc->Type = D3DRTYPE_SURFACE; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + + pDesc->MultiSampleType = desc.MultiSample; + pDesc->MultiSampleQuality = desc.MultisampleQuality; + pDesc->Width = std::max(1u, desc.Width >> m_mipLevel); + pDesc->Height = std::max(1u, desc.Height >> m_mipLevel); + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::LockRect(D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + if (unlikely(pLockedRect == nullptr)) + return D3DERR_INVALIDCALL; + + D3DBOX box; + if (pRect != nullptr) { + box.Left = pRect->left; + box.Right = pRect->right; + box.Top = pRect->top; + box.Bottom = pRect->bottom; + box.Front = 0; + box.Back = 1; + } + + D3DLOCKED_BOX lockedBox; + + HRESULT hr = m_parent->LockImage( + m_texture, + m_face, m_mipLevel, + &lockedBox, + pRect != nullptr ? &box : nullptr, + Flags); + + pLockedRect->pBits = lockedBox.pBits; + pLockedRect->Pitch = lockedBox.RowPitch; + + return hr; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::UnlockRect() { + return m_parent->UnlockImage( + m_texture, + m_face, m_mipLevel); + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::GetDC(HDC *phDC) { +#ifndef DXVK_NATIVE + if (phDC == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9_COMMON_TEXTURE_DESC& desc = *m_texture->Desc(); + + D3DLOCKED_RECT lockedRect; + HRESULT hr = LockRect(&lockedRect, nullptr, 0); + if (FAILED(hr)) + return hr; + + D3DKMT_CREATEDCFROMMEMORY createInfo; + // In... + createInfo.pMemory = lockedRect.pBits; + createInfo.Format = static_cast<D3DFORMAT>(desc.Format); + createInfo.Width = desc.Width; + createInfo.Height = desc.Height; + createInfo.Pitch = lockedRect.Pitch; + createInfo.hDeviceDc = CreateCompatibleDC(NULL); + createInfo.pColorTable = nullptr; + + // Out... + createInfo.hBitmap = nullptr; + createInfo.hDc = nullptr; + + D3DKMTCreateDCFromMemory(&createInfo); + DeleteDC(createInfo.hDeviceDc); + + // These should now be set... + m_dcDesc.hDC = createInfo.hDc; + m_dcDesc.hBitmap = createInfo.hBitmap; + + *phDC = m_dcDesc.hDC; + return D3D_OK; +#else + if (phDC != nullptr) + *phDC = nullptr; + + Logger::warn("D3D9Surface::GetDC: GDI interop not supported on native"); + return D3DERR_INVALIDCALL; +#endif + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::ReleaseDC(HDC hDC) { +#ifndef DXVK_NATIVE + if (m_dcDesc.hDC == nullptr || m_dcDesc.hDC != hDC) + return D3DERR_INVALIDCALL; + + D3DKMTDestroyDCFromMemory(&m_dcDesc); + + HRESULT hr = UnlockRect(); + if (FAILED(hr)) + return hr; + + return D3D_OK; +#else + Logger::warn("D3D9Surface::ReleaseDC: GDI interop not supported on native"); + return D3DERR_INVALIDCALL; +#endif + } + + + void D3D9Surface::ClearContainer() { + m_container = nullptr; + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.h new file mode 100644 index 00000000..10086cac --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_surface.h @@ -0,0 +1,66 @@ +#pragma once + +#include "d3d9_subresource.h" + +#include "d3d9_common_texture.h" + +#include "../util/util_gdi.h" + +#include <algorithm> + +namespace dxvk { + + using D3D9GDIDesc = D3DKMT_DESTROYDCFROMMEMORY; + + using D3D9SurfaceBase = D3D9Subresource<IDirect3DSurface9>; + class D3D9Surface final : public D3D9SurfaceBase { + + public: + + D3D9Surface( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + IUnknown* pContainer); + + D3D9Surface( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pBaseTexture); + + void AddRefPrivate(); + + void ReleasePrivate(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType() final; + + HRESULT STDMETHODCALLTYPE GetDesc(D3DSURFACE_DESC *pDesc) final; + + HRESULT STDMETHODCALLTYPE LockRect(D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) final; + + HRESULT STDMETHODCALLTYPE UnlockRect() final; + + HRESULT STDMETHODCALLTYPE GetDC(HDC *phDC) final; + + HRESULT STDMETHODCALLTYPE ReleaseDC(HDC hDC) final; + + inline VkExtent2D GetSurfaceExtent() const { + const auto* desc = m_texture->Desc(); + + return VkExtent2D { + std::max(1u, desc->Width >> GetMipLevel()), + std::max(1u, desc->Height >> GetMipLevel()) + }; + } + + void ClearContainer(); + + private: + + D3D9GDIDesc m_dcDesc; + + }; +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.cpp new file mode 100644 index 00000000..673ac046 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.cpp @@ -0,0 +1,1320 @@ +#include "d3d9_swapchain.h" +#include "d3d9_surface.h" +#include "d3d9_monitor.h" + +#include "d3d9_hud.h" + +namespace dxvk { + +#ifndef DXVK_NATIVE + struct D3D9WindowData { + bool unicode; + bool filter; + WNDPROC proc; + D3D9SwapChainEx* swapchain; + }; + + + static dxvk::recursive_mutex g_windowProcMapMutex; + static std::unordered_map<HWND, D3D9WindowData> g_windowProcMap; + + + template <typename T, typename J, typename ... Args> + auto CallCharsetFunction(T unicode, J ascii, bool isUnicode, Args... args) { + return isUnicode + ? unicode(args...) + : ascii (args...); + } + + + class D3D9WindowMessageFilter { + + public: + + D3D9WindowMessageFilter(HWND window, bool filter = true) + : m_window(window) { + std::lock_guard lock(g_windowProcMapMutex); + auto it = g_windowProcMap.find(m_window); + m_filter = std::exchange(it->second.filter, filter); + } + + ~D3D9WindowMessageFilter() { + std::lock_guard lock(g_windowProcMapMutex); + auto it = g_windowProcMap.find(m_window); + it->second.filter = m_filter; + } + + D3D9WindowMessageFilter (const D3D9WindowMessageFilter&) = delete; + D3D9WindowMessageFilter& operator = (const D3D9WindowMessageFilter&) = delete; + + private: + + HWND m_window; + bool m_filter; + + }; + + + LRESULT CALLBACK D3D9WindowProc(HWND window, UINT message, WPARAM wparam, LPARAM lparam); + + + void ResetWindowProc(HWND window) { + std::lock_guard lock(g_windowProcMapMutex); + + auto it = g_windowProcMap.find(window); + if (it == g_windowProcMap.end()) + return; + + auto proc = reinterpret_cast<WNDPROC>( + CallCharsetFunction( + GetWindowLongPtrW, GetWindowLongPtrA, it->second.unicode, + window, GWLP_WNDPROC)); + + + if (proc == D3D9WindowProc) + CallCharsetFunction( + SetWindowLongPtrW, SetWindowLongPtrA, it->second.unicode, + window, GWLP_WNDPROC, reinterpret_cast<LONG_PTR>(it->second.proc)); + + g_windowProcMap.erase(window); + } + + + void HookWindowProc(HWND window, D3D9SwapChainEx* swapchain) { + std::lock_guard lock(g_windowProcMapMutex); + + ResetWindowProc(window); + + D3D9WindowData windowData; + windowData.unicode = IsWindowUnicode(window); + windowData.filter = false; + windowData.proc = reinterpret_cast<WNDPROC>( + CallCharsetFunction( + SetWindowLongPtrW, SetWindowLongPtrA, windowData.unicode, + window, GWLP_WNDPROC, reinterpret_cast<LONG_PTR>(D3D9WindowProc))); + windowData.swapchain = swapchain; + + g_windowProcMap[window] = std::move(windowData); + } + + + LRESULT CALLBACK D3D9WindowProc(HWND window, UINT message, WPARAM wparam, LPARAM lparam) { + if (message == WM_NCCALCSIZE && wparam == TRUE) + return 0; + + D3D9WindowData windowData = {}; + + { + std::lock_guard lock(g_windowProcMapMutex); + + auto it = g_windowProcMap.find(window); + if (it != g_windowProcMap.end()) + windowData = it->second; + } + + bool unicode = windowData.proc + ? windowData.unicode + : IsWindowUnicode(window); + + if (!windowData.proc || windowData.filter) + return CallCharsetFunction( + DefWindowProcW, DefWindowProcA, unicode, + window, message, wparam, lparam); + + if (message == WM_DESTROY) + ResetWindowProc(window); + else if (message == WM_ACTIVATEAPP) { + D3DDEVICE_CREATION_PARAMETERS create_parms; + windowData.swapchain->GetDevice()->GetCreationParameters(&create_parms); + + if (!(create_parms.BehaviorFlags & D3DCREATE_NOWINDOWCHANGES)) { + if (wparam) { + // Heroes of Might and Magic V needs this to resume drawing after a focus loss + D3DPRESENT_PARAMETERS params; + RECT rect; + + GetMonitorRect(GetDefaultMonitor(), &rect); + windowData.swapchain->GetPresentParameters(¶ms); + SetWindowPos(window, nullptr, rect.left, rect.top, params.BackBufferWidth, params.BackBufferHeight, + SWP_NOACTIVATE | SWP_NOZORDER); + } + else { + if (IsWindowVisible(window)) + ShowWindow(window, SW_MINIMIZE); + } + } + } + + return CallCharsetFunction( + CallWindowProcW, CallWindowProcA, unicode, + windowData.proc, window, message, wparam, lparam); + } +#endif + + + static uint16_t MapGammaControlPoint(float x) { + if (x < 0.0f) x = 0.0f; + if (x > 1.0f) x = 1.0f; + return uint16_t(65535.0f * x); + } + + + struct D3D9PresentInfo { + float scale[2]; + float offset[2]; + }; + + + static inline void ConvertDisplayMode(const D3DDISPLAYMODEEX& mode, wsi::WsiMode* wsiMode) { + wsiMode->width = mode.Width; + wsiMode->height = mode.Height; + wsiMode->refreshRate = wsi::WsiRational{ mode.RefreshRate, 1 }; + wsiMode->bitsPerPixel = GetMonitorFormatBpp(EnumerateFormat(mode.Format)); + wsiMode->interlaced = false; + } + + + static inline void ConvertDisplayMode(const wsi::WsiMode& devMode, D3DDISPLAYMODEEX* pMode) { + pMode->Size = sizeof(D3DDISPLAYMODEEX); + pMode->Width = devMode.width; + pMode->Height = devMode.height; + pMode->RefreshRate = devMode.refreshRate.numerator / devMode.refreshRate.denominator; + pMode->Format = D3DFMT_X8R8G8B8; + pMode->ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + } + + + D3D9SwapChainEx::D3D9SwapChainEx( + D3D9DeviceEx* pDevice, + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode) + : D3D9SwapChainExBase(pDevice) + , m_device (pDevice->GetDXVKDevice()) + , m_context (m_device->createContext()) + , m_frameLatencyCap (pDevice->GetOptions()->maxFrameLatency) + , m_frameLatencySignal(new sync::Fence(m_frameId)) + , m_dialog (pDevice->GetOptions()->enableDialogMode) { + this->NormalizePresentParameters(pPresentParams); + m_presentParams = *pPresentParams; + m_window = m_presentParams.hDeviceWindow; + + UpdatePresentRegion(nullptr, nullptr); + if (!pDevice->GetOptions()->deferSurfaceCreation) + CreatePresenter(); + + CreateBackBuffers(m_presentParams.BackBufferCount); + CreateBlitter(); + CreateHud(); + + InitRamp(); + + // Apply initial window mode and fullscreen state + if (!m_presentParams.Windowed && FAILED(EnterFullscreenMode(pPresentParams, pFullscreenDisplayMode))) + throw DxvkError("D3D9: Failed to set initial fullscreen state"); + } + + + D3D9SwapChainEx::~D3D9SwapChainEx() { + DestroyBackBuffers(); +#ifndef DXVK_NATIVE + ResetWindowProc(m_window); +#endif + + wsi::restoreDisplayMode(m_monitor); + NotifyDisplayRefreshRate(0.0); + + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DSwapChain9) + || (GetParent()->IsExtended() && riid == __uuidof(IDirect3DSwapChain9Ex))) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9SwapChainEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + uint32_t presentInterval = m_presentParams.PresentationInterval; + + // This is not true directly in d3d9 to to timing differences that don't matter for us. + // For our purposes... + // D3DPRESENT_INTERVAL_DEFAULT (0) == D3DPRESENT_INTERVAL_ONE (1) which means VSYNC. + presentInterval = std::max(presentInterval, 1u); + + if (presentInterval == D3DPRESENT_INTERVAL_IMMEDIATE || (dwFlags & D3DPRESENT_FORCEIMMEDIATE)) + presentInterval = 0; + + auto options = m_parent->GetOptions(); + + if (options->presentInterval >= 0) + presentInterval = options->presentInterval; + + bool vsync = presentInterval != 0; + + HWND window = m_presentParams.hDeviceWindow; + if (hDestWindowOverride != nullptr) + window = hDestWindowOverride; + + bool recreate = false; + recreate |= m_presenter == nullptr; + recreate |= window != m_window; + recreate |= m_dialog != m_lastDialog; + + m_window = window; + + m_dirty |= vsync != m_vsync; + m_dirty |= UpdatePresentRegion(pSourceRect, pDestRect); + m_dirty |= recreate; + m_dirty |= m_presenter != nullptr && + !m_presenter->hasSwapChain(); + + m_vsync = vsync; + + m_lastDialog = m_dialog; + + try { + if (recreate) + CreatePresenter(); + + if (std::exchange(m_dirty, false)) + RecreateSwapChain(vsync); + + // We aren't going to device loss simply because + // 99% of D3D9 games don't handle this properly and + // just end up crashing (like with alt-tab loss) + if (!m_presenter->hasSwapChain()) + return D3D_OK; + + PresentImage(presentInterval); + return D3D_OK; + } catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_DEVICEREMOVED; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetFrontBufferData(IDirect3DSurface9* pDestSurface) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + // This function can do absolutely everything! + // Copies the front buffer between formats with an implicit resolve. + // Oh, and the dest is systemmem... + // This is a slow function anyway, it waits for the copy to finish. + // so there's no reason to not just make and throwaway temp images. + + // If extent of dst > src, then we blit to a subrect of the size + // of src onto a temp image of dst's extents, + // then copy buffer back to dst (given dst is subresource) + + D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface); + + if (unlikely(dst == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* dstTexInfo = dst->GetCommonTexture(); + D3D9CommonTexture* srcTexInfo = m_backBuffers.back()->GetCommonTexture(); + + if (unlikely(dstTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM)) + return D3DERR_INVALIDCALL; + + Rc<DxvkBuffer> dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource()); + Rc<DxvkImage> srcImage = srcTexInfo->GetImage(); + + if (srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT) { + DxvkImageCreateInfo resolveInfo; + resolveInfo.type = VK_IMAGE_TYPE_2D; + resolveInfo.format = srcImage->info().format; + resolveInfo.flags = 0; + resolveInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + resolveInfo.extent = srcImage->info().extent; + resolveInfo.numLayers = 1; + resolveInfo.mipLevels = 1; + resolveInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT + | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + resolveInfo.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_TRANSFER_BIT; + resolveInfo.access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + resolveInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + resolveInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + Rc<DxvkImage> resolvedSrc = m_device->createImage( + resolveInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + m_parent->EmitCs([ + cDstImage = resolvedSrc, + cSrcImage = srcImage + ] (DxvkContext* ctx) { + VkImageSubresourceLayers resolveSubresource; + resolveSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + resolveSubresource.mipLevel = 0; + resolveSubresource.baseArrayLayer = 0; + resolveSubresource.layerCount = 1; + + VkImageResolve resolveRegion; + resolveRegion.srcSubresource = resolveSubresource; + resolveRegion.srcOffset = VkOffset3D { 0, 0, 0 }; + resolveRegion.dstSubresource = resolveSubresource; + resolveRegion.dstOffset = VkOffset3D { 0, 0, 0 }; + resolveRegion.extent = cSrcImage->info().extent; + + ctx->resolveImage( + cDstImage, cSrcImage, + resolveRegion, VK_FORMAT_UNDEFINED); + }); + + srcImage = std::move(resolvedSrc); + } + + D3D9Format srcFormat = srcTexInfo->Desc()->Format; + D3D9Format dstFormat = dstTexInfo->Desc()->Format; + + bool similar = AreFormatsSimilar(srcFormat, dstFormat); + + if (!similar || srcImage->info().extent != dstTexInfo->GetExtent()) { + DxvkImageCreateInfo blitCreateInfo; + blitCreateInfo.type = VK_IMAGE_TYPE_2D; + blitCreateInfo.format = dstTexInfo->GetFormatMapping().FormatColor; + blitCreateInfo.flags = 0; + blitCreateInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + blitCreateInfo.extent = dstTexInfo->GetExtent(); + blitCreateInfo.numLayers = 1; + blitCreateInfo.mipLevels = 1; + blitCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT + | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + blitCreateInfo.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_TRANSFER_BIT; + blitCreateInfo.access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + blitCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + blitCreateInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + Rc<DxvkImage> blittedSrc = m_device->createImage( + blitCreateInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(blittedSrc->info().format); + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + + const VkImageSubresource dstSubresource = dstTexInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, 0); + const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, 0); + + VkImageSubresourceLayers dstSubresourceLayers = { + dstSubresource.aspectMask, + dstSubresource.mipLevel, + dstSubresource.arrayLayer, 1 }; + + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); + + // Blit to a subrect of the src extents + VkImageBlit blitInfo; + blitInfo.dstSubresource = dstSubresourceLayers; + blitInfo.srcSubresource = srcSubresourceLayers; + blitInfo.dstOffsets[0] = VkOffset3D{ 0, 0, 0 }; + blitInfo.dstOffsets[1] = VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 }; + blitInfo.srcOffsets[0] = VkOffset3D{ 0, 0, 0 }; + blitInfo.srcOffsets[1] = VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 }; + + m_parent->EmitCs([ + cDstImage = blittedSrc, + cDstMap = dstTexInfo->GetMapping().Swizzle, + cSrcImage = srcImage, + cSrcMap = srcTexInfo->GetMapping().Swizzle, + cBlitInfo = blitInfo + ] (DxvkContext* ctx) { + ctx->blitImage( + cDstImage, cDstMap, + cSrcImage, cSrcMap, + cBlitInfo, VK_FILTER_NEAREST); + }); + + srcImage = std::move(blittedSrc); + } + + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, 0); + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); + + m_parent->EmitCs([ + cBuffer = dstBuffer, + cImage = srcImage, + cSubresources = srcSubresourceLayers, + cLevelExtent = srcExtent + ] (DxvkContext* ctx) { + ctx->copyImageToBuffer(cBuffer, 0, 4, 0, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + }); + + dstTexInfo->SetWrittenByGPU(dst->GetSubresource(), true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetBackBuffer( + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer) { + // Could be doing a device reset... + D3D9DeviceLock lock = m_parent->LockDevice(); + + if (unlikely(ppBackBuffer == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(iBackBuffer >= m_presentParams.BackBufferCount)) { + Logger::err(str::format("D3D9: GetBackBuffer: Invalid back buffer index: ", iBackBuffer)); + return D3DERR_INVALIDCALL; + } + + *ppBackBuffer = ref(m_backBuffers[iBackBuffer].ptr()); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetRasterStatus(D3DRASTER_STATUS* pRasterStatus) { + // We could use D3DKMTGetScanLine but Wine doesn't implement that. + // So... we lie here and make some stuff up + // enough that it makes games work. + + // Assume there's 20 lines in a vBlank. + constexpr uint32_t vBlankLineCount = 20; + + if (pRasterStatus == nullptr) + return D3DERR_INVALIDCALL; + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(mode); + if (FAILED(this->GetDisplayModeEx(&mode, nullptr))) + return D3DERR_INVALIDCALL; + + uint32_t scanLineCount = mode.Height + vBlankLineCount; + + auto nowUs = std::chrono::time_point_cast<std::chrono::microseconds>( + dxvk::high_resolution_clock::now()) + .time_since_epoch(); + + auto frametimeUs = std::chrono::microseconds(1000000u / mode.RefreshRate); + auto scanLineUs = frametimeUs / scanLineCount; + + pRasterStatus->ScanLine = (nowUs % frametimeUs) / scanLineUs; + pRasterStatus->InVBlank = pRasterStatus->ScanLine >= mode.Height; + + if (pRasterStatus->InVBlank) + pRasterStatus->ScanLine = 0; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetDisplayMode(D3DDISPLAYMODE* pMode) { + if (pMode == nullptr) + return D3DERR_INVALIDCALL; + + *pMode = D3DDISPLAYMODE(); + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(mode); + HRESULT hr = this->GetDisplayModeEx(&mode, nullptr); + + if (FAILED(hr)) + return hr; + + pMode->Width = mode.Width; + pMode->Height = mode.Height; + pMode->Format = mode.Format; + pMode->RefreshRate = mode.RefreshRate; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetPresentParameters(D3DPRESENT_PARAMETERS* pPresentationParameters) { + if (pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + *pPresentationParameters = m_presentParams; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetLastPresentCount(UINT* pLastPresentCount) { + Logger::warn("D3D9SwapChainEx::GetLastPresentCount: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetPresentStats(D3DPRESENTSTATS* pPresentationStatistics) { + Logger::warn("D3D9SwapChainEx::GetPresentStats: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetDisplayModeEx(D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation) { + if (pMode == nullptr && pRotation == nullptr) + return D3DERR_INVALIDCALL; + + if (pRotation != nullptr) + *pRotation = D3DDISPLAYROTATION_IDENTITY; + + if (pMode != nullptr) { + wsi::WsiMode devMode = { }; + + if (!wsi::getCurrentDisplayMode(GetDefaultMonitor(), &devMode)) { + Logger::err("D3D9SwapChainEx::GetDisplayModeEx: Failed to enum display settings"); + return D3DERR_INVALIDCALL; + } + + ConvertDisplayMode(devMode, pMode); + } + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::Reset( + D3DPRESENT_PARAMETERS* pPresentParams, + D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + this->SynchronizePresent(); + this->NormalizePresentParameters(pPresentParams); + + m_dirty |= m_presentParams.BackBufferFormat != pPresentParams->BackBufferFormat + || m_presentParams.BackBufferCount != pPresentParams->BackBufferCount; + + bool changeFullscreen = m_presentParams.Windowed != pPresentParams->Windowed; + + if (pPresentParams->Windowed) { + if (changeFullscreen) + this->LeaveFullscreenMode(); + + wsi::resizeWindow( + m_window, &m_windowState, + pPresentParams->BackBufferWidth, + pPresentParams->BackBufferHeight); + } + else { + if (changeFullscreen) { + if (FAILED(this->EnterFullscreenMode(pPresentParams, pFullscreenDisplayMode))) + return D3DERR_INVALIDCALL; + } + +#ifdef _WIN32 + D3D9WindowMessageFilter filter(m_window); +#endif + + if (!changeFullscreen) { + if (FAILED(ChangeDisplayMode(pPresentParams, pFullscreenDisplayMode, false))) + return D3DERR_INVALIDCALL; + } + } + + m_presentParams = *pPresentParams; + + if (changeFullscreen) + SetGammaRamp(0, &m_ramp); + + CreateBackBuffers(m_presentParams.BackBufferCount); + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::WaitForVBlank() { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9SwapChainEx::WaitForVBlank: Stub"); + + return D3D_OK; + } + + static bool validateGammaRamp(const WORD (&ramp)[256]) { + if (ramp[0] >= ramp[std::size(ramp) - 1]) { + Logger::err("validateGammaRamp: ramp inverted or flat"); + return false; + } + + for (size_t i = 1; i < std::size(ramp); i++) { + if (ramp[i] < ramp[i - 1]) { + Logger::err("validateGammaRamp: ramp not monotonically increasing"); + return false; + } + if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) { + Logger::err("validateGammaRamp: huuuge jump"); + return false; + } + } + + return true; + } + + + void D3D9SwapChainEx::SetGammaRamp( + DWORD Flags, + const D3DGAMMARAMP* pRamp) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + if (unlikely(pRamp == nullptr)) + return; + + if (unlikely(!validateGammaRamp(pRamp->red) + && !validateGammaRamp(pRamp->blue) + && !validateGammaRamp(pRamp->green))) + return; + + m_ramp = *pRamp; + + bool isIdentity = true; + + std::array<DxvkGammaCp, NumControlPoints> cp; + + for (uint32_t i = 0; i < NumControlPoints; i++) { + uint16_t identity = MapGammaControlPoint(float(i) / float(NumControlPoints - 1)); + + cp[i].r = pRamp->red[i]; + cp[i].g = pRamp->green[i]; + cp[i].b = pRamp->blue[i]; + cp[i].a = 0; + + isIdentity &= cp[i].r == identity + && cp[i].g == identity + && cp[i].b == identity; + } + + if (!isIdentity && !m_presentParams.Windowed) + m_blitter->setGammaRamp(NumControlPoints, cp.data()); + else + m_blitter->setGammaRamp(0, nullptr); + } + + + void D3D9SwapChainEx::GetGammaRamp(D3DGAMMARAMP* pRamp) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + if (likely(pRamp != nullptr)) + *pRamp = m_ramp; + } + + + void D3D9SwapChainEx::Invalidate(HWND hWindow) { + if (hWindow == nullptr) + hWindow = m_parent->GetWindow(); + + if (m_presentParams.hDeviceWindow == hWindow) { + m_presenter = nullptr; + + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + } + } + + + HRESULT D3D9SwapChainEx::SetDialogBoxMode(bool bEnableDialogs) { + D3D9DeviceLock lock = m_parent->LockDevice(); + + // https://docs.microsoft.com/en-us/windows/win32/api/d3d9/nf-d3d9-idirect3ddevice9-setdialogboxmode + // The MSDN documentation says this will error out under many weird conditions. + // However it doesn't appear to error at all in any of my tests of these + // cases described in the documentation. + + m_dialog = bEnableDialogs; + + return D3D_OK; + } + + + D3D9Surface* D3D9SwapChainEx::GetBackBuffer(UINT iBackBuffer) { + if (iBackBuffer >= m_presentParams.BackBufferCount) + return nullptr; + + return m_backBuffers[iBackBuffer].ptr(); + } + + + void D3D9SwapChainEx::NormalizePresentParameters(D3DPRESENT_PARAMETERS* pPresentParams) { + if (pPresentParams->hDeviceWindow == nullptr) + pPresentParams->hDeviceWindow = m_parent->GetWindow(); + + pPresentParams->BackBufferCount = std::max(pPresentParams->BackBufferCount, 1u); + + const int32_t forcedMSAA = m_parent->GetOptions()->forceSwapchainMSAA; + if (forcedMSAA != -1) { + pPresentParams->MultiSampleType = D3DMULTISAMPLE_TYPE(forcedMSAA); + pPresentParams->MultiSampleQuality = 0; + } + + if (pPresentParams->Windowed) { + GetWindowClientSize(pPresentParams->hDeviceWindow, + pPresentParams->BackBufferWidth ? nullptr : &pPresentParams->BackBufferWidth, + pPresentParams->BackBufferHeight ? nullptr : &pPresentParams->BackBufferHeight); + } + else { + GetMonitorClientSize(GetDefaultMonitor(), + pPresentParams->BackBufferWidth ? nullptr : &pPresentParams->BackBufferWidth, + pPresentParams->BackBufferHeight ? nullptr : &pPresentParams->BackBufferHeight); + } + + if (pPresentParams->BackBufferFormat == D3DFMT_UNKNOWN) + pPresentParams->BackBufferFormat = D3DFMT_X8R8G8B8; + + if (env::getEnvVar("DXVK_FORCE_WINDOWED") == "1") + pPresentParams->Windowed = TRUE; + } + + + void D3D9SwapChainEx::PresentImage(UINT SyncInterval) { + m_parent->Flush(); + + // Retrieve the image and image view to present + auto swapImage = m_backBuffers[0]->GetCommonTexture()->GetImage(); + auto swapImageView = m_backBuffers[0]->GetImageView(false); + + // Bump our frame id. + ++m_frameId; + + for (uint32_t i = 0; i < SyncInterval || i < 1; i++) { + SynchronizePresent(); + + // Presentation semaphores and WSI swap chain image + vk::PresenterInfo info = m_presenter->info(); + vk::PresenterSync sync; + + uint32_t imageIndex = 0; + + VkResult status = m_presenter->acquireNextImage(sync, imageIndex); + + while (status != VK_SUCCESS && status != VK_SUBOPTIMAL_KHR) { + RecreateSwapChain(m_vsync); + + info = m_presenter->info(); + status = m_presenter->acquireNextImage(sync, imageIndex); + } + + m_context->beginRecording( + m_device->createCommandList()); + + VkRect2D srcRect = { + { int32_t(m_srcRect.left), int32_t(m_srcRect.top) }, + { uint32_t(m_srcRect.right - m_srcRect.left), uint32_t(m_srcRect.bottom - m_srcRect.top) } }; + + VkRect2D dstRect = { + { int32_t(m_dstRect.left), int32_t(m_dstRect.top) }, + { uint32_t(m_dstRect.right - m_dstRect.left), uint32_t(m_dstRect.bottom - m_dstRect.top) } }; + + m_blitter->presentImage(m_context.ptr(), + m_imageViews.at(imageIndex), dstRect, + swapImageView, srcRect); + + if (m_hud != nullptr) + m_hud->render(m_context, info.format, info.imageExtent); + + if (i + 1 >= SyncInterval) + m_context->signal(m_frameLatencySignal, m_frameId); + + SubmitPresent(sync, i); + } + + SyncFrameLatency(); + + // Rotate swap chain buffers so that the back + // buffer at index 0 becomes the front buffer. + for (uint32_t i = 1; i < m_backBuffers.size(); i++) + m_backBuffers[i]->Swap(m_backBuffers[i - 1].ptr()); + + m_parent->m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + } + + + void D3D9SwapChainEx::SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId) { + // Present from CS thread so that we don't + // have to synchronize with it first. + m_presentStatus.result = VK_NOT_READY; + + m_parent->EmitCs([this, + cFrameId = FrameId, + cSync = Sync, + cHud = m_hud, + cCommandList = m_context->endRecording() + ] (DxvkContext* ctx) { + m_device->submitCommandList(cCommandList, + cSync.acquire, cSync.present); + + if (cHud != nullptr && !cFrameId) + cHud->update(); + + m_device->presentImage(m_presenter, &m_presentStatus); + }); + + m_parent->FlushCsChunk(); + } + + + void D3D9SwapChainEx::SynchronizePresent() { + // Recreate swap chain if the previous present call failed + VkResult status = m_device->waitForSubmission(&m_presentStatus); + + if (status != VK_SUCCESS) + RecreateSwapChain(m_vsync); + } + + + void D3D9SwapChainEx::RecreateSwapChain(BOOL Vsync) { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + + m_presentStatus.result = VK_SUCCESS; + + vk::PresenterDesc presenterDesc; + presenterDesc.imageExtent = GetPresentExtent(); + presenterDesc.imageCount = PickImageCount(m_presentParams.BackBufferCount + 1); + presenterDesc.numFormats = PickFormats(EnumerateFormat(m_presentParams.BackBufferFormat), presenterDesc.formats); + presenterDesc.numPresentModes = PickPresentModes(Vsync, presenterDesc.presentModes); + presenterDesc.fullScreenExclusive = PickFullscreenMode(); + + if (m_presenter->recreateSwapChain(presenterDesc) != VK_SUCCESS) + throw DxvkError("D3D9SwapChainEx: Failed to recreate swap chain"); + + CreateRenderTargetViews(); + } + + + void D3D9SwapChainEx::CreatePresenter() { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + + m_presentStatus.result = VK_SUCCESS; + + DxvkDeviceQueue graphicsQueue = m_device->queues().graphics; + + vk::PresenterDevice presenterDevice; + presenterDevice.queueFamily = graphicsQueue.queueFamily; + presenterDevice.queue = graphicsQueue.queueHandle; + presenterDevice.adapter = m_device->adapter()->handle(); + + vk::PresenterDesc presenterDesc; + presenterDesc.imageExtent = GetPresentExtent(); + presenterDesc.imageCount = PickImageCount(m_presentParams.BackBufferCount + 1); + presenterDesc.numFormats = PickFormats(EnumerateFormat(m_presentParams.BackBufferFormat), presenterDesc.formats); + presenterDesc.numPresentModes = PickPresentModes(false, presenterDesc.presentModes); + presenterDesc.fullScreenExclusive = PickFullscreenMode(); + + m_presenter = new vk::Presenter(m_window, + m_device->adapter()->vki(), + m_device->vkd(), + presenterDevice, + presenterDesc); + + m_presenter->setFrameRateLimit(m_parent->GetOptions()->maxFrameRate); + m_presenter->setFrameRateLimiterRefreshRate(m_displayRefreshRate); + + CreateRenderTargetViews(); + } + + + void D3D9SwapChainEx::CreateRenderTargetViews() { + vk::PresenterInfo info = m_presenter->info(); + + m_imageViews.clear(); + m_imageViews.resize(info.imageCount); + + DxvkImageCreateInfo imageInfo; + imageInfo.type = VK_IMAGE_TYPE_2D; + imageInfo.format = info.format.format; + imageInfo.flags = 0; + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + imageInfo.extent = { info.imageExtent.width, info.imageExtent.height, 1 }; + imageInfo.numLayers = 1; + imageInfo.mipLevels = 1; + imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + imageInfo.stages = 0; + imageInfo.access = 0; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imageInfo.shared = VK_TRUE; + + DxvkImageViewCreateInfo viewInfo; + viewInfo.type = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = info.format.format; + viewInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + viewInfo.aspect = VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.minLevel = 0; + viewInfo.numLevels = 1; + viewInfo.minLayer = 0; + viewInfo.numLayers = 1; + + for (uint32_t i = 0; i < info.imageCount; i++) { + VkImage imageHandle = m_presenter->getImage(i).image; + + Rc<DxvkImage> image = new DxvkImage( + m_device->vkd(), imageInfo, imageHandle); + + m_imageViews[i] = new DxvkImageView( + m_device->vkd(), image, viewInfo); + } + } + + + void D3D9SwapChainEx::DestroyBackBuffers() { + for (auto& backBuffer : m_backBuffers) + backBuffer->ClearContainer(); + + m_backBuffers.clear(); + } + + + void D3D9SwapChainEx::CreateBackBuffers(uint32_t NumBackBuffers) { + // Explicitly destroy current swap image before + // creating a new one to free up resources + DestroyBackBuffers(); + + int NumFrontBuffer = m_parent->GetOptions()->noExplicitFrontBuffer ? 0 : 1; + m_backBuffers.resize(NumBackBuffers + NumFrontBuffer); + + // Create new back buffer + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = std::max(m_presentParams.BackBufferWidth, 1u); + desc.Height = std::max(m_presentParams.BackBufferHeight, 1u); + desc.Depth = 1; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = EnumerateFormat(m_presentParams.BackBufferFormat); + desc.MultiSample = m_presentParams.MultiSampleType; + desc.MultisampleQuality = m_presentParams.MultiSampleQuality; + desc.Pool = D3DPOOL_DEFAULT; + desc.Usage = D3DUSAGE_RENDERTARGET; + desc.Discard = FALSE; + desc.IsBackBuffer = TRUE; + desc.IsAttachmentOnly = FALSE; + + for (uint32_t i = 0; i < m_backBuffers.size(); i++) + m_backBuffers[i] = new D3D9Surface(m_parent, &desc, this); + + auto swapImage = m_backBuffers[0]->GetCommonTexture()->GetImage(); + + // Initialize the image so that we can use it. Clearing + // to black prevents garbled output for the first frame. + VkImageSubresourceRange subresources; + subresources.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresources.baseMipLevel = 0; + subresources.levelCount = 1; + subresources.baseArrayLayer = 0; + subresources.layerCount = 1; + + VkClearColorValue clearColor; + clearColor.float32[0] = 0.0f; + clearColor.float32[1] = 0.0f; + clearColor.float32[2] = 0.0f; + clearColor.float32[3] = 0.0f; + + m_context->beginRecording( + m_device->createCommandList()); + + for (uint32_t i = 0; i < m_backBuffers.size(); i++) { + m_context->clearColorImage( + m_backBuffers[i]->GetCommonTexture()->GetImage(), + clearColor, subresources); + } + + m_device->submitCommandList( + m_context->endRecording(), + VK_NULL_HANDLE, + VK_NULL_HANDLE); + } + + + void D3D9SwapChainEx::CreateBlitter() { + m_blitter = new DxvkSwapchainBlitter(m_device); + } + + + void D3D9SwapChainEx::CreateHud() { + m_hud = hud::Hud::createHud(m_device); + + if (m_hud != nullptr) { + m_hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName()); + m_hud->addItem<hud::HudSamplerCount>("samplers", -1, m_parent); + } + } + + + void D3D9SwapChainEx::InitRamp() { + for (uint32_t i = 0; i < NumControlPoints; i++) { + DWORD identity = DWORD(MapGammaControlPoint(float(i) / float(NumControlPoints - 1))); + + m_ramp.red[i] = identity; + m_ramp.green[i] = identity; + m_ramp.blue[i] = identity; + } + } + + + void D3D9SwapChainEx::SyncFrameLatency() { + // Wait for the sync event so that we respect the maximum frame latency + m_frameLatencySignal->wait(m_frameId - GetActualFrameLatency()); + } + + + uint32_t D3D9SwapChainEx::GetActualFrameLatency() { + uint32_t maxFrameLatency = m_parent->GetFrameLatency(); + + if (m_frameLatencyCap) + maxFrameLatency = std::min(maxFrameLatency, m_frameLatencyCap); + + maxFrameLatency = std::min(maxFrameLatency, m_presentParams.BackBufferCount + 1); + return maxFrameLatency; + } + + + uint32_t D3D9SwapChainEx::PickFormats( + D3D9Format Format, + VkSurfaceFormatKHR* pDstFormats) { + uint32_t n = 0; + + switch (Format) { + default: + Logger::warn(str::format("D3D9SwapChainEx: Unexpected format: ", Format)); + + case D3D9Format::A8R8G8B8: + case D3D9Format::X8R8G8B8: + case D3D9Format::A8B8G8R8: + case D3D9Format::X8B8G8R8: { + pDstFormats[n++] = { VK_FORMAT_R8G8B8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } break; + + case D3D9Format::A2R10G10B10: + case D3D9Format::A2B10G10R10: { + pDstFormats[n++] = { VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } break; + + case D3D9Format::X1R5G5B5: + case D3D9Format::A1R5G5B5: { + pDstFormats[n++] = { VK_FORMAT_B5G5R5A1_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_R5G5B5A1_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } + + case D3D9Format::R5G6B5: { + pDstFormats[n++] = { VK_FORMAT_B5G6R5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_R5G6B5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } + } + + return n; + } + + + uint32_t D3D9SwapChainEx::PickPresentModes( + BOOL Vsync, + VkPresentModeKHR* pDstModes) { + uint32_t n = 0; + + if (Vsync) { + if (m_parent->GetOptions()->tearFree == Tristate::False) + pDstModes[n++] = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + pDstModes[n++] = VK_PRESENT_MODE_FIFO_KHR; + } else { + if (m_parent->GetOptions()->tearFree != Tristate::True) + pDstModes[n++] = VK_PRESENT_MODE_IMMEDIATE_KHR; + pDstModes[n++] = VK_PRESENT_MODE_MAILBOX_KHR; + } + + return n; + } + + + uint32_t D3D9SwapChainEx::PickImageCount( + UINT Preferred) { + int32_t option = m_parent->GetOptions()->numBackBuffers; + return option > 0 ? uint32_t(option) : uint32_t(Preferred); + } + + + void D3D9SwapChainEx::NotifyDisplayRefreshRate( + double RefreshRate) { + m_displayRefreshRate = RefreshRate; + + if (m_presenter != nullptr) + m_presenter->setFrameRateLimiterRefreshRate(RefreshRate); + } + + + HRESULT D3D9SwapChainEx::EnterFullscreenMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + if (FAILED(ChangeDisplayMode(pPresentParams, pFullscreenDisplayMode, true))) { + Logger::err("D3D9: EnterFullscreenMode: Failed to change display mode"); + return D3DERR_NOTAVAILABLE; + } + +#ifndef DXVK_NATIVE + // Testing shows we shouldn't hook WM_NCCALCSIZE but we shouldn't change + // windows style either. + // + // Some games restore window styles after we have changed it, so hooking is + // also required. Doing it will allow us to create fullscreen windows + // regardless of their style and it also appears to work on Windows. + HookWindowProc(m_window, this); + + D3D9WindowMessageFilter filter(m_window); +#endif + + m_monitor = GetDefaultMonitor(); + + if (!wsi::enterFullscreenMode(m_monitor, m_window, &m_windowState, true)) { + Logger::err("D3D9: EnterFullscreenMode: Failed to enter fullscreen mode"); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::LeaveFullscreenMode() { + if (!wsi::restoreDisplayMode(m_monitor)) + Logger::warn("D3D9: LeaveFullscreenMode: Failed to restore display mode"); + NotifyDisplayRefreshRate(0.0); + + m_monitor = nullptr; + +#ifndef DXVK_NATIVE + ResetWindowProc(m_window); +#endif + + if (!wsi::isWindow(m_window)) + return D3D_OK; + + if (!wsi::leaveFullscreenMode(m_window, &m_windowState)) { + Logger::err("D3D9: LeaveFullscreenMode: Failed to exit fullscreen mode"); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::ChangeDisplayMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + bool EnteringFullscreen) { + D3DDISPLAYMODEEX mode; + + if (pFullscreenDisplayMode) { + mode = *pFullscreenDisplayMode; + } else { + mode.Width = pPresentParams->BackBufferWidth; + mode.Height = pPresentParams->BackBufferHeight; + mode.Format = pPresentParams->BackBufferFormat; + mode.RefreshRate = pPresentParams->FullScreen_RefreshRateInHz; + mode.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + mode.Size = sizeof(D3DDISPLAYMODEEX); + } + + wsi::WsiMode wsiMode = { }; + ConvertDisplayMode(mode, &wsiMode); + + if (!wsi::setWindowMode(GetDefaultMonitor(), m_window, &wsiMode, EnteringFullscreen)) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + } + + + bool D3D9SwapChainEx::UpdatePresentRegion(const RECT* pSourceRect, const RECT* pDestRect) { + if (pSourceRect == nullptr) { + m_srcRect.top = 0; + m_srcRect.left = 0; + m_srcRect.right = m_presentParams.BackBufferWidth; + m_srcRect.bottom = m_presentParams.BackBufferHeight; + } + else + m_srcRect = *pSourceRect; + + RECT dstRect; + if (pDestRect == nullptr) { + // TODO: Should we hook WM_SIZE message for this? + UINT width, height; + GetWindowClientSize(m_window, &width, &height); + + dstRect.top = 0; + dstRect.left = 0; + dstRect.right = LONG(width); + dstRect.bottom = LONG(height); + } + else + dstRect = *pDestRect; + + bool recreate = + m_dstRect.left != dstRect.left + || m_dstRect.top != dstRect.top + || m_dstRect.right != dstRect.right + || m_dstRect.bottom != dstRect.bottom; + + m_dstRect = dstRect; + + return recreate; + } + + VkExtent2D D3D9SwapChainEx::GetPresentExtent() { + return VkExtent2D { + std::max<uint32_t>(m_dstRect.right - m_dstRect.left, 1u), + std::max<uint32_t>(m_dstRect.bottom - m_dstRect.top, 1u) }; + } + + + VkFullScreenExclusiveEXT D3D9SwapChainEx::PickFullscreenMode() { + return m_dialog + ? VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT + : VK_FULL_SCREEN_EXCLUSIVE_DEFAULT_EXT; + } + + + std::string D3D9SwapChainEx::GetApiName() { + return this->GetParent()->IsExtended() ? "D3D9Ex" : "D3D9"; + } + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.h new file mode 100644 index 00000000..88953b2a --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swapchain.h @@ -0,0 +1,192 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_device.h" +#include "d3d9_format.h" + +#include "../dxvk/hud/dxvk_hud.h" + +#include "../dxvk/dxvk_swapchain_blitter.h" +#include "../wsi/wsi_mode.h" +#include "../wsi/wsi_window.h" +#include "../wsi/wsi_monitor.h" + +#include "../util/sync/sync_signal.h" + +#include <vector> + +namespace dxvk { + + class D3D9Surface; + + using D3D9SwapChainExBase = D3D9DeviceChild<IDirect3DSwapChain9Ex>; + class D3D9SwapChainEx final : public D3D9SwapChainExBase { + static constexpr uint32_t NumControlPoints = 256; + public: + + D3D9SwapChainEx( + D3D9DeviceEx* pDevice, + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + ~D3D9SwapChainEx(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags); + + HRESULT STDMETHODCALLTYPE GetFrontBufferData(IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE GetBackBuffer( + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer); + + HRESULT STDMETHODCALLTYPE GetRasterStatus(D3DRASTER_STATUS* pRasterStatus); + + HRESULT STDMETHODCALLTYPE GetDisplayMode(D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE GetPresentParameters(D3DPRESENT_PARAMETERS* pPresentationParameters); + + HRESULT STDMETHODCALLTYPE GetLastPresentCount(UINT* pLastPresentCount); + + HRESULT STDMETHODCALLTYPE GetPresentStats(D3DPRESENTSTATS* pPresentationStatistics); + + HRESULT STDMETHODCALLTYPE GetDisplayModeEx(D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation); + + HRESULT Reset( + D3DPRESENT_PARAMETERS* pPresentParams, + D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT WaitForVBlank(); + + void SetGammaRamp( + DWORD Flags, + const D3DGAMMARAMP* pRamp); + + void GetGammaRamp(D3DGAMMARAMP* pRamp); + + void Invalidate(HWND hWindow); + + HRESULT SetDialogBoxMode(bool bEnableDialogs); + + D3D9Surface* GetBackBuffer(UINT iBackBuffer); + + const D3DPRESENT_PARAMETERS* GetPresentParams() const { return &m_presentParams; } + + void SyncFrameLatency(); + + private: + + enum BindingIds : uint32_t { + Image = 0, + Gamma = 1, + }; + + D3DPRESENT_PARAMETERS m_presentParams; + D3DGAMMARAMP m_ramp; + + Rc<DxvkDevice> m_device; + Rc<DxvkContext> m_context; + Rc<DxvkSwapchainBlitter> m_blitter; + + Rc<vk::Presenter> m_presenter; + + Rc<hud::Hud> m_hud; + + std::vector<Com<D3D9Surface, false>> m_backBuffers; + + RECT m_srcRect; + RECT m_dstRect; + + DxvkSubmitStatus m_presentStatus; + + std::vector<Rc<DxvkImageView>> m_imageViews; + + + uint64_t m_frameId = D3D9DeviceEx::MaxFrameLatency; + uint32_t m_frameLatencyCap = 0; + Rc<sync::Fence> m_frameLatencySignal; + + bool m_dirty = true; + bool m_vsync = true; + + bool m_dialog; + bool m_lastDialog = false; + + HWND m_window = nullptr; + HMONITOR m_monitor = nullptr; + + wsi::DxvkWindowState m_windowState; + + double m_displayRefreshRate = 0.0; + + void PresentImage(UINT PresentInterval); + + void SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId); + + void SynchronizePresent(); + + void RecreateSwapChain( + BOOL Vsync); + + void CreatePresenter(); + + void CreateRenderTargetViews(); + + void DestroyBackBuffers(); + + void CreateBackBuffers( + uint32_t NumBackBuffers); + + void CreateBlitter(); + + void CreateHud(); + + void InitRamp(); + + uint32_t GetActualFrameLatency(); + + uint32_t PickFormats( + D3D9Format Format, + VkSurfaceFormatKHR* pDstFormats); + + uint32_t PickPresentModes( + BOOL Vsync, + VkPresentModeKHR* pDstModes); + + uint32_t PickImageCount( + UINT Preferred); + + void NormalizePresentParameters(D3DPRESENT_PARAMETERS* pPresentParams); + + void NotifyDisplayRefreshRate( + double RefreshRate); + + HRESULT EnterFullscreenMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT LeaveFullscreenMode(); + + HRESULT ChangeDisplayMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + bool EnteringFullscreen); + + bool UpdatePresentRegion(const RECT* pSourceRect, const RECT* pDestRect); + + VkExtent2D GetPresentExtent(); + + VkFullScreenExclusiveEXT PickFullscreenMode(); + + std::string GetApiName(); + + }; + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.cpp new file mode 100644 index 00000000..9b3b0270 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.cpp @@ -0,0 +1,359 @@ +#include "d3d9_swvp_emu.h" + +#include "d3d9_device.h" +#include "d3d9_vertex_declaration.h" + +#include "../spirv/spirv_module.h" + +namespace dxvk { + + // Doesn't compare everything, only what we use in SWVP. + + size_t D3D9VertexDeclHash::operator () (const D3D9VertexElements& key) const { + DxvkHashState hash; + + std::hash<BYTE> bytehash; + std::hash<WORD> wordhash; + + for (auto& element : key) { + hash.add(wordhash(element.Stream)); + hash.add(wordhash(element.Offset)); + hash.add(bytehash(element.Type)); + hash.add(bytehash(element.Method)); + hash.add(bytehash(element.Usage)); + hash.add(bytehash(element.UsageIndex)); + } + + return hash; + } + + bool D3D9VertexDeclEq::operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const { + if (a.size() != b.size()) + return false; + + bool equal = true; + + for (uint32_t i = 0; i < a.size(); i++) + equal &= std::memcmp(&a[i], &b[i], sizeof(a[0])) == 0; + + return equal; + } + + enum class DecltypeClass { + Float, Byte, Short, Dec, Half + }; + + enum DecltypeFlags { + Signed = 1, + Normalize = 2, + ReverseRGB = 4 + }; + + struct Decltype { + DecltypeClass Class; + uint32_t VectorCount; + uint32_t Flags; + }; + + Decltype ClassifyDecltype(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return { DecltypeClass::Float, 1, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT2: return { DecltypeClass::Float, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT3: return { DecltypeClass::Float, 3, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT4: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; + case D3DDECLTYPE_D3DCOLOR: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize | DecltypeFlags::ReverseRGB }; + case D3DDECLTYPE_UBYTE4: return { DecltypeClass::Byte, 4, 0 }; + case D3DDECLTYPE_SHORT2: return { DecltypeClass::Short, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_SHORT4: return { DecltypeClass::Short, 4, DecltypeFlags::Signed }; + case D3DDECLTYPE_UBYTE4N: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize }; + case D3DDECLTYPE_SHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_SHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_USHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Normalize }; + case D3DDECLTYPE_USHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Normalize }; + case D3DDECLTYPE_UDEC3: return { DecltypeClass::Dec, 3, 0 }; + case D3DDECLTYPE_DEC3N: return { DecltypeClass::Dec, 3, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_FLOAT16_2: return { DecltypeClass::Half, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT16_4: return { DecltypeClass::Half, 4, DecltypeFlags::Signed }; + default: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; + } + } + + class D3D9SWVPEmulatorGenerator { + + public: + + D3D9SWVPEmulatorGenerator(const std::string& name) + : m_module(spvVersion(1, 3)) { + m_entryPointId = m_module.allocateId(); + + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(name.c_str()), + nullptr); + + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + m_module.enableCapability(spv::CapabilityGeometry); + + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeInputPoints); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeOutputPoints); + // This has to be > 0 for some reason even though + // we will never emit a vertex + m_module.setOutputVertices(m_entryPointId, 1); + m_module.setInvocations(m_entryPointId, 1); + + m_module.functionBegin(m_module.defVoidType(), m_entryPointId, m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), spv::FunctionControlMaskNone); + m_module.opLabel(m_module.allocateId()); + } + + void compile(const D3D9VertexDecl* pDecl) { + uint32_t uint_t = m_module.defIntType(32, false); + uint32_t float_t = m_module.defFloatType(32); + uint32_t vec4_t = m_module.defVectorType(float_t, 4); + + uint32_t vec4_singular_array_t = m_module.defArrayType(vec4_t, m_module.constu32(1)); + + // Setup the buffer + uint32_t bufferSlot = getSWVPBufferSlot(); + + uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(uint_t); + m_module.decorateArrayStride(arrayType, sizeof(uint32_t)); + + uint32_t buffer_t = m_module.defStructTypeUnique(1, &arrayType); + m_module.memberDecorateOffset(buffer_t, 0, 0); + m_module.decorate(buffer_t, spv::DecorationBufferBlock); + + uint32_t buffer = m_module.newVar(m_module.defPointerType(buffer_t, spv::StorageClassUniform), spv::StorageClassUniform); + m_module.decorateDescriptorSet(buffer, 0); + m_module.decorateBinding(buffer, bufferSlot); + + DxvkResourceSlot bufferRes; + bufferRes.slot = bufferSlot; + bufferRes.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bufferRes.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + bufferRes.access = VK_ACCESS_SHADER_WRITE_BIT; + m_resourceSlots.push_back(bufferRes); + + // Load our builtins + uint32_t primitiveIdPtr = m_module.newVar(m_module.defPointerType(uint_t, spv::StorageClassInput), spv::StorageClassInput); + m_module.decorateBuiltIn(primitiveIdPtr, spv::BuiltInPrimitiveId); + m_entryPointInterfaces.push_back(primitiveIdPtr); + + uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); + + // The size of any given vertex + uint32_t vertexSize = m_module.constu32(pDecl->GetSize() / sizeof(uint32_t)); + + //The offset of this vertex from the beginning of the buffer + uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); + + + for (auto& element : pDecl->GetElements()) { + // Load the slot associated with this element + DxsoSemantic semantic = { DxsoUsage(element.Usage), element.UsageIndex }; + + uint32_t elementPtr; + uint32_t elementVar; + + elementPtr = m_module.newVar(m_module.defPointerType(vec4_singular_array_t, spv::StorageClassInput), spv::StorageClassInput); + if ((semantic.usage == DxsoUsage::Position || semantic.usage == DxsoUsage::PositionT) && element.UsageIndex == 0) { + // Load from builtin + m_module.decorateBuiltIn(elementPtr, spv::BuiltInPosition); + } + else { + // Load from slot + uint32_t slotIdx = RegisterLinkerSlot(semantic); + + m_module.decorateLocation(elementPtr, slotIdx); + m_interfaceSlots.inputSlots |= 1u << slotIdx; + } + + uint32_t zero = m_module.constu32(0); + elementVar = m_module.opAccessChain(m_module.defPointerType(vec4_t, spv::StorageClassInput), elementPtr, 1, &zero); + elementVar = m_module.opLoad(vec4_t, elementVar); + + m_entryPointInterfaces.push_back(elementPtr); + + // The offset of this element from the beginning of any given vertex + uint32_t perVertexElementOffset = m_module.constu32(element.Offset / sizeof(uint32_t)); + + // The offset of this element from the beginning of the buffer for **THIS** vertex + uint32_t elementOffset = m_module.opIAdd(uint_t, thisVertexOffset, perVertexElementOffset); + + // Write to the buffer at the element offset for each part of the vector. + Decltype elementInfo = ClassifyDecltype(D3DDECLTYPE(element.Type)); + + if (elementInfo.Class == DecltypeClass::Dec) { + // TODO! + Logger::warn("Encountered DEC3/UDEC3N class, ignoring..."); + continue; + } + + uint32_t vecn_t = m_module.defVectorType(float_t, elementInfo.VectorCount); + uint32_t componentSet; + + // Modifiers... + if (elementInfo.Flags & DecltypeFlags::ReverseRGB) { + std::array<uint32_t, 4> indices = { 2, 1, 0, 3 }; + componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); + } + else { + std::array<uint32_t, 4> indices = { 0, 1, 2, 3 }; + componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); + } + + if (elementInfo.Flags & DecltypeFlags::Normalize) + componentSet = m_module.opVectorTimesScalar(vecn_t, componentSet, m_module.constf32(255.0f)); + + + bool isSigned = elementInfo.Flags & DecltypeFlags::Signed; + + // Convert the component to the correct type/value. + switch (elementInfo.Class) { + case DecltypeClass::Float: break; // Do nothing! + case DecltypeClass::Byte: { + m_module.enableCapability(spv::CapabilityInt8); + + uint32_t type = m_module.defIntType(8, isSigned); + type = m_module.defVectorType(type, elementInfo.VectorCount); + + componentSet = isSigned + ? m_module.opConvertFtoS(type, componentSet) + : m_module.opConvertFtoU(type, componentSet); + + break; + } + case DecltypeClass::Short: { + m_module.enableCapability(spv::CapabilityInt16); + + uint32_t type = m_module.defIntType(16, isSigned); + type = m_module.defVectorType(type, elementInfo.VectorCount); + + componentSet = isSigned + ? m_module.opConvertFtoS(type, componentSet) + : m_module.opConvertFtoU(type, componentSet); + + break; + } + case DecltypeClass::Half: { + m_module.enableCapability(spv::CapabilityFloat16); + + uint32_t type = m_module.defFloatType(16); + type = m_module.defVectorType(type, elementInfo.VectorCount); + componentSet = m_module.opFConvert(type, componentSet); + + break; + } + case DecltypeClass::Dec: { + // TODO! + break; + } + } + + // Bitcast to dwords before we write. + uint32_t dwordCount = GetDecltypeSize(D3DDECLTYPE(element.Type)) / sizeof(uint32_t); + uint32_t dwordVector = m_module.opBitcast( + m_module.defVectorType(uint_t, dwordCount), + componentSet); + + // Finally write each dword to the buffer! + for (uint32_t i = 0; i < dwordCount; i++) { + std::array<uint32_t, 2> bufferIndices = { m_module.constu32(0), elementOffset }; + + uint32_t writeDest = m_module.opAccessChain(m_module.defPointerType(uint_t, spv::StorageClassUniform), buffer, bufferIndices.size(), bufferIndices.data()); + uint32_t currentDword = m_module.opCompositeExtract(uint_t, dwordVector, 1, &i); + + m_module.opStore(writeDest, currentDword); + + elementOffset = m_module.opIAdd(uint_t, elementOffset, m_module.constu32(1)); + } + } + } + + Rc<DxvkShader> finalize() { + m_module.opReturn(); + m_module.functionEnd(); + + m_module.addEntryPoint(m_entryPointId, + spv::ExecutionModelGeometry, "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + m_module.setDebugName(m_entryPointId, "main"); + + DxvkShaderConstData constData = { }; + + return new DxvkShader( + VK_SHADER_STAGE_GEOMETRY_BIT, + m_resourceSlots.size(), + m_resourceSlots.data(), + m_interfaceSlots, + m_module.compile(), + DxvkShaderOptions(), + std::move(constData)); + } + + private: + + SpirvModule m_module; + + std::vector<uint32_t> m_entryPointInterfaces; + uint32_t m_entryPointId = 0; + + std::vector<DxvkResourceSlot> m_resourceSlots; + DxvkInterfaceSlots m_interfaceSlots; + + }; + + Rc<DxvkShader> D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl) { + auto& elements = pDecl->GetElements(); + + // Use the shader's unique key for the lookup + { std::unique_lock<dxvk::mutex> lock(m_mutex); + + auto entry = m_modules.find(elements); + if (entry != m_modules.end()) + return entry->second; + } + + Sha1Hash hash = Sha1Hash::compute( + elements.data(), elements.size() * sizeof(elements[0])); + + DxvkShaderKey key = { VK_SHADER_STAGE_GEOMETRY_BIT , hash }; + std::string name = str::format("SWVP_", key.toString()); + + // This shader has not been compiled yet, so we have to create a + // new module. This takes a while, so we won't lock the structure. + D3D9SWVPEmulatorGenerator generator(name); + generator.compile(pDecl); + Rc<DxvkShader> shader = generator.finalize(); + + shader->setShaderKey(key); + pDevice->GetDXVKDevice()->registerShader(shader); + + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); + + if (dumpPath.size() != 0) { + std::ofstream dumpStream( + str::format(dumpPath, "/", name, ".spv"), + std::ios_base::binary | std::ios_base::trunc); + + shader->dump(dumpStream); + } + + // Insert the new module into the lookup table. If another thread + // has compiled the same shader in the meantime, we should return + // that object instead and discard the newly created module. + { std::unique_lock<dxvk::mutex> lock(m_mutex); + + auto status = m_modules.insert({ elements, shader }); + if (!status.second) + return status.first->second; + } + + return shader; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.h new file mode 100644 index 00000000..91aae4c2 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_swvp_emu.h @@ -0,0 +1,38 @@ +#pragma once + +#include <unordered_map> + +#include "d3d9_include.h" + +#include "../dxvk/dxvk_shader.h" + +namespace dxvk { + + class D3D9VertexDecl; + class D3D9DeviceEx; + + struct D3D9VertexDeclHash { + size_t operator () (const D3D9VertexElements& key) const; + }; + + struct D3D9VertexDeclEq { + bool operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const; + }; + + class D3D9SWVPEmulator { + + public: + + Rc<DxvkShader> GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl); + + private: + + dxvk::mutex m_mutex; + + std::unordered_map< + D3D9VertexElements, Rc<DxvkShader>, + D3D9VertexDeclHash, D3D9VertexDeclEq> m_modules; + + }; + +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.cpp new file mode 100644 index 00000000..2ef86be3 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.cpp @@ -0,0 +1,248 @@ +#include "d3d9_texture.h" + +#include "d3d9_util.h" + +namespace dxvk { + + // Direct3DTexture9 + + D3D9Texture2D::D3D9Texture2D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc) + : D3D9Texture2DBase( pDevice, pDesc, D3DRTYPE_TEXTURE ) { } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Texture2D::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Texture2D::GetType() { + return D3DRTYPE_TEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::GetSurfaceLevel(UINT Level, IDirect3DSurface9** ppSurfaceLevel) { + InitReturnPtr(ppSurfaceLevel); + + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + if (unlikely(ppSurfaceLevel == nullptr)) + return D3DERR_INVALIDCALL; + + *ppSurfaceLevel = ref(GetSubresource(Level)); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::LockRect(UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->LockRect(pLockedRect, pRect, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::UnlockRect(UINT Level) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->UnlockRect(); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::AddDirtyRect(CONST RECT* pDirtyRect) { + if (pDirtyRect) { + D3DBOX box = { UINT(pDirtyRect->left), UINT(pDirtyRect->top), UINT(pDirtyRect->right), UINT(pDirtyRect->bottom), 0, 1 }; + m_texture.AddDirtyBox(&box, 0); + } else { + m_texture.AddDirtyBox(nullptr, 0); + } + return D3D_OK; + } + + + // Direct3DVolumeTexture9 + + + D3D9Texture3D::D3D9Texture3D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc) + : D3D9Texture3DBase( pDevice, pDesc, D3DRTYPE_VOLUMETEXTURE ) { } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DVolumeTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Texture3D::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Texture3D::GetType() { + return D3DRTYPE_VOLUMETEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::GetVolumeLevel(UINT Level, IDirect3DVolume9** ppVolumeLevel) { + InitReturnPtr(ppVolumeLevel); + + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + if (unlikely(ppVolumeLevel == nullptr)) + return D3DERR_INVALIDCALL; + + *ppVolumeLevel = ref(GetSubresource(Level)); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::LockBox(UINT Level, D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->LockBox(pLockedBox, pBox, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::UnlockBox(UINT Level) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->UnlockBox(); + } + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::AddDirtyBox(CONST D3DBOX* pDirtyBox) { + m_texture.AddDirtyBox(pDirtyBox, 0); + return D3D_OK; + } + + + // Direct3DCubeTexture9 + + + D3D9TextureCube::D3D9TextureCube( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc) + : D3D9TextureCubeBase( pDevice, pDesc, D3DRTYPE_CUBETEXTURE ) { } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DCubeTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9TextureCube::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9TextureCube::GetType() { + return D3DRTYPE_CUBETEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) { + if (unlikely(Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(Level)->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::GetCubeMapSurface(D3DCUBEMAP_FACES Face, UINT Level, IDirect3DSurface9** ppSurfaceLevel) { + InitReturnPtr(ppSurfaceLevel); + + if (unlikely(Level >= m_texture.ExposedMipLevels() || Face >= D3DCUBEMAP_FACES(6))) + return D3DERR_INVALIDCALL; + + if (unlikely(ppSurfaceLevel == nullptr)) + return D3DERR_INVALIDCALL; + + *ppSurfaceLevel = ref(GetSubresource(m_texture.CalcSubresource(UINT(Face), Level))); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::LockRect(D3DCUBEMAP_FACES Face, UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + if (unlikely(Face > D3DCUBEMAP_FACE_NEGATIVE_Z || Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(m_texture.CalcSubresource(UINT(Face), Level))->LockRect(pLockedRect, pRect, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::UnlockRect(D3DCUBEMAP_FACES Face, UINT Level) { + if (unlikely(Face > D3DCUBEMAP_FACE_NEGATIVE_Z || Level >= m_texture.ExposedMipLevels())) + return D3DERR_INVALIDCALL; + + return GetSubresource(m_texture.CalcSubresource(UINT(Face), Level))->UnlockRect(); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::AddDirtyRect(D3DCUBEMAP_FACES Face, CONST RECT* pDirtyRect) { + if (pDirtyRect) { + D3DBOX box = { UINT(pDirtyRect->left), UINT(pDirtyRect->top), UINT(pDirtyRect->right), UINT(pDirtyRect->bottom), 0, 1 }; + m_texture.AddDirtyBox(&box, Face); + } else { + m_texture.AddDirtyBox(nullptr, Face); + } + return D3D_OK; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.h new file mode 100644 index 00000000..cfe6ecc2 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_texture.h @@ -0,0 +1,241 @@ +#pragma once + +#include "d3d9_device.h" +#include "d3d9_surface.h" +#include "d3d9_volume.h" +#include "d3d9_util.h" + +#include <vector> +#include <list> +#include <mutex> +#include <new> +#include <type_traits> + +namespace dxvk { + + template <typename SubresourceType, typename... Base> + class D3D9BaseTexture : public D3D9Resource<Base...> { + + public: + + using SubresourceData = std::aligned_storage_t<sizeof(SubresourceType), alignof(SubresourceType)>; + + D3D9BaseTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType) + : D3D9Resource<Base...> ( pDevice ) + , m_texture ( pDevice, pDesc, ResourceType ) + , m_lod ( 0 ) { + const uint32_t arraySlices = m_texture.Desc()->ArraySize; + const uint32_t mipLevels = m_texture.Desc()->MipLevels; + + m_subresources.resize(arraySlices * mipLevels); + + for (uint32_t i = 0; i < arraySlices; i++) { + for (uint32_t j = 0; j < mipLevels; j++) { + const uint32_t subresource = m_texture.CalcSubresource(i, j); + + SubresourceType* subObj = this->GetSubresource(subresource); + + new (subObj) SubresourceType( + pDevice, + &m_texture, + i, j, + this); + } + } + } + + ~D3D9BaseTexture() { + for (uint32_t i = 0; i < m_subresources.size(); i++) { + SubresourceType* subObj = this->GetSubresource(i); + subObj->~SubresourceType(); + } + } + + DWORD STDMETHODCALLTYPE SetLOD(DWORD LODNew) final { + DWORD oldLod = m_lod; + m_lod = LODNew; + + m_texture.CreateSampleView(LODNew); + if (this->GetPrivateRefCount() > 0) + this->m_parent->MarkTextureBindingDirty(this); + + return oldLod; + } + + DWORD STDMETHODCALLTYPE GetLOD() final { + return m_lod; + } + + DWORD STDMETHODCALLTYPE GetLevelCount() final { + return m_texture.ExposedMipLevels(); + } + + HRESULT STDMETHODCALLTYPE SetAutoGenFilterType(D3DTEXTUREFILTERTYPE FilterType) final { + if (unlikely(FilterType == D3DTEXF_NONE)) + return D3DERR_INVALIDCALL; + + auto lock = this->m_parent->LockDevice(); + + m_texture.SetMipFilter(FilterType); + if (m_texture.IsAutomaticMip()) + this->m_parent->MarkTextureMipsDirty(&m_texture); + return D3D_OK; + } + + D3DTEXTUREFILTERTYPE STDMETHODCALLTYPE GetAutoGenFilterType() final { + return m_texture.GetMipFilter(); + } + + void STDMETHODCALLTYPE GenerateMipSubLevels() final { + if (!m_texture.NeedsMipGen()) + return; + + auto lock = this->m_parent->LockDevice(); + + this->m_parent->MarkTextureMipsUnDirty(&m_texture); + this->m_parent->EmitGenerateMips(&m_texture); + } + + void STDMETHODCALLTYPE PreLoad() final { + m_texture.PreLoadAll(); + } + + D3D9CommonTexture* GetCommonTexture() { + return &m_texture; + } + + SubresourceType* GetSubresource(UINT Subresource) { + return reinterpret_cast<SubresourceType*>(&m_subresources[Subresource]); + } + + protected: + + D3D9CommonTexture m_texture; + + std::vector<SubresourceData> m_subresources; + + DWORD m_lod; + + }; + + using D3D9Texture2DBase = D3D9BaseTexture<D3D9Surface, IDirect3DTexture9>; + class D3D9Texture2D final : public D3D9Texture2DBase { + + public: + + D3D9Texture2D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetSurfaceLevel(UINT Level, IDirect3DSurface9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockRect(UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockRect(UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyRect(CONST RECT* pDirtyRect); + + }; + + using D3D9Texture3DBase = D3D9BaseTexture<D3D9Volume, IDirect3DVolumeTexture9>; + class D3D9Texture3D final : public D3D9Texture3DBase { + + public: + + D3D9Texture3D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetVolumeLevel(UINT Level, IDirect3DVolume9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockBox(UINT Level, D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockBox(UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyBox(CONST D3DBOX* pDirtyBox); + + }; + + using D3D9TextureCubeBase = D3D9BaseTexture<D3D9Surface, IDirect3DCubeTexture9>; + class D3D9TextureCube final : public D3D9TextureCubeBase { + + public: + + D3D9TextureCube( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetCubeMapSurface(D3DCUBEMAP_FACES Face, UINT Level, IDirect3DSurface9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockRect(D3DCUBEMAP_FACES Face, UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockRect(D3DCUBEMAP_FACES Face, UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyRect(D3DCUBEMAP_FACES Face, CONST RECT* pDirtyRect); + + }; + + static_assert(sizeof(D3D9Texture2D) == sizeof(D3D9Texture3D) && + sizeof(D3D9Texture2D) == sizeof(D3D9TextureCube)); + + inline D3D9CommonTexture* GetCommonTexture(IDirect3DBaseTexture9* ptr) { + if (ptr == nullptr) + return nullptr; + + // We can avoid needing to get the type as m_texture has the same offset + // no matter the texture type. + // The compiler is not smart enough to eliminate the call to GetType as it is + // not marked const. + return static_cast<D3D9Texture2D*>(ptr)->GetCommonTexture(); + } + + inline D3D9CommonTexture* GetCommonTexture(D3D9Surface* ptr) { + if (ptr == nullptr) + return nullptr; + + return ptr->GetCommonTexture(); + } + + inline D3D9CommonTexture* GetCommonTexture(IDirect3DSurface9* ptr) { + return GetCommonTexture(static_cast<D3D9Surface*>(ptr)); + } + + inline void TextureRefPrivate(IDirect3DBaseTexture9* tex, bool AddRef) { + if (tex == nullptr) + return; + + // We can avoid needing to get the type as m_refCount has the same offset + // no matter the texture type. + // The compiler is not smart enough to eliminate the call to GetType as it is + // not marked const. + return CastRefPrivate<D3D9Texture2D>(tex, AddRef); + } + + inline void TextureChangePrivate(IDirect3DBaseTexture9*& dst, IDirect3DBaseTexture9* src) { + TextureRefPrivate(dst, false); + TextureRefPrivate(src, true); + dst = src; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.cpp new file mode 100644 index 00000000..91cf2637 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.cpp @@ -0,0 +1,380 @@ +#include "d3d9_util.h" + +namespace dxvk { + + HRESULT DecodeMultiSampleType( + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + VkSampleCountFlagBits* pCount) { + uint32_t sampleCount = std::max<uint32_t>(MultiSample, 1u); + + // Check if this is a power of two... + if (sampleCount & (sampleCount - 1)) + return D3DERR_INVALIDCALL; + + if (MultiSample == D3DMULTISAMPLE_NONMASKABLE) + sampleCount = 1u << MultisampleQuality; + + if (pCount != nullptr) + *pCount = VkSampleCountFlagBits(sampleCount); + + return D3D_OK; + } + + + VkFormat GetPackedDepthStencilFormat(D3D9Format Format) { + switch (Format) { + case D3D9Format::D15S1: + return VK_FORMAT_D16_UNORM_S8_UINT; // This should never happen! + + case D3D9Format::D16: + case D3D9Format::D16_LOCKABLE: + case D3D9Format::DF16: + return VK_FORMAT_D16_UNORM; + + case D3D9Format::D24X8: + case D3D9Format::DF24: + return VK_FORMAT_X8_D24_UNORM_PACK32; + + case D3D9Format::D24X4S4: + case D3D9Format::D24FS8: + case D3D9Format::D24S8: + case D3D9Format::INTZ: + return VK_FORMAT_D24_UNORM_S8_UINT; + + case D3D9Format::D32: + case D3D9Format::D32_LOCKABLE: + case D3D9Format::D32F_LOCKABLE: + return VK_FORMAT_D32_SFLOAT; + + case D3D9Format::S8_LOCKABLE: + return VK_FORMAT_S8_UINT; + + default: + return VK_FORMAT_UNDEFINED; + } + } + + + VkFormatFeatureFlags GetImageFormatFeatures(DWORD Usage) { + VkFormatFeatureFlags features = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + features |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) + features |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + return features; + } + + + VkImageUsageFlags GetImageUsageFlags(DWORD Usage) { + VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT; + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return usage; + } + + + uint32_t GetVertexCount(D3DPRIMITIVETYPE type, UINT count) { + switch (type) { + default: + case D3DPT_TRIANGLELIST: return count * 3; + case D3DPT_POINTLIST: return count; + case D3DPT_LINELIST: return count * 2; + case D3DPT_LINESTRIP: return count + 1; + case D3DPT_TRIANGLESTRIP: return count + 2; + case D3DPT_TRIANGLEFAN: return count + 2; + } + } + + + DxvkInputAssemblyState DecodeInputAssemblyState(D3DPRIMITIVETYPE type) { + switch (type) { + default: + case D3DPT_TRIANGLELIST: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_FALSE, 0 }; + + case D3DPT_POINTLIST: + return { VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_FALSE, 0 }; + + case D3DPT_LINELIST: + return { VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_FALSE, 0 }; + + case D3DPT_LINESTRIP: + return { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, VK_FALSE, 0 }; + + case D3DPT_TRIANGLESTRIP: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, VK_FALSE, 0 }; + + case D3DPT_TRIANGLEFAN: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, VK_FALSE, 0 }; + } + } + + + VkBlendFactor DecodeBlendFactor(D3DBLEND BlendFactor, bool IsAlpha) { + switch (BlendFactor) { + default: + case D3DBLEND_ZERO: return VK_BLEND_FACTOR_ZERO; + case D3DBLEND_ONE: return VK_BLEND_FACTOR_ONE; + case D3DBLEND_SRCCOLOR: return VK_BLEND_FACTOR_SRC_COLOR; + case D3DBLEND_INVSRCCOLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case D3DBLEND_SRCALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case D3DBLEND_INVSRCALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case D3DBLEND_DESTALPHA: return VK_BLEND_FACTOR_DST_ALPHA; + case D3DBLEND_INVDESTALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case D3DBLEND_DESTCOLOR: return VK_BLEND_FACTOR_DST_COLOR; + case D3DBLEND_INVDESTCOLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case D3DBLEND_SRCALPHASAT: return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; + case D3DBLEND_BOTHSRCALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case D3DBLEND_BOTHINVSRCALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case D3DBLEND_BLENDFACTOR: return IsAlpha ? VK_BLEND_FACTOR_CONSTANT_ALPHA : VK_BLEND_FACTOR_CONSTANT_COLOR; + case D3DBLEND_INVBLENDFACTOR: return IsAlpha ? VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA : VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + case D3DBLEND_SRCCOLOR2: return VK_BLEND_FACTOR_SRC1_COLOR; + case D3DBLEND_INVSRCCOLOR2: return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; + } + } + + + VkBlendOp DecodeBlendOp(D3DBLENDOP BlendOp) { + switch (BlendOp) { + default: + case D3DBLENDOP_ADD: return VK_BLEND_OP_ADD; + case D3DBLENDOP_SUBTRACT: return VK_BLEND_OP_SUBTRACT; + case D3DBLENDOP_REVSUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; + case D3DBLENDOP_MIN: return VK_BLEND_OP_MIN; + case D3DBLENDOP_MAX: return VK_BLEND_OP_MAX; + } + } + + + VkFilter DecodeFilter(D3DTEXTUREFILTERTYPE Filter) { + switch (Filter) { + case D3DTEXF_NONE: + case D3DTEXF_POINT: + return VK_FILTER_NEAREST; + default: + return VK_FILTER_LINEAR; + } + } + + + D3D9MipFilter DecodeMipFilter(D3DTEXTUREFILTERTYPE Filter) { + D3D9MipFilter filter; + filter.MipsEnabled = Filter != D3DTEXF_NONE; + + switch (Filter) { + case D3DTEXF_POINT: + case D3DTEXF_NONE: + filter.MipFilter = VK_SAMPLER_MIPMAP_MODE_NEAREST; break; + default: + filter.MipFilter = VK_SAMPLER_MIPMAP_MODE_LINEAR; break; + } + + return filter; + } + + + bool IsAnisotropic(D3DTEXTUREFILTERTYPE Filter) { + return Filter == D3DTEXF_ANISOTROPIC; + } + + + VkSamplerAddressMode DecodeAddressMode(D3DTEXTUREADDRESS Mode) { + switch (Mode) { + default: + case D3DTADDRESS_WRAP: + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case D3DTADDRESS_MIRROR: + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case D3DTADDRESS_CLAMP: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case D3DTADDRESS_BORDER: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case D3DTADDRESS_MIRRORONCE: + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + } + } + + + VkCompareOp DecodeCompareOp(D3DCMPFUNC Func) { + switch (Func) { + default: + case D3DCMP_NEVER: return VK_COMPARE_OP_NEVER; + case D3DCMP_LESS: return VK_COMPARE_OP_LESS; + case D3DCMP_EQUAL: return VK_COMPARE_OP_EQUAL; + case D3DCMP_LESSEQUAL: return VK_COMPARE_OP_LESS_OR_EQUAL; + case D3DCMP_GREATER: return VK_COMPARE_OP_GREATER; + case D3DCMP_NOTEQUAL: return VK_COMPARE_OP_NOT_EQUAL; + case D3DCMP_GREATEREQUAL: return VK_COMPARE_OP_GREATER_OR_EQUAL; + case D3DCMP_ALWAYS: return VK_COMPARE_OP_ALWAYS; + } + } + + + VkStencilOp DecodeStencilOp(D3DSTENCILOP Op) { + switch (Op) { + default: + case D3DSTENCILOP_KEEP: return VK_STENCIL_OP_KEEP; + case D3DSTENCILOP_ZERO: return VK_STENCIL_OP_ZERO; + case D3DSTENCILOP_REPLACE: return VK_STENCIL_OP_REPLACE; + case D3DSTENCILOP_INCRSAT: return VK_STENCIL_OP_INCREMENT_AND_CLAMP; + case D3DSTENCILOP_DECRSAT: return VK_STENCIL_OP_DECREMENT_AND_CLAMP; + case D3DSTENCILOP_INVERT: return VK_STENCIL_OP_INVERT; + case D3DSTENCILOP_INCR: return VK_STENCIL_OP_INCREMENT_AND_WRAP; + case D3DSTENCILOP_DECR: return VK_STENCIL_OP_DECREMENT_AND_WRAP; + } + } + + + VkCullModeFlags DecodeCullMode(D3DCULL Mode) { + switch (Mode) { + case D3DCULL_NONE: return VK_CULL_MODE_NONE; + case D3DCULL_CW: return VK_CULL_MODE_FRONT_BIT; + default: + case D3DCULL_CCW: return VK_CULL_MODE_BACK_BIT; + } + } + + + VkPolygonMode DecodeFillMode(D3DFILLMODE Mode) { + switch (Mode) { + case D3DFILL_POINT: return VK_POLYGON_MODE_POINT; + case D3DFILL_WIREFRAME: return VK_POLYGON_MODE_LINE; + default: + case D3DFILL_SOLID: return VK_POLYGON_MODE_FILL; + } + } + + + VkIndexType DecodeIndexType(D3D9Format Format) { + return Format == D3D9Format::INDEX16 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32; + } + + + VkFormat DecodeDecltype(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return VK_FORMAT_R32_SFLOAT; + case D3DDECLTYPE_FLOAT2: return VK_FORMAT_R32G32_SFLOAT; + case D3DDECLTYPE_FLOAT3: return VK_FORMAT_R32G32B32_SFLOAT; + case D3DDECLTYPE_FLOAT4: return VK_FORMAT_R32G32B32A32_SFLOAT; + case D3DDECLTYPE_D3DCOLOR: return VK_FORMAT_B8G8R8A8_UNORM; + case D3DDECLTYPE_UBYTE4: return VK_FORMAT_R8G8B8A8_USCALED; + case D3DDECLTYPE_SHORT2: return VK_FORMAT_R16G16_SSCALED; + case D3DDECLTYPE_SHORT4: return VK_FORMAT_R16G16B16A16_SSCALED; + case D3DDECLTYPE_UBYTE4N: return VK_FORMAT_R8G8B8A8_UNORM; + case D3DDECLTYPE_SHORT2N: return VK_FORMAT_R16G16_SNORM; + case D3DDECLTYPE_SHORT4N: return VK_FORMAT_R16G16B16A16_SNORM; + case D3DDECLTYPE_USHORT2N: return VK_FORMAT_R16G16_UNORM; + case D3DDECLTYPE_USHORT4N: return VK_FORMAT_R16G16B16A16_UNORM; + case D3DDECLTYPE_UDEC3: return VK_FORMAT_A2B10G10R10_USCALED_PACK32; + case D3DDECLTYPE_FLOAT16_2: return VK_FORMAT_R16G16_SFLOAT; + case D3DDECLTYPE_FLOAT16_4: return VK_FORMAT_R16G16B16A16_SFLOAT; + case D3DDECLTYPE_DEC3N: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; + case D3DDECLTYPE_UNUSED: + default: return VK_FORMAT_UNDEFINED; + } + } + + void ConvertBox(D3DBOX box, VkOffset3D& offset, VkExtent3D& extent) { + offset.x = box.Left; + offset.y = box.Top; + offset.z = box.Front; + + extent.width = box.Right - box.Left; + extent.height = box.Bottom - box.Top; + extent.depth = box.Back - box.Front; + } + + void ConvertRect(RECT rect, VkOffset3D& offset, VkExtent3D& extent) { + offset.x = rect.left; + offset.y = rect.top; + offset.z = 0; + + extent.width = rect.right - rect.left; + extent.height = rect.bottom - rect.top; + extent.depth = 1; + } + + void ConvertRect(RECT rect, VkOffset2D& offset, VkExtent2D& extent) { + offset.x = rect.left; + offset.y = rect.top; + + extent.width = rect.right - rect.left; + extent.height = rect.bottom - rect.top; + } + + uint32_t GetDecltypeSize(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return 1 * sizeof(float); + case D3DDECLTYPE_FLOAT2: return 2 * sizeof(float); + case D3DDECLTYPE_FLOAT3: return 3 * sizeof(float); + case D3DDECLTYPE_FLOAT4: return 4 * sizeof(float); + case D3DDECLTYPE_D3DCOLOR: return 1 * sizeof(DWORD); + case D3DDECLTYPE_UBYTE4: return 4 * sizeof(BYTE); + case D3DDECLTYPE_SHORT2: return 2 * sizeof(short); + case D3DDECLTYPE_SHORT4: return 4 * sizeof(short); + case D3DDECLTYPE_UBYTE4N: return 4 * sizeof(BYTE); + case D3DDECLTYPE_SHORT2N: return 2 * sizeof(short); + case D3DDECLTYPE_SHORT4N: return 4 * sizeof(short); + case D3DDECLTYPE_USHORT2N: return 2 * sizeof(short); + case D3DDECLTYPE_USHORT4N: return 4 * sizeof(short); + case D3DDECLTYPE_UDEC3: return 4; + case D3DDECLTYPE_DEC3N: return 4; + case D3DDECLTYPE_FLOAT16_2: return 2 * 2; + case D3DDECLTYPE_FLOAT16_4: return 4 * 2; + default: return 0; + } + } + + + uint32_t GetDecltypeCount(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return 1; + case D3DDECLTYPE_FLOAT2: return 2; + case D3DDECLTYPE_FLOAT3: return 3; + case D3DDECLTYPE_FLOAT4: return 4; + case D3DDECLTYPE_D3DCOLOR: return 4; + case D3DDECLTYPE_UBYTE4: return 4; + case D3DDECLTYPE_SHORT2: return 2; + case D3DDECLTYPE_SHORT4: return 4; + case D3DDECLTYPE_UBYTE4N: return 4; + case D3DDECLTYPE_SHORT2N: return 2; + case D3DDECLTYPE_SHORT4N: return 4; + case D3DDECLTYPE_USHORT2N: return 2; + case D3DDECLTYPE_USHORT4N: return 4; + case D3DDECLTYPE_UDEC3: return 3; + case D3DDECLTYPE_DEC3N: return 3; + case D3DDECLTYPE_FLOAT16_2: return 2; + case D3DDECLTYPE_FLOAT16_4: return 4; + default: return 0; + } + } + + + bool IsDepthFormat(D3D9Format Format) { + return Format == D3D9Format::D16_LOCKABLE + || Format == D3D9Format::D32 + || Format == D3D9Format::D15S1 + || Format == D3D9Format::D24S8 + || Format == D3D9Format::D24X8 + || Format == D3D9Format::D24X4S4 + || Format == D3D9Format::D16 + || Format == D3D9Format::D32F_LOCKABLE + || Format == D3D9Format::D24FS8 + || Format == D3D9Format::D32_LOCKABLE + || Format == D3D9Format::DF16 + || Format == D3D9Format::DF24 + || Format == D3D9Format::INTZ; + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.h new file mode 100644 index 00000000..ced4f872 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_util.h @@ -0,0 +1,295 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_format.h" + +#include "../dxso/dxso_common.h" +#include "../dxvk/dxvk_device.h" + +#include "../util/util_matrix.h" + +#include <d3dcommon.h> + +namespace dxvk { + + struct D3D9ShaderMasks { + uint32_t samplerMask; + uint32_t rtMask; + }; + + static constexpr D3D9ShaderMasks FixedFunctionMask = + { 0b1111111, 0b1 }; + + struct D3D9MipFilter { + bool MipsEnabled; + VkSamplerMipmapMode MipFilter; + }; + + struct D3D9BlendState { + D3DBLEND Src; + D3DBLEND Dst; + D3DBLENDOP Op; + }; + + inline void FixupBlendState(D3D9BlendState& State) { + // Old DirectX 6 HW feature that still exists... + // Yuck! + if (unlikely(State.Src == D3DBLEND_BOTHSRCALPHA)) { + State.Src = D3DBLEND_SRCALPHA; + State.Dst = D3DBLEND_INVSRCALPHA; + } + else if (unlikely(State.Src == D3DBLEND_BOTHINVSRCALPHA)) { + State.Src = D3DBLEND_INVSRCALPHA; + State.Dst = D3DBLEND_SRCALPHA; + } + } + + inline bool InvalidSampler(DWORD Sampler) { + if (Sampler > 15 && Sampler < D3DDMAPSAMPLER) + return true; + + if (Sampler > D3DVERTEXTEXTURESAMPLER3) + return true; + + return false; + } + + inline DWORD RemapSamplerState(DWORD Sampler) { + if (Sampler >= D3DDMAPSAMPLER) + Sampler = 16 + (Sampler - D3DDMAPSAMPLER); + + return Sampler; + } + + inline std::pair<DxsoProgramType, DWORD> RemapStateSamplerShader(DWORD Sampler) { + if (Sampler >= 17) + return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - 17); + + return std::make_pair(DxsoProgramTypes::PixelShader, Sampler); + } + + inline std::pair<DxsoProgramType, DWORD> RemapSamplerShader(DWORD Sampler) { + Sampler = RemapSamplerState(Sampler); + + return RemapStateSamplerShader(Sampler); + } + + template <typename T, typename J> + void CastRefPrivate(J* ptr, bool AddRef) { + if (ptr == nullptr) + return; + + T* castedPtr = reinterpret_cast<T*>(ptr); + AddRef ? castedPtr->AddRefPrivate() : castedPtr->ReleasePrivate(); + } + + HRESULT DecodeMultiSampleType( + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + VkSampleCountFlagBits* pCount); + + VkFormat GetPackedDepthStencilFormat(D3D9Format Format); + + VkFormatFeatureFlags GetImageFormatFeatures(DWORD Usage); + + VkImageUsageFlags GetImageUsageFlags(DWORD Usage); + + inline void DecodeD3DCOLOR(D3DCOLOR color, float* rgba) { + // Encoded in D3DCOLOR as argb + rgba[3] = (float)((color & 0xff000000) >> 24) / 255.0f; + rgba[0] = (float)((color & 0x00ff0000) >> 16) / 255.0f; + rgba[1] = (float)((color & 0x0000ff00) >> 8) / 255.0f; + rgba[2] = (float)((color & 0x000000ff)) / 255.0f; + } + + inline VkFormat PickSRGB(VkFormat format, VkFormat srgbFormat, bool srgb) { + return srgb ? srgbFormat : format; + } + + inline VkShaderStageFlagBits GetShaderStage(DxsoProgramType ProgramType) { + switch (ProgramType) { + case DxsoProgramTypes::VertexShader: return VK_SHADER_STAGE_VERTEX_BIT; + case DxsoProgramTypes::PixelShader: return VK_SHADER_STAGE_FRAGMENT_BIT; + default: return VkShaderStageFlagBits(0); + } + } + + inline uint32_t GetTransformIndex(D3DTRANSFORMSTATETYPE Type) { + if (Type == D3DTS_VIEW) + return 0; + + if (Type == D3DTS_PROJECTION) + return 1; + + if (Type >= D3DTS_TEXTURE0 && Type <= D3DTS_TEXTURE7) + return 2 + (Type - D3DTS_TEXTURE0); + + return 10 + (Type - D3DTS_WORLD); + } + + inline Matrix4 ConvertMatrix(const D3DMATRIX* Matrix) { + if (Matrix == nullptr) // Identity. + return Matrix4(); + + return Matrix4(Matrix->m); + } + + uint32_t GetVertexCount(D3DPRIMITIVETYPE type, UINT count); + + DxvkInputAssemblyState DecodeInputAssemblyState(D3DPRIMITIVETYPE type); + + VkBlendFactor DecodeBlendFactor(D3DBLEND BlendFactor, bool IsAlpha); + + VkBlendOp DecodeBlendOp(D3DBLENDOP BlendOp); + + VkFilter DecodeFilter(D3DTEXTUREFILTERTYPE Filter); + + D3D9MipFilter DecodeMipFilter(D3DTEXTUREFILTERTYPE Filter); + + bool IsAnisotropic(D3DTEXTUREFILTERTYPE Filter); + + VkSamplerAddressMode DecodeAddressMode(D3DTEXTUREADDRESS Mode); + + VkCompareOp DecodeCompareOp(D3DCMPFUNC Func); + + VkStencilOp DecodeStencilOp(D3DSTENCILOP Op); + + VkCullModeFlags DecodeCullMode(D3DCULL Mode); + + VkPolygonMode DecodeFillMode(D3DFILLMODE Mode); + + VkIndexType DecodeIndexType(D3D9Format Format); + + VkFormat DecodeDecltype(D3DDECLTYPE Type); + + uint32_t GetDecltypeSize(D3DDECLTYPE Type); + + uint32_t GetDecltypeCount(D3DDECLTYPE Type); + + void ConvertBox(D3DBOX box, VkOffset3D& offset, VkExtent3D& extent); + + void ConvertRect(RECT rect, VkOffset3D& offset, VkExtent3D& extent); + + void ConvertRect(RECT rect, VkOffset2D& offset, VkExtent2D& extent); + + inline float GetDepthBufferRValue(VkFormat Format) { + switch (Format) { + case VK_FORMAT_D16_UNORM_S8_UINT: + case VK_FORMAT_D16_UNORM: + return float(1 << 16); + + case VK_FORMAT_D24_UNORM_S8_UINT: + return float(1 << 24); + + default: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D32_SFLOAT: + return float(1 << 23); + } + } + + template<typename T> + UINT CompactSparseList(T* pData, UINT Mask) { + uint32_t count = 0; + + for (uint32_t id : bit::BitMask(Mask)) + pData[count++] = pData[id]; + + return count; + } + + bool IsDepthFormat(D3D9Format Format); + + inline bool operator == (const D3DVIEWPORT9& a, const D3DVIEWPORT9& b) { + return a.X == b.X && + a.Y == b.Y && + a.Width == b.Width && + a.Height == b.Height && + a.MinZ == b.MinZ && + a.MaxZ == b.MaxZ; + } + + inline bool operator != (const D3DVIEWPORT9& a, const D3DVIEWPORT9& b) { + return !(a == b); + } + + inline bool operator == (const RECT& a, const RECT& b) { + return a.left == b.left && + a.right == b.right && + a.top == b.top && + a.bottom == b.bottom; + } + + inline bool operator != (const RECT& a, const RECT& b) { + return !(a == b); + } + + inline bool operator == (const POINT& a, const POINT& b) { + return a.x == b.x && a.y == b.y; + } + + inline bool operator != (const POINT& a, const POINT& b) { + return !(a == b); + } + + inline bool IsPoolManaged(D3DPOOL Pool) { + return Pool == D3DPOOL_MANAGED || Pool == D3DPOOL_MANAGED_EX; + } + + inline D3DRENDERSTATETYPE ColorWriteIndex(uint32_t i) { + return D3DRENDERSTATETYPE(i ? D3DRS_COLORWRITEENABLE1 + i - 1 : D3DRS_COLORWRITEENABLE); + } + + inline bool AreFormatsSimilar(D3D9Format srcFormat, D3D9Format dstFormat) { + return (srcFormat == dstFormat) + || (srcFormat == D3D9Format::A8B8G8R8 && dstFormat == D3D9Format::X8B8G8R8) + || (srcFormat == D3D9Format::A8R8G8B8 && dstFormat == D3D9Format::X8R8G8B8) + || (srcFormat == D3D9Format::A1R5G5B5 && dstFormat == D3D9Format::X1R5G5B5) + || (srcFormat == D3D9Format::A4R4G4B4 && dstFormat == D3D9Format::X4R4G4B4); + } + + inline bool IsBlitRegionInvalid(VkOffset3D offsets[2], VkExtent3D extent) { + // Only bother checking x, y as we don't have 3D blits. + return offsets[1].x < offsets[0].x || + offsets[1].y < offsets[0].y || + offsets[0].x < 0 || + offsets[0].y < 0 || + uint32_t(offsets[1].x) > extent.width || + uint32_t(offsets[1].y) > extent.height; + } + + enum D3D9TextureStageStateTypes : uint32_t + { + DXVK_TSS_COLOROP = 0, + DXVK_TSS_COLORARG1 = 1, + DXVK_TSS_COLORARG2 = 2, + DXVK_TSS_ALPHAOP = 3, + DXVK_TSS_ALPHAARG1 = 4, + DXVK_TSS_ALPHAARG2 = 5, + DXVK_TSS_BUMPENVMAT00 = 6, + DXVK_TSS_BUMPENVMAT01 = 7, + DXVK_TSS_BUMPENVMAT10 = 8, + DXVK_TSS_BUMPENVMAT11 = 9, + DXVK_TSS_TEXCOORDINDEX = 10, + DXVK_TSS_BUMPENVLSCALE = 21, + DXVK_TSS_BUMPENVLOFFSET = 22, + DXVK_TSS_TEXTURETRANSFORMFLAGS = 23, + DXVK_TSS_COLORARG0 = 25, + DXVK_TSS_ALPHAARG0 = 26, + DXVK_TSS_RESULTARG = 27, + DXVK_TSS_CONSTANT = 31, + DXVK_TSS_COUNT = 32 + }; + + constexpr uint32_t DXVK_TSS_TCI_PASSTHRU = 0x00000000; + constexpr uint32_t DXVK_TSS_TCI_CAMERASPACENORMAL = 0x00010000; + constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEPOSITION = 0x00020000; + constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000; + constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000; + + inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) { + return D3D9TextureStageStateTypes(Type - 1); + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.cpp new file mode 100644 index 00000000..9eb65d79 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.cpp @@ -0,0 +1,234 @@ +#include "d3d9_vertex_declaration.h" +#include "d3d9_util.h" + +#include <algorithm> +#include <cstring> + +namespace dxvk { + + D3D9VertexDecl::D3D9VertexDecl( + D3D9DeviceEx* pDevice, + DWORD FVF) + : D3D9VertexDeclBase(pDevice) { + this->SetFVF(FVF); + this->Classify(); + } + + + D3D9VertexDecl::D3D9VertexDecl( + D3D9DeviceEx* pDevice, + const D3DVERTEXELEMENT9* pVertexElements, + uint32_t DeclCount) + : D3D9VertexDeclBase( pDevice ) + , m_elements ( DeclCount ) + , m_fvf ( 0 ) { + std::copy(pVertexElements, pVertexElements + DeclCount, m_elements.begin()); + this->Classify(); + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexDecl::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DVertexDeclaration9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9VertexDecl::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexDecl::GetDeclaration( + D3DVERTEXELEMENT9* pElement, + UINT* pNumElements) { + if (pNumElements == nullptr) + return D3DERR_INVALIDCALL; + + *pNumElements = UINT(m_elements.size()) + 1u; // Account for D3DDECL_END + + if (pElement == nullptr) + return D3D_OK; + + // The native runtime ignores pNumElements here... + std::copy(m_elements.begin(), m_elements.end(), pElement); + pElement[m_elements.size()] = D3DDECL_END(); + + return D3D_OK; + } + + + void D3D9VertexDecl::SetFVF(DWORD FVF) { + m_fvf = FVF; + + std::array<D3DVERTEXELEMENT9, 16> elements; + uint32_t elemCount = 0; + uint32_t texCount = 0; + + uint32_t betas = 0; + uint8_t betaIdx = 0xFF; + + switch (FVF & D3DFVF_POSITION_MASK) { + case D3DFVF_XYZ: + case D3DFVF_XYZB1: + case D3DFVF_XYZB2: + case D3DFVF_XYZB3: + case D3DFVF_XYZB4: + case D3DFVF_XYZB5: + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + elements[elemCount].Usage = D3DDECLUSAGE_POSITION; + elements[elemCount].UsageIndex = 0; + elemCount++; + + if ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZ) + break; + + betas = (((FVF & D3DFVF_XYZB5) - D3DFVF_XYZB1) >> 1) + 1; + if (FVF & D3DFVF_LASTBETA_D3DCOLOR) + betaIdx = D3DDECLTYPE_D3DCOLOR; + else if (FVF & D3DFVF_LASTBETA_UBYTE4) + betaIdx = D3DDECLTYPE_UBYTE4; + else if ((FVF & D3DFVF_XYZB5) == D3DFVF_XYZB5) + betaIdx = D3DDECLTYPE_FLOAT1; + + if (betaIdx != 0xFF) + betas--; + + if (betas > 0) { + switch (betas) { + case 1: elements[elemCount].Type = D3DDECLTYPE_FLOAT1; break; + case 2: elements[elemCount].Type = D3DDECLTYPE_FLOAT2; break; + case 3: elements[elemCount].Type = D3DDECLTYPE_FLOAT3; break; + case 4: elements[elemCount].Type = D3DDECLTYPE_FLOAT4; break; + default: break; + } + elements[elemCount].Usage = D3DDECLUSAGE_BLENDWEIGHT; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + + if (betaIdx != 0xFF) { + elements[elemCount].Type = betaIdx; + elements[elemCount].Usage = D3DDECLUSAGE_BLENDINDICES; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + break; + + case D3DFVF_XYZW: + case D3DFVF_XYZRHW: + elements[elemCount].Type = D3DDECLTYPE_FLOAT4; + elements[elemCount].Usage = + ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZW) + ? D3DDECLUSAGE_POSITION + : D3DDECLUSAGE_POSITIONT; + elements[elemCount].UsageIndex = 0; + elemCount++; + break; + + default: + break; + } + + if (FVF & D3DFVF_NORMAL) { + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + elements[elemCount].Usage = D3DDECLUSAGE_NORMAL; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_PSIZE) { + elements[elemCount].Type = D3DDECLTYPE_FLOAT1; + elements[elemCount].Usage = D3DDECLUSAGE_PSIZE; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_DIFFUSE) { + elements[elemCount].Type = D3DDECLTYPE_D3DCOLOR; + elements[elemCount].Usage = D3DDECLUSAGE_COLOR; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_SPECULAR) { + elements[elemCount].Type = D3DDECLTYPE_D3DCOLOR; + elements[elemCount].Usage = D3DDECLUSAGE_COLOR; + elements[elemCount].UsageIndex = 1; + elemCount++; + } + + texCount = (FVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT; + texCount = std::min(texCount, 8u); + + for (uint32_t i = 0; i < texCount; i++) { + switch ((FVF >> (16 + i * 2)) & 0x3) { + case D3DFVF_TEXTUREFORMAT1: + elements[elemCount].Type = D3DDECLTYPE_FLOAT1; + break; + + case D3DFVF_TEXTUREFORMAT2: + elements[elemCount].Type = D3DDECLTYPE_FLOAT2; + break; + + case D3DFVF_TEXTUREFORMAT3: + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + break; + + case D3DFVF_TEXTUREFORMAT4: + elements[elemCount].Type = D3DDECLTYPE_FLOAT4; + break; + + default: + break; + } + elements[elemCount].Usage = D3DDECLUSAGE_TEXCOORD; + elements[elemCount].UsageIndex = i; + elemCount++; + } + + for (uint32_t i = 0; i < elemCount; i++) { + elements[i].Stream = 0; + elements[i].Offset = (i == 0) + ? 0 + : (elements[i - 1].Offset + GetDecltypeSize(D3DDECLTYPE(elements[i - 1].Type))); + + elements[i].Method = D3DDECLMETHOD_DEFAULT; + } + + m_elements.resize(elemCount); + std::copy(elements.begin(), elements.begin() + elemCount, m_elements.data()); + } + + + void D3D9VertexDecl::Classify() { + for (const auto& element : m_elements) { + if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) + m_size = std::max(m_size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); + + if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 0) + m_flags.set(D3D9VertexDeclFlag::HasColor0); + else if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 1) + m_flags.set(D3D9VertexDeclFlag::HasColor1); + else if (element.Usage == D3DDECLUSAGE_POSITIONT) + m_flags.set(D3D9VertexDeclFlag::HasPositionT); + else if (element.Usage == D3DDECLUSAGE_PSIZE) + m_flags.set(D3D9VertexDeclFlag::HasPointSize); + else if (element.Usage == D3DDECLUSAGE_FOG) + m_flags.set(D3D9VertexDeclFlag::HasFog); + else if (element.Usage == D3DDECLUSAGE_BLENDWEIGHT) + m_flags.set(D3D9VertexDeclFlag::HasBlendWeight); + else if (element.Usage == D3DDECLUSAGE_BLENDINDICES) + m_flags.set(D3D9VertexDeclFlag::HasBlendIndices); + + if (element.Usage == D3DDECLUSAGE_TEXCOORD) + m_texcoordMask |= GetDecltypeCount(D3DDECLTYPE(element.Type)) << (element.UsageIndex * 3); + } + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.h new file mode 100644 index 00000000..fe519f2d --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_vertex_declaration.h @@ -0,0 +1,82 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_util.h" + +#include <vector> + +namespace dxvk { + + enum D3D9VertexDeclFlag { + HasColor0, + HasColor1, + HasPositionT, + HasPointSize, + HasFog, + HasBlendWeight, + HasBlendIndices + }; + using D3D9VertexDeclFlags = Flags<D3D9VertexDeclFlag>; + + using D3D9VertexDeclBase = D3D9DeviceChild<IDirect3DVertexDeclaration9>; + class D3D9VertexDecl final : public D3D9VertexDeclBase { + + public: + + D3D9VertexDecl( + D3D9DeviceEx* pDevice, + DWORD FVF); + + D3D9VertexDecl( + D3D9DeviceEx* pDevice, + const D3DVERTEXELEMENT9* pVertexElements, + uint32_t DeclCount); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + HRESULT STDMETHODCALLTYPE GetDeclaration( + D3DVERTEXELEMENT9* pElement, + UINT* pNumElements); + + inline DWORD GetFVF() { + return m_fvf; + } + + void SetFVF(DWORD FVF); + + const D3D9VertexElements& GetElements() const { + return m_elements; + } + + UINT GetSize() const { + return m_size; + } + + bool TestFlag(D3D9VertexDeclFlag flag) const { + return m_flags.test(flag); + } + + uint32_t GetTexcoordMask() const { + return m_texcoordMask; + } + + private: + + void Classify(); + + D3D9VertexDeclFlags m_flags; + + D3D9VertexElements m_elements; + + DWORD m_fvf; + + uint32_t m_texcoordMask = 0; + + // The size of Stream 0. That's all we care about. + uint32_t m_size = 0; + + }; + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.cpp b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.cpp new file mode 100644 index 00000000..399566f3 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.cpp @@ -0,0 +1,112 @@ +#include "d3d9_volume.h" + +#include "d3d9_device.h" +#include "d3d9_texture.h" + +namespace dxvk { + + D3D9Volume::D3D9Volume( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc) + : D3D9VolumeBase( + pDevice, + new D3D9CommonTexture( pDevice, pDesc, D3DRTYPE_VOLUMETEXTURE ), + 0, 0, + nullptr, + nullptr) { } + + + D3D9Volume::D3D9Volume( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer) + : D3D9VolumeBase( + pDevice, + pTexture, + Face, MipLevel, + pContainer, + pContainer) { } + + + void D3D9Volume::AddRefPrivate() { + // Can't have a swapchain container for a volume. + if (m_baseTexture != nullptr) { + static_cast<D3D9Texture3D*>(m_baseTexture)->AddRefPrivate(); + return; + } + + D3D9VolumeBase::AddRefPrivate(); + } + + + void D3D9Volume::ReleasePrivate() { + // Can't have a swapchain container for a volume. + if (m_baseTexture != nullptr) { + static_cast<D3D9Texture3D*>(m_baseTexture)->ReleasePrivate(); + return; + } + + D3D9VolumeBase::ReleasePrivate(); + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DVolume9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Volume::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::GetDesc(D3DVOLUME_DESC *pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + auto& desc = *(m_texture->Desc()); + + pDesc->Format = static_cast<D3DFORMAT>(desc.Format); + pDesc->Type = D3DRTYPE_VOLUME; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + + pDesc->Width = std::max(1u, desc.Width >> m_mipLevel); + pDesc->Height = std::max(1u, desc.Height >> m_mipLevel); + pDesc->Depth = std::max(1u, desc.Depth >> m_mipLevel); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::LockBox(D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) { + if (unlikely(pLockedBox == nullptr)) + return D3DERR_INVALIDCALL; + + return m_parent->LockImage( + m_texture, + m_face, m_mipLevel, + pLockedBox, + pBox, + Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::UnlockBox() { + return m_parent->UnlockImage( + m_texture, + m_face, m_mipLevel); + } + +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.h new file mode 100644 index 00000000..9c33a788 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/d3d9_volume.h @@ -0,0 +1,38 @@ +#pragma once + +#include "d3d9_subresource.h" + +#include "d3d9_common_texture.h" + +namespace dxvk { + + using D3D9VolumeBase = D3D9Subresource<IDirect3DVolume9>; + class D3D9Volume final : public D3D9VolumeBase { + + public: + + D3D9Volume( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc); + + D3D9Volume( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer); + + void AddRefPrivate(); + + void ReleasePrivate(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE GetDesc(D3DVOLUME_DESC *pDesc) final; + + HRESULT STDMETHODCALLTYPE LockBox(D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) final; + + HRESULT STDMETHODCALLTYPE UnlockBox() final; + + }; +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/meson.build b/src/libs/dxvk-native-1.9.2a/src/d3d9/meson.build new file mode 100644 index 00000000..1086b857 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/meson.build @@ -0,0 +1,54 @@ +d3d9_res = wrc_generator.process('version.rc') + +d3d9_shaders = files([ + 'shaders/d3d9_convert_yuy2_uyvy.comp', + 'shaders/d3d9_convert_l6v5u5.comp', + 'shaders/d3d9_convert_x8l8v8u8.comp', + 'shaders/d3d9_convert_a2w10v10u10.comp', + 'shaders/d3d9_convert_nv12.comp', + 'shaders/d3d9_convert_yv12.comp' +]) + +d3d9_src = [ + 'd3d9_main.cpp', + 'd3d9_interface.cpp', + 'd3d9_adapter.cpp', + 'd3d9_monitor.cpp', + 'd3d9_device.cpp', + 'd3d9_state.cpp', + 'd3d9_cursor.cpp', + 'd3d9_swapchain.cpp', + 'd3d9_format.cpp', + 'd3d9_common_texture.cpp', + 'd3d9_texture.cpp', + 'd3d9_surface.cpp', + 'd3d9_volume.cpp', + 'd3d9_common_buffer.cpp', + 'd3d9_buffer.cpp', + 'd3d9_shader.cpp', + 'd3d9_vertex_declaration.cpp', + 'd3d9_query.cpp', + 'd3d9_multithread.cpp', + 'd3d9_options.cpp', + 'd3d9_stateblock.cpp', + 'd3d9_sampler.cpp', + 'd3d9_util.cpp', + 'd3d9_initializer.cpp', + 'd3d9_fixed_function.cpp', + 'd3d9_names.cpp', + 'd3d9_swvp_emu.cpp', + 'd3d9_format_helpers.cpp', + 'd3d9_hud.cpp' +] + +d3d9_dll = shared_library(so_prefix+'d3d9'+dll_ext, d3d9_src, glsl_generator.process(d3d9_shaders), d3d9_res, + name_prefix : '', + dependencies : [ dxso_dep, dxvk_dep, wsi_dep ], + include_directories : dxvk_include_path, + install : true, + vs_module_defs : 'd3d9'+def_spec_ext, + override_options : ['cpp_std='+dxvk_cpp_std]) + +d3d9_dep = declare_dependency( + link_with : [ d3d9_dll ], + include_directories : [ dxvk_include_path ]) diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_a2w10v10u10.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_a2w10v10u10.comp new file mode 100644 index 00000000..2d489e3d --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_a2w10v10u10.comp @@ -0,0 +1,44 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uint offset = thread_id.x + + thread_id.y * u_info.extent.x; + + uint value = texelFetch(src, int(offset)).r; + + // Sign-extend magic! + int u10 = bitfieldExtract(int (value), 0, 10); + int v10 = bitfieldExtract(int (value), 10, 10); + int w10 = bitfieldExtract(int (value), 20, 10); + uint a2 = bitfieldExtract(uint(value), 30, 2); + + vec4 color = vec4( + snormalize(u10, 10), + snormalize(v10, 10), + snormalize(w10, 10), + unormalize(a2, 2)); + + imageStore(dst, thread_id.xy, color); + } +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_common.h b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_common.h new file mode 100644 index 00000000..daca40e1 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_common.h @@ -0,0 +1,44 @@ +float unormalize(uint value, int bits) { + const int range = (1 << bits) - 1; + + return float(value) / float(range); +} + +float snormalize(int value, int bits) { + const int range = (1 << (bits - 1)) - 1; + + // Min because, -32 and -31 map to -1.0f, and we + // divide by 31. + return max(float(value) / float(range), -1.0); +} + +float unpackUnorm(uint p) { + return float(p) / 255.0; +} + +vec2 unpackUnorm2x8(uint p) { + uvec2 value = uvec2(p & 0xFF, p >> 8); + return vec2(unpackUnorm(value.x), unpackUnorm(value.y)); +} + +mat3x4 g_yuv_to_rgb = { + { 298 / 256, 0, 409 / 256, 0.5 }, + { 298 / 256, -100 / 256, -208 / 256, 0.5 }, + { 298 / 256, 516 / 256, 0, 0.5 } +}; + +vec4 convertYUV(vec3 yuv) { + vec3 value = vec4(yuv, 1 / 255.0) * g_yuv_to_rgb; + + return vec4(clamp(value, 0, 1), 1); +} + +mat3x3 g_bt709_to_rgb = { + { 1.164, 0, 1.793 }, + { 1.164, -0.213, -0.533 }, + { 1.164, 2.112, 0 } +}; + +vec4 convertBT_709(vec3 cde) { + return vec4(clamp(cde * g_bt709_to_rgb, 0, 1), 1); +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_l6v5u5.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_l6v5u5.comp new file mode 100644 index 00000000..e7ee25a9 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_l6v5u5.comp @@ -0,0 +1,43 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uint offset = thread_id.x + + thread_id.y * u_info.extent.x; + + uint value = texelFetch(src, int(offset)).r; + + // Sign-extend magic! + int u5 = bitfieldExtract(int (value), 0, 5); + int v5 = bitfieldExtract(int (value), 5, 5); + uint l6 = bitfieldExtract(uint(value), 10, 6); + + vec4 color = vec4( + snormalize(u5, 5), + snormalize(v5, 5), + unormalize(l6, 6), + 1.0f); + + imageStore(dst, thread_id.xy, color); + } +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_nv12.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_nv12.comp new file mode 100644 index 00000000..70f7998d --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_nv12.comp @@ -0,0 +1,63 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +vec2 fetchUnorm2x8(usamplerBuffer source, uint offset) { + return unpackUnorm2x8(texelFetch(src, int(offset)).r); +} + +// Format is: +// YYYYYYYYYYYYYYY... +// YYYYYYYYYYYYYYY... +// UVUVUVUVUVUVUVU... + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y); + + uint offset = thread_id.x + + thread_id.y * pitch.x; + + // Fetch 2 luminance samples. + vec2 y = fetchUnorm2x8(src, offset) - (16 / 255.0); + + // Go into the second plane to get the chroma data. + // UV data is subsampled as [2, 2] + // So we need to divide thread_id.y by 2. + // thread_id.x is already accounted for as we read uint16 + offset = thread_id.x + + thread_id.y / 2 * pitch.x + + pitch.x * pitch.y; + + vec2 uv = fetchUnorm2x8(src, offset) - (128 / 255.0); + + // The NV12 format seems to use the BT.709 color space. + vec4 color0 = convertBT_709(vec3(y.x, uv.x, uv.y)); + vec4 color1 = convertBT_709(vec3(y.y, uv.x, uv.y)); + + // We write as a macropixel of [2, 1] + // So write out 2 pixels in this run. + ivec2 writePos = thread_id.xy * ivec2(2, 1); + + imageStore(dst, ivec2(writePos.x, writePos.y), color0); + imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1); + } +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_x8l8v8u8.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_x8l8v8u8.comp new file mode 100644 index 00000000..a9257cb5 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_x8l8v8u8.comp @@ -0,0 +1,43 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uint offset = thread_id.x + + thread_id.y * u_info.extent.x; + + uint value = texelFetch(src, int(offset)).r; + + // Sign-extend magic! + int u8 = bitfieldExtract(int (value), 0, 8); + int v8 = bitfieldExtract(int (value), 8, 8); + uint l8 = bitfieldExtract(uint(value), 16, 8); + + vec4 color = vec4( + snormalize(u8, 8), + snormalize(v8, 8), + unormalize(l8, 8), + 1.0f); + + imageStore(dst, thread_id.xy, color); + } +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp new file mode 100644 index 00000000..8f38c9ba --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp @@ -0,0 +1,53 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout(constant_id = 1225) const bool s_is_uyvy = false; + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uint offset = thread_id.x + + thread_id.y * u_info.extent.x; + + uint value = texelFetch(src, int(offset)).r; + + vec4 data = unpackUnorm4x8(value); + + // Flip around stuff for UYVY + if (s_is_uyvy) + data = data.yxwz; + + float y0 = data.x - (16 / 255.0); + float u = data.y - (128 / 255.0); + float y1 = data.z - (16 / 255.0); + float v = data.w - (128 / 255.0); + + vec4 color0 = convertYUV(vec3(y0, u, v)); + vec4 color1 = convertYUV(vec3(y1, u, v)); + + // YUY2 has a macropixel of [2, 1] + // so we write 2 pixels in this run. + ivec2 writePos = thread_id.xy * ivec2(2, 1); + + imageStore(dst, ivec2(writePos.x, writePos.y), color0); + imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1); + } +}
\ No newline at end of file diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yv12.comp b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yv12.comp new file mode 100644 index 00000000..ad3822ab --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_yv12.comp @@ -0,0 +1,58 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "d3d9_convert_common.h" + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) uniform usamplerBuffer src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +// Format is: +// YYYYYYYY... +// VVVV... +// UUUU... + +float fetchUnorm(usamplerBuffer source, uint offset) { + return unpackUnorm(texelFetch(src, int(offset)).r); +} + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y); + + uint offset = thread_id.x + + thread_id.y * pitch.x; + + // Fetch a Y, luminance sample. + float y = fetchUnorm(src, offset) - (16 / 255.0); + + // Go into the second plane to get a V, chroma sample + offset = (thread_id.x / 2) + + (thread_id.y / 2) * (pitch.x / 2) + + pitch.x * pitch.y; + + float v = fetchUnorm(src, offset) - (128 / 255.0); + + // Go into the third plane to get a U, chroma sample + offset += (pitch.x / 2) * (pitch.y / 2); + float u = fetchUnorm(src, offset) - (128 / 255.0); + + // TODO: Is this the right color space? + vec4 color = convertBT_709(vec3(y, u, v)); + + imageStore(dst, thread_id.xy, color); + } +} diff --git a/src/libs/dxvk-native-1.9.2a/src/d3d9/version.rc b/src/libs/dxvk-native-1.9.2a/src/d3d9/version.rc new file mode 100644 index 00000000..b4027eee --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/d3d9/version.rc @@ -0,0 +1,31 @@ +#include <windows.h> + +// DLL version information. +VS_VERSION_INFO VERSIONINFO +FILEVERSION 10,0,17763,1 +PRODUCTVERSION 10,0,17763,1 +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS 0 +FILEOS VOS_NT_WINDOWS32 +FILETYPE VFT_DLL +FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "080904b0" + BEGIN + VALUE "CompanyName", "DXVK" + VALUE "FileDescription", "Direct3D 9 Runtime" + VALUE "FileVersion", "10.0.17763.1 (WinBuild.160101.0800)" + VALUE "InternalName", "D3D9.dll" + VALUE "LegalCopyright", "zlib/libpng license" + VALUE "OriginalFilename", "D3D9.dll" + VALUE "ProductName", "DXVK" + VALUE "ProductVersion", "10.0.17763.1" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0809, 1200 + END +END |