diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/gfx-backend-dx11 | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/gfx-backend-dx11')
-rw-r--r-- | third_party/rust/gfx-backend-dx11/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/Cargo.toml | 40 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/README.md | 13 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/shaders/blit.hlsl | 63 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/shaders/clear.hlsl | 22 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/shaders/copy.hlsl | 615 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/conv.rs | 855 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/debug.rs | 114 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/device.rs | 2485 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/dxgi.rs | 213 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/internal.rs | 1329 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/lib.rs | 4285 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx11/src/shader.rs | 405 |
13 files changed, 10440 insertions, 0 deletions
diff --git a/third_party/rust/gfx-backend-dx11/.cargo-checksum.json b/third_party/rust/gfx-backend-dx11/.cargo-checksum.json new file mode 100644 index 0000000000..a82dc6ce92 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"ba6db93df75f979728d5bcb9fff9ee328fa807ba6d640121b3effe355fa98345","README.md":"5c66af7bc110525a57c757859b9b93468ae54222e6ce9ce5ffd55b2a6ca596b9","shaders/blit.hlsl":"92a8b404ee956ceff2728ec8dd68969fba4c32a79f4d879f069a294f245a867c","shaders/clear.hlsl":"b715a0d8ccebd858531de845fdb3f1b31f25d3f62266238cd1d417006a07957c","shaders/copy.hlsl":"c71b2df8691068d450d3216b54a5a0315f1e17bcd75aec3b4f46b5e3521945c3","src/conv.rs":"c191269ba4e38c119afe989fb6d0343ed7497477e1b4debb11761c125c3feedf","src/debug.rs":"60a2a7cac9cbced6385f560d7b171c4a8eb9a644d8ac219671e1e0fcc8a2439f","src/device.rs":"a54e7b23fd268d839726bb6d5a0843e77e50156eda16ce51799931043b7a199c","src/dxgi.rs":"3af9731ebdfc6d834f661cbd4a2378d063d9728d34b9dc725bcb3195b134d478","src/internal.rs":"ce3874fbe7fbbc50b18f90223ca7e98d0a69d30c7cd671e317de3ec1db7ca668","src/lib.rs":"f18b3f11f846a40bfecc06a0af1148175fdadee5662c746a9e124ff75a438070","src/shader.rs":"05cb22fdeaaf17b05e21a68911e1fa8c8feb03eeda256e18a72c74ea31ce3f15"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/gfx-backend-dx11/Cargo.toml b/third_party/rust/gfx-backend-dx11/Cargo.toml new file mode 100644 index 0000000000..163fc6a285 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "gfx-backend-dx11" +version = "0.6.0" +description = "DirectX-11 API backend for gfx-rs" +homepage = "https://github.com/gfx-rs/gfx" +repository = "https://github.com/gfx-rs/gfx" +keywords = ["graphics", "gamedev"] +license = "MIT OR Apache-2.0" +authors = ["The Gfx-rs Developers"] +readme = "README.md" +documentation = "https://docs.rs/gfx-backend-dx11" +workspace = "../../.." +edition = "2018" + +[features] +default = [] + +[lib] +name = "gfx_backend_dx11" + +[dependencies] +arrayvec = "0.5" +auxil = { path = "../../auxil/auxil", version = "0.5", package = "gfx-auxil", features = ["spirv_cross"] } +hal = { path = "../../hal", version = "0.6", package = "gfx-hal" } +range-alloc = { path = "../../auxil/range-alloc", version = "0.1" } +bitflags = "1" +libloading = "0.6" +log = { version = "0.4" } +smallvec = "1.0" +spirv_cross = { version = "0.22", features = ["hlsl"] } +thunderdome = "0.3" +parking_lot = "0.11" +winapi = { version = "0.3", features = ["basetsd","d3d11", "d3d11_1", "d3d11sdklayers", "d3dcommon","d3dcompiler","dxgi1_2","dxgi1_3","dxgi1_4", "dxgi1_5", "dxgiformat","dxgitype","handleapi","minwindef","synchapi","unknwnbase","winbase","windef","winerror","winnt","winuser"] } +wio = "0.2" +raw-window-handle = "0.3" + +# This forces docs.rs to build the crate on windows, otherwise the build fails +# and we get no docs at all. +[package.metadata.docs.rs] +default-target = "x86_64-pc-windows-msvc" diff --git a/third_party/rust/gfx-backend-dx11/README.md b/third_party/rust/gfx-backend-dx11/README.md new file mode 100644 index 0000000000..921c43c22c --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/README.md @@ -0,0 +1,13 @@ +# gfx_device_dx11 + +[DX11](https://msdn.microsoft.com/en-us/library/windows/desktop/ff476080(v=vs.85).aspx) backend for gfx. + +## Normalized Coordinates + +Render | Depth | Texture +-------|-------|-------- +![render_coordinates](../../../info/gl_render_coordinates.png) | ![depth_coordinates](../../../info/dx_depth_coordinates.png) | ![texture_coordinates](../../../info/dx_texture_coordinates.png) + +## Mirroring + +TODO diff --git a/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl b/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl new file mode 100644 index 0000000000..f627e5c9b9 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl @@ -0,0 +1,63 @@ +cbuffer Region : register(b0) { + float2 offset; + float2 extent; + float z; + float level; +}; + +struct VsOutput { + float4 pos: SV_POSITION; + float4 uv: TEXCOORD0; +}; + +// Create a screen filling triangle +VsOutput vs_blit_2d(uint id: SV_VertexID) { + float2 coord = float2((id << 1) & 2, id & 2); + VsOutput output = { + float4(float2(-1.0, 1.0) + coord * float2(2.0, -2.0), 0.0, 1.0), + float4(offset + coord * extent, z, level) + }; + return output; +} + +SamplerState BlitSampler : register(s0); + +Texture2DArray<uint4> BlitSrc_Uint : register(t0); +Texture2DArray<int4> BlitSrc_Sint : register(t0); +Texture2DArray<float4> BlitSrc_Float : register(t0); + +// TODO: get rid of GetDimensions call +uint4 Nearest_Uint(float4 uv) +{ + float4 size; + BlitSrc_Uint.GetDimensions(0, size.x, size.y, size.z, size.w); + + float2 pix = uv.xy * size.xy; + + return BlitSrc_Uint.Load(int4(int2(pix), uv.zw)); +} + +int4 Nearest_Sint(float4 uv) +{ + float4 size; + BlitSrc_Sint.GetDimensions(0, size.x, size.y, size.z, size.w); + + float2 pix = uv.xy * size.xy; + + return BlitSrc_Sint.Load(int4(int2(pix), uv.zw)); +} + +uint4 ps_blit_2d_uint(VsOutput input) : SV_Target +{ + return Nearest_Uint(input.uv); +} + +int4 ps_blit_2d_int(VsOutput input) : SV_Target +{ + return Nearest_Sint(input.uv); +} + +float4 ps_blit_2d_float(VsOutput input) : SV_Target +{ + return BlitSrc_Float.SampleLevel(BlitSampler, input.uv.xyz, input.uv.w); +} diff --git a/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl b/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl new file mode 100644 index 0000000000..5c91fe6b49 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl @@ -0,0 +1,22 @@ +cbuffer ClearColorF32 : register(b0) { float4 ClearF32; }; +cbuffer ClearColorU32 : register(b0) { uint4 ClearU32; }; +cbuffer ClearColorI32 : register(b0) { int4 ClearI32; }; +cbuffer ClearColorDepth : register(b0) { float ClearDepth; }; + +// fullscreen triangle +float4 vs_partial_clear(uint id : SV_VertexID) : SV_Position +{ + return float4( + float(id / 2) * 4.0 - 1.0, + float(id % 2) * 4.0 - 1.0, + 0.0, + 1.0 + ); +} + +// TODO: send constants through VS as flat attributes +float4 ps_partial_clear_float() : SV_Target0 { return ClearF32; } +uint4 ps_partial_clear_uint() : SV_Target0 { return ClearU32; } +int4 ps_partial_clear_int() : SV_Target0 { return ClearI32; } +float ps_partial_clear_depth() : SV_Depth { return ClearDepth; } +void ps_partial_clear_stencil() { } diff --git a/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl new file mode 100644 index 0000000000..f8b5d8523b --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl @@ -0,0 +1,615 @@ +struct BufferCopy { + uint4 SrcDst; +}; + +struct ImageCopy { + uint4 Src; + uint4 Dst; +}; + +struct BufferImageCopy { + // x=offset, yz=size + uint4 BufferVars; + uint4 ImageOffset; + uint4 ImageExtent; + uint4 ImageSize; +}; + +cbuffer CopyConstants : register(b0) { + BufferCopy BufferCopies; + ImageCopy ImageCopies; + BufferImageCopy BufferImageCopies; +}; + + +uint3 GetDestBounds() +{ + return min( + BufferImageCopies.ImageOffset + BufferImageCopies.ImageExtent, + BufferImageCopies.ImageSize + ); +} + +uint3 GetImageCopyDst(uint3 dispatch_thread_id) +{ + return uint3(ImageCopies.Dst.xy + dispatch_thread_id.xy, ImageCopies.Dst.z); +} + +uint3 GetImageCopySrc(uint3 dispatch_thread_id) +{ + return uint3(ImageCopies.Src.xy + dispatch_thread_id.xy, ImageCopies.Src.z); +} + +uint3 GetImageDst(uint3 dispatch_thread_id) +{ + return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z); +} + +uint3 GetImageSrc(uint3 dispatch_thread_id) +{ + return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z); +} + +uint GetBufferDst128(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} +uint GetBufferSrc128(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} + +uint GetBufferDst64(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} +uint GetBufferSrc64(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} + +uint GetBufferDst32(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} +uint GetBufferSrc32(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} + +uint GetBufferDst16(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} +uint GetBufferSrc16(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} + +uint GetBufferDst8(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} +uint GetBufferSrc8(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); +} + + +uint4 Uint32ToUint8x4(uint data) +{ + return (data >> uint4(0, 8, 16, 24)) & 0xFF; +} + +uint2 Uint32ToUint16x2(uint data) +{ + return (data >> uint2(0, 16)) & 0xFFFF; +} + +uint Uint8x4ToUint32(uint4 data) +{ + return dot(min(data, 0xFF), 1 << uint4(0, 8, 16, 24)); +} + +uint Uint16x2ToUint32(uint2 data) +{ + return dot(min(data, 0xFFFF), 1 << uint2(0, 16)); +} + +uint2 Uint16ToUint8x2(uint data) +{ + return (data >> uint2(0, 8)) & 0xFF; +} + +uint Uint8x2ToUint16(uint2 data) +{ + return dot(min(data, 0xFF), 1 << uint2(0, 8)); +} + +uint4 Float4ToUint8x4(float4 data) +{ + return uint4(data * 255 + .5f); +} + +// Buffers are always R32-aligned +ByteAddressBuffer BufferCopySrc : register(t0); +RWByteAddressBuffer BufferCopyDst : register(u0); + +RWTexture1DArray<uint> Image1CopyDstR : register(u0); +RWTexture1DArray<uint2> Image1CopyDstRg : register(u0); +RWTexture1DArray<uint4> Image1CopyDstRgba : register(u0); + +Texture2DArray<uint4> Image2CopySrc : register(t0); +RWTexture2DArray<uint> Image2CopyDstR : register(u0); +RWTexture2DArray<uint2> Image2CopyDstRg : register(u0); +RWTexture2DArray<uint4> Image2CopyDstRgba : register(u0); + +Texture2DArray<float4> ImageCopy2SrcBgra : register(t0); + +// Image<->Image copies +[numthreads(1, 1, 1)] +void cs_copy_image2d_r8g8_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstR[dst_idx] = Uint8x2ToUint16(Image2CopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r16_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstRg[dst_idx] = Uint16ToUint8x2(Image2CopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r8g8b8a8_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstR[dst_idx] = Uint8x4ToUint32(Image2CopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r8g8b8a8_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Uint8x4ToUint32(Image2CopySrc[src_idx])); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r16g16_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstR[dst_idx] = Uint16x2ToUint32(Image2CopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r16g16_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Uint16x2ToUint32(Image2CopySrc[src_idx])); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r32_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Image2CopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r32_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) +{ + uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); + uint3 src_idx = GetImageCopySrc(dispatch_thread_id); + + Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Image2CopySrc[src_idx]); +} + +//#define COPY_1D_NUM_THREAD 64 //TODO +#define COPY_2D_NUM_THREAD_X 8 +#define COPY_2D_NUM_THREAD_Y 8 + +// Buffer<->Image copies + +// R32G32B32A32 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r32g32b32a32(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc128(dispatch_thread_id); + + Image2CopyDstRgba[dst_idx] = uint4( + BufferCopySrc.Load(src_idx), + BufferCopySrc.Load(src_idx + 1 * 4), + BufferCopySrc.Load(src_idx + 2 * 4), + BufferCopySrc.Load(src_idx + 3 * 4) + ); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r32g32b32a32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint4 data = Image2CopySrc[src_idx]; + uint dst_idx = GetBufferDst128(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, data.x); + BufferCopyDst.Store(dst_idx + 1 * 4, data.y); + BufferCopyDst.Store(dst_idx + 2 * 4, data.z); + BufferCopyDst.Store(dst_idx + 3 * 4, data.w); +} + +// R32G32 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r32g32(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc64(dispatch_thread_id); + + Image2CopyDstRg[dst_idx] = uint2( + BufferCopySrc.Load(src_idx), + BufferCopySrc.Load(src_idx + 1 * 4) + ); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r32g32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint2 data = Image2CopySrc[src_idx].rg; + uint dst_idx = GetBufferDst64(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx , data.x); + BufferCopyDst.Store(dst_idx + 1 * 4, data.y); +} + +// R16G16B16A16 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r16g16b16a16(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc64(dispatch_thread_id); + + Image2CopyDstRgba[dst_idx] = uint4( + Uint32ToUint16x2(BufferCopySrc.Load(src_idx)), + Uint32ToUint16x2(BufferCopySrc.Load(src_idx + 1 * 4)) + ); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r16g16b16a16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint4 data = Image2CopySrc[src_idx]; + uint dst_idx = GetBufferDst64(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(data.xy)); + BufferCopyDst.Store(dst_idx + 1 * 4, Uint16x2ToUint32(data.zw)); +} + +// R32 +[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)] +void cs_copy_buffer_image1d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x) { + return; + } + + uint src_idx = GetBufferSrc32(dispatch_thread_id); + + Image1CopyDstR[dst_idx.xz] = BufferCopySrc.Load(src_idx); +} +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc32(dispatch_thread_id); + + Image2CopyDstR[dst_idx] = BufferCopySrc.Load(src_idx); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst32(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Image2CopySrc[src_idx].r); +} + +// R16G16 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc32(dispatch_thread_id); + + Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(BufferCopySrc.Load(src_idx)); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst32(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(Image2CopySrc[src_idx].xy)); +} + +// R8G8B8A8 +[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)] +void cs_copy_buffer_image1d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x) { + return; + } + + uint src_idx = GetBufferSrc32(dispatch_thread_id); + + Image1CopyDstRgba[dst_idx.xz] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); +} +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc32(dispatch_thread_id); + + Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r8g8b8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst32(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Image2CopySrc[src_idx])); +} + +// B8G8R8A8 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_b8g8r8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst32(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Float4ToUint8x4(ImageCopy2SrcBgra[src_idx].bgra))); +} + +// R16 +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc16(dispatch_thread_id); + uint2 data = Uint32ToUint16x2(BufferCopySrc.Load(src_idx)); + + uint remaining_x = bounds.x - dst_idx.x; + + if (remaining_x >= 2) { + Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y; + } + if (remaining_x >= 1) { + Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x; + } +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst16(dispatch_thread_id); + + uint upper = Image2CopySrc[src_idx].r; + uint lower = Image2CopySrc[src_idx + uint3(1, 0, 0)].r; + + BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(uint2(upper, lower))); +} + +// R8G8 +[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)] +void cs_copy_buffer_image1d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x) { + return; + } + + uint src_idx = GetBufferSrc16(dispatch_thread_id); + + uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); + + uint remaining_x = bounds.x - dst_idx.x; + + if (remaining_x >= 2) { + Image1CopyDstRg[dst_idx.xz + uint2(1, 0)] = data.zw; + } + if (remaining_x >= 1) { + Image1CopyDstRg[dst_idx.xz + uint2(0, 0)] = data.xy; + } +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc16(dispatch_thread_id); + + uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); + + uint remaining_x = bounds.x - dst_idx.x; + + if (remaining_x >= 2) { + Image2CopyDstRg[dst_idx + uint3(1, 0, 0)] = data.zw; + } + if (remaining_x >= 1) { + Image2CopyDstRg[dst_idx + uint3(0, 0, 0)] = data.xy; + } +} + +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst16(dispatch_thread_id); + + uint2 lower = Image2CopySrc[src_idx].xy; + uint2 upper = Image2CopySrc[src_idx + uint3(1, 0, 0)].xy; + + BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(lower.x, lower.y, upper.x, upper.y))); +} + +// R8 +[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)] +void cs_copy_buffer_image1d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x) { + return; + } + + uint src_idx = GetBufferSrc8(dispatch_thread_id); + uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); + + uint remaining_x = bounds.x - dst_idx.x; + + if (remaining_x >= 4) { + Image1CopyDstR[dst_idx.xz + uint2(3, 0)] = data.w; + } + if (remaining_x >= 3) { + Image1CopyDstR[dst_idx.xz + uint2(2, 0)] = data.z; + } + if (remaining_x >= 2) { + Image1CopyDstR[dst_idx.xz + uint2(1, 0)] = data.y; + } + if (remaining_x >= 1) { + Image1CopyDstR[dst_idx.xz + uint2(0, 0)] = data.x; + } +} +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { + return; + } + + uint src_idx = GetBufferSrc8(dispatch_thread_id); + uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); + + uint remaining_x = bounds.x - dst_idx.x; + + if (remaining_x >= 4) { + Image2CopyDstR[dst_idx + uint3(3, 0, 0)] = data.w; + } + if (remaining_x >= 3) { + Image2CopyDstR[dst_idx + uint3(2, 0, 0)] = data.z; + } + if (remaining_x >= 2) { + Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y; + } + if (remaining_x >= 1) { + Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x; + } +} +[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)] +void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint3 src_idx = GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id); + uint3 bounds = GetDestBounds(); + if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { + return; + } + + uint dst_idx = GetBufferDst8(dispatch_thread_id); + + BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4( + Image2CopySrc[src_idx].r, + Image2CopySrc[src_idx + uint3(1, 0, 0)].r, + Image2CopySrc[src_idx + uint3(2, 0, 0)].r, + Image2CopySrc[src_idx + uint3(3, 0, 0)].r + ))); +} diff --git a/third_party/rust/gfx-backend-dx11/src/conv.rs b/third_party/rust/gfx-backend-dx11/src/conv.rs new file mode 100644 index 0000000000..4cd75ce788 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/conv.rs @@ -0,0 +1,855 @@ +use auxil::ShaderStage; +use hal::{ + format::{Aspects, Format, FormatDesc}, + image, + pso::{ + BlendDesc, BlendOp, ColorBlendDesc, Comparison, DepthBias, DepthStencilDesc, Face, Factor, + FrontFace, InputAssemblerDesc, Multisampling, PolygonMode, Rasterizer, Rect, Sided, State, + StencilFace, StencilOp, StencilValue, Viewport, + }, + IndexType, +}; + +use spirv_cross::spirv; + +use winapi::{ + shared::{ + dxgiformat::*, + minwindef::{FALSE, INT, TRUE}, + }, + um::{d3d11::*, d3dcommon::*}, +}; + +use std::mem; + +pub fn map_index_type(ty: IndexType) -> DXGI_FORMAT { + match ty { + IndexType::U16 => DXGI_FORMAT_R16_UINT, + IndexType::U32 => DXGI_FORMAT_R32_UINT, + } +} + +// TODO: add aspect parameter +pub fn viewable_format(format: DXGI_FORMAT) -> DXGI_FORMAT { + match format { + DXGI_FORMAT_D32_FLOAT_S8X24_UINT => DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, + DXGI_FORMAT_D32_FLOAT => DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_D16_UNORM => DXGI_FORMAT_R16_UNORM, + _ => format, + } +} + +// TODO: stolen from d3d12 backend, maybe share function somehow? +pub fn map_format(format: Format) -> Option<DXGI_FORMAT> { + use hal::format::Format::*; + + let format = match format { + R5g6b5Unorm => DXGI_FORMAT_B5G6R5_UNORM, + R5g5b5a1Unorm => DXGI_FORMAT_B5G5R5A1_UNORM, + R8Unorm => DXGI_FORMAT_R8_UNORM, + R8Snorm => DXGI_FORMAT_R8_SNORM, + R8Uint => DXGI_FORMAT_R8_UINT, + R8Sint => DXGI_FORMAT_R8_SINT, + Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Rgba8Srgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Bgra8Srgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + A2b10g10r10Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + A2b10g10r10Uint => DXGI_FORMAT_R10G10B10A2_UINT, + R16Unorm => DXGI_FORMAT_R16_UNORM, + R16Snorm => DXGI_FORMAT_R16_SNORM, + R16Uint => DXGI_FORMAT_R16_UINT, + R16Sint => DXGI_FORMAT_R16_SINT, + R16Sfloat => DXGI_FORMAT_R16_FLOAT, + Rg16Unorm => DXGI_FORMAT_R16G16_UNORM, + Rg16Snorm => DXGI_FORMAT_R16G16_SNORM, + Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Rg16Sfloat => DXGI_FORMAT_R16G16_FLOAT, + Rgba16Unorm => DXGI_FORMAT_R16G16B16A16_UNORM, + Rgba16Snorm => DXGI_FORMAT_R16G16B16A16_SNORM, + Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Rgba16Sfloat => DXGI_FORMAT_R16G16B16A16_FLOAT, + R32Uint => DXGI_FORMAT_R32_UINT, + R32Sint => DXGI_FORMAT_R32_SINT, + R32Sfloat => DXGI_FORMAT_R32_FLOAT, + Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Rg32Sfloat => DXGI_FORMAT_R32G32_FLOAT, + Rgb32Uint => DXGI_FORMAT_R32G32B32_UINT, + Rgb32Sint => DXGI_FORMAT_R32G32B32_SINT, + Rgb32Sfloat => DXGI_FORMAT_R32G32B32_FLOAT, + Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Rgba32Sfloat => DXGI_FORMAT_R32G32B32A32_FLOAT, + B10g11r11Ufloat => DXGI_FORMAT_R11G11B10_FLOAT, + E5b9g9r9Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + D16Unorm => DXGI_FORMAT_D16_UNORM, + D32Sfloat => DXGI_FORMAT_D32_FLOAT, + D32SfloatS8Uint => DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + Bc1RgbUnorm | Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Bc1RgbSrgb | Bc1RgbaSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Bc2Unorm => DXGI_FORMAT_BC2_UNORM, + Bc2Srgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Bc3Unorm => DXGI_FORMAT_BC3_UNORM, + Bc3Srgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Bc4Unorm => DXGI_FORMAT_BC4_UNORM, + Bc4Snorm => DXGI_FORMAT_BC4_SNORM, + Bc5Unorm => DXGI_FORMAT_BC5_UNORM, + Bc5Snorm => DXGI_FORMAT_BC5_SNORM, + Bc6hUfloat => DXGI_FORMAT_BC6H_UF16, + Bc6hSfloat => DXGI_FORMAT_BC6H_SF16, + Bc7Unorm => DXGI_FORMAT_BC7_UNORM, + Bc7Srgb => DXGI_FORMAT_BC7_UNORM_SRGB, + + _ => return None, + }; + + Some(format) +} + +pub fn map_format_nosrgb(format: Format) -> Option<DXGI_FORMAT> { + // NOTE: DXGI doesn't allow sRGB format on the swapchain, but + // creating RTV of swapchain buffers with sRGB works + match format { + Format::Bgra8Srgb => Some(DXGI_FORMAT_B8G8R8A8_UNORM), + Format::Rgba8Srgb => Some(DXGI_FORMAT_R8G8B8A8_UNORM), + _ => map_format(format), + } +} + +#[derive(Debug, Clone)] +pub struct DecomposedDxgiFormat { + pub typeless: DXGI_FORMAT, + pub srv: Option<DXGI_FORMAT>, + pub rtv: Option<DXGI_FORMAT>, + pub uav: Option<DXGI_FORMAT>, + pub dsv: Option<DXGI_FORMAT>, + // the format we use internally for operating on textures (eg. Rgba8 uses R32 internally for + // copies) + pub copy_uav: Option<DXGI_FORMAT>, + pub copy_srv: Option<DXGI_FORMAT>, +} + +impl DecomposedDxgiFormat { + pub const UNKNOWN: DecomposedDxgiFormat = DecomposedDxgiFormat { + typeless: DXGI_FORMAT_UNKNOWN, + srv: None, + rtv: None, + uav: None, + dsv: None, + copy_uav: None, + copy_srv: None, + }; + + // TODO: we probably want to pass in usage flags or similar to allow for our `typeless_format` + // field to only contain the input format (eg. depth only rather than typeless likely + // improves perf since the driver doesn't need to expose internals) + // + // TODO: we also want aspect for determining depth/stencil + pub fn from_dxgi_format(format: DXGI_FORMAT) -> DecomposedDxgiFormat { + match format { + DXGI_FORMAT_R8G8B8A8_UNORM + | DXGI_FORMAT_R8G8B8A8_SNORM + | DXGI_FORMAT_R8G8B8A8_UINT + | DXGI_FORMAT_R8G8B8A8_SINT + | DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R8G8B8A8_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_R8G8B8A8_UINT), + }, + + DXGI_FORMAT_B8G8R8A8_UNORM | DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_B8G8R8A8_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(DXGI_FORMAT_B8G8R8A8_UNORM), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_B8G8R8A8_UNORM), + }, + + DXGI_FORMAT_A8_UNORM => DecomposedDxgiFormat { + typeless: format, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(format), + copy_srv: Some(format), + }, + + DXGI_FORMAT_R8_UNORM | DXGI_FORMAT_R8_SNORM | DXGI_FORMAT_R8_UINT + | DXGI_FORMAT_R8_SINT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R8_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R8_UINT), + copy_srv: Some(DXGI_FORMAT_R8_UINT), + }, + + DXGI_FORMAT_R8G8_UNORM + | DXGI_FORMAT_R8G8_SNORM + | DXGI_FORMAT_R8G8_UINT + | DXGI_FORMAT_R8G8_SINT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R8G8_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R8G8_UINT), + copy_srv: Some(DXGI_FORMAT_R8G8_UINT), + }, + + DXGI_FORMAT_D16_UNORM => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R16_TYPELESS, + srv: Some(DXGI_FORMAT_R16_FLOAT), + rtv: Some(DXGI_FORMAT_R16_FLOAT), + uav: Some(DXGI_FORMAT_R16_FLOAT), + dsv: Some(format), + copy_uav: Some(DXGI_FORMAT_R16_UINT), + copy_srv: Some(DXGI_FORMAT_R16_UINT), + }, + + DXGI_FORMAT_R16_UNORM + | DXGI_FORMAT_R16_SNORM + | DXGI_FORMAT_R16_UINT + | DXGI_FORMAT_R16_SINT + | DXGI_FORMAT_R16_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R16_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: Some(DXGI_FORMAT_D16_UNORM), + copy_uav: Some(DXGI_FORMAT_R16_UINT), + copy_srv: Some(DXGI_FORMAT_R16_UINT), + }, + + DXGI_FORMAT_R16G16_UNORM + | DXGI_FORMAT_R16G16_SNORM + | DXGI_FORMAT_R16G16_UINT + | DXGI_FORMAT_R16G16_SINT + | DXGI_FORMAT_R16G16_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R16G16_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_R16G16_UINT), + }, + + DXGI_FORMAT_R16G16B16A16_UNORM + | DXGI_FORMAT_R16G16B16A16_SNORM + | DXGI_FORMAT_R16G16B16A16_UINT + | DXGI_FORMAT_R16G16B16A16_SINT + | DXGI_FORMAT_R16G16B16A16_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R16G16B16A16_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R16G16B16A16_UINT), + copy_srv: Some(DXGI_FORMAT_R16G16B16A16_UINT), + }, + + DXGI_FORMAT_D32_FLOAT_S8X24_UINT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32G8X24_TYPELESS, + // TODO: depth or stencil? + srv: Some(DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS), + rtv: None, + uav: None, + dsv: Some(format), + copy_uav: None, + copy_srv: Some(DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS), + }, + + DXGI_FORMAT_D32_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32_TYPELESS, + srv: Some(DXGI_FORMAT_R32_FLOAT), + rtv: None, + uav: None, + dsv: Some(format), + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_R32_UINT), + }, + + DXGI_FORMAT_R32_UINT | DXGI_FORMAT_R32_SINT | DXGI_FORMAT_R32_FLOAT => { + DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: Some(DXGI_FORMAT_D32_FLOAT), + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_R32_UINT), + } + } + + DXGI_FORMAT_R32G32_UINT | DXGI_FORMAT_R32G32_SINT | DXGI_FORMAT_R32G32_FLOAT => { + DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32G32_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32G32_UINT), + copy_srv: Some(DXGI_FORMAT_R32G32_UINT), + } + } + + // TODO: should we just convert to Rgba32 internally? + DXGI_FORMAT_R32G32B32_UINT + | DXGI_FORMAT_R32G32B32_SINT + | DXGI_FORMAT_R32G32B32_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32G32_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32G32B32_UINT), + copy_srv: Some(DXGI_FORMAT_R32G32B32_UINT), + }, + + DXGI_FORMAT_R32G32B32A32_UINT + | DXGI_FORMAT_R32G32B32A32_SINT + | DXGI_FORMAT_R32G32B32A32_FLOAT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R32G32B32A32_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32G32B32A32_UINT), + copy_srv: Some(DXGI_FORMAT_R32G32B32A32_UINT), + }, + + DXGI_FORMAT_R10G10B10A2_UNORM | DXGI_FORMAT_R10G10B10A2_UINT => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_R10G10B10A2_TYPELESS, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(DXGI_FORMAT_R32_UINT), + copy_srv: Some(DXGI_FORMAT_R10G10B10A2_UINT), + }, + + DXGI_FORMAT_R11G11B10_FLOAT => DecomposedDxgiFormat { + typeless: format, + srv: Some(format), + rtv: Some(format), + uav: Some(format), + dsv: None, + copy_uav: Some(format), + copy_srv: Some(format), + }, + + DXGI_FORMAT_R9G9B9E5_SHAREDEXP => DecomposedDxgiFormat { + typeless: format, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC1_UNORM | DXGI_FORMAT_BC1_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC1_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC2_UNORM | DXGI_FORMAT_BC2_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC2_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC3_UNORM | DXGI_FORMAT_BC3_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC3_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC4_UNORM | DXGI_FORMAT_BC4_SNORM => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC4_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC5_UNORM | DXGI_FORMAT_BC5_SNORM => DecomposedDxgiFormat { + typeless: format, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + DXGI_FORMAT_BC6H_UF16 | DXGI_FORMAT_BC6H_SF16 => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC6H_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + // TODO: srgb craziness + DXGI_FORMAT_BC7_UNORM | DXGI_FORMAT_BC7_UNORM_SRGB => DecomposedDxgiFormat { + typeless: DXGI_FORMAT_BC7_TYPELESS, + srv: Some(format), + rtv: None, + uav: None, + dsv: None, + // NOTE: read only + copy_uav: None, + copy_srv: Some(format), + }, + + _ => unimplemented!(), + } + } +} + +pub fn map_viewport(viewport: &Viewport) -> D3D11_VIEWPORT { + D3D11_VIEWPORT { + TopLeftX: viewport.rect.x as _, + TopLeftY: viewport.rect.y as _, + Width: viewport.rect.w as _, + Height: viewport.rect.h as _, + MinDepth: viewport.depth.start, + MaxDepth: viewport.depth.end, + } +} + +pub fn map_rect(rect: &Rect) -> D3D11_RECT { + D3D11_RECT { + left: rect.x as _, + top: rect.y as _, + right: (rect.x + rect.w) as _, + bottom: (rect.y + rect.h) as _, + } +} + +pub fn map_topology(ia: &InputAssemblerDesc) -> D3D11_PRIMITIVE_TOPOLOGY { + use hal::pso::Primitive::*; + match (ia.primitive, ia.with_adjacency) { + (PointList, false) => D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + (PointList, true) => panic!("Points can't have adjacency info"), + (LineList, false) => D3D_PRIMITIVE_TOPOLOGY_LINELIST, + (LineList, true) => D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, + (LineStrip, false) => D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + (LineStrip, true) => D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ, + (TriangleList, false) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + (TriangleList, true) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, + (TriangleStrip, false) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + (TriangleStrip, true) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ, + (PatchList(num), false) => { + assert!(num != 0); + D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (num as u32) - 1 + } + (_, true) => panic!("Patches can't have adjacency info"), + } +} + +fn map_fill_mode(mode: PolygonMode) -> D3D11_FILL_MODE { + match mode { + PolygonMode::Fill => D3D11_FILL_SOLID, + PolygonMode::Line => D3D11_FILL_WIREFRAME, + // TODO: return error + _ => unimplemented!(), + } +} + +fn map_cull_mode(mode: Face) -> D3D11_CULL_MODE { + match mode { + Face::NONE => D3D11_CULL_NONE, + Face::FRONT => D3D11_CULL_FRONT, + Face::BACK => D3D11_CULL_BACK, + _ => panic!("Culling both front and back faces is not supported"), + } +} + +pub(crate) fn map_rasterizer_desc( + desc: &Rasterizer, + multisampling_desc: &Option<Multisampling>, +) -> D3D11_RASTERIZER_DESC { + let bias = match desc.depth_bias { + //TODO: support dynamic depth bias + Some(State::Static(db)) => db, + Some(_) | None => DepthBias::default(), + }; + if let State::Static(w) = desc.line_width { + super::validate_line_width(w); + } + let multisampled = multisampling_desc.is_some(); + D3D11_RASTERIZER_DESC { + FillMode: map_fill_mode(desc.polygon_mode), + CullMode: map_cull_mode(desc.cull_face), + FrontCounterClockwise: match desc.front_face { + FrontFace::Clockwise => FALSE, + FrontFace::CounterClockwise => TRUE, + }, + DepthBias: bias.const_factor as INT, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_factor, + DepthClipEnable: !desc.depth_clamping as _, + // TODO: + ScissorEnable: TRUE, + MultisampleEnable: multisampled as _, + AntialiasedLineEnable: multisampled as _, + // TODO: conservative raster in >=11.x + } +} + +fn map_blend_factor(factor: Factor) -> D3D11_BLEND { + match factor { + Factor::Zero => D3D11_BLEND_ZERO, + Factor::One => D3D11_BLEND_ONE, + Factor::SrcColor => D3D11_BLEND_SRC_COLOR, + Factor::OneMinusSrcColor => D3D11_BLEND_INV_SRC_COLOR, + Factor::DstColor => D3D11_BLEND_DEST_COLOR, + Factor::OneMinusDstColor => D3D11_BLEND_INV_DEST_COLOR, + Factor::SrcAlpha => D3D11_BLEND_SRC_ALPHA, + Factor::OneMinusSrcAlpha => D3D11_BLEND_INV_SRC_ALPHA, + Factor::DstAlpha => D3D11_BLEND_DEST_ALPHA, + Factor::OneMinusDstAlpha => D3D11_BLEND_INV_DEST_ALPHA, + Factor::ConstColor | Factor::ConstAlpha => D3D11_BLEND_BLEND_FACTOR, + Factor::OneMinusConstColor | Factor::OneMinusConstAlpha => D3D11_BLEND_INV_BLEND_FACTOR, + Factor::SrcAlphaSaturate => D3D11_BLEND_SRC_ALPHA_SAT, + Factor::Src1Color => D3D11_BLEND_SRC1_COLOR, + Factor::OneMinusSrc1Color => D3D11_BLEND_INV_SRC1_COLOR, + Factor::Src1Alpha => D3D11_BLEND_SRC1_ALPHA, + Factor::OneMinusSrc1Alpha => D3D11_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_alpha_blend_factor(factor: Factor) -> D3D11_BLEND { + match factor { + Factor::Zero => D3D11_BLEND_ZERO, + Factor::One => D3D11_BLEND_ONE, + Factor::SrcColor | Factor::SrcAlpha => D3D11_BLEND_SRC_ALPHA, + Factor::DstColor | Factor::DstAlpha => D3D11_BLEND_DEST_ALPHA, + Factor::OneMinusSrcColor | Factor::OneMinusSrcAlpha => D3D11_BLEND_INV_SRC_ALPHA, + Factor::OneMinusDstColor | Factor::OneMinusDstAlpha => D3D11_BLEND_INV_DEST_ALPHA, + Factor::ConstColor | Factor::ConstAlpha => D3D11_BLEND_BLEND_FACTOR, + Factor::OneMinusConstColor | Factor::OneMinusConstAlpha => D3D11_BLEND_INV_BLEND_FACTOR, + Factor::SrcAlphaSaturate => D3D11_BLEND_SRC_ALPHA_SAT, + Factor::Src1Color | Factor::Src1Alpha => D3D11_BLEND_SRC1_ALPHA, + Factor::OneMinusSrc1Color | Factor::OneMinusSrc1Alpha => D3D11_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_op(operation: BlendOp) -> (D3D11_BLEND_OP, D3D11_BLEND, D3D11_BLEND) { + match operation { + BlendOp::Add { src, dst } => ( + D3D11_BLEND_OP_ADD, + map_blend_factor(src), + map_blend_factor(dst), + ), + BlendOp::Sub { src, dst } => ( + D3D11_BLEND_OP_SUBTRACT, + map_blend_factor(src), + map_blend_factor(dst), + ), + BlendOp::RevSub { src, dst } => ( + D3D11_BLEND_OP_REV_SUBTRACT, + map_blend_factor(src), + map_blend_factor(dst), + ), + BlendOp::Min => (D3D11_BLEND_OP_MIN, D3D11_BLEND_ZERO, D3D11_BLEND_ZERO), + BlendOp::Max => (D3D11_BLEND_OP_MAX, D3D11_BLEND_ZERO, D3D11_BLEND_ZERO), + } +} + +fn map_alpha_blend_op(operation: BlendOp) -> (D3D11_BLEND_OP, D3D11_BLEND, D3D11_BLEND) { + match operation { + BlendOp::Add { src, dst } => ( + D3D11_BLEND_OP_ADD, + map_alpha_blend_factor(src), + map_alpha_blend_factor(dst), + ), + BlendOp::Sub { src, dst } => ( + D3D11_BLEND_OP_SUBTRACT, + map_alpha_blend_factor(src), + map_alpha_blend_factor(dst), + ), + BlendOp::RevSub { src, dst } => ( + D3D11_BLEND_OP_REV_SUBTRACT, + map_alpha_blend_factor(src), + map_alpha_blend_factor(dst), + ), + BlendOp::Min => (D3D11_BLEND_OP_MIN, D3D11_BLEND_ZERO, D3D11_BLEND_ZERO), + BlendOp::Max => (D3D11_BLEND_OP_MAX, D3D11_BLEND_ZERO, D3D11_BLEND_ZERO), + } +} + +fn map_blend_targets( + render_target_blends: &[ColorBlendDesc], +) -> [D3D11_RENDER_TARGET_BLEND_DESC; 8] { + let mut targets: [D3D11_RENDER_TARGET_BLEND_DESC; 8] = [unsafe { mem::zeroed() }; 8]; + + for (mut target, color_desc) in targets.iter_mut().zip(render_target_blends.iter()) { + target.RenderTargetWriteMask = color_desc.mask.bits() as _; + if let Some(ref blend) = color_desc.blend { + let (color_op, color_src, color_dst) = map_blend_op(blend.color); + let (alpha_op, alpha_src, alpha_dst) = map_alpha_blend_op(blend.alpha); + target.BlendEnable = TRUE; + target.BlendOp = color_op; + target.SrcBlend = color_src; + target.DestBlend = color_dst; + target.BlendOpAlpha = alpha_op; + target.SrcBlendAlpha = alpha_src; + target.DestBlendAlpha = alpha_dst; + } + } + + targets +} + +pub(crate) fn map_blend_desc( + desc: &BlendDesc, + multisampling: &Option<Multisampling>, +) -> D3D11_BLEND_DESC { + D3D11_BLEND_DESC { + AlphaToCoverageEnable: multisampling.as_ref().map_or(false, |m| m.alpha_coverage) as _, + IndependentBlendEnable: TRUE, + RenderTarget: map_blend_targets(&desc.targets), + } +} + +pub fn map_comparison(func: Comparison) -> D3D11_COMPARISON_FUNC { + match func { + Comparison::Never => D3D11_COMPARISON_NEVER, + Comparison::Less => D3D11_COMPARISON_LESS, + Comparison::LessEqual => D3D11_COMPARISON_LESS_EQUAL, + Comparison::Equal => D3D11_COMPARISON_EQUAL, + Comparison::GreaterEqual => D3D11_COMPARISON_GREATER_EQUAL, + Comparison::Greater => D3D11_COMPARISON_GREATER, + Comparison::NotEqual => D3D11_COMPARISON_NOT_EQUAL, + Comparison::Always => D3D11_COMPARISON_ALWAYS, + } +} + +fn map_stencil_op(op: StencilOp) -> D3D11_STENCIL_OP { + match op { + StencilOp::Keep => D3D11_STENCIL_OP_KEEP, + StencilOp::Zero => D3D11_STENCIL_OP_ZERO, + StencilOp::Replace => D3D11_STENCIL_OP_REPLACE, + StencilOp::IncrementClamp => D3D11_STENCIL_OP_INCR_SAT, + StencilOp::IncrementWrap => D3D11_STENCIL_OP_INCR, + StencilOp::DecrementClamp => D3D11_STENCIL_OP_DECR_SAT, + StencilOp::DecrementWrap => D3D11_STENCIL_OP_DECR, + StencilOp::Invert => D3D11_STENCIL_OP_INVERT, + } +} + +fn map_stencil_side(side: &StencilFace) -> D3D11_DEPTH_STENCILOP_DESC { + D3D11_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(side.op_fail), + StencilDepthFailOp: map_stencil_op(side.op_depth_fail), + StencilPassOp: map_stencil_op(side.op_pass), + StencilFunc: map_comparison(side.fun), + } +} + +pub(crate) fn map_depth_stencil_desc( + desc: &DepthStencilDesc, +) -> (D3D11_DEPTH_STENCIL_DESC, State<StencilValue>, bool) { + let (depth_on, depth_write, depth_func) = match desc.depth { + Some(ref depth) => (TRUE, depth.write, map_comparison(depth.fun)), + None => unsafe { mem::zeroed() }, + }; + + let (stencil_on, front, back, read_mask, write_mask, stencil_ref) = match desc.stencil { + Some(ref stencil) => { + let read_masks = stencil.read_masks.static_or(Sided::new(!0)); + let write_masks = stencil.read_masks.static_or(Sided::new(!0)); + let reference_value = match stencil.reference_values { + State::Static(ref values) => { + if values.front != values.back { + error!("Different reference values for front ({}) and back ({}) of the stencil", + values.front, values.back); + } + State::Static(values.front) + } + State::Dynamic => State::Dynamic, + }; + // TODO: cascade to create_pipeline + if read_masks.front != read_masks.back || write_masks.front != write_masks.back { + error!( + "Different sides are specified for read ({:?} and write ({:?}) stencil masks", + read_masks, write_masks + ); + } + ( + TRUE, + map_stencil_side(&stencil.faces.front), + map_stencil_side(&stencil.faces.back), + read_masks.front, + write_masks.front, + reference_value, + ) + } + None => unsafe { mem::zeroed() }, + }; + + let stencil_read_only = write_mask == 0 + || (front.StencilDepthFailOp == D3D11_STENCIL_OP_KEEP + && front.StencilFailOp == D3D11_STENCIL_OP_KEEP + && front.StencilPassOp == D3D11_STENCIL_OP_KEEP + && back.StencilDepthFailOp == D3D11_STENCIL_OP_KEEP + && back.StencilFailOp == D3D11_STENCIL_OP_KEEP + && back.StencilPassOp == D3D11_STENCIL_OP_KEEP); + let read_only = !depth_write && stencil_read_only; + + ( + D3D11_DEPTH_STENCIL_DESC { + DepthEnable: depth_on, + DepthWriteMask: if depth_write { + D3D11_DEPTH_WRITE_MASK_ALL + } else { + D3D11_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: depth_func, + StencilEnable: stencil_on, + StencilReadMask: read_mask as _, + StencilWriteMask: write_mask as _, + FrontFace: front, + BackFace: back, + }, + stencil_ref, + read_only, + ) +} + +pub fn map_execution_model(model: spirv::ExecutionModel) -> ShaderStage { + match model { + spirv::ExecutionModel::Vertex => ShaderStage::Vertex, + spirv::ExecutionModel::Fragment => ShaderStage::Fragment, + spirv::ExecutionModel::Geometry => ShaderStage::Geometry, + spirv::ExecutionModel::GlCompute => ShaderStage::Compute, + spirv::ExecutionModel::TessellationControl => ShaderStage::Hull, + spirv::ExecutionModel::TessellationEvaluation => ShaderStage::Domain, + spirv::ExecutionModel::Kernel => panic!("Kernel is not a valid execution model."), + } +} + +pub fn map_stage(stage: ShaderStage) -> spirv::ExecutionModel { + match stage { + ShaderStage::Vertex => spirv::ExecutionModel::Vertex, + ShaderStage::Fragment => spirv::ExecutionModel::Fragment, + ShaderStage::Geometry => spirv::ExecutionModel::Geometry, + ShaderStage::Compute => spirv::ExecutionModel::GlCompute, + ShaderStage::Hull => spirv::ExecutionModel::TessellationControl, + ShaderStage::Domain => spirv::ExecutionModel::TessellationEvaluation, + ShaderStage::Task | ShaderStage::Mesh => { + panic!("{:?} shader is not supported in DirectX 11", stage) + } + } +} + +pub fn map_wrapping(wrap: image::WrapMode) -> D3D11_TEXTURE_ADDRESS_MODE { + use hal::image::WrapMode as Wm; + match wrap { + Wm::Tile => D3D11_TEXTURE_ADDRESS_WRAP, + Wm::Mirror => D3D11_TEXTURE_ADDRESS_MIRROR, + Wm::Clamp => D3D11_TEXTURE_ADDRESS_CLAMP, + Wm::Border => D3D11_TEXTURE_ADDRESS_BORDER, + Wm::MirrorClamp => D3D11_TEXTURE_ADDRESS_MIRROR_ONCE, + } +} + +fn map_filter_type(filter: image::Filter) -> D3D11_FILTER_TYPE { + match filter { + image::Filter::Nearest => D3D11_FILTER_TYPE_POINT, + image::Filter::Linear => D3D11_FILTER_TYPE_LINEAR, + } +} + +// Hopefully works just as well in d3d11 :) +pub fn map_filter( + mag_filter: image::Filter, + min_filter: image::Filter, + mip_filter: image::Filter, + reduction: D3D11_FILTER_REDUCTION_TYPE, + anisotropy_clamp: Option<u8>, +) -> D3D11_FILTER { + let mag = map_filter_type(mag_filter); + let min = map_filter_type(min_filter); + let mip = map_filter_type(mip_filter); + + (min & D3D11_FILTER_TYPE_MASK) << D3D11_MIN_FILTER_SHIFT + | (mag & D3D11_FILTER_TYPE_MASK) << D3D11_MAG_FILTER_SHIFT + | (mip & D3D11_FILTER_TYPE_MASK) << D3D11_MIP_FILTER_SHIFT + | (reduction & D3D11_FILTER_REDUCTION_TYPE_MASK) << D3D11_FILTER_REDUCTION_TYPE_SHIFT + | anisotropy_clamp + .map(|_| D3D11_FILTER_ANISOTROPIC) + .unwrap_or(0) +} + +pub fn map_image_usage(usage: image::Usage, format_desc: FormatDesc) -> D3D11_BIND_FLAG { + let mut bind = 0; + + if usage.intersects(image::Usage::TRANSFER_SRC | image::Usage::SAMPLED | image::Usage::STORAGE) + { + bind |= D3D11_BIND_SHADER_RESOURCE; + } + + // we cant get RTVs or UAVs on compressed & depth formats + if !format_desc.is_compressed() && !format_desc.aspects.contains(Aspects::DEPTH) { + if usage.intersects(image::Usage::COLOR_ATTACHMENT | image::Usage::TRANSFER_DST) { + bind |= D3D11_BIND_RENDER_TARGET; + } + + if usage.intersects(image::Usage::TRANSFER_DST | image::Usage::STORAGE) { + bind |= D3D11_BIND_UNORDERED_ACCESS; + } + } + + if usage.contains(image::Usage::DEPTH_STENCIL_ATTACHMENT) { + bind |= D3D11_BIND_DEPTH_STENCIL; + } + + bind +} diff --git a/third_party/rust/gfx-backend-dx11/src/debug.rs b/third_party/rust/gfx-backend-dx11/src/debug.rs new file mode 100644 index 0000000000..0d061de3a9 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/debug.rs @@ -0,0 +1,114 @@ +use winapi::um::{d3d11, d3d11_1, d3dcommon}; + +use wio::{com::ComPtr, wide::ToWide}; + +use std::{env, ffi::OsStr, fmt}; + +#[must_use] +pub struct DebugScope { + annotation: ComPtr<d3d11_1::ID3DUserDefinedAnnotation>, +} + +impl DebugScope { + // TODO: Not used currently in release, will be used in the future + #[allow(dead_code)] + pub fn with_name( + context: &ComPtr<d3d11::ID3D11DeviceContext>, + args: fmt::Arguments, + ) -> Option<Self> { + let name = format!("{}", args); + + // debugging with visual studio and its ilk *really* doesn't like calling this on a + // deferred context when replaying a capture, compared to renderdoc + if unsafe { context.GetType() } == d3d11::D3D11_DEVICE_CONTEXT_DEFERRED { + // TODO: find a better way to detect either if RD or VS is active debugger + if env::var("GFX_NO_RENDERDOC").is_ok() { + return None; + } + } + + let annotation = context + .cast::<d3d11_1::ID3DUserDefinedAnnotation>() + .unwrap(); + let msg: &OsStr = name.as_ref(); + let msg: Vec<u16> = msg.to_wide_null(); + + unsafe { + annotation.BeginEvent(msg.as_ptr() as _); + } + + Some(DebugScope { annotation }) + } +} + +impl Drop for DebugScope { + fn drop(&mut self) { + unsafe { + self.annotation.EndEvent(); + } + } +} + +pub fn debug_marker(context: &ComPtr<d3d11::ID3D11DeviceContext>, name: &str) { + // same here + if unsafe { context.GetType() } == d3d11::D3D11_DEVICE_CONTEXT_DEFERRED { + if env::var("GFX_NO_RENDERDOC").is_ok() { + return; + } + } + + let annotation = context + .cast::<d3d11_1::ID3DUserDefinedAnnotation>() + .unwrap(); + let msg: &OsStr = name.as_ref(); + let msg: Vec<u16> = msg.to_wide_null(); + + unsafe { + annotation.SetMarker(msg.as_ptr() as _); + } +} + +pub fn verify_debug_ascii(name: &str) -> bool { + let res = name.is_ascii(); + if !res { + error!("DX11 buffer names must be ASCII"); + } + res +} + +/// Set the debug name of a resource. +/// +/// Must be ASCII. +/// +/// SetPrivateData will copy the data internally so the data doesn't need to live. +pub fn set_debug_name(resource: &d3d11::ID3D11DeviceChild, name: &str) { + unsafe { + resource.SetPrivateData( + (&d3dcommon::WKPDID_D3DDebugObjectName) as *const _, + name.len() as _, + name.as_ptr() as *const _, + ); + } +} + +/// Set the debug name of a resource with a given suffix. +/// +/// Must be ASCII. +/// +/// The given string will be mutated to add the suffix, then restored to it's original state. +/// This saves an allocation. SetPrivateData will copy the data internally so the data doesn't need to live. +pub fn set_debug_name_with_suffix( + resource: &d3d11::ID3D11DeviceChild, + name: &mut String, + suffix: &str, +) { + name.push_str(suffix); + unsafe { + resource.SetPrivateData( + (&d3dcommon::WKPDID_D3DDebugObjectName) as *const _, + name.len() as _, + name.as_ptr() as *const _, + ); + } + name.drain((name.len() - suffix.len())..); +} diff --git a/third_party/rust/gfx-backend-dx11/src/device.rs b/third_party/rust/gfx-backend-dx11/src/device.rs new file mode 100644 index 0000000000..a8a5d5e179 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/device.rs @@ -0,0 +1,2485 @@ +use auxil::ShaderStage; +use hal::{ + adapter::MemoryProperties, buffer, device, format, image, memory, pass, pool, pso, + pso::VertexInputRate, query, queue::QueueFamilyId, window, +}; + +use winapi::{ + shared::{dxgi, dxgiformat, dxgitype, minwindef::TRUE, windef::HWND, winerror}, + um::{d3d11, d3d11_1, d3d11sdklayers, d3dcommon}, +}; + +use wio::com::ComPtr; + +use std::{ + borrow::Borrow, + fmt, mem, + ops::Range, + ptr, + sync::{Arc, Weak}, +}; + +use parking_lot::{Condvar, Mutex, RwLock}; + +use crate::{ + conv, + debug::{set_debug_name, set_debug_name_with_suffix, verify_debug_ascii}, + internal, shader, Backend, Buffer, BufferView, CommandBuffer, CommandPool, ComputePipeline, + DescriptorContent, DescriptorIndex, DescriptorPool, DescriptorSet, DescriptorSetInfo, + DescriptorSetLayout, Fence, Framebuffer, GraphicsPipeline, Image, ImageView, InternalBuffer, + InternalImage, Memory, MultiStageData, PipelineLayout, QueryPool, RawFence, + RegisterAccumulator, RegisterData, RenderPass, ResourceIndex, Sampler, Semaphore, ShaderModule, + SubpassDesc, ViewInfo, +}; + +//TODO: expose coherent type 0x2 when it's properly supported +const BUFFER_TYPE_MASK: u32 = 0x1 | 0x4; + +struct InputLayout { + raw: ComPtr<d3d11::ID3D11InputLayout>, + required_bindings: u32, + max_vertex_bindings: u32, + topology: d3d11::D3D11_PRIMITIVE_TOPOLOGY, + vertex_strides: Vec<u32>, +} + +#[derive(Clone)] +pub struct DepthStencilState { + pub raw: ComPtr<d3d11::ID3D11DepthStencilState>, + pub stencil_ref: pso::State<pso::StencilValue>, + pub read_only: bool, +} + +pub struct Device { + raw: ComPtr<d3d11::ID3D11Device>, + raw1: Option<ComPtr<d3d11_1::ID3D11Device1>>, + pub(crate) context: ComPtr<d3d11::ID3D11DeviceContext>, + features: hal::Features, + memory_properties: MemoryProperties, + internal: Arc<internal::Internal>, +} + +impl fmt::Debug for Device { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Device") + } +} + +impl Drop for Device { + fn drop(&mut self) { + if let Ok(debug) = self.raw.cast::<d3d11sdklayers::ID3D11Debug>() { + unsafe { + debug.ReportLiveDeviceObjects(d3d11sdklayers::D3D11_RLDO_DETAIL); + } + } + } +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +impl Device { + pub fn new( + device: ComPtr<d3d11::ID3D11Device>, + device1: Option<ComPtr<d3d11_1::ID3D11Device1>>, + context: ComPtr<d3d11::ID3D11DeviceContext>, + features: hal::Features, + memory_properties: MemoryProperties, + ) -> Self { + Device { + internal: Arc::new(internal::Internal::new(&device)), + raw: device, + raw1: device1, + context, + features, + memory_properties, + } + } + + pub fn as_raw(&self) -> *mut d3d11::ID3D11Device { + self.raw.as_raw() + } + + fn create_rasterizer_state( + &self, + rasterizer_desc: &pso::Rasterizer, + multisampling_desc: &Option<pso::Multisampling>, + ) -> Result<ComPtr<d3d11::ID3D11RasterizerState>, pso::CreationError> { + let mut rasterizer = ptr::null_mut(); + let desc = conv::map_rasterizer_desc(rasterizer_desc, multisampling_desc); + + let hr = unsafe { + self.raw + .CreateRasterizerState(&desc, &mut rasterizer as *mut *mut _ as *mut *mut _) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(rasterizer) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_blend_state( + &self, + blend_desc: &pso::BlendDesc, + multisampling: &Option<pso::Multisampling>, + ) -> Result<ComPtr<d3d11::ID3D11BlendState>, pso::CreationError> { + let mut blend = ptr::null_mut(); + let desc = conv::map_blend_desc(blend_desc, multisampling); + + let hr = unsafe { + self.raw + .CreateBlendState(&desc, &mut blend as *mut *mut _ as *mut *mut _) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(blend) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_depth_stencil_state( + &self, + depth_desc: &pso::DepthStencilDesc, + ) -> Result<DepthStencilState, pso::CreationError> { + let mut depth = ptr::null_mut(); + let (desc, stencil_ref, read_only) = conv::map_depth_stencil_desc(depth_desc); + + let hr = unsafe { + self.raw + .CreateDepthStencilState(&desc, &mut depth as *mut *mut _ as *mut *mut _) + }; + + if winerror::SUCCEEDED(hr) { + Ok(DepthStencilState { + raw: unsafe { ComPtr::from_raw(depth) }, + stencil_ref, + read_only, + }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_input_layout( + &self, + vs: ComPtr<d3dcommon::ID3DBlob>, + vertex_buffers: &[pso::VertexBufferDesc], + attributes: &[pso::AttributeDesc], + input_assembler: &pso::InputAssemblerDesc, + vertex_semantic_remapping: auxil::FastHashMap<u32, Option<(u32, u32)>>, + ) -> Result<InputLayout, pso::CreationError> { + let mut layout = ptr::null_mut(); + + let mut vertex_strides = Vec::new(); + let mut required_bindings = 0u32; + let mut max_vertex_bindings = 0u32; + for buffer in vertex_buffers { + required_bindings |= 1 << buffer.binding as u32; + max_vertex_bindings = max_vertex_bindings.max(1u32 + buffer.binding as u32); + + while vertex_strides.len() <= buffer.binding as usize { + vertex_strides.push(0); + } + + vertex_strides[buffer.binding as usize] = buffer.stride; + } + + // See [`shader::introspect_spirv_vertex_semantic_remapping`] for details of why this is needed. + let semantics: Vec<_> = attributes + .iter() + .map( + |attrib| match vertex_semantic_remapping.get(&attrib.location) { + Some(Some((major, minor))) => { + let name = std::borrow::Cow::Owned(format!("TEXCOORD{}_\0", major)); + let location = *minor; + (name, location) + } + _ => { + let name = std::borrow::Cow::Borrowed("TEXCOORD\0"); + let location = attrib.location; + (name, location) + } + }, + ) + .collect(); + + let input_elements = attributes + .iter() + .zip(semantics.iter()) + .filter_map(|(attrib, (semantic_name, semantic_index))| { + let buffer_desc = match vertex_buffers + .iter() + .find(|buffer_desc| buffer_desc.binding == attrib.binding) + { + Some(buffer_desc) => buffer_desc, + None => { + // TODO: + // error!("Couldn't find associated vertex buffer description {:?}", attrib.binding); + return Some(Err(pso::CreationError::Other)); + } + }; + + let (slot_class, step_rate) = match buffer_desc.rate { + VertexInputRate::Vertex => (d3d11::D3D11_INPUT_PER_VERTEX_DATA, 0), + VertexInputRate::Instance(divisor) => { + (d3d11::D3D11_INPUT_PER_INSTANCE_DATA, divisor) + } + }; + let format = attrib.element.format; + + Some(Ok(d3d11::D3D11_INPUT_ELEMENT_DESC { + SemanticName: semantic_name.as_ptr() as *const _, // Semantic name used by SPIRV-Cross + SemanticIndex: *semantic_index, + Format: match conv::map_format(format) { + Some(fm) => fm, + None => { + // TODO: + // error!("Unable to find DXGI format for {:?}", format); + return Some(Err(pso::CreationError::Other)); + } + }, + InputSlot: attrib.binding as _, + AlignedByteOffset: attrib.element.offset, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate as _, + })) + }) + .collect::<Result<Vec<_>, _>>()?; + + let hr = unsafe { + self.raw.CreateInputLayout( + input_elements.as_ptr(), + input_elements.len() as _, + vs.GetBufferPointer(), + vs.GetBufferSize(), + &mut layout as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + let topology = conv::map_topology(input_assembler); + + Ok(InputLayout { + raw: unsafe { ComPtr::from_raw(layout) }, + required_bindings, + max_vertex_bindings, + topology, + vertex_strides, + }) + } else { + error!("CreateInputLayout error 0x{:X}", hr); + Err(pso::CreationError::Other) + } + } + + fn create_vertex_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11VertexShader>, pso::CreationError> { + let mut vs = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreateVertexShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut vs as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(vs) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_pixel_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11PixelShader>, pso::CreationError> { + let mut ps = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreatePixelShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut ps as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(ps) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_geometry_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11GeometryShader>, pso::CreationError> { + let mut gs = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreateGeometryShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut gs as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(gs) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_hull_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11HullShader>, pso::CreationError> { + let mut hs = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreateHullShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut hs as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(hs) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_domain_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11DomainShader>, pso::CreationError> { + let mut ds = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreateDomainShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut ds as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(ds) }) + } else { + Err(pso::CreationError::Other) + } + } + + fn create_compute_shader( + &self, + blob: ComPtr<d3dcommon::ID3DBlob>, + ) -> Result<ComPtr<d3d11::ID3D11ComputeShader>, pso::CreationError> { + let mut cs = ptr::null_mut(); + + let hr = unsafe { + self.raw.CreateComputeShader( + blob.GetBufferPointer(), + blob.GetBufferSize(), + ptr::null_mut(), + &mut cs as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(cs) }) + } else { + Err(pso::CreationError::Other) + } + } + + // TODO: fix return type.. + fn extract_entry_point( + stage: ShaderStage, + source: &pso::EntryPoint<Backend>, + layout: &PipelineLayout, + features: &hal::Features, + ) -> Result<Option<ComPtr<d3dcommon::ID3DBlob>>, device::ShaderError> { + // TODO: entrypoint stuff + match *source.module { + ShaderModule::Dxbc(ref _shader) => { + unimplemented!() + + // Ok(Some(shader)) + } + ShaderModule::Spirv(ref raw_data) => Ok(shader::compile_spirv_entrypoint( + raw_data, stage, source, layout, features, + )?), + } + } + + fn view_image_as_shader_resource( + &self, + info: &ViewInfo, + ) -> Result<ComPtr<d3d11::ID3D11ShaderResourceView>, image::ViewCreationError> { + let mut desc: d3d11::D3D11_SHADER_RESOURCE_VIEW_DESC = unsafe { mem::zeroed() }; + desc.Format = info.format; + if desc.Format == dxgiformat::DXGI_FORMAT_D32_FLOAT_S8X24_UINT { + desc.Format = dxgiformat::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + } + + #[allow(non_snake_case)] + let MostDetailedMip = info.levels.start as _; + #[allow(non_snake_case)] + let MipLevels = (info.levels.end - info.levels.start) as _; + #[allow(non_snake_case)] + let FirstArraySlice = info.layers.start as _; + #[allow(non_snake_case)] + let ArraySize = (info.layers.end - info.layers.start) as _; + + match info.view_kind { + image::ViewKind::D1 => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d11::D3D11_TEX1D_SRV { + MostDetailedMip, + MipLevels, + } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d11::D3D11_TEX1D_ARRAY_SRV { + MostDetailedMip, + MipLevels, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 if info.kind.num_samples() > 1 => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d11::D3D11_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + image::ViewKind::D2 => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d11::D3D11_TEX2D_SRV { + MostDetailedMip, + MipLevels, + } + } + image::ViewKind::D2Array if info.kind.num_samples() > 1 => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d11::D3D11_TEX2DMS_ARRAY_SRV { + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d11::D3D11_TEX2D_ARRAY_SRV { + MostDetailedMip, + MipLevels, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D3 => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d11::D3D11_TEX3D_SRV { + MostDetailedMip, + MipLevels, + } + } + image::ViewKind::Cube => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURECUBE; + *unsafe { desc.u.TextureCube_mut() } = d3d11::D3D11_TEXCUBE_SRV { + MostDetailedMip, + MipLevels, + } + } + image::ViewKind::CubeArray => { + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; + *unsafe { desc.u.TextureCubeArray_mut() } = d3d11::D3D11_TEXCUBE_ARRAY_SRV { + MostDetailedMip, + MipLevels, + First2DArrayFace: FirstArraySlice, + NumCubes: ArraySize / 6, + } + } + } + + let mut srv = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateShaderResourceView( + info.resource, + &desc, + &mut srv as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(srv) }) + } else { + Err(image::ViewCreationError::Unsupported) + } + } + + fn view_image_as_unordered_access( + &self, + info: &ViewInfo, + ) -> Result<ComPtr<d3d11::ID3D11UnorderedAccessView>, image::ViewCreationError> { + let mut desc: d3d11::D3D11_UNORDERED_ACCESS_VIEW_DESC = unsafe { mem::zeroed() }; + desc.Format = info.format; + + #[allow(non_snake_case)] + let MipSlice = info.levels.start as _; + #[allow(non_snake_case)] + let FirstArraySlice = info.layers.start as _; + #[allow(non_snake_case)] + let ArraySize = (info.layers.end - info.layers.start) as _; + + match info.view_kind { + image::ViewKind::D1 => { + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d11::D3D11_TEX1D_UAV { + MipSlice: info.levels.start as _, + } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d11::D3D11_TEX1D_ARRAY_UAV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 => { + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d11::D3D11_TEX2D_UAV { + MipSlice: info.levels.start as _, + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d11::D3D11_TEX2D_ARRAY_UAV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D3 => { + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d11::D3D11_TEX3D_UAV { + MipSlice, + FirstWSlice: FirstArraySlice, + WSize: ArraySize, + } + } + _ => unimplemented!(), + } + + let mut uav = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateUnorderedAccessView( + info.resource, + &desc, + &mut uav as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(uav) }) + } else { + error!("CreateUnorderedAccessView failed: 0x{:x}", hr); + + Err(image::ViewCreationError::Unsupported) + } + } + + pub(crate) fn view_image_as_render_target( + &self, + info: &ViewInfo, + ) -> Result<ComPtr<d3d11::ID3D11RenderTargetView>, image::ViewCreationError> { + let mut desc: d3d11::D3D11_RENDER_TARGET_VIEW_DESC = unsafe { mem::zeroed() }; + desc.Format = info.format; + + #[allow(non_snake_case)] + let MipSlice = info.levels.start as _; + #[allow(non_snake_case)] + let FirstArraySlice = info.layers.start as _; + #[allow(non_snake_case)] + let ArraySize = (info.layers.end - info.layers.start) as _; + + match info.view_kind { + image::ViewKind::D1 => { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d11::D3D11_TEX1D_RTV { MipSlice } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d11::D3D11_TEX1D_ARRAY_RTV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 => { + if info.kind.num_samples() > 1 { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d11::D3D11_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } else { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d11::D3D11_TEX2D_RTV { MipSlice } + } + } + image::ViewKind::D2Array => { + if info.kind.num_samples() > 1 { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d11::D3D11_TEX2DMS_ARRAY_RTV { + FirstArraySlice, + ArraySize, + } + } else { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d11::D3D11_TEX2D_ARRAY_RTV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + } + image::ViewKind::D3 => { + desc.ViewDimension = d3d11::D3D11_RTV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d11::D3D11_TEX3D_RTV { + MipSlice, + FirstWSlice: FirstArraySlice, + WSize: ArraySize, + } + } + _ => unimplemented!(), + } + + let mut rtv = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateRenderTargetView( + info.resource, + &desc, + &mut rtv as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(rtv) }) + } else { + error!("CreateRenderTargetView failed: 0x{:x}", hr); + + Err(image::ViewCreationError::Unsupported) + } + } + + fn view_image_as_depth_stencil( + &self, + info: &ViewInfo, + read_only_stencil: Option<bool>, + ) -> Result<ComPtr<d3d11::ID3D11DepthStencilView>, image::ViewCreationError> { + #![allow(non_snake_case)] + + let MipSlice = info.levels.start as _; + let FirstArraySlice = info.layers.start as _; + let ArraySize = (info.layers.end - info.layers.start) as _; + assert_eq!(info.levels.start + 1, info.levels.end); + assert!(info.layers.end <= info.kind.num_layers()); + + let mut desc: d3d11::D3D11_DEPTH_STENCIL_VIEW_DESC = unsafe { mem::zeroed() }; + desc.Format = info.format; + + if let Some(stencil) = read_only_stencil { + desc.Flags = match stencil { + true => d3d11::D3D11_DSV_READ_ONLY_DEPTH | d3d11::D3D11_DSV_READ_ONLY_STENCIL, + false => d3d11::D3D11_DSV_READ_ONLY_DEPTH, + } + } + + match info.view_kind { + image::ViewKind::D2 => { + if info.kind.num_samples() > 1 { + desc.ViewDimension = d3d11::D3D11_DSV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d11::D3D11_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } else { + desc.ViewDimension = d3d11::D3D11_DSV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d11::D3D11_TEX2D_DSV { MipSlice } + } + } + image::ViewKind::D2Array => { + if info.kind.num_samples() > 1 { + desc.ViewDimension = d3d11::D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d11::D3D11_TEX2DMS_ARRAY_DSV { + FirstArraySlice, + ArraySize, + } + } else { + desc.ViewDimension = d3d11::D3D11_DSV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d11::D3D11_TEX2D_ARRAY_DSV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + } + _ => unimplemented!(), + } + + let mut dsv = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateDepthStencilView( + info.resource, + &desc, + &mut dsv as *mut *mut _ as *mut *mut _, + ) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(dsv) }) + } else { + error!("CreateDepthStencilView failed: 0x{:x}", hr); + + Err(image::ViewCreationError::Unsupported) + } + } + + pub(crate) fn create_swapchain_impl( + &self, + config: &window::SwapchainConfig, + window_handle: HWND, + factory: ComPtr<dxgi::IDXGIFactory>, + ) -> Result<(ComPtr<dxgi::IDXGISwapChain>, dxgiformat::DXGI_FORMAT), window::CreationError> + { + // TODO: use IDXGIFactory2 for >=11.1 + // TODO: this function should be able to fail (Result)? + + debug!("{:#?}", config); + let non_srgb_format = conv::map_format_nosrgb(config.format).unwrap(); + + let mut desc = dxgi::DXGI_SWAP_CHAIN_DESC { + BufferDesc: dxgitype::DXGI_MODE_DESC { + Width: config.extent.width, + Height: config.extent.height, + // TODO: should this grab max value of all monitor hz? vsync + // will clamp to current monitor anyways? + RefreshRate: dxgitype::DXGI_RATIONAL { + Numerator: 1, + Denominator: 60, + }, + Format: non_srgb_format, + ScanlineOrdering: dxgitype::DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED, + Scaling: dxgitype::DXGI_MODE_SCALING_UNSPECIFIED, + }, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + BufferUsage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + BufferCount: config.image_count, + OutputWindow: window_handle, + // TODO: + Windowed: TRUE, + // TODO: + SwapEffect: dxgi::DXGI_SWAP_EFFECT_DISCARD, + Flags: 0, + }; + + let dxgi_swapchain = { + let mut swapchain: *mut dxgi::IDXGISwapChain = ptr::null_mut(); + let hr = unsafe { + factory.CreateSwapChain( + self.raw.as_raw() as *mut _, + &mut desc as *mut _, + &mut swapchain as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(hr, winerror::S_OK); + + unsafe { ComPtr::from_raw(swapchain) } + }; + Ok((dxgi_swapchain, non_srgb_format)) + } +} + +impl device::Device<Backend> for Device { + unsafe fn allocate_memory( + &self, + mem_type: hal::MemoryTypeId, + size: u64, + ) -> Result<Memory, device::AllocationError> { + let properties = self.memory_properties.memory_types[mem_type.0].properties; + let host_ptr = if properties.contains(hal::memory::Properties::CPU_VISIBLE) { + let mut data = vec![0u8; size as usize]; + let ptr = data.as_mut_ptr(); + mem::forget(data); + ptr + } else { + ptr::null_mut() + }; + Ok(Memory { + properties, + size, + host_ptr, + local_buffers: Arc::new(RwLock::new(thunderdome::Arena::new())), + }) + } + + unsafe fn create_command_pool( + &self, + _family: QueueFamilyId, + _create_flags: pool::CommandPoolCreateFlags, + ) -> Result<CommandPool, device::OutOfMemory> { + // TODO: + Ok(CommandPool { + device: self.raw.clone(), + device1: self.raw1.clone(), + internal: Arc::clone(&self.internal), + }) + } + + unsafe fn destroy_command_pool(&self, _pool: CommandPool) { + // automatic + } + + unsafe fn create_render_pass<'a, IA, IS, ID>( + &self, + attachments: IA, + subpasses: IS, + _dependencies: ID, + ) -> Result<RenderPass, device::OutOfMemory> + where + IA: IntoIterator, + IA::Item: Borrow<pass::Attachment>, + IS: IntoIterator, + IS::Item: Borrow<pass::SubpassDesc<'a>>, + ID: IntoIterator, + ID::Item: Borrow<pass::SubpassDependency>, + { + Ok(RenderPass { + attachments: attachments + .into_iter() + .map(|attachment| attachment.borrow().clone()) + .collect(), + subpasses: subpasses + .into_iter() + .map(|desc| { + let desc = desc.borrow(); + SubpassDesc { + color_attachments: desc + .colors + .iter() + .map(|color| color.borrow().clone()) + .collect(), + depth_stencil_attachment: desc.depth_stencil.map(|d| *d), + input_attachments: desc + .inputs + .iter() + .map(|input| input.borrow().clone()) + .collect(), + resolve_attachments: desc + .resolves + .iter() + .map(|resolve| resolve.borrow().clone()) + .collect(), + } + }) + .collect(), + }) + } + + unsafe fn create_pipeline_layout<IS, IR>( + &self, + set_layouts: IS, + _push_constant_ranges: IR, + ) -> Result<PipelineLayout, device::OutOfMemory> + where + IS: IntoIterator, + IS::Item: Borrow<DescriptorSetLayout>, + IR: IntoIterator, + IR::Item: Borrow<(pso::ShaderStageFlags, Range<u32>)>, + { + let mut res_offsets = MultiStageData::<RegisterData<RegisterAccumulator>>::default(); + let mut sets = Vec::new(); + for set_layout in set_layouts { + let layout = set_layout.borrow(); + sets.push(DescriptorSetInfo { + bindings: Arc::clone(&layout.bindings), + registers: res_offsets.advance(&layout.pool_mapping), + }); + } + + res_offsets.map_other(|data| { + // These use <= because this tells us the _next_ register, so maximum usage will be equal to the limit. + // + // Leave one slot for push constants + assert!( + data.c.res_index as u32 + <= d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + "{} bound constant buffers exceeds limit of {}", + data.c.res_index as u32, + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + ); + assert!( + data.s.res_index as u32 <= d3d11::D3D11_COMMONSHADER_SAMPLER_REGISTER_COUNT, + "{} bound samplers exceeds limit of {}", + data.s.res_index as u32, + d3d11::D3D11_COMMONSHADER_SAMPLER_REGISTER_COUNT, + ); + assert!( + data.t.res_index as u32 <= d3d11::D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT, + "{} bound sampled textures and read-only buffers exceeds limit of {}", + data.t.res_index as u32, + d3d11::D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT, + ); + assert!( + data.u.res_index as u32 <= d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT, + "{} bound storage textures and read-write buffers exceeds limit of {}", + data.u.res_index as u32, + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT, + ); + }); + + Ok(PipelineLayout { sets }) + } + + unsafe fn create_pipeline_cache( + &self, + _data: Option<&[u8]>, + ) -> Result<(), device::OutOfMemory> { + Ok(()) + } + + unsafe fn get_pipeline_cache_data(&self, _cache: &()) -> Result<Vec<u8>, device::OutOfMemory> { + //empty + Ok(Vec::new()) + } + + unsafe fn destroy_pipeline_cache(&self, _: ()) { + //empty + } + + unsafe fn merge_pipeline_caches<I>(&self, _: &(), _: I) -> Result<(), device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<()>, + { + //empty + Ok(()) + } + + unsafe fn create_graphics_pipeline<'a>( + &self, + desc: &pso::GraphicsPipelineDesc<'a, Backend>, + _cache: Option<&()>, + ) -> Result<GraphicsPipeline, pso::CreationError> { + let features = &self.features; + let build_shader = |stage: ShaderStage, source: Option<&pso::EntryPoint<'a, Backend>>| { + let source = match source { + Some(src) => src, + None => return Ok(None), + }; + + Self::extract_entry_point(stage, source, desc.layout, features) + .map_err(|err| pso::CreationError::Shader(err)) + }; + + let (layout, vs, gs, hs, ds) = match desc.primitive_assembler { + pso::PrimitiveAssemblerDesc::Vertex { + buffers, + attributes, + ref input_assembler, + ref vertex, + ref tessellation, + ref geometry, + } => { + let vertex_semantic_remapping = match vertex.module { + ShaderModule::Spirv(spirv) => { + shader::introspect_spirv_vertex_semantic_remapping(spirv) + .map_err(|err| pso::CreationError::Shader(err))? + } + _ => unimplemented!(), + }; + + let vs = build_shader(ShaderStage::Vertex, Some(&vertex))?.unwrap(); + let gs = build_shader(ShaderStage::Geometry, geometry.as_ref())?; + + let layout = self.create_input_layout( + vs.clone(), + buffers, + attributes, + input_assembler, + vertex_semantic_remapping, + )?; + + let vs = self.create_vertex_shader(vs)?; + let gs = if let Some(blob) = gs { + Some(self.create_geometry_shader(blob)?) + } else { + None + }; + let (hs, ds) = if let Some(ts) = tessellation { + let hs = build_shader(ShaderStage::Hull, Some(&ts.0))?.unwrap(); + let ds = build_shader(ShaderStage::Domain, Some(&ts.1))?.unwrap(); + + ( + Some(self.create_hull_shader(hs)?), + Some(self.create_domain_shader(ds)?), + ) + } else { + (None, None) + }; + + (layout, vs, gs, hs, ds) + } + pso::PrimitiveAssemblerDesc::Mesh { .. } => { + return Err(pso::CreationError::UnsupportedPipeline) + } + }; + + let ps = build_shader(ShaderStage::Fragment, desc.fragment.as_ref())?; + let ps = if let Some(blob) = ps { + Some(self.create_pixel_shader(blob)?) + } else { + None + }; + + let rasterizer_state = + self.create_rasterizer_state(&desc.rasterizer, &desc.multisampling)?; + let blend_state = self.create_blend_state(&desc.blender, &desc.multisampling)?; + let depth_stencil_state = Some(self.create_depth_stencil_state(&desc.depth_stencil)?); + + Ok(GraphicsPipeline { + vs, + gs, + ds, + hs, + ps, + topology: layout.topology, + input_layout: layout.raw, + rasterizer_state, + blend_state, + depth_stencil_state, + baked_states: desc.baked_states.clone(), + required_bindings: layout.required_bindings, + max_vertex_bindings: layout.max_vertex_bindings, + strides: layout.vertex_strides, + }) + } + + unsafe fn create_compute_pipeline<'a>( + &self, + desc: &pso::ComputePipelineDesc<'a, Backend>, + _cache: Option<&()>, + ) -> Result<ComputePipeline, pso::CreationError> { + let features = &self.features; + let build_shader = |stage: ShaderStage, source: Option<&pso::EntryPoint<'a, Backend>>| { + let source = match source { + Some(src) => src, + None => return Ok(None), + }; + + Self::extract_entry_point(stage, source, desc.layout, features) + .map_err(|err| pso::CreationError::Shader(err)) + }; + + let cs = build_shader(ShaderStage::Compute, Some(&desc.shader))?.unwrap(); + let cs = self.create_compute_shader(cs)?; + + Ok(ComputePipeline { cs }) + } + + unsafe fn create_framebuffer<I>( + &self, + _renderpass: &RenderPass, + attachments: I, + extent: image::Extent, + ) -> Result<Framebuffer, device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<ImageView>, + { + Ok(Framebuffer { + attachments: attachments + .into_iter() + .map(|att| att.borrow().clone()) + .collect(), + layers: extent.depth as _, + }) + } + + unsafe fn create_shader_module( + &self, + raw_data: &[u32], + ) -> Result<ShaderModule, device::ShaderError> { + Ok(ShaderModule::Spirv(raw_data.into())) + } + + unsafe fn create_buffer( + &self, + size: u64, + usage: buffer::Usage, + ) -> Result<Buffer, buffer::CreationError> { + use buffer::Usage; + + let mut bind = 0; + + if usage.contains(Usage::UNIFORM) { + bind |= d3d11::D3D11_BIND_CONSTANT_BUFFER; + } + if usage.contains(Usage::VERTEX) { + bind |= d3d11::D3D11_BIND_VERTEX_BUFFER; + } + if usage.contains(Usage::INDEX) { + bind |= d3d11::D3D11_BIND_INDEX_BUFFER; + } + + // TODO: >=11.1 + if usage.intersects( + Usage::UNIFORM_TEXEL | Usage::STORAGE_TEXEL | Usage::TRANSFER_SRC | Usage::STORAGE, + ) { + bind |= d3d11::D3D11_BIND_SHADER_RESOURCE; + } + + if usage.intersects(Usage::TRANSFER_DST | Usage::STORAGE) { + bind |= d3d11::D3D11_BIND_UNORDERED_ACCESS; + } + + // if `D3D11_BIND_CONSTANT_BUFFER` intersects with any other bind flag, we need to handle + // it by creating two buffers. one with `D3D11_BIND_CONSTANT_BUFFER` and one with the rest + let needs_disjoint_cb = bind & d3d11::D3D11_BIND_CONSTANT_BUFFER != 0 + && bind != d3d11::D3D11_BIND_CONSTANT_BUFFER; + + if needs_disjoint_cb { + bind ^= d3d11::D3D11_BIND_CONSTANT_BUFFER; + } + + fn up_align(x: u64, alignment: u64) -> u64 { + (x + alignment - 1) & !(alignment - 1) + } + + // constant buffer size need to be divisible by 16 + let size = if usage.contains(Usage::UNIFORM) { + up_align(size, 16) + } else { + up_align(size, 4) + }; + + Ok(Buffer { + internal: InternalBuffer { + raw: ptr::null_mut(), + disjoint_cb: if needs_disjoint_cb { + Some(ptr::null_mut()) + } else { + None + }, + srv: None, + uav: None, + usage, + debug_name: None, + }, + bound_range: 0..0, + local_memory_arena: Weak::new(), + memory_index: None, + is_coherent: false, + memory_ptr: ptr::null_mut(), + bind, + requirements: memory::Requirements { + size, + alignment: 4, + type_mask: BUFFER_TYPE_MASK, + }, + }) + } + + unsafe fn get_buffer_requirements(&self, buffer: &Buffer) -> memory::Requirements { + buffer.requirements + } + + unsafe fn bind_buffer_memory( + &self, + memory: &Memory, + offset: u64, + buffer: &mut Buffer, + ) -> Result<(), device::BindError> { + debug!( + "usage={:?}, props={:b}", + buffer.internal.usage, memory.properties + ); + + #[allow(non_snake_case)] + let mut MiscFlags = if buffer.bind + & (d3d11::D3D11_BIND_SHADER_RESOURCE | d3d11::D3D11_BIND_UNORDERED_ACCESS) + != 0 + { + d3d11::D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS + } else { + 0 + }; + + if buffer.internal.usage.contains(buffer::Usage::INDIRECT) { + MiscFlags |= d3d11::D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + } + + let initial_data = if memory.host_ptr.is_null() { + None + } else { + Some(d3d11::D3D11_SUBRESOURCE_DATA { + pSysMem: memory.host_ptr.offset(offset as isize) as *const _, + SysMemPitch: 0, + SysMemSlicePitch: 0, + }) + }; + + //TODO: check `memory.properties.contains(memory::Properties::DEVICE_LOCAL)` ? + let raw = { + // device local memory + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: buffer.requirements.size as _, + Usage: d3d11::D3D11_USAGE_DEFAULT, + BindFlags: buffer.bind, + CPUAccessFlags: 0, + MiscFlags, + StructureByteStride: if buffer.internal.usage.contains(buffer::Usage::TRANSFER_SRC) + { + 4 + } else { + 0 + }, + }; + + let mut raw: *mut d3d11::ID3D11Buffer = ptr::null_mut(); + let hr = self.raw.CreateBuffer( + &desc, + initial_data.as_ref().map_or(ptr::null_mut(), |id| id), + &mut raw as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + return Err(device::BindError::WrongMemory); + } + + if let Some(ref mut name) = buffer.internal.debug_name { + set_debug_name(&*raw, name); + } + + ComPtr::from_raw(raw) + }; + + let disjoint_cb = if buffer.internal.disjoint_cb.is_some() { + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: buffer.requirements.size as _, + Usage: d3d11::D3D11_USAGE_DEFAULT, + BindFlags: d3d11::D3D11_BIND_CONSTANT_BUFFER, + CPUAccessFlags: 0, + MiscFlags: 0, + StructureByteStride: 0, + }; + + let mut disjoint_raw: *mut d3d11::ID3D11Buffer = ptr::null_mut(); + let hr = self.raw.CreateBuffer( + &desc, + initial_data.as_ref().map_or(ptr::null_mut(), |id| id), + &mut disjoint_raw as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + return Err(device::BindError::WrongMemory); + } + + if let Some(ref mut name) = buffer.internal.debug_name { + set_debug_name_with_suffix(&*disjoint_raw, name, " -- Constant Buffer"); + } + + Some(disjoint_raw) + } else { + None + }; + + let srv = if buffer.bind & d3d11::D3D11_BIND_SHADER_RESOURCE != 0 { + let mut desc = mem::zeroed::<d3d11::D3D11_SHADER_RESOURCE_VIEW_DESC>(); + desc.Format = dxgiformat::DXGI_FORMAT_R32_TYPELESS; + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_BUFFEREX; + *desc.u.BufferEx_mut() = d3d11::D3D11_BUFFEREX_SRV { + FirstElement: 0, + NumElements: buffer.requirements.size as u32 / 4, + Flags: d3d11::D3D11_BUFFEREX_SRV_FLAG_RAW, + }; + + let mut srv: *mut d3d11::ID3D11ShaderResourceView = ptr::null_mut(); + let hr = self.raw.CreateShaderResourceView( + raw.as_raw() as *mut _, + &desc, + &mut srv as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("CreateShaderResourceView failed: 0x{:x}", hr); + + return Err(device::BindError::WrongMemory); + } + + if let Some(ref mut name) = buffer.internal.debug_name { + set_debug_name_with_suffix(&*srv, name, " -- SRV"); + } + + Some(srv) + } else { + None + }; + + let uav = if buffer.bind & d3d11::D3D11_BIND_UNORDERED_ACCESS != 0 { + let mut desc = mem::zeroed::<d3d11::D3D11_UNORDERED_ACCESS_VIEW_DESC>(); + desc.Format = dxgiformat::DXGI_FORMAT_R32_TYPELESS; + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_BUFFER; + *desc.u.Buffer_mut() = d3d11::D3D11_BUFFER_UAV { + FirstElement: 0, + NumElements: buffer.requirements.size as u32 / 4, + Flags: d3d11::D3D11_BUFFER_UAV_FLAG_RAW, + }; + + let mut uav: *mut d3d11::ID3D11UnorderedAccessView = ptr::null_mut(); + let hr = self.raw.CreateUnorderedAccessView( + raw.as_raw() as *mut _, + &desc, + &mut uav as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("CreateUnorderedAccessView failed: 0x{:x}", hr); + + return Err(device::BindError::WrongMemory); + } + + if let Some(ref mut name) = buffer.internal.debug_name { + set_debug_name_with_suffix(&*uav, name, " -- UAV"); + } + + Some(uav) + } else { + None + }; + + let internal = InternalBuffer { + raw: raw.into_raw(), + disjoint_cb, + srv, + uav, + usage: buffer.internal.usage, + debug_name: buffer.internal.debug_name.take(), + }; + let range = offset..offset + buffer.requirements.size; + + let memory_index = memory.bind_buffer(range.clone(), internal.clone()); + + buffer.internal = internal; + buffer.is_coherent = memory + .properties + .contains(hal::memory::Properties::COHERENT); + buffer.memory_ptr = memory.host_ptr; + buffer.bound_range = range; + buffer.local_memory_arena = Arc::downgrade(&memory.local_buffers); + buffer.memory_index = Some(memory_index); + + Ok(()) + } + + unsafe fn create_buffer_view( + &self, + _buffer: &Buffer, + _format: Option<format::Format>, + _range: buffer::SubRange, + ) -> Result<BufferView, buffer::ViewCreationError> { + unimplemented!() + } + + unsafe fn create_image( + &self, + kind: image::Kind, + mip_levels: image::Level, + format: format::Format, + _tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Result<Image, image::CreationError> { + let surface_desc = format.base_format().0.desc(); + let bytes_per_texel = surface_desc.bits / 8; + let ext = kind.extent(); + let size = (ext.width * ext.height * ext.depth) as u64 * bytes_per_texel as u64; + + let bind = conv::map_image_usage(usage, surface_desc); + debug!("{:b}", bind); + + Ok(Image { + internal: InternalImage { + raw: ptr::null_mut(), + copy_srv: None, + srv: None, + unordered_access_views: Vec::new(), + depth_stencil_views: Vec::new(), + render_target_views: Vec::new(), + debug_name: None, + }, + decomposed_format: conv::DecomposedDxgiFormat::UNKNOWN, + kind, + mip_levels, + format, + usage, + view_caps, + bind, + requirements: memory::Requirements { + size: size, + alignment: 4, + type_mask: 0x1, // device-local only + }, + }) + } + + unsafe fn get_image_requirements(&self, image: &Image) -> memory::Requirements { + image.requirements + } + + unsafe fn get_image_subresource_footprint( + &self, + _image: &Image, + _sub: image::Subresource, + ) -> image::SubresourceFootprint { + unimplemented!() + } + + unsafe fn bind_image_memory( + &self, + memory: &Memory, + _offset: u64, + image: &mut Image, + ) -> Result<(), device::BindError> { + use image::Usage; + use memory::Properties; + + let base_format = image.format.base_format(); + let format_desc = base_format.0.desc(); + + let compressed = format_desc.is_compressed(); + let depth = image.format.is_depth(); + let stencil = image.format.is_stencil(); + + let (bind, usage, cpu) = if memory.properties == Properties::DEVICE_LOCAL { + (image.bind, d3d11::D3D11_USAGE_DEFAULT, 0) + } else if memory.properties + == (Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED) + { + ( + image.bind, + d3d11::D3D11_USAGE_DYNAMIC, + d3d11::D3D11_CPU_ACCESS_WRITE, + ) + } else if memory.properties == (Properties::CPU_VISIBLE | Properties::CPU_CACHED) { + ( + 0, + d3d11::D3D11_USAGE_STAGING, + d3d11::D3D11_CPU_ACCESS_READ | d3d11::D3D11_CPU_ACCESS_WRITE, + ) + } else { + unimplemented!() + }; + + let dxgi_format = conv::map_format(image.format).unwrap(); + let decomposed = conv::DecomposedDxgiFormat::from_dxgi_format(dxgi_format); + assert!( + memory.host_ptr.is_null(), + "Images can only be allocated from device-local memory" + ); + let initial_data_ptr = ptr::null_mut(); + + let mut resource = ptr::null_mut(); + let view_kind = match image.kind { + image::Kind::D1(width, layers) => { + let desc = d3d11::D3D11_TEXTURE1D_DESC { + Width: width, + MipLevels: image.mip_levels as _, + ArraySize: layers as _, + Format: decomposed.typeless, + Usage: usage, + BindFlags: bind, + CPUAccessFlags: cpu, + MiscFlags: 0, + }; + + let hr = self.raw.CreateTexture1D( + &desc, + initial_data_ptr, + &mut resource as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("CreateTexture1D failed: 0x{:x}", hr); + + return Err(device::BindError::WrongMemory); + } + + image::ViewKind::D1Array + } + image::Kind::D2(width, height, layers, samples) => { + let desc = d3d11::D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: image.mip_levels as _, + ArraySize: layers as _, + Format: decomposed.typeless, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: samples as _, + Quality: 0, + }, + Usage: usage, + BindFlags: bind, + CPUAccessFlags: cpu, + MiscFlags: if image.view_caps.contains(image::ViewCapabilities::KIND_CUBE) { + d3d11::D3D11_RESOURCE_MISC_TEXTURECUBE + } else { + 0 + }, + }; + + let hr = self.raw.CreateTexture2D( + &desc, + initial_data_ptr, + &mut resource as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("CreateTexture2D failed: 0x{:x}", hr); + + return Err(device::BindError::WrongMemory); + } + + image::ViewKind::D2Array + } + image::Kind::D3(width, height, depth) => { + let desc = d3d11::D3D11_TEXTURE3D_DESC { + Width: width, + Height: height, + Depth: depth, + MipLevels: image.mip_levels as _, + Format: decomposed.typeless, + Usage: usage, + BindFlags: bind, + CPUAccessFlags: cpu, + MiscFlags: 0, + }; + + let hr = self.raw.CreateTexture3D( + &desc, + initial_data_ptr, + &mut resource as *mut *mut _ as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("CreateTexture3D failed: 0x{:x}", hr); + + return Err(device::BindError::WrongMemory); + } + + image::ViewKind::D3 + } + }; + + let mut unordered_access_views = Vec::new(); + + if image.usage.contains(Usage::TRANSFER_DST) && !compressed && !depth { + for mip in 0..image.mip_levels { + let view = ViewInfo { + resource: resource, + kind: image.kind, + caps: image::ViewCapabilities::empty(), + view_kind, + // TODO: we should be using `uav_format` rather than `copy_uav_format`, and share + // the UAVs when the formats are identical + format: decomposed.copy_uav.unwrap(), + levels: mip..(mip + 1), + layers: 0..image.kind.num_layers(), + }; + + let uav = self + .view_image_as_unordered_access(&view) + .map_err(|_| device::BindError::WrongMemory)?; + + if let Some(ref name) = image.internal.debug_name { + set_debug_name(&uav, &format!("{} -- UAV Mip {}", name, mip)); + } + + unordered_access_views.push(uav); + } + } + + let (copy_srv, srv) = if image.usage.contains(image::Usage::TRANSFER_SRC) { + let mut view = ViewInfo { + resource: resource, + kind: image.kind, + caps: image::ViewCapabilities::empty(), + view_kind, + format: decomposed.copy_srv.unwrap(), + levels: 0..image.mip_levels, + layers: 0..image.kind.num_layers(), + }; + + let copy_srv = if !compressed { + Some( + self.view_image_as_shader_resource(&view) + .map_err(|_| device::BindError::WrongMemory)?, + ) + } else { + None + }; + + view.format = decomposed.srv.unwrap(); + + let srv = if !depth && !stencil { + Some( + self.view_image_as_shader_resource(&view) + .map_err(|_| device::BindError::WrongMemory)?, + ) + } else { + None + }; + + (copy_srv, srv) + } else { + (None, None) + }; + + let mut render_target_views = Vec::new(); + + if (image.usage.contains(image::Usage::COLOR_ATTACHMENT) + || image.usage.contains(image::Usage::TRANSFER_DST)) + && !compressed + && !depth + { + for layer in 0..image.kind.num_layers() { + for mip in 0..image.mip_levels { + let view = ViewInfo { + resource, + kind: image.kind, + caps: image::ViewCapabilities::empty(), + view_kind, + format: decomposed.rtv.unwrap(), + levels: mip..(mip + 1), + layers: layer..(layer + 1), + }; + + let rtv = self + .view_image_as_render_target(&view) + .map_err(|_| device::BindError::WrongMemory)?; + + if let Some(ref name) = image.internal.debug_name { + set_debug_name( + &rtv, + &format!("{} -- RTV Mip {} Layer {}", name, mip, layer), + ); + } + + render_target_views.push(rtv); + } + } + }; + + let mut depth_stencil_views = Vec::new(); + + if depth { + for layer in 0..image.kind.num_layers() { + for mip in 0..image.mip_levels { + let view = ViewInfo { + resource, + kind: image.kind, + caps: image::ViewCapabilities::empty(), + view_kind, + format: decomposed.dsv.unwrap(), + levels: mip..(mip + 1), + layers: layer..(layer + 1), + }; + + let dsv = self + .view_image_as_depth_stencil(&view, None) + .map_err(|_| device::BindError::WrongMemory)?; + + if let Some(ref name) = image.internal.debug_name { + set_debug_name( + &dsv, + &format!("{} -- DSV Mip {} Layer {}", name, mip, layer), + ); + } + + depth_stencil_views.push(dsv); + } + } + } + + if let Some(ref mut name) = image.internal.debug_name { + set_debug_name(&*resource, name); + if let Some(ref copy_srv) = copy_srv { + set_debug_name_with_suffix(copy_srv, name, " -- Copy SRV"); + } + if let Some(ref srv) = srv { + set_debug_name_with_suffix(srv, name, " -- SRV"); + } + } + + let internal = InternalImage { + raw: resource, + copy_srv, + srv, + unordered_access_views, + depth_stencil_views, + render_target_views, + debug_name: image.internal.debug_name.take(), + }; + + image.decomposed_format = decomposed; + image.internal = internal; + + Ok(()) + } + + unsafe fn create_image_view( + &self, + image: &Image, + view_kind: image::ViewKind, + format: format::Format, + _swizzle: format::Swizzle, + range: image::SubresourceRange, + ) -> Result<ImageView, image::ViewCreationError> { + let is_array = image.kind.num_layers() > 1; + let num_levels = range.resolve_level_count(image.mip_levels); + let num_layers = range.resolve_layer_count(image.kind.num_layers()); + + let info = ViewInfo { + resource: image.internal.raw, + kind: image.kind, + caps: image.view_caps, + // D3D11 doesn't allow looking at a single slice of an array as a non-array + view_kind: if is_array && view_kind == image::ViewKind::D2 { + image::ViewKind::D2Array + } else if is_array && view_kind == image::ViewKind::D1 { + image::ViewKind::D1Array + } else { + view_kind + }, + format: conv::map_format(format).ok_or(image::ViewCreationError::BadFormat(format))?, + levels: range.level_start..range.level_start + num_levels, + layers: range.layer_start..range.layer_start + num_layers, + }; + + let srv_info = ViewInfo { + format: conv::viewable_format(info.format), + ..info.clone() + }; + + let mut debug_name = image.internal.debug_name.clone(); + + Ok(ImageView { + subresource: d3d11::D3D11CalcSubresource( + 0, + range.layer_start as _, + range.level_start as _, + ), + format, + srv_handle: if image.usage.intersects(image::Usage::SAMPLED) { + let srv = self.view_image_as_shader_resource(&srv_info)?; + + if let Some(ref mut name) = debug_name { + set_debug_name_with_suffix(&srv, name, " -- SRV"); + } + + Some(srv.into_raw()) + } else { + None + }, + rtv_handle: if image.usage.contains(image::Usage::COLOR_ATTACHMENT) { + let rtv = self.view_image_as_render_target(&info)?; + + if let Some(ref mut name) = debug_name { + set_debug_name_with_suffix(&rtv, name, " -- RTV"); + } + + Some(rtv.into_raw()) + } else { + None + }, + uav_handle: if image.usage.contains(image::Usage::STORAGE) { + let uav = self.view_image_as_unordered_access(&info)?; + + if let Some(ref mut name) = debug_name { + set_debug_name_with_suffix(&uav, name, " -- UAV"); + } + + Some(uav.into_raw()) + } else { + None + }, + dsv_handle: if image.usage.contains(image::Usage::DEPTH_STENCIL_ATTACHMENT) { + let dsv = self.view_image_as_depth_stencil(&info, None)?; + + if let Some(ref mut name) = debug_name { + set_debug_name_with_suffix(&dsv, name, " -- DSV"); + } + + Some(dsv.into_raw()) + } else { + None + }, + rodsv_handle: if image.usage.contains(image::Usage::DEPTH_STENCIL_ATTACHMENT) { + let rodsv = + self.view_image_as_depth_stencil(&info, Some(image.format.is_stencil()))?; + + if let Some(ref mut name) = debug_name { + set_debug_name_with_suffix(&rodsv, name, " -- DSV"); + } + + Some(rodsv.into_raw()) + } else { + None + }, + owned: true, + }) + } + + unsafe fn create_sampler( + &self, + info: &image::SamplerDesc, + ) -> Result<Sampler, device::AllocationError> { + assert!(info.normalized); + + let op = match info.comparison { + Some(_) => d3d11::D3D11_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d11::D3D11_FILTER_REDUCTION_TYPE_STANDARD, + }; + + let desc = d3d11::D3D11_SAMPLER_DESC { + Filter: conv::map_filter( + info.min_filter, + info.mag_filter, + info.mip_filter, + op, + info.anisotropy_clamp, + ), + AddressU: conv::map_wrapping(info.wrap_mode.0), + AddressV: conv::map_wrapping(info.wrap_mode.1), + AddressW: conv::map_wrapping(info.wrap_mode.2), + MipLODBias: info.lod_bias.0, + MaxAnisotropy: info.anisotropy_clamp.map_or(0, |aniso| aniso as u32), + ComparisonFunc: info.comparison.map_or(0, |comp| conv::map_comparison(comp)), + BorderColor: info.border.into(), + MinLOD: info.lod_range.start.0, + MaxLOD: info.lod_range.end.0, + }; + + let mut sampler = ptr::null_mut(); + let hr = self + .raw + .CreateSamplerState(&desc, &mut sampler as *mut *mut _ as *mut *mut _); + + assert_eq!(true, winerror::SUCCEEDED(hr)); + + Ok(Sampler { + sampler_handle: ComPtr::from_raw(sampler), + }) + } + + unsafe fn create_descriptor_pool<I>( + &self, + _max_sets: usize, + ranges: I, + _flags: pso::DescriptorPoolCreateFlags, + ) -> Result<DescriptorPool, device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorRangeDesc>, + { + let mut total = RegisterData::default(); + for range in ranges { + let r = range.borrow(); + let content = DescriptorContent::from(r.ty); + total.add_content_many(content, r.count as DescriptorIndex); + } + + let max_stages = 6; + let count = total.sum() * max_stages; + Ok(DescriptorPool::with_capacity(count)) + } + + unsafe fn create_descriptor_set_layout<I, J>( + &self, + layout_bindings: I, + _immutable_samplers: J, + ) -> Result<DescriptorSetLayout, device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetLayoutBinding>, + J: IntoIterator, + J::Item: Borrow<Sampler>, + { + let mut total = MultiStageData::<RegisterData<_>>::default(); + let mut bindings = layout_bindings + .into_iter() + .map(|b| b.borrow().clone()) + .collect::<Vec<_>>(); + + for binding in bindings.iter() { + let content = DescriptorContent::from(binding.ty); + // If this binding is used by the graphics pipeline and is a UAV, it belongs to the "Output Merger" + // stage, so we only put them in the fragment stage to save redundant descriptor allocations. + let stage_flags = if content.contains(DescriptorContent::UAV) + && binding + .stage_flags + .intersects(pso::ShaderStageFlags::ALL - pso::ShaderStageFlags::COMPUTE) + { + let mut stage_flags = pso::ShaderStageFlags::FRAGMENT; + stage_flags.set( + pso::ShaderStageFlags::COMPUTE, + binding.stage_flags.contains(pso::ShaderStageFlags::COMPUTE), + ); + stage_flags + } else { + binding.stage_flags + }; + total.add_content_many(content, stage_flags, binding.count as _); + } + + bindings.sort_by_key(|a| a.binding); + + let accum = total.map_register(|count| RegisterAccumulator { + res_index: *count as ResourceIndex, + }); + + Ok(DescriptorSetLayout { + bindings: Arc::new(bindings), + pool_mapping: accum.to_mapping(), + }) + } + + unsafe fn write_descriptor_sets<'a, I, J>(&self, write_iter: I) + where + I: IntoIterator<Item = pso::DescriptorSetWrite<'a, Backend, J>>, + J: IntoIterator, + J::Item: Borrow<pso::Descriptor<'a, Backend>>, + { + for write in write_iter { + // Get baseline mapping + let mut mapping = write + .set + .layout + .pool_mapping + .map_register(|mapping| mapping.offset); + + // Iterate over layout bindings until the first binding is found. + let binding_start = write + .set + .layout + .bindings + .iter() + .position(|binding| binding.binding == write.binding) + .unwrap(); + + // If we've skipped layout bindings, we need to add them to get the correct binding offset + for binding in &write.set.layout.bindings[..binding_start] { + let content = DescriptorContent::from(binding.ty); + mapping.add_content_many(content, binding.stage_flags, binding.count as _); + } + + // We start at the given binding index and array index + let mut binding_index = binding_start; + let mut array_index = write.array_offset; + + // If we're skipping array indices in the current binding, we need to add them to get the correct binding offset + if array_index > 0 { + let binding: &pso::DescriptorSetLayoutBinding = + &write.set.layout.bindings[binding_index]; + let content = DescriptorContent::from(binding.ty); + mapping.add_content_many(content, binding.stage_flags, array_index as _); + } + + // Iterate over the descriptors, figuring out the corresponding binding, and adding + // it to the set of bindings. + // + // When we hit the end of an array of descriptors and there are still descriptors left + // over, we will spill into writing the next binding. + for descriptor in write.descriptors { + let binding: &pso::DescriptorSetLayoutBinding = + &write.set.layout.bindings[binding_index]; + + let handles = match *descriptor.borrow() { + pso::Descriptor::Buffer(buffer, ref _sub) => RegisterData { + c: match buffer.internal.disjoint_cb { + Some(dj_buf) => dj_buf as *mut _, + None => buffer.internal.raw as *mut _, + }, + t: buffer.internal.srv.map_or(ptr::null_mut(), |p| p as *mut _), + u: buffer.internal.uav.map_or(ptr::null_mut(), |p| p as *mut _), + s: ptr::null_mut(), + }, + pso::Descriptor::Image(image, _layout) => RegisterData { + c: ptr::null_mut(), + t: image.srv_handle.map_or(ptr::null_mut(), |h| h as *mut _), + u: image.uav_handle.map_or(ptr::null_mut(), |h| h as *mut _), + s: ptr::null_mut(), + }, + pso::Descriptor::Sampler(sampler) => RegisterData { + c: ptr::null_mut(), + t: ptr::null_mut(), + u: ptr::null_mut(), + s: sampler.sampler_handle.as_raw() as *mut _, + }, + pso::Descriptor::CombinedImageSampler(image, _layout, sampler) => { + RegisterData { + c: ptr::null_mut(), + t: image.srv_handle.map_or(ptr::null_mut(), |h| h as *mut _), + u: image.uav_handle.map_or(ptr::null_mut(), |h| h as *mut _), + s: sampler.sampler_handle.as_raw() as *mut _, + } + } + pso::Descriptor::TexelBuffer(_buffer_view) => unimplemented!(), + }; + + let content = DescriptorContent::from(binding.ty); + if content.contains(DescriptorContent::CBV) { + let offsets = mapping.map_other(|map| map.c); + write + .set + .assign_stages(&offsets, binding.stage_flags, handles.c); + }; + if content.contains(DescriptorContent::SRV) { + let offsets = mapping.map_other(|map| map.t); + write + .set + .assign_stages(&offsets, binding.stage_flags, handles.t); + }; + if content.contains(DescriptorContent::UAV) { + // If this binding is used by the graphics pipeline and is a UAV, it belongs to the "Output Merger" + // stage, so we only put them in the fragment stage to save redundant descriptor allocations. + let stage_flags = if binding + .stage_flags + .intersects(pso::ShaderStageFlags::ALL - pso::ShaderStageFlags::COMPUTE) + { + let mut stage_flags = pso::ShaderStageFlags::FRAGMENT; + stage_flags.set( + pso::ShaderStageFlags::COMPUTE, + binding.stage_flags.contains(pso::ShaderStageFlags::COMPUTE), + ); + stage_flags + } else { + binding.stage_flags + }; + + let offsets = mapping.map_other(|map| map.u); + write.set.assign_stages(&offsets, stage_flags, handles.u); + }; + if content.contains(DescriptorContent::SAMPLER) { + let offsets = mapping.map_other(|map| map.s); + write + .set + .assign_stages(&offsets, binding.stage_flags, handles.s); + }; + + mapping.add_content_many(content, binding.stage_flags, 1); + + array_index += 1; + if array_index >= binding.count { + // We've run out of array to write to, we should overflow to the next binding. + array_index = 0; + binding_index += 1; + } + } + } + } + + unsafe fn copy_descriptor_sets<'a, I>(&self, copy_iter: I) + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetCopy<'a, Backend>>, + { + for copy in copy_iter { + let _copy = copy.borrow(); + //TODO + /* + for offset in 0 .. copy.count { + let (dst_ty, dst_handle_offset, dst_second_handle_offset) = copy + .dst_set + .get_handle_offset(copy.dst_binding + offset as u32); + let (src_ty, src_handle_offset, src_second_handle_offset) = copy + .src_set + .get_handle_offset(copy.src_binding + offset as u32); + assert_eq!(dst_ty, src_ty); + + let dst_handle = copy.dst_set.handles.offset(dst_handle_offset as isize); + let src_handle = copy.dst_set.handles.offset(src_handle_offset as isize); + + match dst_ty { + pso::DescriptorType::Image { + ty: pso::ImageDescriptorType::Sampled { with_sampler: true } + } => { + let dst_second_handle = copy + .dst_set + .handles + .offset(dst_second_handle_offset as isize); + let src_second_handle = copy + .dst_set + .handles + .offset(src_second_handle_offset as isize); + + *dst_handle = *src_handle; + *dst_second_handle = *src_second_handle; + } + _ => *dst_handle = *src_handle, + } + }*/ + } + } + + unsafe fn map_memory( + &self, + memory: &Memory, + segment: memory::Segment, + ) -> Result<*mut u8, device::MapError> { + Ok(memory.host_ptr.offset(segment.offset as isize)) + } + + unsafe fn unmap_memory(&self, _memory: &Memory) { + // persistent mapping FTW + } + + unsafe fn flush_mapped_memory_ranges<'a, I>(&self, ranges: I) -> Result<(), device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a Memory, memory::Segment)>, + { + let _scope = debug_scope!(&self.context, "FlushMappedRanges"); + + // go through every range we wrote to + for range in ranges.into_iter() { + let &(memory, ref segment) = range.borrow(); + let range = memory.resolve(segment); + + let _scope = debug_scope!(&self.context, "Range({:?})", range); + memory.flush(&self.context, range); + } + + Ok(()) + } + + unsafe fn invalidate_mapped_memory_ranges<'a, I>( + &self, + ranges: I, + ) -> Result<(), device::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a Memory, memory::Segment)>, + { + let _scope = debug_scope!(&self.context, "InvalidateMappedRanges"); + + // go through every range we want to read from + for range in ranges.into_iter() { + let &(memory, ref segment) = range.borrow(); + let range = memory.resolve(segment); + + let _scope = debug_scope!(&self.context, "Range({:?})", range); + memory.invalidate( + &self.context, + range, + self.internal.working_buffer.clone(), + self.internal.working_buffer_size, + ); + } + + Ok(()) + } + + fn create_semaphore(&self) -> Result<Semaphore, device::OutOfMemory> { + // TODO: + Ok(Semaphore) + } + + fn create_fence(&self, signalled: bool) -> Result<Fence, device::OutOfMemory> { + Ok(Arc::new(RawFence { + mutex: Mutex::new(signalled), + condvar: Condvar::new(), + })) + } + + unsafe fn reset_fence(&self, fence: &Fence) -> Result<(), device::OutOfMemory> { + *fence.mutex.lock() = false; + Ok(()) + } + + unsafe fn wait_for_fence( + &self, + fence: &Fence, + timeout_ns: u64, + ) -> Result<bool, device::OomOrDeviceLost> { + use std::time::{Duration, Instant}; + + debug!("wait_for_fence {:?} for {} ns", fence, timeout_ns); + let mut guard = fence.mutex.lock(); + match timeout_ns { + 0 => Ok(*guard), + 0xFFFFFFFFFFFFFFFF => { + while !*guard { + fence.condvar.wait(&mut guard); + } + Ok(true) + } + _ => { + let total = Duration::from_nanos(timeout_ns as u64); + let now = Instant::now(); + while !*guard { + let duration = match total.checked_sub(now.elapsed()) { + Some(dur) => dur, + None => return Ok(false), + }; + let result = fence.condvar.wait_for(&mut guard, duration); + if result.timed_out() { + return Ok(false); + } + } + Ok(true) + } + } + } + + unsafe fn get_fence_status(&self, fence: &Fence) -> Result<bool, device::DeviceLost> { + Ok(*fence.mutex.lock()) + } + + fn create_event(&self) -> Result<(), device::OutOfMemory> { + unimplemented!() + } + + unsafe fn get_event_status(&self, _event: &()) -> Result<bool, device::OomOrDeviceLost> { + unimplemented!() + } + + unsafe fn set_event(&self, _event: &()) -> Result<(), device::OutOfMemory> { + unimplemented!() + } + + unsafe fn reset_event(&self, _event: &()) -> Result<(), device::OutOfMemory> { + unimplemented!() + } + + unsafe fn free_memory(&self, mut memory: Memory) { + if !memory.host_ptr.is_null() { + let _vec = + Vec::from_raw_parts(memory.host_ptr, memory.size as usize, memory.size as usize); + // let it drop + memory.host_ptr = ptr::null_mut(); + } + for (_, (_range, mut internal)) in memory.local_buffers.write().drain() { + internal.release_resources() + } + } + + unsafe fn create_query_pool( + &self, + _query_ty: query::Type, + _count: query::Id, + ) -> Result<QueryPool, query::CreationError> { + unimplemented!() + } + + unsafe fn destroy_query_pool(&self, _pool: QueryPool) { + unimplemented!() + } + + unsafe fn get_query_pool_results( + &self, + _pool: &QueryPool, + _queries: Range<query::Id>, + _data: &mut [u8], + _stride: buffer::Offset, + _flags: query::ResultFlags, + ) -> Result<bool, device::OomOrDeviceLost> { + unimplemented!() + } + + unsafe fn destroy_shader_module(&self, _shader_lib: ShaderModule) {} + + unsafe fn destroy_render_pass(&self, _rp: RenderPass) { + //unimplemented!() + } + + unsafe fn destroy_pipeline_layout(&self, _layout: PipelineLayout) { + //unimplemented!() + } + + unsafe fn destroy_graphics_pipeline(&self, _pipeline: GraphicsPipeline) {} + + unsafe fn destroy_compute_pipeline(&self, _pipeline: ComputePipeline) {} + + unsafe fn destroy_framebuffer(&self, _fb: Framebuffer) {} + + unsafe fn destroy_buffer(&self, buffer: Buffer) { + let mut internal = buffer.internal; + + if internal.raw.is_null() { + return; + } + + let arena_arc = match buffer.local_memory_arena.upgrade() { + Some(arena) => arena, + // Memory is destroyed before the buffer, we've already been destroyed. + None => return, + }; + let mut arena = arena_arc.write(); + + let memory_index = buffer.memory_index.expect("Buffer's memory index unset"); + // Drop the internal stored by the arena on the floor, it owns nothing. + let _ = arena.remove(memory_index); + + // Release all memory owned by this buffer + internal.release_resources(); + } + + unsafe fn destroy_buffer_view(&self, _view: BufferView) { + //unimplemented!() + } + + unsafe fn destroy_image(&self, mut image: Image) { + image.internal.release_resources(); + } + + unsafe fn destroy_image_view(&self, _view: ImageView) { + //unimplemented!() + } + + unsafe fn destroy_sampler(&self, _sampler: Sampler) {} + + unsafe fn destroy_descriptor_pool(&self, _pool: DescriptorPool) { + //unimplemented!() + } + + unsafe fn destroy_descriptor_set_layout(&self, _layout: DescriptorSetLayout) { + //unimplemented!() + } + + unsafe fn destroy_fence(&self, _fence: Fence) { + // unimplemented!() + } + + unsafe fn destroy_semaphore(&self, _semaphore: Semaphore) { + //unimplemented!() + } + + unsafe fn destroy_event(&self, _event: ()) { + //unimplemented!() + } + + fn wait_idle(&self) -> Result<(), device::OutOfMemory> { + Ok(()) + // unimplemented!() + } + + unsafe fn set_image_name(&self, image: &mut Image, name: &str) { + if !verify_debug_ascii(name) { + return; + } + + image.internal.debug_name = Some(name.to_string()); + } + + unsafe fn set_buffer_name(&self, buffer: &mut Buffer, name: &str) { + if !verify_debug_ascii(name) { + return; + } + + buffer.internal.debug_name = Some(name.to_string()); + } + + unsafe fn set_command_buffer_name(&self, command_buffer: &mut CommandBuffer, name: &str) { + if !verify_debug_ascii(name) { + return; + } + + command_buffer.debug_name = Some(name.to_string()); + } + + unsafe fn set_semaphore_name(&self, _semaphore: &mut Semaphore, _name: &str) { + // TODO + } + + unsafe fn set_fence_name(&self, _fence: &mut Fence, _name: &str) { + // TODO + } + + unsafe fn set_framebuffer_name(&self, _framebuffer: &mut Framebuffer, _name: &str) { + // TODO + } + + unsafe fn set_render_pass_name(&self, _render_pass: &mut RenderPass, _name: &str) { + // TODO + } + + unsafe fn set_descriptor_set_name(&self, _descriptor_set: &mut DescriptorSet, _name: &str) { + // TODO + } + + unsafe fn set_descriptor_set_layout_name( + &self, + _descriptor_set_layout: &mut DescriptorSetLayout, + _name: &str, + ) { + // TODO + } + + unsafe fn set_pipeline_layout_name(&self, _pipeline_layout: &mut PipelineLayout, _name: &str) { + // TODO + } + + unsafe fn set_compute_pipeline_name( + &self, + _compute_pipeline: &mut ComputePipeline, + _name: &str, + ) { + // TODO + } + + unsafe fn set_graphics_pipeline_name( + &self, + graphics_pipeline: &mut GraphicsPipeline, + name: &str, + ) { + if !verify_debug_ascii(name) { + return; + } + + let mut name = name.to_string(); + + set_debug_name_with_suffix(&graphics_pipeline.blend_state, &mut name, " -- Blend State"); + set_debug_name_with_suffix( + &graphics_pipeline.rasterizer_state, + &mut name, + " -- Rasterizer State", + ); + set_debug_name_with_suffix( + &graphics_pipeline.input_layout, + &mut name, + " -- Input Layout", + ); + if let Some(ref dss) = graphics_pipeline.depth_stencil_state { + set_debug_name_with_suffix(&dss.raw, &mut name, " -- Depth Stencil State"); + } + } +} diff --git a/third_party/rust/gfx-backend-dx11/src/dxgi.rs b/third_party/rust/gfx-backend-dx11/src/dxgi.rs new file mode 100644 index 0000000000..bd722d7554 --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/dxgi.rs @@ -0,0 +1,213 @@ +use hal::adapter::{AdapterInfo, DeviceType}; + +use winapi::{ + shared::{ + dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgi1_5, + guiddef::{GUID, REFIID}, + winerror, + }, + um::unknwnbase::IUnknown, + Interface, +}; + +use wio::com::ComPtr; + +use std::{ffi::OsString, mem, os::windows::ffi::OsStringExt, ptr}; + +#[derive(Debug, Copy, Clone)] +pub(crate) enum DxgiVersion { + /// Capable of the following interfaces: + /// * IDXGIObject + /// * IDXGIDeviceSubObject + /// * IDXGIResource + /// * IDXGIKeyedMutex + /// * IDXGISurface + /// * IDXGISurface1 + /// * IDXGIOutput + /// * IDXGISwapChain + /// * IDXGIFactory + /// * IDXGIDevice + /// * IDXGIFactory1 + /// * IDXGIAdapter1 + /// * IDXGIDevice1 + Dxgi1_0, + + /// Capable of the following interfaces: + /// * IDXGIDisplayControl + /// * IDXGIOutputDuplication + /// * IDXGISurface2 + /// * IDXGIResource1 + /// * IDXGIDevice2 + /// * IDXGISwapChain1 + /// * IDXGIFactory2 + /// * IDXGIAdapter2 + /// * IDXGIOutput1 + Dxgi1_2, + + /// Capable of the following interfaces: + /// * IDXGIDevice3 + /// * IDXGISwapChain2 + /// * IDXGIOutput2 + /// * IDXGIDecodeSwapChain + /// * IDXGIFactoryMedia + /// * IDXGISwapChainMedia + /// * IDXGIOutput3 + Dxgi1_3, + + /// Capable of the following interfaces: + /// * IDXGISwapChain3 + /// * IDXGIOutput4 + /// * IDXGIFactory4 + /// * IDXGIAdapter3 + Dxgi1_4, + + /// Capable of the following interfaces: + /// * IDXGIOutput5 + /// * IDXGISwapChain4 + /// * IDXGIDevice4 + /// * IDXGIFactory5 + Dxgi1_5, +} + +type DxgiFun = + unsafe extern "system" fn(REFIID, *mut *mut winapi::ctypes::c_void) -> winerror::HRESULT; + +fn create_dxgi_factory1( + func: &DxgiFun, + guid: &GUID, +) -> Result<ComPtr<dxgi::IDXGIFactory>, winerror::HRESULT> { + let mut factory: *mut IUnknown = ptr::null_mut(); + + let hr = unsafe { func(guid, &mut factory as *mut *mut _ as *mut *mut _) }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(factory as *mut _) }) + } else { + Err(hr) + } +} + +pub(crate) fn get_dxgi_factory( +) -> Result<(libloading::Library, ComPtr<dxgi::IDXGIFactory>, DxgiVersion), winerror::HRESULT> { + // The returned Com-pointer is only safe to use for the lifetime of the Library. + let library = libloading::Library::new("dxgi.dll").map_err(|_| -1)?; + let func: libloading::Symbol<DxgiFun> = + unsafe { library.get(b"CreateDXGIFactory1") }.map_err(|_| -1)?; + + // TODO: do we even need `create_dxgi_factory2`? + if let Ok(factory) = create_dxgi_factory1(&func, &dxgi1_5::IDXGIFactory5::uuidof()) { + return Ok((library, factory, DxgiVersion::Dxgi1_5)); + } + + if let Ok(factory) = create_dxgi_factory1(&func, &dxgi1_4::IDXGIFactory4::uuidof()) { + return Ok((library, factory, DxgiVersion::Dxgi1_4)); + } + + if let Ok(factory) = create_dxgi_factory1(&func, &dxgi1_3::IDXGIFactory3::uuidof()) { + return Ok((library, factory, DxgiVersion::Dxgi1_3)); + } + + if let Ok(factory) = create_dxgi_factory1(&func, &dxgi1_2::IDXGIFactory2::uuidof()) { + return Ok((library, factory, DxgiVersion::Dxgi1_2)); + } + + if let Ok(factory) = create_dxgi_factory1(&func, &dxgi::IDXGIFactory1::uuidof()) { + return Ok((library, factory, DxgiVersion::Dxgi1_0)); + } + + // TODO: any reason why above would fail and this wouldnt? + match create_dxgi_factory1(&func, &dxgi::IDXGIFactory::uuidof()) { + Ok(factory) => Ok((library, factory, DxgiVersion::Dxgi1_0)), + Err(hr) => Err(hr), + } +} + +fn enum_adapters1( + idx: u32, + factory: *mut dxgi::IDXGIFactory, +) -> Result<ComPtr<dxgi::IDXGIAdapter>, winerror::HRESULT> { + let mut adapter: *mut dxgi::IDXGIAdapter = ptr::null_mut(); + + let hr = unsafe { + (*(factory as *mut dxgi::IDXGIFactory1)) + .EnumAdapters1(idx, &mut adapter as *mut *mut _ as *mut *mut _) + }; + + if winerror::SUCCEEDED(hr) { + Ok(unsafe { ComPtr::from_raw(adapter) }) + } else { + Err(hr) + } +} + +fn get_adapter_desc(adapter: *mut dxgi::IDXGIAdapter, version: DxgiVersion) -> AdapterInfo { + match version { + DxgiVersion::Dxgi1_0 => { + let mut desc: dxgi::DXGI_ADAPTER_DESC1 = unsafe { mem::zeroed() }; + unsafe { + (*(adapter as *mut dxgi::IDXGIAdapter1)).GetDesc1(&mut desc); + } + + let device_name = { + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = <OsString as OsStringExt>::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + AdapterInfo { + name: device_name, + vendor: desc.VendorId as usize, + device: desc.DeviceId as usize, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + DeviceType::VirtualGpu + } else { + DeviceType::DiscreteGpu + }, + } + } + DxgiVersion::Dxgi1_2 + | DxgiVersion::Dxgi1_3 + | DxgiVersion::Dxgi1_4 + | DxgiVersion::Dxgi1_5 => { + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + (*(adapter as *mut dxgi1_2::IDXGIAdapter2)).GetDesc2(&mut desc); + } + + let device_name = { + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = <OsString as OsStringExt>::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + AdapterInfo { + name: device_name, + vendor: desc.VendorId as usize, + device: desc.DeviceId as usize, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + DeviceType::VirtualGpu + } else { + DeviceType::DiscreteGpu + }, + } + } + } +} + +pub(crate) fn get_adapter( + idx: u32, + factory: *mut dxgi::IDXGIFactory, + version: DxgiVersion, +) -> Result<(ComPtr<dxgi::IDXGIAdapter>, AdapterInfo), winerror::HRESULT> { + let adapter = match version { + DxgiVersion::Dxgi1_0 + | DxgiVersion::Dxgi1_2 + | DxgiVersion::Dxgi1_3 + | DxgiVersion::Dxgi1_4 + | DxgiVersion::Dxgi1_5 => enum_adapters1(idx, factory)?, + }; + + let desc = get_adapter_desc(adapter.as_raw(), version); + + Ok((adapter, desc)) +} diff --git a/third_party/rust/gfx-backend-dx11/src/internal.rs b/third_party/rust/gfx-backend-dx11/src/internal.rs new file mode 100644 index 0000000000..21b0c3e93f --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/internal.rs @@ -0,0 +1,1329 @@ +use auxil::{FastHashMap, ShaderStage}; +use hal::{command, image, pso}; + +use winapi::{ + shared::minwindef::UINT, + shared::{ + dxgiformat, + minwindef::{FALSE, TRUE}, + winerror, + }, + um::{d3d11, d3dcommon}, +}; + +use wio::com::ComPtr; + +use std::{borrow::Borrow, mem, ptr}; + +use parking_lot::Mutex; +use smallvec::SmallVec; +use spirv_cross; + +use crate::{conv, shader, Buffer, Image, RenderPassCache}; + +#[repr(C)] +struct BufferCopy { + src: u32, + dst: u32, + _padding: [u32; 2], +} + +#[repr(C)] +struct ImageCopy { + src: [u32; 4], + dst: [u32; 4], +} + +#[repr(C)] +struct BufferImageCopy { + buffer_offset: u32, + buffer_size: [u32; 2], + _padding: u32, + image_offset: [u32; 4], + image_extent: [u32; 4], + // actual size of the target image + image_size: [u32; 4], +} + +#[repr(C)] +struct BufferImageCopyInfo { + buffer: BufferCopy, + image: ImageCopy, + buffer_image: BufferImageCopy, +} + +#[repr(C)] +struct BlitInfo { + offset: [f32; 2], + extent: [f32; 2], + z: f32, + level: f32, +} + +#[repr(C)] +struct PartialClearInfo { + // transmute between the types, easier than juggling all different kinds of fields.. + data: [u32; 4], +} + +// the threadgroup count we use in our copy shaders +const COPY_THREAD_GROUP_X: u32 = 8; +const COPY_THREAD_GROUP_Y: u32 = 8; + +#[derive(Clone, Debug)] +struct ComputeCopyBuffer { + d1_from_buffer: Option<ComPtr<d3d11::ID3D11ComputeShader>>, + // Buffer -> Image2D + d2_from_buffer: Option<ComPtr<d3d11::ID3D11ComputeShader>>, + // Image2D -> Buffer + d2_into_buffer: ComPtr<d3d11::ID3D11ComputeShader>, + scale: (u32, u32), +} + +#[derive(Debug)] +struct ConstantBuffer { + buffer: ComPtr<d3d11::ID3D11Buffer>, +} + +impl ConstantBuffer { + unsafe fn update<T>(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>, data: T) { + let mut mapped = mem::zeroed::<d3d11::D3D11_MAPPED_SUBRESOURCE>(); + let hr = context.Map( + self.buffer.as_raw() as _, + 0, + d3d11::D3D11_MAP_WRITE_DISCARD, + 0, + &mut mapped, + ); + assert_eq!(winerror::S_OK, hr); + + ptr::copy(&data, mapped.pData as _, 1); + + context.Unmap(self.buffer.as_raw() as _, 0); + } +} + +// Holds everything we need for fallback implementations of features that are not in DX. +// +// TODO: make struct fields more modular and group them up in structs depending on if it is a +// fallback version or not (eg. Option<PartialClear>), should make struct definition and +// `new` function smaller +#[derive(Debug)] +pub struct Internal { + // partial clearing + vs_partial_clear: ComPtr<d3d11::ID3D11VertexShader>, + ps_partial_clear_float: ComPtr<d3d11::ID3D11PixelShader>, + ps_partial_clear_uint: ComPtr<d3d11::ID3D11PixelShader>, + ps_partial_clear_int: ComPtr<d3d11::ID3D11PixelShader>, + ps_partial_clear_depth: ComPtr<d3d11::ID3D11PixelShader>, + ps_partial_clear_stencil: ComPtr<d3d11::ID3D11PixelShader>, + partial_clear_depth_stencil_state: ComPtr<d3d11::ID3D11DepthStencilState>, + partial_clear_depth_state: ComPtr<d3d11::ID3D11DepthStencilState>, + partial_clear_stencil_state: ComPtr<d3d11::ID3D11DepthStencilState>, + + // blitting + vs_blit_2d: ComPtr<d3d11::ID3D11VertexShader>, + + sampler_nearest: ComPtr<d3d11::ID3D11SamplerState>, + sampler_linear: ComPtr<d3d11::ID3D11SamplerState>, + + ps_blit_2d_uint: ComPtr<d3d11::ID3D11PixelShader>, + ps_blit_2d_int: ComPtr<d3d11::ID3D11PixelShader>, + ps_blit_2d_float: ComPtr<d3d11::ID3D11PixelShader>, + + // Image<->Image not covered by `CopySubresourceRegion` + cs_copy_image_shaders: FastHashMap< + (dxgiformat::DXGI_FORMAT, dxgiformat::DXGI_FORMAT), + ComPtr<d3d11::ID3D11ComputeShader>, + >, + // Image -> Buffer and Buffer -> Image shaders + cs_copy_buffer_shaders: FastHashMap<dxgiformat::DXGI_FORMAT, ComputeCopyBuffer>, + + // internal constant buffer that is used by internal shaders + internal_buffer: Mutex<ConstantBuffer>, + + // public buffer that is used as intermediate storage for some operations (memory invalidation) + pub working_buffer: ComPtr<d3d11::ID3D11Buffer>, + pub working_buffer_size: u64, + + pub constant_buffer_count_buffer: + [UINT; d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT as _], + + /// Command lists are not supported by graphics card and are being emulated. + /// Requires various workarounds to make things work correctly. + pub command_list_emulation: bool, +} + +fn compile_blob(src: &[u8], entrypoint: &str, stage: ShaderStage) -> ComPtr<d3dcommon::ID3DBlob> { + unsafe { + ComPtr::from_raw( + shader::compile_hlsl_shader( + stage, + spirv_cross::hlsl::ShaderModel::V5_0, + entrypoint, + src, + ) + .unwrap(), + ) + } +} + +fn compile_vs( + device: &ComPtr<d3d11::ID3D11Device>, + src: &[u8], + entrypoint: &str, +) -> ComPtr<d3d11::ID3D11VertexShader> { + let bytecode = compile_blob(src, entrypoint, ShaderStage::Vertex); + let mut shader = ptr::null_mut(); + let hr = unsafe { + device.CreateVertexShader( + bytecode.GetBufferPointer(), + bytecode.GetBufferSize(), + ptr::null_mut(), + &mut shader as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(true, winerror::SUCCEEDED(hr)); + + unsafe { ComPtr::from_raw(shader) } +} + +fn compile_ps( + device: &ComPtr<d3d11::ID3D11Device>, + src: &[u8], + entrypoint: &str, +) -> ComPtr<d3d11::ID3D11PixelShader> { + let bytecode = compile_blob(src, entrypoint, ShaderStage::Fragment); + let mut shader = ptr::null_mut(); + let hr = unsafe { + device.CreatePixelShader( + bytecode.GetBufferPointer(), + bytecode.GetBufferSize(), + ptr::null_mut(), + &mut shader as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(true, winerror::SUCCEEDED(hr)); + + unsafe { ComPtr::from_raw(shader) } +} + +fn compile_cs( + device: &ComPtr<d3d11::ID3D11Device>, + src: &[u8], + entrypoint: &str, +) -> ComPtr<d3d11::ID3D11ComputeShader> { + let bytecode = compile_blob(src, entrypoint, ShaderStage::Compute); + let mut shader = ptr::null_mut(); + let hr = unsafe { + device.CreateComputeShader( + bytecode.GetBufferPointer(), + bytecode.GetBufferSize(), + ptr::null_mut(), + &mut shader as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(true, winerror::SUCCEEDED(hr)); + + unsafe { ComPtr::from_raw(shader) } +} + +impl Internal { + pub fn new(device: &ComPtr<d3d11::ID3D11Device>) -> Self { + let internal_buffer = { + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: mem::size_of::<BufferImageCopyInfo>() as _, + Usage: d3d11::D3D11_USAGE_DYNAMIC, + BindFlags: d3d11::D3D11_BIND_CONSTANT_BUFFER, + CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_WRITE, + MiscFlags: 0, + StructureByteStride: 0, + }; + + let mut buffer = ptr::null_mut(); + let hr = unsafe { + device.CreateBuffer( + &desc, + ptr::null_mut(), + &mut buffer as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(true, winerror::SUCCEEDED(hr)); + + unsafe { ComPtr::from_raw(buffer) } + }; + + let (depth_stencil_state, depth_state, stencil_state) = { + let mut depth_state = ptr::null_mut(); + let mut stencil_state = ptr::null_mut(); + let mut depth_stencil_state = ptr::null_mut(); + + let mut desc = d3d11::D3D11_DEPTH_STENCIL_DESC { + DepthEnable: TRUE, + DepthWriteMask: d3d11::D3D11_DEPTH_WRITE_MASK_ALL, + DepthFunc: d3d11::D3D11_COMPARISON_ALWAYS, + StencilEnable: TRUE, + StencilReadMask: 0, + StencilWriteMask: !0, + FrontFace: d3d11::D3D11_DEPTH_STENCILOP_DESC { + StencilFailOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilDepthFailOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilPassOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilFunc: d3d11::D3D11_COMPARISON_ALWAYS, + }, + BackFace: d3d11::D3D11_DEPTH_STENCILOP_DESC { + StencilFailOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilDepthFailOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilPassOp: d3d11::D3D11_STENCIL_OP_REPLACE, + StencilFunc: d3d11::D3D11_COMPARISON_ALWAYS, + }, + }; + + let hr = unsafe { + device.CreateDepthStencilState( + &desc, + &mut depth_stencil_state as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(winerror::S_OK, hr); + + desc.DepthEnable = TRUE; + desc.StencilEnable = FALSE; + + let hr = unsafe { + device + .CreateDepthStencilState(&desc, &mut depth_state as *mut *mut _ as *mut *mut _) + }; + assert_eq!(winerror::S_OK, hr); + + desc.DepthEnable = FALSE; + desc.StencilEnable = TRUE; + + let hr = unsafe { + device.CreateDepthStencilState( + &desc, + &mut stencil_state as *mut *mut _ as *mut *mut _, + ) + }; + assert_eq!(winerror::S_OK, hr); + + unsafe { + ( + ComPtr::from_raw(depth_stencil_state), + ComPtr::from_raw(depth_state), + ComPtr::from_raw(stencil_state), + ) + } + }; + + let (sampler_nearest, sampler_linear) = { + let mut desc = d3d11::D3D11_SAMPLER_DESC { + Filter: d3d11::D3D11_FILTER_MIN_MAG_MIP_POINT, + AddressU: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP, + AddressV: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP, + AddressW: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP, + MipLODBias: 0f32, + MaxAnisotropy: 0, + ComparisonFunc: 0, + BorderColor: [0f32; 4], + MinLOD: 0f32, + MaxLOD: d3d11::D3D11_FLOAT32_MAX, + }; + + let mut nearest = ptr::null_mut(); + let mut linear = ptr::null_mut(); + + assert_eq!(winerror::S_OK, unsafe { + device.CreateSamplerState(&desc, &mut nearest as *mut *mut _ as *mut *mut _) + }); + + desc.Filter = d3d11::D3D11_FILTER_MIN_MAG_MIP_LINEAR; + + assert_eq!(winerror::S_OK, unsafe { + device.CreateSamplerState(&desc, &mut linear as *mut *mut _ as *mut *mut _) + }); + + unsafe { (ComPtr::from_raw(nearest), ComPtr::from_raw(linear)) } + }; + + let (working_buffer, working_buffer_size) = { + let working_buffer_size = 1 << 16; + + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: working_buffer_size, + Usage: d3d11::D3D11_USAGE_STAGING, + BindFlags: 0, + CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_READ | d3d11::D3D11_CPU_ACCESS_WRITE, + MiscFlags: 0, + StructureByteStride: 0, + }; + let mut working_buffer = ptr::null_mut(); + + assert_eq!(winerror::S_OK, unsafe { + device.CreateBuffer( + &desc, + ptr::null_mut(), + &mut working_buffer as *mut *mut _ as *mut *mut _, + ) + }); + + ( + unsafe { ComPtr::from_raw(working_buffer) }, + working_buffer_size, + ) + }; + + let clear_shaders = include_bytes!("../shaders/clear.hlsl"); + let copy_shaders = include_bytes!("../shaders/copy.hlsl"); + let blit_shaders = include_bytes!("../shaders/blit.hlsl"); + + let mut cs_copy_image_shaders = FastHashMap::default(); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R8G8_UINT, + dxgiformat::DXGI_FORMAT_R16_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r8g8_image2d_r16"), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R16_UINT, + dxgiformat::DXGI_FORMAT_R8G8_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r16_image2d_r8g8"), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT, + dxgiformat::DXGI_FORMAT_R32_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r8g8b8a8_image2d_r32"), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT, + dxgiformat::DXGI_FORMAT_R16G16_UINT, + ), + compile_cs( + device, + copy_shaders, + "cs_copy_image2d_r8g8b8a8_image2d_r16g16", + ), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R16G16_UINT, + dxgiformat::DXGI_FORMAT_R32_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r16g16_image2d_r32"), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R16G16_UINT, + dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT, + ), + compile_cs( + device, + copy_shaders, + "cs_copy_image2d_r16g16_image2d_r8g8b8a8", + ), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R32_UINT, + dxgiformat::DXGI_FORMAT_R16G16_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r32_image2d_r16g16"), + ); + cs_copy_image_shaders.insert( + ( + dxgiformat::DXGI_FORMAT_R32_UINT, + dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT, + ), + compile_cs(device, copy_shaders, "cs_copy_image2d_r32_image2d_r8g8b8a8"), + ); + + let mut cs_copy_buffer_shaders = FastHashMap::default(); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R32G32B32A32_UINT, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r32g32b32a32", + )), + d2_into_buffer: compile_cs( + device, + copy_shaders, + "cs_copy_image2d_r32g32b32a32_buffer", + ), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R32G32_UINT, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r32g32", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r32g32_buffer"), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R32_UINT, + ComputeCopyBuffer { + d1_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image1d_r32", + )), + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r32", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r32_buffer"), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R16G16B16A16_UINT, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r16g16b16a16", + )), + d2_into_buffer: compile_cs( + device, + copy_shaders, + "cs_copy_image2d_r16g16b16a16_buffer", + ), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R16G16_UINT, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r16g16", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r16g16_buffer"), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R16_UINT, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r16", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r16_buffer"), + scale: (2, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + ComputeCopyBuffer { + d1_from_buffer: None, + d2_from_buffer: None, + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_b8g8r8a8_buffer"), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT, + ComputeCopyBuffer { + d1_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image1d_r8g8b8a8", + )), + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r8g8b8a8", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r8g8b8a8_buffer"), + scale: (1, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R8G8_UINT, + ComputeCopyBuffer { + d1_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image1d_r8g8", + )), + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r8g8", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r8g8_buffer"), + scale: (2, 1), + }, + ); + cs_copy_buffer_shaders.insert( + dxgiformat::DXGI_FORMAT_R8_UINT, + ComputeCopyBuffer { + d1_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image1d_r8", + )), + d2_from_buffer: Some(compile_cs( + device, + copy_shaders, + "cs_copy_buffer_image2d_r8", + )), + d2_into_buffer: compile_cs(device, copy_shaders, "cs_copy_image2d_r8_buffer"), + scale: (4, 1), + }, + ); + + let mut threading_capability: d3d11::D3D11_FEATURE_DATA_THREADING = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d11::D3D11_FEATURE_THREADING, + &mut threading_capability as *mut _ as *mut _, + mem::size_of::<d3d11::D3D11_FEATURE_DATA_THREADING>() as _, + ) + }; + assert_eq!(hr, winerror::S_OK); + + let command_list_emulation = !(threading_capability.DriverCommandLists >= 1); + if command_list_emulation { + info!("D3D11 command list emulation is active"); + } + + Internal { + vs_partial_clear: compile_vs(device, clear_shaders, "vs_partial_clear"), + ps_partial_clear_float: compile_ps(device, clear_shaders, "ps_partial_clear_float"), + ps_partial_clear_uint: compile_ps(device, clear_shaders, "ps_partial_clear_uint"), + ps_partial_clear_int: compile_ps(device, clear_shaders, "ps_partial_clear_int"), + ps_partial_clear_depth: compile_ps(device, clear_shaders, "ps_partial_clear_depth"), + ps_partial_clear_stencil: compile_ps(device, clear_shaders, "ps_partial_clear_stencil"), + partial_clear_depth_stencil_state: depth_stencil_state, + partial_clear_depth_state: depth_state, + partial_clear_stencil_state: stencil_state, + + vs_blit_2d: compile_vs(device, blit_shaders, "vs_blit_2d"), + + sampler_nearest, + sampler_linear, + + ps_blit_2d_uint: compile_ps(device, blit_shaders, "ps_blit_2d_uint"), + ps_blit_2d_int: compile_ps(device, blit_shaders, "ps_blit_2d_int"), + ps_blit_2d_float: compile_ps(device, blit_shaders, "ps_blit_2d_float"), + + cs_copy_image_shaders, + cs_copy_buffer_shaders, + + internal_buffer: Mutex::new(ConstantBuffer { + buffer: internal_buffer, + }), + working_buffer, + working_buffer_size: working_buffer_size as _, + + constant_buffer_count_buffer: [4096_u32; + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT as _], + + command_list_emulation, + } + } + + pub fn copy_image_2d<T>( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + src: &Image, + dst: &Image, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::ImageCopy>, + { + let key = ( + src.decomposed_format.copy_srv.unwrap(), + dst.decomposed_format.copy_srv.unwrap(), + ); + if let Some(shader) = self.cs_copy_image_shaders.get(&key) { + // Some formats cant go through default path, since they cant + // be cast between formats of different component types (eg. + // Rg16 <-> Rgba8) + + // TODO: subresources + let srv = src.internal.copy_srv.clone().unwrap().as_raw(); + let mut const_buf = self.internal_buffer.lock(); + + unsafe { + context.CSSetShader(shader.as_raw(), ptr::null_mut(), 0); + context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw()); + context.CSSetShaderResources(0, 1, [srv].as_ptr()); + + for region in regions.into_iter() { + let info = region.borrow(); + let image = ImageCopy { + src: [ + info.src_offset.x as _, + info.src_offset.y as _, + info.src_offset.z as _, + 0, + ], + dst: [ + info.dst_offset.x as _, + info.dst_offset.y as _, + info.dst_offset.z as _, + 0, + ], + }; + const_buf.update( + context, + BufferImageCopyInfo { + image, + ..mem::zeroed() + }, + ); + + let uav = dst.get_uav(info.dst_subresource.level, 0).unwrap().as_raw(); + context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut()); + + context.Dispatch(info.extent.width as u32, info.extent.height as u32, 1); + } + + // unbind external resources + context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr()); + context.CSSetUnorderedAccessViews( + 0, + 1, + [ptr::null_mut(); 1].as_ptr(), + ptr::null_mut(), + ); + } + } else { + // Default copy path + for region in regions.into_iter() { + let info: &command::ImageCopy = region.borrow(); + + assert_eq!( + src.decomposed_format.typeless, dst.decomposed_format.typeless, + "DX11 backend cannot copy between underlying image formats: {} to {}.", + src.decomposed_format.typeless, dst.decomposed_format.typeless, + ); + + // Formats are the same per above assert, only need to do it for one of the formats + let full_copy_only = + src.format.is_depth() || src.format.is_stencil() || src.kind.num_samples() > 1; + + let copy_box = if full_copy_only { + let offset_zero = info.src_offset.x == 0 + && info.src_offset.y == 0 + && info.src_offset.z == 0 + && info.dst_offset.x == 0 + && info.dst_offset.y == 0 + && info.dst_offset.z == 0; + + let full_extent = info.extent == src.kind.extent(); + + if !offset_zero || !full_extent { + warn!("image to image copies of depth-stencil or multisampled textures must copy the whole resource. Ignoring non-zero offset or non-full extent."); + } + + None + } else { + Some(d3d11::D3D11_BOX { + left: info.src_offset.x as _, + top: info.src_offset.y as _, + front: info.src_offset.z as _, + right: info.src_offset.x as u32 + info.extent.width as u32, + bottom: info.src_offset.y as u32 + info.extent.height as u32, + back: info.src_offset.z as u32 + info.extent.depth as u32, + }) + }; + + // TODO: layer subresources + unsafe { + context.CopySubresourceRegion( + dst.internal.raw, + src.calc_subresource(info.src_subresource.level as _, 0), + info.dst_offset.x as _, + info.dst_offset.y as _, + info.dst_offset.z as _, + src.internal.raw, + dst.calc_subresource(info.dst_subresource.level as _, 0), + copy_box.map_or_else(ptr::null, |b| &b), + ); + } + } + } + } + + pub fn copy_image_to_buffer<T>( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + src: &Image, + dst: &Buffer, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::BufferImageCopy>, + { + let _scope = debug_scope!( + context, + "Image (format={:?},kind={:?}) => Buffer", + src.format, + src.kind + ); + let shader = self + .cs_copy_buffer_shaders + .get(&src.decomposed_format.copy_srv.unwrap()) + .unwrap() + .clone(); + + let srv = src.internal.copy_srv.clone().unwrap().as_raw(); + let uav = dst.internal.uav.unwrap(); + let format_desc = src.format.base_format().0.desc(); + let bytes_per_texel = format_desc.bits as u32 / 8; + let mut const_buf = self.internal_buffer.lock(); + + unsafe { + context.CSSetShader(shader.d2_into_buffer.as_raw(), ptr::null_mut(), 0); + context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw()); + + context.CSSetShaderResources(0, 1, [srv].as_ptr()); + context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut()); + + for copy in regions { + let info = copy.borrow(); + let size = src.kind.extent(); + let buffer_image = BufferImageCopy { + buffer_offset: info.buffer_offset as _, + buffer_size: [info.buffer_width, info.buffer_height], + _padding: 0, + image_offset: [ + info.image_offset.x as _, + info.image_offset.y as _, + (info.image_offset.z + info.image_layers.layers.start as i32) as _, + 0, + ], + image_extent: [ + info.image_extent.width, + info.image_extent.height, + info.image_extent.depth, + 0, + ], + image_size: [size.width, size.height, size.depth, 0], + }; + + const_buf.update( + context, + BufferImageCopyInfo { + buffer_image, + ..mem::zeroed() + }, + ); + + debug_marker!(context, "{:?}", info); + + context.Dispatch( + ((info.image_extent.width + (COPY_THREAD_GROUP_X - 1)) + / COPY_THREAD_GROUP_X + / shader.scale.0) + .max(1), + ((info.image_extent.height + (COPY_THREAD_GROUP_X - 1)) + / COPY_THREAD_GROUP_Y + / shader.scale.1) + .max(1), + 1, + ); + + if let Some(disjoint_cb) = dst.internal.disjoint_cb { + let total_size = info.image_extent.depth + * (info.buffer_height * info.buffer_width * bytes_per_texel); + let copy_box = d3d11::D3D11_BOX { + left: info.buffer_offset as u32, + top: 0, + front: 0, + right: info.buffer_offset as u32 + total_size, + bottom: 1, + back: 1, + }; + + context.CopySubresourceRegion( + disjoint_cb as _, + 0, + info.buffer_offset as _, + 0, + 0, + dst.internal.raw as _, + 0, + ©_box, + ); + } + } + + // unbind external resources + context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr()); + context.CSSetUnorderedAccessViews(0, 1, [ptr::null_mut(); 1].as_ptr(), ptr::null_mut()); + } + } + + pub fn copy_buffer_to_image<T>( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + src: &Buffer, + dst: &Image, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::BufferImageCopy>, + { + let _scope = debug_scope!( + context, + "Buffer => Image (format={:?},kind={:?})", + dst.format, + dst.kind + ); + // NOTE: we have two separate paths for Buffer -> Image transfers. we need to special case + // uploads to compressed formats through `UpdateSubresource` since we cannot get a + // UAV of any compressed format. + + let format_desc = dst.format.base_format().0.desc(); + if format_desc.is_compressed() { + // we dont really care about non-4x4 block formats.. + assert_eq!(format_desc.dim, (4, 4)); + assert!( + !src.memory_ptr.is_null(), + "Only CPU to GPU upload of compressed texture is supported atm" + ); + + for copy in regions { + let info: &command::BufferImageCopy = copy.borrow(); + + let bytes_per_texel = format_desc.bits as u32 / 8; + + let bounds = d3d11::D3D11_BOX { + left: info.image_offset.x as _, + top: info.image_offset.y as _, + front: info.image_offset.z as _, + right: info.image_offset.x as u32 + info.image_extent.width, + bottom: info.image_offset.y as u32 + info.image_extent.height, + back: info.image_offset.z as u32 + info.image_extent.depth, + }; + + let row_pitch = bytes_per_texel * info.image_extent.width / 4; + let depth_pitch = row_pitch * info.image_extent.height / 4; + + for layer in info.image_layers.layers.clone() { + let layer_offset = layer - info.image_layers.layers.start; + + unsafe { + context.UpdateSubresource( + dst.internal.raw, + dst.calc_subresource(info.image_layers.level as _, layer as _), + &bounds, + src.memory_ptr.offset( + src.bound_range.start as isize + + info.buffer_offset as isize + + depth_pitch as isize * layer_offset as isize, + ) as _, + row_pitch, + depth_pitch, + ); + } + } + } + } else { + let shader = self + .cs_copy_buffer_shaders + .get(&dst.decomposed_format.copy_uav.unwrap()) + .unwrap() + .clone(); + + let srv = src.internal.srv.unwrap(); + let mut const_buf = self.internal_buffer.lock(); + let shader_raw = match dst.kind { + image::Kind::D1(..) => shader.d1_from_buffer.unwrap().as_raw(), + image::Kind::D2(..) => shader.d2_from_buffer.unwrap().as_raw(), + image::Kind::D3(..) => panic!("Copies into 3D images are not supported"), + }; + + unsafe { + context.CSSetShader(shader_raw, ptr::null_mut(), 0); + context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw()); + context.CSSetShaderResources(0, 1, [srv].as_ptr()); + + for copy in regions { + let info = copy.borrow(); + let size = dst.kind.extent(); + let buffer_image = BufferImageCopy { + buffer_offset: info.buffer_offset as _, + buffer_size: [info.buffer_width, info.buffer_height], + _padding: 0, + image_offset: [ + info.image_offset.x as _, + info.image_offset.y as _, + (info.image_offset.z + info.image_layers.layers.start as i32) as _, + 0, + ], + image_extent: [ + info.image_extent.width, + info.image_extent.height, + info.image_extent.depth, + 0, + ], + image_size: [size.width, size.height, size.depth, 0], + }; + + const_buf.update( + context, + BufferImageCopyInfo { + buffer_image, + ..mem::zeroed() + }, + ); + + debug_marker!(context, "{:?}", info); + + // TODO: multiple layers? do we introduce a stride and do multiple dispatch + // calls or handle this in the shader? (use z component in dispatch call + // + // NOTE: right now our copy UAV is a 2D array, and we set the layer in the + // `update_buffer_image` call above + let uav = dst + .get_uav( + info.image_layers.level, + 0, /*info.image_layers.layers.start*/ + ) + .unwrap() + .as_raw(); + context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut()); + + context.Dispatch( + ((info.image_extent.width + (COPY_THREAD_GROUP_X - 1)) + / COPY_THREAD_GROUP_X + / shader.scale.0) + .max(1), + ((info.image_extent.height + (COPY_THREAD_GROUP_X - 1)) + / COPY_THREAD_GROUP_Y + / shader.scale.1) + .max(1), + 1, + ); + } + + // unbind external resources + context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr()); + context.CSSetUnorderedAccessViews( + 0, + 1, + [ptr::null_mut(); 1].as_ptr(), + ptr::null_mut(), + ); + } + } + } + + fn find_blit_shader(&self, src: &Image) -> Option<*mut d3d11::ID3D11PixelShader> { + use crate::format::ChannelType as Ct; + + match src.format.base_format().1 { + Ct::Uint => Some(self.ps_blit_2d_uint.as_raw()), + Ct::Sint => Some(self.ps_blit_2d_int.as_raw()), + Ct::Unorm | Ct::Snorm | Ct::Sfloat | Ct::Srgb => Some(self.ps_blit_2d_float.as_raw()), + Ct::Ufloat | Ct::Uscaled | Ct::Sscaled => None, + } + } + + pub fn blit_2d_image<T>( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + src: &Image, + dst: &Image, + filter: image::Filter, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::ImageBlit>, + { + use std::cmp; + + let _scope = debug_scope!( + context, + "Blit: Image (format={:?},kind={:?}) => Image (format={:?},kind={:?})", + src.format, + src.kind, + dst.format, + dst.kind + ); + + let shader = self.find_blit_shader(src).unwrap(); + + let srv = src.internal.srv.clone().unwrap().as_raw(); + let mut const_buf = self.internal_buffer.lock(); + + unsafe { + context.IASetPrimitiveTopology(d3dcommon::D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context.VSSetShader(self.vs_blit_2d.as_raw(), ptr::null_mut(), 0); + context.VSSetConstantBuffers(0, 1, [const_buf.buffer.as_raw()].as_ptr()); + context.PSSetShader(shader, ptr::null_mut(), 0); + context.PSSetShaderResources(0, 1, [srv].as_ptr()); + context.PSSetSamplers( + 0, + 1, + match filter { + image::Filter::Nearest => [self.sampler_nearest.as_raw()], + image::Filter::Linear => [self.sampler_linear.as_raw()], + } + .as_ptr(), + ); + + for region in regions { + let info = region.borrow(); + let blit_info = { + let (sx, dx) = if info.dst_bounds.start.x > info.dst_bounds.end.x { + ( + info.src_bounds.end.x, + info.src_bounds.start.x - info.src_bounds.end.x, + ) + } else { + ( + info.src_bounds.start.x, + info.src_bounds.end.x - info.src_bounds.start.x, + ) + }; + let (sy, dy) = if info.dst_bounds.start.y > info.dst_bounds.end.y { + ( + info.src_bounds.end.y, + info.src_bounds.start.y - info.src_bounds.end.y, + ) + } else { + ( + info.src_bounds.start.y, + info.src_bounds.end.y - info.src_bounds.start.y, + ) + }; + let image::Extent { width, height, .. } = + src.kind.level_extent(info.src_subresource.level); + BlitInfo { + offset: [sx as f32 / width as f32, sy as f32 / height as f32], + extent: [dx as f32 / width as f32, dy as f32 / height as f32], + z: 0f32, // TODO + level: info.src_subresource.level as _, + } + }; + + const_buf.update(context, blit_info); + + // TODO: more layers + let rtv = dst + .get_rtv( + info.dst_subresource.level, + info.dst_subresource.layers.start, + ) + .unwrap() + .as_raw(); + + context.RSSetViewports( + 1, + [d3d11::D3D11_VIEWPORT { + TopLeftX: cmp::min(info.dst_bounds.start.x, info.dst_bounds.end.x) as _, + TopLeftY: cmp::min(info.dst_bounds.start.y, info.dst_bounds.end.y) as _, + Width: (info.dst_bounds.end.x - info.dst_bounds.start.x).abs() as _, + Height: (info.dst_bounds.end.y - info.dst_bounds.start.y).abs() as _, + MinDepth: 0.0f32, + MaxDepth: 1.0f32, + }] + .as_ptr(), + ); + context.OMSetRenderTargets(1, [rtv].as_ptr(), ptr::null_mut()); + context.Draw(3, 0); + } + + context.PSSetShaderResources(0, 1, [ptr::null_mut()].as_ptr()); + context.OMSetRenderTargets(1, [ptr::null_mut()].as_ptr(), ptr::null_mut()); + } + } + + pub fn clear_attachments<T, U>( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + clears: T, + rects: U, + cache: &RenderPassCache, + ) where + T: IntoIterator, + T::Item: Borrow<command::AttachmentClear>, + U: IntoIterator, + U::Item: Borrow<pso::ClearRect>, + { + use hal::format::ChannelType as Ct; + let _scope = debug_scope!(context, "ClearAttachments"); + + let clear_rects: SmallVec<[pso::ClearRect; 8]> = rects + .into_iter() + .map(|rect| rect.borrow().clone()) + .collect(); + let mut const_buf = self.internal_buffer.lock(); + + unsafe { + context.IASetPrimitiveTopology(d3dcommon::D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context.IASetInputLayout(ptr::null_mut()); + context.VSSetShader(self.vs_partial_clear.as_raw(), ptr::null_mut(), 0); + context.PSSetConstantBuffers(0, 1, [const_buf.buffer.as_raw()].as_ptr()); + } + + let subpass = &cache.render_pass.subpasses[cache.current_subpass as usize]; + + for clear in clears { + let clear = clear.borrow(); + + let _scope = debug_scope!(context, "{:?}", clear); + + match *clear { + command::AttachmentClear::Color { index, value } => { + unsafe { + const_buf.update( + context, + PartialClearInfo { + data: mem::transmute(value), + }, + ) + }; + + let attachment = { + let rtv_id = subpass.color_attachments[index]; + &cache.framebuffer.attachments[rtv_id.0] + }; + + unsafe { + context.OMSetRenderTargets( + 1, + [attachment.rtv_handle.unwrap()].as_ptr(), + ptr::null_mut(), + ); + } + + let shader = match attachment.format.base_format().1 { + Ct::Uint => self.ps_partial_clear_uint.as_raw(), + Ct::Sint => self.ps_partial_clear_int.as_raw(), + _ => self.ps_partial_clear_float.as_raw(), + }; + unsafe { context.PSSetShader(shader, ptr::null_mut(), 0) }; + + for clear_rect in &clear_rects { + let viewport = conv::map_viewport(&pso::Viewport { + rect: clear_rect.rect, + depth: 0f32..1f32, + }); + + debug_marker!(context, "{:?}", clear_rect.rect); + + unsafe { + context.RSSetViewports(1, [viewport].as_ptr()); + context.Draw(3, 0); + } + } + } + command::AttachmentClear::DepthStencil { depth, stencil } => { + unsafe { + const_buf.update( + context, + PartialClearInfo { + data: [ + mem::transmute(depth.unwrap_or(0f32)), + stencil.unwrap_or(0), + 0, + 0, + ], + }, + ) + }; + + let attachment = { + let dsv_id = subpass.depth_stencil_attachment.unwrap(); + &cache.framebuffer.attachments[dsv_id.0] + }; + + unsafe { + match (depth, stencil) { + (Some(_), Some(stencil)) => { + context.OMSetDepthStencilState( + self.partial_clear_depth_stencil_state.as_raw(), + stencil, + ); + context.PSSetShader( + self.ps_partial_clear_depth.as_raw(), + ptr::null_mut(), + 0, + ); + } + + (Some(_), None) => { + context.OMSetDepthStencilState( + self.partial_clear_depth_state.as_raw(), + 0, + ); + context.PSSetShader( + self.ps_partial_clear_depth.as_raw(), + ptr::null_mut(), + 0, + ); + } + + (None, Some(stencil)) => { + context.OMSetDepthStencilState( + self.partial_clear_stencil_state.as_raw(), + stencil, + ); + context.PSSetShader( + self.ps_partial_clear_stencil.as_raw(), + ptr::null_mut(), + 0, + ); + } + (None, None) => {} + } + + context.OMSetRenderTargets( + 0, + ptr::null_mut(), + attachment.dsv_handle.unwrap(), + ); + context.PSSetShader( + self.ps_partial_clear_depth.as_raw(), + ptr::null_mut(), + 0, + ); + } + + for clear_rect in &clear_rects { + let viewport = conv::map_viewport(&pso::Viewport { + rect: clear_rect.rect, + depth: 0f32..1f32, + }); + + unsafe { + context.RSSetViewports(1, [viewport].as_ptr()); + context.Draw(3, 0); + } + } + } + } + } + } +} diff --git a/third_party/rust/gfx-backend-dx11/src/lib.rs b/third_party/rust/gfx-backend-dx11/src/lib.rs new file mode 100644 index 0000000000..831effedae --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/lib.rs @@ -0,0 +1,4285 @@ +/*! +# DX11 backend internals. + +## Pipeline Layout + +In D3D11 there are tables of CBVs, SRVs, UAVs, and samplers. + +Each descriptor type can take 1 or two of those entry points. + +The descriptor pool is just and array of handles, belonging to descriptor set 1, descriptor set 2, etc. +Each range of descriptors in a descriptor set area of the pool is split into shader stages, +which in turn is split into CBS/SRV/UAV/Sampler parts. That allows binding a descriptor set as a list +of continuous descriptor ranges (per type, per shader stage). + +!*/ + +//#[deny(missing_docs)] + +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate log; + +use crate::{debug::set_debug_name, device::DepthStencilState}; +use auxil::ShaderStage; +use hal::{ + adapter, buffer, command, format, image, memory, pass, pso, query, queue, window, DrawCount, + IndexCount, IndexType, InstanceCount, Limits, TaskCount, VertexCount, VertexOffset, + WorkGroupCount, +}; +use range_alloc::RangeAllocator; + +use winapi::{ + shared::{ + dxgi::{IDXGIAdapter, IDXGIFactory, IDXGISwapChain}, + dxgiformat, + minwindef::{FALSE, HMODULE, UINT}, + windef::{HWND, RECT}, + winerror, + }, + um::{d3d11, d3d11_1, d3dcommon, winuser::GetClientRect}, + Interface as _, +}; + +use wio::com::ComPtr; + +use arrayvec::ArrayVec; +use parking_lot::{Condvar, Mutex, RwLock}; + +use std::{ + borrow::Borrow, + cell::RefCell, + fmt, mem, + ops::Range, + os::raw::c_void, + ptr, + sync::{Arc, Weak}, +}; + +macro_rules! debug_scope { + ($context:expr, $($arg:tt)+) => ({ + #[cfg(debug_assertions)] + { + $crate::debug::DebugScope::with_name( + $context, + format_args!($($arg)+), + ) + } + #[cfg(not(debug_assertions))] + { + () + } + }); +} + +macro_rules! debug_marker { + ($context:expr, $($arg:tt)+) => ({ + #[cfg(debug_assertions)] + { + $crate::debug::debug_marker( + $context, + &format!($($arg)+), + ); + } + }); +} + +mod conv; +mod debug; +mod device; +mod dxgi; +mod internal; +mod shader; + +type CreateFun = unsafe extern "system" fn( + *mut IDXGIAdapter, + UINT, + HMODULE, + UINT, + *const UINT, + UINT, + UINT, + *mut *mut d3d11::ID3D11Device, + *mut UINT, + *mut *mut d3d11::ID3D11DeviceContext, +) -> winerror::HRESULT; + +#[derive(Clone)] +pub(crate) struct ViewInfo { + resource: *mut d3d11::ID3D11Resource, + kind: image::Kind, + caps: image::ViewCapabilities, + view_kind: image::ViewKind, + format: dxgiformat::DXGI_FORMAT, + levels: Range<image::Level>, + layers: Range<image::Layer>, +} + +impl fmt::Debug for ViewInfo { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ViewInfo") + } +} + +#[derive(Debug)] +pub struct Instance { + pub(crate) factory: ComPtr<IDXGIFactory>, + pub(crate) dxgi_version: dxgi::DxgiVersion, + library_d3d11: Arc<libloading::Library>, + library_dxgi: libloading::Library, +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +impl Instance { + pub fn create_surface_from_hwnd(&self, hwnd: *mut c_void) -> Surface { + Surface { + factory: self.factory.clone(), + wnd_handle: hwnd as *mut _, + presentation: None, + } + } +} + +fn get_features( + _device: ComPtr<d3d11::ID3D11Device>, + feature_level: d3dcommon::D3D_FEATURE_LEVEL, +) -> hal::Features { + let mut features = hal::Features::empty() + | hal::Features::ROBUST_BUFFER_ACCESS // TODO: verify + | hal::Features::INSTANCE_RATE + | hal::Features::INDEPENDENT_BLENDING // TODO: verify + | hal::Features::SAMPLER_BORDER_COLOR + | hal::Features::SAMPLER_MIP_LOD_BIAS + | hal::Features::SAMPLER_MIRROR_CLAMP_EDGE + | hal::Features::SAMPLER_ANISOTROPY + | hal::Features::DEPTH_CLAMP + | hal::Features::NDC_Y_UP; + + features.set( + hal::Features::TEXTURE_DESCRIPTOR_ARRAY + | hal::Features::FULL_DRAW_INDEX_U32 + | hal::Features::GEOMETRY_SHADER, + feature_level >= d3dcommon::D3D_FEATURE_LEVEL_10_0, + ); + + features.set( + hal::Features::IMAGE_CUBE_ARRAY, + feature_level >= d3dcommon::D3D_FEATURE_LEVEL_10_1, + ); + + features.set( + hal::Features::VERTEX_STORES_AND_ATOMICS + | hal::Features::FRAGMENT_STORES_AND_ATOMICS + | hal::Features::FORMAT_BC + | hal::Features::TESSELLATION_SHADER + | hal::Features::DRAW_INDIRECT_FIRST_INSTANCE, + feature_level >= d3dcommon::D3D_FEATURE_LEVEL_11_0, + ); + + features.set( + hal::Features::LOGIC_OP, // TODO: Optional at 10_0 -> 11_0 + feature_level >= d3dcommon::D3D_FEATURE_LEVEL_11_1, + ); + + features +} + +const MAX_PUSH_CONSTANT_SIZE: usize = 256; + +fn get_limits(feature_level: d3dcommon::D3D_FEATURE_LEVEL) -> hal::Limits { + let max_texture_uv_dimension = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 | d3dcommon::D3D_FEATURE_LEVEL_9_2 => 2048, + d3dcommon::D3D_FEATURE_LEVEL_9_3 => 4096, + d3dcommon::D3D_FEATURE_LEVEL_10_0 | d3dcommon::D3D_FEATURE_LEVEL_10_1 => 8192, + d3dcommon::D3D_FEATURE_LEVEL_11_0 | d3dcommon::D3D_FEATURE_LEVEL_11_1 | _ => 16384, + }; + + let max_texture_w_dimension = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 + | d3dcommon::D3D_FEATURE_LEVEL_9_2 + | d3dcommon::D3D_FEATURE_LEVEL_9_3 => 256, + d3dcommon::D3D_FEATURE_LEVEL_10_0 + | d3dcommon::D3D_FEATURE_LEVEL_10_1 + | d3dcommon::D3D_FEATURE_LEVEL_11_0 + | d3dcommon::D3D_FEATURE_LEVEL_11_1 + | _ => 2048, + }; + + let max_texture_cube_dimension = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 | d3dcommon::D3D_FEATURE_LEVEL_9_2 => 512, + _ => max_texture_uv_dimension, + }; + + let max_image_uav = 2; + let max_buffer_uav = d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT as usize - max_image_uav; + + let max_input_slots = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 + | d3dcommon::D3D_FEATURE_LEVEL_9_2 + | d3dcommon::D3D_FEATURE_LEVEL_9_3 + | d3dcommon::D3D_FEATURE_LEVEL_10_0 => 16, + d3dcommon::D3D_FEATURE_LEVEL_10_1 + | d3dcommon::D3D_FEATURE_LEVEL_11_0 + | d3dcommon::D3D_FEATURE_LEVEL_11_1 + | _ => 32, + }; + + let max_color_attachments = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 + | d3dcommon::D3D_FEATURE_LEVEL_9_2 + | d3dcommon::D3D_FEATURE_LEVEL_9_3 + | d3dcommon::D3D_FEATURE_LEVEL_10_0 => 4, + d3dcommon::D3D_FEATURE_LEVEL_10_1 + | d3dcommon::D3D_FEATURE_LEVEL_11_0 + | d3dcommon::D3D_FEATURE_LEVEL_11_1 + | _ => 8, + }; + + // https://docs.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11device-checkmultisamplequalitylevels#remarks + // for more information. + let max_samples = match feature_level { + d3dcommon::D3D_FEATURE_LEVEL_9_1 + | d3dcommon::D3D_FEATURE_LEVEL_9_2 + | d3dcommon::D3D_FEATURE_LEVEL_9_3 + | d3dcommon::D3D_FEATURE_LEVEL_10_0 => 0b0001, // Conservative, MSAA isn't required. + d3dcommon::D3D_FEATURE_LEVEL_10_1 => 0b0101, // Optimistic, 4xMSAA is required on all formats _but_ RGBA32. + d3dcommon::D3D_FEATURE_LEVEL_11_0 | d3dcommon::D3D_FEATURE_LEVEL_11_1 | _ => 0b1101, // Optimistic, 8xMSAA and 4xMSAA is required on all formats _but_ RGBA32 which requires 4x. + }; + + let max_constant_buffers = d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1; + + hal::Limits { + max_image_1d_size: max_texture_uv_dimension, + max_image_2d_size: max_texture_uv_dimension, + max_image_3d_size: max_texture_w_dimension, + max_image_cube_size: max_texture_cube_dimension, + max_image_array_layers: max_texture_cube_dimension as _, + max_per_stage_descriptor_samplers: d3d11::D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT as _, + // Leave top buffer for push constants + max_per_stage_descriptor_uniform_buffers: max_constant_buffers as _, + max_per_stage_descriptor_storage_buffers: max_buffer_uav, + max_per_stage_descriptor_storage_images: max_image_uav, + max_per_stage_descriptor_sampled_images: + d3d11::D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT as _, + max_descriptor_set_uniform_buffers_dynamic: max_constant_buffers as _, + max_descriptor_set_storage_buffers_dynamic: 0, // TODO: Implement dynamic offsets for storage buffers + max_texel_elements: max_texture_uv_dimension as _, //TODO + max_patch_size: d3d11::D3D11_IA_PATCH_MAX_CONTROL_POINT_COUNT as _, + max_viewports: d3d11::D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE as _, + max_viewport_dimensions: [d3d11::D3D11_VIEWPORT_BOUNDS_MAX; 2], + max_framebuffer_extent: hal::image::Extent { + //TODO + width: 4096, + height: 4096, + depth: 1, + }, + max_compute_work_group_count: [ + d3d11::D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + d3d11::D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + d3d11::D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + ], + max_compute_work_group_invocations: d3d11::D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP as _, + max_compute_work_group_size: [ + d3d11::D3D11_CS_THREAD_GROUP_MAX_X, + d3d11::D3D11_CS_THREAD_GROUP_MAX_Y, + d3d11::D3D11_CS_THREAD_GROUP_MAX_Z, + ], // TODO + max_vertex_input_attribute_offset: 255, // TODO + max_vertex_input_attributes: max_input_slots, + max_vertex_input_binding_stride: d3d11::D3D11_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES + as _, + max_vertex_input_bindings: d3d11::D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT as _, // TODO: verify same as attributes + max_vertex_output_components: d3d11::D3D11_VS_OUTPUT_REGISTER_COUNT as _, // TODO + min_texel_buffer_offset_alignment: 1, // TODO + min_uniform_buffer_offset_alignment: 16, + min_storage_buffer_offset_alignment: 16, // TODO + framebuffer_color_sample_counts: max_samples, + framebuffer_depth_sample_counts: max_samples, + framebuffer_stencil_sample_counts: max_samples, + max_color_attachments, + buffer_image_granularity: 1, + non_coherent_atom_size: 1, // TODO + max_sampler_anisotropy: 16., + optimal_buffer_copy_offset_alignment: 1, // TODO + // buffer -> image and image -> buffer paths use compute shaders that, at maximum, read 4 pixels from the buffer + // at a time, so need an alignment of at least 4. + optimal_buffer_copy_pitch_alignment: 4, + min_vertex_input_binding_stride_alignment: 1, + max_push_constants_size: MAX_PUSH_CONSTANT_SIZE, + max_uniform_buffer_range: 1 << 16, + ..hal::Limits::default() //TODO + } +} + +fn get_format_properties( + device: ComPtr<d3d11::ID3D11Device>, +) -> [format::Properties; format::NUM_FORMATS] { + let mut format_properties = [format::Properties::default(); format::NUM_FORMATS]; + for (i, props) in &mut format_properties.iter_mut().enumerate().skip(1) { + let format: format::Format = unsafe { mem::transmute(i as u32) }; + + let dxgi_format = match conv::map_format(format) { + Some(format) => format, + None => continue, + }; + + let mut support = d3d11::D3D11_FEATURE_DATA_FORMAT_SUPPORT { + InFormat: dxgi_format, + OutFormatSupport: 0, + }; + let mut support_2 = d3d11::D3D11_FEATURE_DATA_FORMAT_SUPPORT2 { + InFormat: dxgi_format, + OutFormatSupport2: 0, + }; + + let hr = unsafe { + device.CheckFeatureSupport( + d3d11::D3D11_FEATURE_FORMAT_SUPPORT, + &mut support as *mut _ as *mut _, + mem::size_of::<d3d11::D3D11_FEATURE_DATA_FORMAT_SUPPORT>() as UINT, + ) + }; + + if hr == winerror::S_OK { + let can_buffer = 0 != support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_BUFFER; + let can_image = 0 + != support.OutFormatSupport + & (d3d11::D3D11_FORMAT_SUPPORT_TEXTURE1D + | d3d11::D3D11_FORMAT_SUPPORT_TEXTURE2D + | d3d11::D3D11_FORMAT_SUPPORT_TEXTURE3D + | d3d11::D3D11_FORMAT_SUPPORT_TEXTURECUBE); + let can_linear = can_image && !format.surface_desc().is_compressed(); + if can_image { + props.optimal_tiling |= + format::ImageFeature::SAMPLED | format::ImageFeature::BLIT_SRC; + } + if can_linear { + props.linear_tiling |= + format::ImageFeature::SAMPLED | format::ImageFeature::BLIT_SRC; + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER != 0 { + props.buffer_features |= format::BufferFeature::VERTEX; + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_SHADER_SAMPLE != 0 { + props.optimal_tiling |= format::ImageFeature::SAMPLED_LINEAR; + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_RENDER_TARGET != 0 { + props.optimal_tiling |= + format::ImageFeature::COLOR_ATTACHMENT | format::ImageFeature::BLIT_DST; + if can_linear { + props.linear_tiling |= + format::ImageFeature::COLOR_ATTACHMENT | format::ImageFeature::BLIT_DST; + } + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_BLENDABLE != 0 { + props.optimal_tiling |= format::ImageFeature::COLOR_ATTACHMENT_BLEND; + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_DEPTH_STENCIL != 0 { + props.optimal_tiling |= format::ImageFeature::DEPTH_STENCIL_ATTACHMENT; + } + if support.OutFormatSupport & d3d11::D3D11_FORMAT_SUPPORT_SHADER_LOAD != 0 { + //TODO: check d3d12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD ? + if can_buffer { + props.buffer_features |= format::BufferFeature::UNIFORM_TEXEL; + } + } + + let hr = unsafe { + device.CheckFeatureSupport( + d3d11::D3D11_FEATURE_FORMAT_SUPPORT2, + &mut support_2 as *mut _ as *mut _, + mem::size_of::<d3d11::D3D11_FEATURE_DATA_FORMAT_SUPPORT2>() as UINT, + ) + }; + if hr == winerror::S_OK { + if support_2.OutFormatSupport2 & d3d11::D3D11_FORMAT_SUPPORT2_UAV_ATOMIC_ADD != 0 { + //TODO: other atomic flags? + if can_buffer { + props.buffer_features |= format::BufferFeature::STORAGE_TEXEL_ATOMIC; + } + if can_image { + props.optimal_tiling |= format::ImageFeature::STORAGE_ATOMIC; + } + } + if support_2.OutFormatSupport2 & d3d11::D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE != 0 { + if can_buffer { + props.buffer_features |= format::BufferFeature::STORAGE_TEXEL; + } + if can_image { + props.optimal_tiling |= format::ImageFeature::STORAGE; + } + } + } + } + + //TODO: blits, linear tiling + } + + format_properties +} + +impl hal::Instance<Backend> for Instance { + fn create(_: &str, _: u32) -> Result<Self, hal::UnsupportedBackend> { + // TODO: get the latest factory we can find + + match dxgi::get_dxgi_factory() { + Ok((library_dxgi, factory, dxgi_version)) => { + info!("DXGI version: {:?}", dxgi_version); + let library_d3d11 = Arc::new( + libloading::Library::new("d3d11.dll").map_err(|_| hal::UnsupportedBackend)?, + ); + Ok(Instance { + factory, + dxgi_version, + library_d3d11, + library_dxgi, + }) + } + Err(hr) => { + info!("Failed on factory creation: {:?}", hr); + Err(hal::UnsupportedBackend) + } + } + } + + fn enumerate_adapters(&self) -> Vec<adapter::Adapter<Backend>> { + let mut adapters = Vec::new(); + let mut idx = 0; + + let func: libloading::Symbol<CreateFun> = + match unsafe { self.library_d3d11.get(b"D3D11CreateDevice") } { + Ok(func) => func, + Err(e) => { + error!("Unable to get device creation function: {:?}", e); + return Vec::new(); + } + }; + + while let Ok((adapter, info)) = + dxgi::get_adapter(idx, self.factory.as_raw(), self.dxgi_version) + { + idx += 1; + + use hal::memory::Properties; + + // TODO: move into function? + let (device, feature_level) = { + let feature_level = get_feature_level(&func, adapter.as_raw()); + + let mut device = ptr::null_mut(); + let hr = unsafe { + func( + adapter.as_raw() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), + 0, + [feature_level].as_ptr(), + 1, + d3d11::D3D11_SDK_VERSION, + &mut device as *mut *mut _ as *mut *mut _, + ptr::null_mut(), + ptr::null_mut(), + ) + }; + + if !winerror::SUCCEEDED(hr) { + continue; + } + + ( + unsafe { ComPtr::<d3d11::ID3D11Device>::from_raw(device) }, + feature_level, + ) + }; + + let memory_properties = adapter::MemoryProperties { + memory_types: vec![ + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + adapter::MemoryType { + properties: Properties::CPU_VISIBLE + | Properties::COHERENT + | Properties::CPU_CACHED, + heap_index: 1, + }, + adapter::MemoryType { + properties: Properties::CPU_VISIBLE | Properties::CPU_CACHED, + heap_index: 1, + }, + ], + // TODO: would using *VideoMemory and *SystemMemory from + // DXGI_ADAPTER_DESC be too optimistic? :) + memory_heaps: vec![ + adapter::MemoryHeap { + size: !0, + flags: memory::HeapFlags::DEVICE_LOCAL, + }, + adapter::MemoryHeap { + size: !0, + flags: memory::HeapFlags::empty(), + }, + ], + }; + + let limits = get_limits(feature_level); + let features = get_features(device.clone(), feature_level); + let format_properties = get_format_properties(device.clone()); + let hints = hal::Hints::BASE_VERTEX_INSTANCE_DRAWING; + + let physical_device = PhysicalDevice { + adapter, + library_d3d11: Arc::clone(&self.library_d3d11), + features, + hints, + limits, + memory_properties, + format_properties, + }; + + info!("{:#?}", info); + + adapters.push(adapter::Adapter { + info, + physical_device, + queue_families: vec![QueueFamily], + }); + } + + adapters + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result<Surface, hal::window::InitError> { + match has_handle.raw_window_handle() { + raw_window_handle::RawWindowHandle::Windows(handle) => { + Ok(self.create_surface_from_hwnd(handle.hwnd)) + } + _ => Err(hal::window::InitError::UnsupportedWindowHandle), + } + } + + unsafe fn destroy_surface(&self, _surface: Surface) { + // TODO: Implement Surface cleanup + } +} + +pub struct PhysicalDevice { + adapter: ComPtr<IDXGIAdapter>, + library_d3d11: Arc<libloading::Library>, + features: hal::Features, + hints: hal::Hints, + limits: hal::Limits, + memory_properties: adapter::MemoryProperties, + format_properties: [format::Properties; format::NUM_FORMATS], +} + +impl fmt::Debug for PhysicalDevice { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("PhysicalDevice") + } +} + +unsafe impl Send for PhysicalDevice {} +unsafe impl Sync for PhysicalDevice {} + +// TODO: does the adapter we get earlier matter for feature level? +fn get_feature_level(func: &CreateFun, adapter: *mut IDXGIAdapter) -> d3dcommon::D3D_FEATURE_LEVEL { + let requested_feature_levels = [ + d3dcommon::D3D_FEATURE_LEVEL_11_1, + d3dcommon::D3D_FEATURE_LEVEL_11_0, + d3dcommon::D3D_FEATURE_LEVEL_10_1, + d3dcommon::D3D_FEATURE_LEVEL_10_0, + d3dcommon::D3D_FEATURE_LEVEL_9_3, + d3dcommon::D3D_FEATURE_LEVEL_9_2, + d3dcommon::D3D_FEATURE_LEVEL_9_1, + ]; + + let mut feature_level = d3dcommon::D3D_FEATURE_LEVEL_9_1; + let hr = unsafe { + func( + adapter, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), + 0, + requested_feature_levels[..].as_ptr(), + requested_feature_levels.len() as _, + d3d11::D3D11_SDK_VERSION, + ptr::null_mut(), + &mut feature_level as *mut _, + ptr::null_mut(), + ) + }; + + if !winerror::SUCCEEDED(hr) { + // if there is no 11.1 runtime installed, requesting + // `D3D_FEATURE_LEVEL_11_1` will return E_INVALIDARG so we just retry + // without that + if hr == winerror::E_INVALIDARG { + let hr = unsafe { + func( + adapter, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), + 0, + requested_feature_levels[1..].as_ptr(), + (requested_feature_levels.len() - 1) as _, + d3d11::D3D11_SDK_VERSION, + ptr::null_mut(), + &mut feature_level as *mut _, + ptr::null_mut(), + ) + }; + + if !winerror::SUCCEEDED(hr) { + // TODO: device might not support any feature levels? + unimplemented!(); + } + } + } + + feature_level +} + +// TODO: PhysicalDevice +impl adapter::PhysicalDevice<Backend> for PhysicalDevice { + unsafe fn open( + &self, + families: &[(&QueueFamily, &[queue::QueuePriority])], + requested_features: hal::Features, + ) -> Result<adapter::Gpu<Backend>, hal::device::CreationError> { + let func: libloading::Symbol<CreateFun> = + self.library_d3d11.get(b"D3D11CreateDevice").unwrap(); + + let (device, cxt) = { + if !self.features().contains(requested_features) { + return Err(hal::device::CreationError::MissingFeature); + } + + let feature_level = get_feature_level(&func, self.adapter.as_raw()); + let mut returned_level = d3dcommon::D3D_FEATURE_LEVEL_9_1; + + #[cfg(debug_assertions)] + let create_flags = d3d11::D3D11_CREATE_DEVICE_DEBUG; + #[cfg(not(debug_assertions))] + let create_flags = 0; + + // TODO: request debug device only on debug config? + let mut device: *mut d3d11::ID3D11Device = ptr::null_mut(); + let mut cxt = ptr::null_mut(); + let hr = func( + self.adapter.as_raw() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), + create_flags, + [feature_level].as_ptr(), + 1, + d3d11::D3D11_SDK_VERSION, + &mut device as *mut *mut _ as *mut *mut _, + &mut returned_level as *mut _, + &mut cxt as *mut *mut _ as *mut *mut _, + ); + + // NOTE: returns error if adapter argument is non-null and driver + // type is not unknown; or if debug device is requested but not + // present + if !winerror::SUCCEEDED(hr) { + return Err(hal::device::CreationError::InitializationFailed); + } + + info!( + "feature level={:x}=FL{}_{}", + feature_level, + feature_level >> 12, + feature_level >> 8 & 0xF + ); + + (ComPtr::from_raw(device), ComPtr::from_raw(cxt)) + }; + + let device1 = device.cast::<d3d11_1::ID3D11Device1>().ok(); + + let device = device::Device::new( + device, + device1, + cxt, + requested_features, + self.memory_properties.clone(), + ); + + // TODO: deferred context => 1 cxt/queue? + let queue_groups = families + .into_iter() + .map(|&(_family, prio)| { + assert_eq!(prio.len(), 1); + let mut group = queue::QueueGroup::new(queue::QueueFamilyId(0)); + + // TODO: multiple queues? + let queue = CommandQueue { + context: device.context.clone(), + }; + group.add_queue(queue); + group + }) + .collect(); + + Ok(adapter::Gpu { + device, + queue_groups, + }) + } + + fn format_properties(&self, fmt: Option<format::Format>) -> format::Properties { + let idx = fmt.map(|fmt| fmt as usize).unwrap_or(0); + self.format_properties[idx] + } + + fn image_format_properties( + &self, + format: format::Format, + dimensions: u8, + tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Option<image::FormatProperties> { + conv::map_format(format)?; //filter out unknown formats + + let supported_usage = { + use hal::image::Usage as U; + let format_props = &self.format_properties[format as usize]; + let props = match tiling { + image::Tiling::Optimal => format_props.optimal_tiling, + image::Tiling::Linear => format_props.linear_tiling, + }; + let mut flags = U::empty(); + // Note: these checks would have been nicer if we had explicit BLIT usage + if props.contains(format::ImageFeature::BLIT_SRC) { + flags |= U::TRANSFER_SRC; + } + if props.contains(format::ImageFeature::BLIT_DST) { + flags |= U::TRANSFER_DST; + } + if props.contains(format::ImageFeature::SAMPLED) { + flags |= U::SAMPLED; + } + if props.contains(format::ImageFeature::STORAGE) { + flags |= U::STORAGE; + } + if props.contains(format::ImageFeature::COLOR_ATTACHMENT) { + flags |= U::COLOR_ATTACHMENT; + } + if props.contains(format::ImageFeature::DEPTH_STENCIL_ATTACHMENT) { + flags |= U::DEPTH_STENCIL_ATTACHMENT; + } + flags + }; + if !supported_usage.contains(usage) { + return None; + } + + let max_resource_size = + (d3d11::D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM as usize) << 20; + Some(match tiling { + image::Tiling::Optimal => image::FormatProperties { + max_extent: match dimensions { + 1 => image::Extent { + width: d3d11::D3D11_REQ_TEXTURE1D_U_DIMENSION, + height: 1, + depth: 1, + }, + 2 => image::Extent { + width: d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + height: d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + depth: 1, + }, + 3 => image::Extent { + width: d3d11::D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + height: d3d11::D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + depth: d3d11::D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + }, + _ => return None, + }, + max_levels: d3d11::D3D11_REQ_MIP_LEVELS as _, + max_layers: match dimensions { + 1 => d3d11::D3D11_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION as _, + 2 => d3d11::D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION as _, + _ => return None, + }, + sample_count_mask: if dimensions == 2 + && !view_caps.contains(image::ViewCapabilities::KIND_CUBE) + && (usage.contains(image::Usage::COLOR_ATTACHMENT) + | usage.contains(image::Usage::DEPTH_STENCIL_ATTACHMENT)) + { + 0x3F //TODO: use D3D12_FEATURE_DATA_FORMAT_SUPPORT + } else { + 0x1 + }, + max_resource_size, + }, + image::Tiling::Linear => image::FormatProperties { + max_extent: match dimensions { + 2 => image::Extent { + width: d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + height: d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + depth: 1, + }, + _ => return None, + }, + max_levels: 1, + max_layers: 1, + sample_count_mask: 0x1, + max_resource_size, + }, + }) + } + + fn memory_properties(&self) -> adapter::MemoryProperties { + self.memory_properties.clone() + } + + fn features(&self) -> hal::Features { + self.features + } + + fn hints(&self) -> hal::Hints { + self.hints + } + + fn limits(&self) -> Limits { + self.limits + } +} + +struct Presentation { + swapchain: ComPtr<IDXGISwapChain>, + view: ComPtr<d3d11::ID3D11RenderTargetView>, + format: format::Format, + size: window::Extent2D, + mode: window::PresentMode, + image: Arc<Image>, + is_init: bool, +} + +pub struct Surface { + pub(crate) factory: ComPtr<IDXGIFactory>, + wnd_handle: HWND, + presentation: Option<Presentation>, +} + +impl fmt::Debug for Surface { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Surface") + } +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl window::Surface<Backend> for Surface { + fn supports_queue_family(&self, _queue_family: &QueueFamily) -> bool { + true + } + + fn capabilities(&self, _physical_device: &PhysicalDevice) -> window::SurfaceCapabilities { + let current_extent = unsafe { + let mut rect: RECT = mem::zeroed(); + assert_ne!( + 0, + GetClientRect(self.wnd_handle as *mut _, &mut rect as *mut RECT) + ); + Some(window::Extent2D { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + }) + }; + + // TODO: flip swap effects require dx11.1/windows8 + // NOTE: some swap effects affect msaa capabilities.. + // TODO: _DISCARD swap effects can only have one image? + window::SurfaceCapabilities { + present_modes: window::PresentMode::IMMEDIATE | window::PresentMode::FIFO, + composite_alpha_modes: window::CompositeAlphaMode::OPAQUE, //TODO + image_count: 1..=16, // TODO: + current_extent, + extents: window::Extent2D { + width: 16, + height: 16, + }..=window::Extent2D { + width: 4096, + height: 4096, + }, + max_image_layers: 1, + usage: image::Usage::COLOR_ATTACHMENT, + } + } + + fn supported_formats(&self, _physical_device: &PhysicalDevice) -> Option<Vec<format::Format>> { + Some(vec![ + format::Format::Bgra8Srgb, + format::Format::Bgra8Unorm, + format::Format::Rgba8Srgb, + format::Format::Rgba8Unorm, + format::Format::A2b10g10r10Unorm, + format::Format::Rgba16Sfloat, + ]) + } +} + +#[derive(Debug)] +pub struct SwapchainImage { + image: Arc<Image>, + view: ImageView, +} +impl Borrow<Image> for SwapchainImage { + fn borrow(&self) -> &Image { + &*self.image + } +} +impl Borrow<ImageView> for SwapchainImage { + fn borrow(&self) -> &ImageView { + &self.view + } +} + +impl window::PresentationSurface<Backend> for Surface { + type SwapchainImage = SwapchainImage; + + unsafe fn configure_swapchain( + &mut self, + device: &device::Device, + config: window::SwapchainConfig, + ) -> Result<(), window::CreationError> { + assert!(image::Usage::COLOR_ATTACHMENT.contains(config.image_usage)); + + let swapchain = match self.presentation.take() { + Some(present) => { + if present.format == config.format && present.size == config.extent { + self.presentation = Some(present); + return Ok(()); + } + let non_srgb_format = conv::map_format_nosrgb(config.format).unwrap(); + drop(present.view); + let result = present.swapchain.ResizeBuffers( + config.image_count, + config.extent.width, + config.extent.height, + non_srgb_format, + 0, + ); + if result != winerror::S_OK { + error!("ResizeBuffers failed with 0x{:x}", result as u32); + return Err(window::CreationError::WindowInUse(hal::device::WindowInUse)); + } + present.swapchain + } + None => { + let (swapchain, _) = + device.create_swapchain_impl(&config, self.wnd_handle, self.factory.clone())?; + swapchain + } + }; + + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + self.factory.MakeWindowAssociation( + self.wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ); + + let mut resource: *mut d3d11::ID3D11Resource = ptr::null_mut(); + assert_eq!( + winerror::S_OK, + swapchain.GetBuffer( + 0 as _, + &d3d11::ID3D11Resource::uuidof(), + &mut resource as *mut *mut _ as *mut *mut _, + ) + ); + set_debug_name(&*resource, "Swapchain Image"); + + let kind = image::Kind::D2(config.extent.width, config.extent.height, 1, 1); + let format = conv::map_format(config.format).unwrap(); + let decomposed_format = conv::DecomposedDxgiFormat::from_dxgi_format(format); + + let view_info = ViewInfo { + resource, + kind, + caps: image::ViewCapabilities::empty(), + view_kind: image::ViewKind::D2, + format: decomposed_format.rtv.unwrap(), + levels: 0..1, + layers: 0..1, + }; + let view = device.view_image_as_render_target(&view_info).unwrap(); + set_debug_name(&view, "Swapchain Image View"); + + self.presentation = Some(Presentation { + swapchain, + view, + format: config.format, + size: config.extent, + mode: config.present_mode, + image: Arc::new(Image { + kind, + usage: config.image_usage, + format: config.format, + view_caps: image::ViewCapabilities::empty(), + decomposed_format, + mip_levels: 1, + internal: InternalImage { + raw: resource, + copy_srv: None, //TODO + srv: None, //TODO + unordered_access_views: Vec::new(), + depth_stencil_views: Vec::new(), + render_target_views: Vec::new(), + debug_name: None, + }, + bind: conv::map_image_usage(config.image_usage, config.format.surface_desc()), + requirements: memory::Requirements { + size: 0, + alignment: 1, + type_mask: 0, + }, + }), + is_init: true, + }); + Ok(()) + } + + unsafe fn unconfigure_swapchain(&mut self, _device: &device::Device) { + self.presentation = None; + } + + unsafe fn acquire_image( + &mut self, + _timeout_ns: u64, //TODO: use the timeout + ) -> Result<(SwapchainImage, Option<window::Suboptimal>), window::AcquireError> { + let present = self.presentation.as_ref().unwrap(); + let swapchain_image = SwapchainImage { + image: Arc::clone(&present.image), + view: ImageView { + subresource: d3d11::D3D11CalcSubresource(0, 0, 1), + format: present.format, + rtv_handle: Some(present.view.as_raw()), + dsv_handle: None, + srv_handle: None, + uav_handle: None, + rodsv_handle: None, + owned: false, + }, + }; + Ok((swapchain_image, None)) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct QueueFamily; + +impl queue::QueueFamily for QueueFamily { + fn queue_type(&self) -> queue::QueueType { + queue::QueueType::General + } + fn max_queues(&self) -> usize { + 1 + } + fn id(&self) -> queue::QueueFamilyId { + queue::QueueFamilyId(0) + } +} + +#[derive(Clone)] +pub struct CommandQueue { + context: ComPtr<d3d11::ID3D11DeviceContext>, +} + +impl fmt::Debug for CommandQueue { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("CommandQueue") + } +} + +unsafe impl Send for CommandQueue {} +unsafe impl Sync for CommandQueue {} + +impl queue::CommandQueue<Backend> for CommandQueue { + unsafe fn submit<'a, T, Ic, S, Iw, Is>( + &mut self, + submission: queue::Submission<Ic, Iw, Is>, + fence: Option<&Fence>, + ) where + T: 'a + Borrow<CommandBuffer>, + Ic: IntoIterator<Item = &'a T>, + S: 'a + Borrow<Semaphore>, + Iw: IntoIterator<Item = (&'a S, pso::PipelineStage)>, + Is: IntoIterator<Item = &'a S>, + { + let _scope = debug_scope!(&self.context, "Submit(fence={:?})", fence); + for cmd_buf in submission.command_buffers { + let cmd_buf = cmd_buf.borrow(); + + let _scope = debug_scope!( + &self.context, + "CommandBuffer ({}/{})", + cmd_buf.flush_coherent_memory.len(), + cmd_buf.invalidate_coherent_memory.len() + ); + + { + let _scope = debug_scope!(&self.context, "Pre-Exec: Flush"); + for sync in &cmd_buf.flush_coherent_memory { + sync.do_flush(&self.context); + } + } + self.context + .ExecuteCommandList(cmd_buf.as_raw_list().as_raw(), FALSE); + { + let _scope = debug_scope!(&self.context, "Post-Exec: Invalidate"); + for sync in &cmd_buf.invalidate_coherent_memory { + sync.do_invalidate(&self.context); + } + } + } + + if let Some(fence) = fence { + *fence.mutex.lock() = true; + fence.condvar.notify_all(); + } + } + + unsafe fn present( + &mut self, + surface: &mut Surface, + _image: SwapchainImage, + _wait_semaphore: Option<&Semaphore>, + ) -> Result<Option<window::Suboptimal>, window::PresentError> { + let mut presentation = surface.presentation.as_mut().unwrap(); + let (interval, flags) = match presentation.mode { + window::PresentMode::IMMEDIATE => (0, 0), + //Note: this ends up not presenting anything for some reason + //window::PresentMode::MAILBOX if !presentation.is_init => (1, DXGI_PRESENT_DO_NOT_SEQUENCE), + window::PresentMode::FIFO => (1, 0), + _ => (0, 0), + }; + presentation.is_init = false; + presentation.swapchain.Present(interval, flags); + Ok(None) + } + + fn wait_idle(&self) -> Result<(), hal::device::OutOfMemory> { + // unimplemented!() + Ok(()) + } +} + +#[derive(Debug)] +pub struct AttachmentClear { + subpass_id: Option<pass::SubpassId>, + attachment_id: usize, + raw: command::AttachmentClear, +} + +#[derive(Debug)] +pub struct RenderPassCache { + pub render_pass: RenderPass, + pub framebuffer: Framebuffer, + pub attachment_clear_values: Vec<AttachmentClear>, + pub target_rect: pso::Rect, + pub current_subpass: pass::SubpassId, +} + +impl RenderPassCache { + pub fn start_subpass( + &mut self, + internal: &internal::Internal, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + cache: &mut CommandBufferState, + ) { + let attachments = self + .attachment_clear_values + .iter() + .filter(|clear| clear.subpass_id == Some(self.current_subpass)) + .map(|clear| clear.raw); + + cache.dirty_flag.insert( + DirtyStateFlag::GRAPHICS_PIPELINE + | DirtyStateFlag::DEPTH_STENCIL_STATE + | DirtyStateFlag::PIPELINE_PS + | DirtyStateFlag::VIEWPORTS + | DirtyStateFlag::RENDER_TARGETS_AND_UAVS, + ); + internal.clear_attachments( + context, + attachments, + &[pso::ClearRect { + rect: self.target_rect, + layers: 0..1, + }], + &self, + ); + + let subpass = &self.render_pass.subpasses[self.current_subpass as usize]; + let color_views = subpass + .color_attachments + .iter() + .map(|&(id, _)| self.framebuffer.attachments[id].rtv_handle.unwrap()) + .collect::<Vec<_>>(); + let (ds_view, rods_view) = match subpass.depth_stencil_attachment { + Some((id, _)) => { + let attachment = &self.framebuffer.attachments[id]; + let ds_view = attachment.dsv_handle.unwrap(); + + let rods_view = attachment.rodsv_handle.unwrap(); + + (Some(ds_view), Some(rods_view)) + } + None => (None, None), + }; + + cache.set_render_targets(&color_views, ds_view, rods_view); + cache.bind(context); + } + + fn resolve_msaa(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + let subpass: &SubpassDesc = &self.render_pass.subpasses[self.current_subpass as usize]; + + for (&(color_id, _), &(resolve_id, _)) in subpass + .color_attachments + .iter() + .zip(subpass.resolve_attachments.iter()) + { + if color_id == pass::ATTACHMENT_UNUSED || resolve_id == pass::ATTACHMENT_UNUSED { + continue; + } + + let color_framebuffer = &self.framebuffer.attachments[color_id]; + let resolve_framebuffer = &self.framebuffer.attachments[resolve_id]; + + let mut color_resource: *mut d3d11::ID3D11Resource = ptr::null_mut(); + let mut resolve_resource: *mut d3d11::ID3D11Resource = ptr::null_mut(); + + unsafe { + (&*color_framebuffer + .rtv_handle + .expect("Framebuffer must have COLOR_ATTACHMENT usage")) + .GetResource(&mut color_resource as *mut *mut _); + (&*resolve_framebuffer + .rtv_handle + .expect("Resolve texture must have COLOR_ATTACHMENT usage")) + .GetResource(&mut resolve_resource as *mut *mut _); + + context.ResolveSubresource( + resolve_resource, + resolve_framebuffer.subresource, + color_resource, + color_framebuffer.subresource, + conv::map_format(color_framebuffer.format).unwrap(), + ); + + (&*color_resource).Release(); + (&*resolve_resource).Release(); + } + } + } + + pub fn next_subpass(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + self.resolve_msaa(context); + self.current_subpass += 1; + } +} + +bitflags! { + struct DirtyStateFlag : u32 { + const RENDER_TARGETS_AND_UAVS = (1 << 1); + const VERTEX_BUFFERS = (1 << 2); + const GRAPHICS_PIPELINE = (1 << 3); + const PIPELINE_GS = (1 << 4); + const PIPELINE_HS = (1 << 5); + const PIPELINE_DS = (1 << 6); + const PIPELINE_PS = (1 << 7); + const VIEWPORTS = (1 << 8); + const BLEND_STATE = (1 << 9); + const DEPTH_STENCIL_STATE = (1 << 10); + } +} + +pub struct CommandBufferState { + dirty_flag: DirtyStateFlag, + + render_target_len: u32, + render_targets: [*mut d3d11::ID3D11RenderTargetView; 8], + uav_len: u32, + uavs: [*mut d3d11::ID3D11UnorderedAccessView; d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT as _], + depth_target: Option<*mut d3d11::ID3D11DepthStencilView>, + readonly_depth_target: Option<*mut d3d11::ID3D11DepthStencilView>, + depth_target_read_only: bool, + graphics_pipeline: Option<GraphicsPipeline>, + + // a bitmask that keeps track of what vertex buffer bindings have been "bound" into + // our vec + bound_bindings: u32, + // a bitmask that hold the required binding slots to be bound for the currently + // bound pipeline + required_bindings: Option<u32>, + // the highest binding number in currently bound pipeline + max_bindings: Option<u32>, + viewports: Vec<d3d11::D3D11_VIEWPORT>, + vertex_buffers: Vec<*mut d3d11::ID3D11Buffer>, + vertex_offsets: Vec<u32>, + vertex_strides: Vec<u32>, + blend_factor: Option<[f32; 4]>, + // we can only support one face (rather, both faces must have the same value) + stencil_ref: Option<pso::StencilValue>, + stencil_read_mask: Option<pso::StencilValue>, + stencil_write_mask: Option<pso::StencilValue>, + current_blend: Option<*mut d3d11::ID3D11BlendState>, +} + +impl fmt::Debug for CommandBufferState { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("CommandBufferState") + } +} + +impl CommandBufferState { + fn new() -> Self { + CommandBufferState { + dirty_flag: DirtyStateFlag::empty(), + render_target_len: 0, + render_targets: [ptr::null_mut(); 8], + uav_len: 0, + uavs: [ptr::null_mut(); 8], + depth_target: None, + readonly_depth_target: None, + depth_target_read_only: false, + graphics_pipeline: None, + bound_bindings: 0, + required_bindings: None, + max_bindings: None, + viewports: Vec::new(), + vertex_buffers: Vec::new(), + vertex_offsets: Vec::new(), + vertex_strides: Vec::new(), + blend_factor: None, + stencil_ref: None, + stencil_read_mask: None, + stencil_write_mask: None, + current_blend: None, + } + } + + fn clear(&mut self) { + self.render_target_len = 0; + self.uav_len = 0; + self.depth_target = None; + self.readonly_depth_target = None; + self.depth_target_read_only = false; + self.graphics_pipeline = None; + self.bound_bindings = 0; + self.required_bindings = None; + self.max_bindings = None; + self.viewports.clear(); + self.vertex_buffers.clear(); + self.vertex_offsets.clear(); + self.vertex_strides.clear(); + self.blend_factor = None; + self.stencil_ref = None; + self.stencil_read_mask = None; + self.stencil_write_mask = None; + self.current_blend = None; + } + + pub fn set_vertex_buffer( + &mut self, + index: usize, + offset: u32, + buffer: *mut d3d11::ID3D11Buffer, + ) { + self.bound_bindings |= 1 << index as u32; + + if index >= self.vertex_buffers.len() { + self.vertex_buffers.push(buffer); + self.vertex_offsets.push(offset); + } else { + self.vertex_buffers[index] = buffer; + self.vertex_offsets[index] = offset; + } + + self.dirty_flag.insert(DirtyStateFlag::VERTEX_BUFFERS); + } + + pub fn bind_vertex_buffers(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if !self.dirty_flag.contains(DirtyStateFlag::VERTEX_BUFFERS) { + return; + } + + if let Some(binding_count) = self.max_bindings { + if self.vertex_buffers.len() >= binding_count as usize + && self.vertex_strides.len() >= binding_count as usize + { + unsafe { + context.IASetVertexBuffers( + 0, + binding_count, + self.vertex_buffers.as_ptr(), + self.vertex_strides.as_ptr(), + self.vertex_offsets.as_ptr(), + ); + } + + self.dirty_flag.remove(DirtyStateFlag::VERTEX_BUFFERS); + } + } + } + + pub fn set_viewports(&mut self, viewports: &[d3d11::D3D11_VIEWPORT]) { + self.viewports.clear(); + self.viewports.extend(viewports); + + self.dirty_flag.insert(DirtyStateFlag::VIEWPORTS); + } + + pub fn bind_viewports(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if !self.dirty_flag.contains(DirtyStateFlag::VIEWPORTS) { + return; + } + + if let Some(ref pipeline) = self.graphics_pipeline { + if let Some(ref viewport) = pipeline.baked_states.viewport { + unsafe { + context.RSSetViewports(1, [conv::map_viewport(&viewport)].as_ptr()); + } + } else { + unsafe { + context.RSSetViewports(self.viewports.len() as u32, self.viewports.as_ptr()); + } + } + } else { + unsafe { + context.RSSetViewports(self.viewports.len() as u32, self.viewports.as_ptr()); + } + } + + self.dirty_flag.remove(DirtyStateFlag::VIEWPORTS); + } + + pub fn set_render_targets( + &mut self, + render_targets: &[*mut d3d11::ID3D11RenderTargetView], + depth_target: Option<*mut d3d11::ID3D11DepthStencilView>, + readonly_depth_target: Option<*mut d3d11::ID3D11DepthStencilView>, + ) { + for (idx, &rt) in render_targets.iter().enumerate() { + self.render_targets[idx] = rt; + } + + self.render_target_len = render_targets.len() as u32; + self.depth_target = depth_target; + self.readonly_depth_target = readonly_depth_target; + + self.dirty_flag + .insert(DirtyStateFlag::RENDER_TARGETS_AND_UAVS); + } + + pub fn bind_render_targets(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if !self + .dirty_flag + .contains(DirtyStateFlag::RENDER_TARGETS_AND_UAVS) + { + return; + } + + let depth_target = if self.depth_target_read_only { + self.readonly_depth_target + } else { + self.depth_target + } + .unwrap_or(ptr::null_mut()); + + let uav_start_index = d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT - self.uav_len; + + unsafe { + if self.uav_len > 0 { + context.OMSetRenderTargetsAndUnorderedAccessViews( + self.render_target_len, + self.render_targets.as_ptr(), + depth_target, + uav_start_index, + self.uav_len, + &self.uavs[uav_start_index as usize] as *const *mut _, + ptr::null(), + ) + } else { + context.OMSetRenderTargets( + self.render_target_len, + self.render_targets.as_ptr(), + depth_target, + ) + }; + } + + self.dirty_flag + .remove(DirtyStateFlag::RENDER_TARGETS_AND_UAVS); + } + + pub fn set_blend_factor(&mut self, factor: [f32; 4]) { + self.blend_factor = Some(factor); + + self.dirty_flag.insert(DirtyStateFlag::BLEND_STATE); + } + + pub fn bind_blend_state(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if let Some(blend) = self.current_blend { + let blend_color = if let Some(ref pipeline) = self.graphics_pipeline { + pipeline + .baked_states + .blend_color + .or(self.blend_factor) + .unwrap_or([0f32; 4]) + } else { + self.blend_factor.unwrap_or([0f32; 4]) + }; + + // TODO: MSAA + unsafe { + context.OMSetBlendState(blend, &blend_color, !0); + } + + self.dirty_flag.remove(DirtyStateFlag::BLEND_STATE); + } + } + + pub fn set_stencil_ref(&mut self, value: pso::StencilValue) { + self.stencil_ref = Some(value); + self.dirty_flag.insert(DirtyStateFlag::DEPTH_STENCIL_STATE); + } + + pub fn bind_depth_stencil_state(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if !self + .dirty_flag + .contains(DirtyStateFlag::DEPTH_STENCIL_STATE) + { + return; + } + + let pipeline = match self.graphics_pipeline { + Some(ref pipeline) => pipeline, + None => return, + }; + + if let Some(ref state) = pipeline.depth_stencil_state { + let stencil_ref = state.stencil_ref.static_or(self.stencil_ref.unwrap_or(0)); + + unsafe { + context.OMSetDepthStencilState(state.raw.as_raw(), stencil_ref); + } + } + + self.dirty_flag.remove(DirtyStateFlag::DEPTH_STENCIL_STATE) + } + + pub fn set_graphics_pipeline(&mut self, pipeline: GraphicsPipeline) { + let prev = self.graphics_pipeline.take(); + + let mut prev_has_ps = false; + let mut prev_has_gs = false; + let mut prev_has_ds = false; + let mut prev_has_hs = false; + if let Some(p) = prev { + prev_has_ps = p.ps.is_some(); + prev_has_gs = p.gs.is_some(); + prev_has_ds = p.ds.is_some(); + prev_has_hs = p.hs.is_some(); + } + + if prev_has_ps || pipeline.ps.is_some() { + self.dirty_flag.insert(DirtyStateFlag::PIPELINE_PS); + } + if prev_has_gs || pipeline.gs.is_some() { + self.dirty_flag.insert(DirtyStateFlag::PIPELINE_GS); + } + if prev_has_ds || pipeline.ds.is_some() { + self.dirty_flag.insert(DirtyStateFlag::PIPELINE_DS); + } + if prev_has_hs || pipeline.hs.is_some() { + self.dirty_flag.insert(DirtyStateFlag::PIPELINE_HS); + } + + // If we don't have depth stencil state, we use the old value, so we don't bother changing anything. + let depth_target_read_only = pipeline + .depth_stencil_state + .as_ref() + .map_or(self.depth_target_read_only, |ds| ds.read_only); + + if self.depth_target_read_only != depth_target_read_only { + self.depth_target_read_only = depth_target_read_only; + self.dirty_flag + .insert(DirtyStateFlag::RENDER_TARGETS_AND_UAVS); + } + + self.dirty_flag + .insert(DirtyStateFlag::GRAPHICS_PIPELINE | DirtyStateFlag::DEPTH_STENCIL_STATE); + + self.graphics_pipeline = Some(pipeline); + } + + pub fn bind_graphics_pipeline(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + if !self.dirty_flag.contains(DirtyStateFlag::GRAPHICS_PIPELINE) { + return; + } + + if let Some(ref pipeline) = self.graphics_pipeline { + self.vertex_strides.clear(); + self.vertex_strides.extend(&pipeline.strides); + + self.required_bindings = Some(pipeline.required_bindings); + self.max_bindings = Some(pipeline.max_vertex_bindings); + }; + + self.bind_vertex_buffers(context); + + if let Some(ref pipeline) = self.graphics_pipeline { + unsafe { + context.IASetPrimitiveTopology(pipeline.topology); + context.IASetInputLayout(pipeline.input_layout.as_raw()); + + context.VSSetShader(pipeline.vs.as_raw(), ptr::null_mut(), 0); + + if self.dirty_flag.contains(DirtyStateFlag::PIPELINE_PS) { + let ps = pipeline + .ps + .as_ref() + .map_or(ptr::null_mut(), |ps| ps.as_raw()); + context.PSSetShader(ps, ptr::null_mut(), 0); + + self.dirty_flag.remove(DirtyStateFlag::PIPELINE_PS) + } + + if self.dirty_flag.contains(DirtyStateFlag::PIPELINE_GS) { + let gs = pipeline + .gs + .as_ref() + .map_or(ptr::null_mut(), |gs| gs.as_raw()); + context.GSSetShader(gs, ptr::null_mut(), 0); + + self.dirty_flag.remove(DirtyStateFlag::PIPELINE_GS) + } + + if self.dirty_flag.contains(DirtyStateFlag::PIPELINE_HS) { + let hs = pipeline + .hs + .as_ref() + .map_or(ptr::null_mut(), |hs| hs.as_raw()); + context.HSSetShader(hs, ptr::null_mut(), 0); + + self.dirty_flag.remove(DirtyStateFlag::PIPELINE_HS) + } + + if self.dirty_flag.contains(DirtyStateFlag::PIPELINE_DS) { + let ds = pipeline + .ds + .as_ref() + .map_or(ptr::null_mut(), |ds| ds.as_raw()); + context.DSSetShader(ds, ptr::null_mut(), 0); + + self.dirty_flag.remove(DirtyStateFlag::PIPELINE_DS) + } + + context.RSSetState(pipeline.rasterizer_state.as_raw()); + if let Some(ref viewport) = pipeline.baked_states.viewport { + context.RSSetViewports(1, [conv::map_viewport(&viewport)].as_ptr()); + } + if let Some(ref scissor) = pipeline.baked_states.scissor { + context.RSSetScissorRects(1, [conv::map_rect(&scissor)].as_ptr()); + } + + self.current_blend = Some(pipeline.blend_state.as_raw()); + } + }; + + self.bind_blend_state(context); + self.bind_depth_stencil_state(context); + + self.dirty_flag.remove(DirtyStateFlag::GRAPHICS_PIPELINE); + } + + pub fn bind(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + self.bind_render_targets(context); + self.bind_graphics_pipeline(context); + self.bind_vertex_buffers(context); + self.bind_viewports(context); + } +} + +type PerConstantBufferVec<T> = + ArrayVec<[T; d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT as _]>; + +fn generate_graphics_dynamic_constant_buffer_offsets<'a>( + bindings: impl IntoIterator<Item = &'a pso::DescriptorSetLayoutBinding>, + offset_iter: &mut impl Iterator<Item = u32>, + context1_some: bool, +) -> (PerConstantBufferVec<UINT>, PerConstantBufferVec<UINT>) { + let mut vs_offsets = ArrayVec::new(); + let mut fs_offsets = ArrayVec::new(); + + let mut exists_dynamic_constant_buffer = false; + + for binding in bindings.into_iter() { + match binding.ty { + pso::DescriptorType::Buffer { + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: true, + }, + ty: pso::BufferDescriptorType::Uniform, + } => { + let offset = offset_iter.next().unwrap(); + + if binding.stage_flags.contains(pso::ShaderStageFlags::VERTEX) { + vs_offsets.push(offset / 16) + }; + + if binding + .stage_flags + .contains(pso::ShaderStageFlags::FRAGMENT) + { + fs_offsets.push(offset / 16) + }; + exists_dynamic_constant_buffer = true; + } + pso::DescriptorType::Buffer { + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: false, + }, + ty: pso::BufferDescriptorType::Uniform, + } => { + if binding.stage_flags.contains(pso::ShaderStageFlags::VERTEX) { + vs_offsets.push(0) + }; + + if binding + .stage_flags + .contains(pso::ShaderStageFlags::FRAGMENT) + { + fs_offsets.push(0) + }; + } + pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Storage { .. }, + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: true, + }, + } => { + // TODO: Storage buffer offsets require new buffer views with correct sizes. + // Might also require D3D11_BUFFEREX_SRV to act like RBA is happening. + let _ = offset_iter.next().unwrap(); + warn!("Dynamic offsets into storage buffers are currently unsupported on DX11."); + } + _ => {} + } + } + + if exists_dynamic_constant_buffer && !context1_some { + warn!("D3D11.1 runtime required for dynamic offsets into constant buffers. Offsets will be ignored."); + } + + (vs_offsets, fs_offsets) +} + +fn generate_compute_dynamic_constant_buffer_offsets<'a>( + bindings: impl IntoIterator<Item = &'a pso::DescriptorSetLayoutBinding>, + offset_iter: &mut impl Iterator<Item = u32>, + context1_some: bool, +) -> PerConstantBufferVec<UINT> { + let mut cs_offsets = ArrayVec::new(); + + let mut exists_dynamic_constant_buffer = false; + + for binding in bindings.into_iter() { + match binding.ty { + pso::DescriptorType::Buffer { + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: true, + }, + ty: pso::BufferDescriptorType::Uniform, + } => { + let offset = offset_iter.next().unwrap(); + + if binding.stage_flags.contains(pso::ShaderStageFlags::COMPUTE) { + cs_offsets.push(offset / 16) + }; + + exists_dynamic_constant_buffer = true; + } + pso::DescriptorType::Buffer { + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: false, + }, + ty: pso::BufferDescriptorType::Uniform, + } => { + if binding.stage_flags.contains(pso::ShaderStageFlags::COMPUTE) { + cs_offsets.push(0) + }; + } + pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Storage { .. }, + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: true, + }, + } => { + // TODO: Storage buffer offsets require new buffer views with correct sizes. + // Might also require D3D11_BUFFEREX_SRV to act like RBA is happening. + let _ = offset_iter.next().unwrap(); + warn!("Dynamic offsets into storage buffers are currently unsupported on DX11."); + } + _ => {} + } + } + + if exists_dynamic_constant_buffer && !context1_some { + warn!("D3D11.1 runtime required for dynamic offsets into constant buffers. Offsets will be ignored."); + } + + cs_offsets +} + +pub struct CommandBuffer { + internal: Arc<internal::Internal>, + context: ComPtr<d3d11::ID3D11DeviceContext>, + context1: Option<ComPtr<d3d11_1::ID3D11DeviceContext1>>, + list: RefCell<Option<ComPtr<d3d11::ID3D11CommandList>>>, + + // since coherent memory needs to be synchronized at submission, we need to gather up all + // coherent resources that are used in the command buffer and flush/invalidate them accordingly + // before executing. + flush_coherent_memory: Vec<MemoryFlush>, + invalidate_coherent_memory: Vec<MemoryInvalidate>, + + // holds information about the active render pass + render_pass_cache: Option<RenderPassCache>, + + // Have to update entire push constant buffer at once, keep whole buffer data local. + push_constant_data: [u32; MAX_PUSH_CONSTANT_SIZE / 4], + push_constant_buffer: ComPtr<d3d11::ID3D11Buffer>, + + cache: CommandBufferState, + + one_time_submit: bool, + + debug_name: Option<String>, + debug_scopes: Vec<Option<debug::DebugScope>>, +} + +impl fmt::Debug for CommandBuffer { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("CommandBuffer") + } +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +impl CommandBuffer { + fn create_deferred( + device: &d3d11::ID3D11Device, + device1: Option<&d3d11_1::ID3D11Device1>, + internal: Arc<internal::Internal>, + ) -> Self { + let (context, context1) = if let Some(device1) = device1 { + let mut context1: *mut d3d11_1::ID3D11DeviceContext1 = ptr::null_mut(); + let hr = unsafe { device1.CreateDeferredContext1(0, &mut context1 as *mut *mut _) }; + assert_eq!(hr, winerror::S_OK); + + let context1 = unsafe { ComPtr::from_raw(context1) }; + let context = context1.cast::<d3d11::ID3D11DeviceContext>().unwrap(); + + (context, Some(context1)) + } else { + let mut context: *mut d3d11::ID3D11DeviceContext = ptr::null_mut(); + let hr = unsafe { device.CreateDeferredContext(0, &mut context as *mut *mut _) }; + assert_eq!(hr, winerror::S_OK); + + let context = unsafe { ComPtr::from_raw(context) }; + + (context, None) + }; + + let push_constant_buffer = { + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: MAX_PUSH_CONSTANT_SIZE as _, + Usage: d3d11::D3D11_USAGE_DEFAULT, + BindFlags: d3d11::D3D11_BIND_CONSTANT_BUFFER, + CPUAccessFlags: 0, + MiscFlags: 0, + StructureByteStride: 0, + }; + + let mut buffer: *mut d3d11::ID3D11Buffer = ptr::null_mut(); + let hr = unsafe { + device.CreateBuffer(&desc as *const _, ptr::null_mut(), &mut buffer as *mut _) + }; + + assert_eq!(hr, winerror::S_OK); + + unsafe { ComPtr::from_raw(buffer) } + }; + + let push_constant_data = [0_u32; 64]; + + CommandBuffer { + internal, + context, + context1, + list: RefCell::new(None), + flush_coherent_memory: Vec::new(), + invalidate_coherent_memory: Vec::new(), + render_pass_cache: None, + push_constant_data, + push_constant_buffer, + cache: CommandBufferState::new(), + one_time_submit: false, + debug_name: None, + debug_scopes: Vec::new(), + } + } + + fn as_raw_list(&self) -> ComPtr<d3d11::ID3D11CommandList> { + if self.one_time_submit { + self.list.replace(None).unwrap() + } else { + self.list.borrow().clone().unwrap() + } + } + + fn defer_coherent_flush(&mut self, buffer: &Buffer) { + if !self + .flush_coherent_memory + .iter() + .any(|m| m.buffer == buffer.internal.raw) + { + self.flush_coherent_memory.push(MemoryFlush { + host_memory: buffer.memory_ptr, + sync_range: SyncRange::Whole, + buffer: buffer.internal.raw, + }); + } + } + + fn defer_coherent_invalidate(&mut self, buffer: &Buffer) { + if !self + .invalidate_coherent_memory + .iter() + .any(|m| m.buffer == buffer.internal.raw) + { + self.invalidate_coherent_memory.push(MemoryInvalidate { + working_buffer: Some(self.internal.working_buffer.clone()), + working_buffer_size: self.internal.working_buffer_size, + host_memory: buffer.memory_ptr, + host_sync_range: buffer.bound_range.clone(), + buffer_sync_range: buffer.bound_range.clone(), + buffer: buffer.internal.raw, + }); + } + } + + fn reset(&mut self) { + self.flush_coherent_memory.clear(); + self.invalidate_coherent_memory.clear(); + self.render_pass_cache = None; + self.cache.clear(); + self.debug_scopes.clear(); + } +} + +impl command::CommandBuffer<Backend> for CommandBuffer { + unsafe fn begin( + &mut self, + flags: command::CommandBufferFlags, + _info: command::CommandBufferInheritanceInfo<Backend>, + ) { + self.one_time_submit = flags.contains(command::CommandBufferFlags::ONE_TIME_SUBMIT); + self.reset(); + + // Push constants are at the top register to allow them to be bound only once. + let raw_push_constant_buffer = self.push_constant_buffer.as_raw(); + self.context.VSSetConstantBuffers( + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + 1, + &raw_push_constant_buffer as *const _, + ); + self.context.PSSetConstantBuffers( + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + 1, + &raw_push_constant_buffer as *const _, + ); + self.context.CSSetConstantBuffers( + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + 1, + &raw_push_constant_buffer as *const _, + ); + } + + unsafe fn finish(&mut self) { + let mut list: *mut d3d11::ID3D11CommandList = ptr::null_mut(); + let hr = self + .context + .FinishCommandList(FALSE, &mut list as *mut *mut _); + assert_eq!(hr, winerror::S_OK); + + if let Some(ref name) = self.debug_name { + set_debug_name(&*list, name); + } + + self.list.replace(Some(ComPtr::from_raw(list))); + } + + unsafe fn reset(&mut self, _release_resources: bool) { + self.reset(); + } + + unsafe fn begin_render_pass<T>( + &mut self, + render_pass: &RenderPass, + framebuffer: &Framebuffer, + target_rect: pso::Rect, + clear_values: T, + _first_subpass: command::SubpassContents, + ) where + T: IntoIterator, + T::Item: Borrow<command::ClearValue>, + { + use pass::AttachmentLoadOp as Alo; + + let mut clear_iter = clear_values.into_iter(); + let mut attachment_clears = Vec::new(); + + for (idx, attachment) in render_pass.attachments.iter().enumerate() { + //let attachment = render_pass.attachments[attachment_ref]; + let format = attachment.format.unwrap(); + + let subpass_id = render_pass + .subpasses + .iter() + .position(|sp| sp.is_using(idx)) + .map(|i| i as pass::SubpassId); + + if attachment.has_clears() { + let value = *clear_iter.next().unwrap().borrow(); + + match (attachment.ops.load, attachment.stencil_ops.load) { + (Alo::Clear, Alo::Clear) if format.is_depth() => { + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::DepthStencil { + depth: Some(value.depth_stencil.depth), + stencil: Some(value.depth_stencil.stencil), + }, + }); + } + (Alo::Clear, Alo::Clear) => { + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::Color { + index: idx, + value: value.color, + }, + }); + + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::DepthStencil { + depth: None, + stencil: Some(value.depth_stencil.stencil), + }, + }); + } + (Alo::Clear, _) if format.is_depth() => { + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::DepthStencil { + depth: Some(value.depth_stencil.depth), + stencil: None, + }, + }); + } + (Alo::Clear, _) => { + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::Color { + index: idx, + value: value.color, + }, + }); + } + (_, Alo::Clear) => { + attachment_clears.push(AttachmentClear { + subpass_id, + attachment_id: idx, + raw: command::AttachmentClear::DepthStencil { + depth: None, + stencil: Some(value.depth_stencil.stencil), + }, + }); + } + _ => {} + } + } + } + + self.render_pass_cache = Some(RenderPassCache { + render_pass: render_pass.clone(), + framebuffer: framebuffer.clone(), + attachment_clear_values: attachment_clears, + target_rect, + current_subpass: 0, + }); + + if let Some(ref mut current_render_pass) = self.render_pass_cache { + current_render_pass.start_subpass(&self.internal, &self.context, &mut self.cache); + } + } + + unsafe fn next_subpass(&mut self, _contents: command::SubpassContents) { + if let Some(ref mut current_render_pass) = self.render_pass_cache { + current_render_pass.next_subpass(&self.context); + current_render_pass.start_subpass(&self.internal, &self.context, &mut self.cache); + } + } + + unsafe fn end_render_pass(&mut self) { + if let Some(ref mut current_render_pass) = self.render_pass_cache { + current_render_pass.resolve_msaa(&self.context); + } + + self.context + .OMSetRenderTargets(8, [ptr::null_mut(); 8].as_ptr(), ptr::null_mut()); + + self.render_pass_cache = None; + } + + unsafe fn pipeline_barrier<'a, T>( + &mut self, + _stages: Range<pso::PipelineStage>, + _dependencies: memory::Dependencies, + _barriers: T, + ) where + T: IntoIterator, + T::Item: Borrow<memory::Barrier<'a, Backend>>, + { + // TODO: should we track and assert on resource states? + // unimplemented!() + } + + unsafe fn clear_image<T>( + &mut self, + image: &Image, + _: image::Layout, + value: command::ClearValue, + subresource_ranges: T, + ) where + T: IntoIterator, + T::Item: Borrow<image::SubresourceRange>, + { + for range in subresource_ranges { + let range = range.borrow(); + let num_levels = range.resolve_level_count(image.mip_levels); + let num_layers = range.resolve_layer_count(image.kind.num_layers()); + + let mut depth_stencil_flags = 0; + if range.aspects.contains(format::Aspects::DEPTH) { + depth_stencil_flags |= d3d11::D3D11_CLEAR_DEPTH; + } + if range.aspects.contains(format::Aspects::STENCIL) { + depth_stencil_flags |= d3d11::D3D11_CLEAR_STENCIL; + } + + // TODO: clear Int/Uint depending on format + for rel_layer in 0..num_layers { + for rel_level in 0..num_levels { + let level = range.level_start + rel_level; + let layer = range.layer_start + rel_layer; + if range.aspects.contains(format::Aspects::COLOR) { + self.context.ClearRenderTargetView( + image.get_rtv(level, layer).unwrap().as_raw(), + &value.color.float32, + ); + } else { + self.context.ClearDepthStencilView( + image.get_dsv(level, layer).unwrap().as_raw(), + depth_stencil_flags, + value.depth_stencil.depth, + value.depth_stencil.stencil as _, + ); + } + } + } + } + } + + unsafe fn clear_attachments<T, U>(&mut self, clears: T, rects: U) + where + T: IntoIterator, + T::Item: Borrow<command::AttachmentClear>, + U: IntoIterator, + U::Item: Borrow<pso::ClearRect>, + { + if let Some(ref pass) = self.render_pass_cache { + self.cache.dirty_flag.insert( + DirtyStateFlag::GRAPHICS_PIPELINE + | DirtyStateFlag::DEPTH_STENCIL_STATE + | DirtyStateFlag::PIPELINE_PS + | DirtyStateFlag::VIEWPORTS + | DirtyStateFlag::RENDER_TARGETS_AND_UAVS, + ); + self.internal + .clear_attachments(&self.context, clears, rects, pass); + self.cache.bind(&self.context); + } else { + panic!("`clear_attachments` can only be called inside a renderpass") + } + } + + unsafe fn resolve_image<T>( + &mut self, + _src: &Image, + _src_layout: image::Layout, + _dst: &Image, + _dst_layout: image::Layout, + _regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::ImageResolve>, + { + unimplemented!() + } + + unsafe fn blit_image<T>( + &mut self, + src: &Image, + _src_layout: image::Layout, + dst: &Image, + _dst_layout: image::Layout, + filter: image::Filter, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::ImageBlit>, + { + self.cache + .dirty_flag + .insert(DirtyStateFlag::GRAPHICS_PIPELINE | DirtyStateFlag::PIPELINE_PS); + + self.internal + .blit_2d_image(&self.context, src, dst, filter, regions); + + self.cache.bind(&self.context); + } + + unsafe fn bind_index_buffer(&mut self, buffer: &Buffer, sub: buffer::SubRange, ty: IndexType) { + self.context.IASetIndexBuffer( + buffer.internal.raw, + conv::map_index_type(ty), + sub.offset as u32, + ); + } + + unsafe fn bind_vertex_buffers<I, T>(&mut self, first_binding: pso::BufferIndex, buffers: I) + where + I: IntoIterator<Item = (T, buffer::SubRange)>, + T: Borrow<Buffer>, + { + for (i, (buf, sub)) in buffers.into_iter().enumerate() { + let idx = i + first_binding as usize; + let buf = buf.borrow(); + + if buf.is_coherent { + self.defer_coherent_flush(buf); + } + + self.cache + .set_vertex_buffer(idx, sub.offset as u32, buf.internal.raw); + } + + self.cache.bind_vertex_buffers(&self.context); + } + + unsafe fn set_viewports<T>(&mut self, _first_viewport: u32, viewports: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Viewport>, + { + let viewports = viewports + .into_iter() + .map(|v| { + let v = v.borrow(); + conv::map_viewport(v) + }) + .collect::<Vec<_>>(); + + // TODO: DX only lets us set all VPs at once, so cache in slice? + self.cache.set_viewports(&viewports); + self.cache.bind_viewports(&self.context); + } + + unsafe fn set_scissors<T>(&mut self, _first_scissor: u32, scissors: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Rect>, + { + let scissors = scissors + .into_iter() + .map(|s| { + let s = s.borrow(); + conv::map_rect(s) + }) + .collect::<Vec<_>>(); + + // TODO: same as for viewports + self.context + .RSSetScissorRects(scissors.len() as _, scissors.as_ptr()); + } + + unsafe fn set_blend_constants(&mut self, color: pso::ColorValue) { + self.cache.set_blend_factor(color); + self.cache.bind_blend_state(&self.context); + } + + unsafe fn set_stencil_reference(&mut self, _faces: pso::Face, value: pso::StencilValue) { + self.cache.set_stencil_ref(value); + self.cache.bind_depth_stencil_state(&self.context); + } + + unsafe fn set_stencil_read_mask(&mut self, _faces: pso::Face, value: pso::StencilValue) { + self.cache.stencil_read_mask = Some(value); + } + + unsafe fn set_stencil_write_mask(&mut self, _faces: pso::Face, value: pso::StencilValue) { + self.cache.stencil_write_mask = Some(value); + } + + unsafe fn set_depth_bounds(&mut self, _bounds: Range<f32>) { + unimplemented!() + } + + unsafe fn set_line_width(&mut self, width: f32) { + validate_line_width(width); + } + + unsafe fn set_depth_bias(&mut self, _depth_bias: pso::DepthBias) { + // TODO: + // unimplemented!() + } + + unsafe fn bind_graphics_pipeline(&mut self, pipeline: &GraphicsPipeline) { + self.cache.set_graphics_pipeline(pipeline.clone()); + self.cache.bind(&self.context); + } + + unsafe fn bind_graphics_descriptor_sets<'a, I, J>( + &mut self, + layout: &PipelineLayout, + first_set: usize, + sets: I, + offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<command::DescriptorSetOffset>, + { + let _scope = debug_scope!(&self.context, "BindGraphicsDescriptorSets"); + + // TODO: find a better solution to invalidating old bindings.. + let nulls = [ptr::null_mut(); d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT as usize]; + self.context.CSSetUnorderedAccessViews( + 0, + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT, + nulls.as_ptr(), + ptr::null_mut(), + ); + + let mut offset_iter = offsets.into_iter().map(|o: J::Item| *o.borrow()); + + for (set, info) in sets.into_iter().zip(&layout.sets[first_set..]) { + let set: &DescriptorSet = set.borrow(); + + { + let coherent_buffers = set.coherent_buffers.lock(); + for sync in coherent_buffers.flush_coherent_buffers.borrow().iter() { + // TODO: merge sync range if a flush already exists + if !self + .flush_coherent_memory + .iter() + .any(|m| m.buffer == sync.device_buffer) + { + self.flush_coherent_memory.push(MemoryFlush { + host_memory: sync.host_ptr, + sync_range: sync.range.clone(), + buffer: sync.device_buffer, + }); + } + } + + for sync in coherent_buffers.invalidate_coherent_buffers.borrow().iter() { + if !self + .invalidate_coherent_memory + .iter() + .any(|m| m.buffer == sync.device_buffer) + { + self.invalidate_coherent_memory.push(MemoryInvalidate { + working_buffer: Some(self.internal.working_buffer.clone()), + working_buffer_size: self.internal.working_buffer_size, + host_memory: sync.host_ptr, + host_sync_range: sync.range.clone(), + buffer_sync_range: sync.range.clone(), + buffer: sync.device_buffer, + }); + } + } + } + + let (vs_offsets, fs_offsets) = generate_graphics_dynamic_constant_buffer_offsets( + &*set.layout.bindings, + &mut offset_iter, + self.context1.is_some(), + ); + + if let Some(rd) = info.registers.vs.c.as_some() { + let start_slot = rd.res_index as u32; + let num_buffers = rd.count as u32; + let constant_buffers = set.handles.offset(rd.pool_offset as isize); + if let Some(ref context1) = self.context1 { + // This call with offsets won't work right with command list emulation + // unless we reset the first and last constant buffers to null. + if self.internal.command_list_emulation { + let null_cbuf = [ptr::null_mut::<d3d11::ID3D11Buffer>()]; + context1.VSSetConstantBuffers(start_slot, 1, &null_cbuf as *const _); + if num_buffers > 1 { + context1.VSSetConstantBuffers( + start_slot + num_buffers - 1, + 1, + &null_cbuf as *const _, + ); + } + } + + // TODO: This should be the actual buffer length for RBA purposes, + // but that information isn't easily accessible here. + context1.VSSetConstantBuffers1( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + vs_offsets.as_ptr(), + self.internal.constant_buffer_count_buffer.as_ptr(), + ); + } else { + self.context.VSSetConstantBuffers( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + ); + } + } + if let Some(rd) = info.registers.vs.t.as_some() { + self.context.VSSetShaderResources( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + if let Some(rd) = info.registers.vs.s.as_some() { + self.context.VSSetSamplers( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + + if let Some(rd) = info.registers.ps.c.as_some() { + let start_slot = rd.res_index as u32; + let num_buffers = rd.count as u32; + let constant_buffers = set.handles.offset(rd.pool_offset as isize); + if let Some(ref context1) = self.context1 { + // This call with offsets won't work right with command list emulation + // unless we reset the first and last constant buffers to null. + if self.internal.command_list_emulation { + let null_cbuf = [ptr::null_mut::<d3d11::ID3D11Buffer>()]; + context1.PSSetConstantBuffers(start_slot, 1, &null_cbuf as *const _); + if num_buffers > 1 { + context1.PSSetConstantBuffers( + start_slot + num_buffers - 1, + 1, + &null_cbuf as *const _, + ); + } + } + + context1.PSSetConstantBuffers1( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + fs_offsets.as_ptr(), + self.internal.constant_buffer_count_buffer.as_ptr(), + ); + } else { + self.context.PSSetConstantBuffers( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + ); + } + } + if let Some(rd) = info.registers.ps.t.as_some() { + self.context.PSSetShaderResources( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + if let Some(rd) = info.registers.ps.s.as_some() { + self.context.PSSetSamplers( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + + // UAVs going to the graphics pipeline are always treated as pixel shader bindings. + if let Some(rd) = info.registers.ps.u.as_some() { + // We bind UAVs in inverse order from the top to prevent invalidation + // when the render target count changes. + for idx in (0..(rd.count)).rev() { + let ptr = (*set.handles.offset(rd.pool_offset as isize + idx as isize)).0; + let uav_register = d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT + - 1 + - rd.res_index as u32 + - idx as u32; + self.cache.uavs[uav_register as usize] = ptr as *mut _; + } + self.cache.uav_len = (rd.res_index + rd.count) as u32; + self.cache + .dirty_flag + .insert(DirtyStateFlag::RENDER_TARGETS_AND_UAVS); + } + } + + self.cache.bind_render_targets(&self.context); + } + + unsafe fn bind_compute_pipeline(&mut self, pipeline: &ComputePipeline) { + self.context + .CSSetShader(pipeline.cs.as_raw(), ptr::null_mut(), 0); + } + + unsafe fn bind_compute_descriptor_sets<I, J>( + &mut self, + layout: &PipelineLayout, + first_set: usize, + sets: I, + offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<command::DescriptorSetOffset>, + { + let _scope = debug_scope!(&self.context, "BindComputeDescriptorSets"); + + let nulls = [ptr::null_mut(); d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT as usize]; + self.context.CSSetUnorderedAccessViews( + 0, + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT, + nulls.as_ptr(), + ptr::null_mut(), + ); + + let mut offset_iter = offsets.into_iter().map(|o: J::Item| *o.borrow()); + + for (set, info) in sets.into_iter().zip(&layout.sets[first_set..]) { + let set: &DescriptorSet = set.borrow(); + + { + let coherent_buffers = set.coherent_buffers.lock(); + for sync in coherent_buffers.flush_coherent_buffers.borrow().iter() { + if !self + .flush_coherent_memory + .iter() + .any(|m| m.buffer == sync.device_buffer) + { + self.flush_coherent_memory.push(MemoryFlush { + host_memory: sync.host_ptr, + sync_range: sync.range.clone(), + buffer: sync.device_buffer, + }); + } + } + + for sync in coherent_buffers.invalidate_coherent_buffers.borrow().iter() { + if !self + .invalidate_coherent_memory + .iter() + .any(|m| m.buffer == sync.device_buffer) + { + self.invalidate_coherent_memory.push(MemoryInvalidate { + working_buffer: Some(self.internal.working_buffer.clone()), + working_buffer_size: self.internal.working_buffer_size, + host_memory: sync.host_ptr, + host_sync_range: sync.range.clone(), + buffer_sync_range: sync.range.clone(), + buffer: sync.device_buffer, + }); + } + } + } + + let cs_offsets = generate_compute_dynamic_constant_buffer_offsets( + &*set.layout.bindings, + &mut offset_iter, + self.context1.is_some(), + ); + + if let Some(rd) = info.registers.cs.c.as_some() { + let start_slot = rd.res_index as u32; + let num_buffers = rd.count as u32; + let constant_buffers = set.handles.offset(rd.pool_offset as isize); + if let Some(ref context1) = self.context1 { + // This call with offsets won't work right with command list emulation + // unless we reset the first and last constant buffers to null. + if self.internal.command_list_emulation { + let null_cbuf = [ptr::null_mut::<d3d11::ID3D11Buffer>()]; + context1.CSSetConstantBuffers(start_slot, 1, &null_cbuf as *const _); + if num_buffers > 1 { + context1.CSSetConstantBuffers( + start_slot + num_buffers - 1, + 1, + &null_cbuf as *const _, + ); + } + } + + // TODO: This should be the actual buffer length for RBA purposes, + // but that information isn't easily accessible here. + context1.CSSetConstantBuffers1( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + cs_offsets.as_ptr(), + self.internal.constant_buffer_count_buffer.as_ptr(), + ); + } else { + self.context.CSSetConstantBuffers( + start_slot, + num_buffers, + constant_buffers as *const *mut _, + ); + } + } + if let Some(rd) = info.registers.cs.t.as_some() { + self.context.CSSetShaderResources( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + if let Some(rd) = info.registers.cs.u.as_some() { + self.context.CSSetUnorderedAccessViews( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ptr::null_mut(), + ); + } + if let Some(rd) = info.registers.cs.s.as_some() { + self.context.CSSetSamplers( + rd.res_index as u32, + rd.count as u32, + set.handles.offset(rd.pool_offset as isize) as *const *mut _, + ); + } + } + } + + unsafe fn dispatch(&mut self, count: WorkGroupCount) { + self.context.Dispatch(count[0], count[1], count[2]); + } + + unsafe fn dispatch_indirect(&mut self, _buffer: &Buffer, _offset: buffer::Offset) { + unimplemented!() + } + + unsafe fn fill_buffer(&mut self, _buffer: &Buffer, _sub: buffer::SubRange, _data: u32) { + unimplemented!() + } + + unsafe fn update_buffer(&mut self, _buffer: &Buffer, _offset: buffer::Offset, _data: &[u8]) { + unimplemented!() + } + + unsafe fn copy_buffer<T>(&mut self, src: &Buffer, dst: &Buffer, regions: T) + where + T: IntoIterator, + T::Item: Borrow<command::BufferCopy>, + { + if src.is_coherent { + self.defer_coherent_flush(src); + } + + for region in regions.into_iter() { + let info = region.borrow(); + let dst_box = d3d11::D3D11_BOX { + left: info.src as _, + top: 0, + front: 0, + right: (info.src + info.size) as _, + bottom: 1, + back: 1, + }; + + self.context.CopySubresourceRegion( + dst.internal.raw as _, + 0, + info.dst as _, + 0, + 0, + src.internal.raw as _, + 0, + &dst_box, + ); + + if let Some(disjoint_cb) = dst.internal.disjoint_cb { + self.context.CopySubresourceRegion( + disjoint_cb as _, + 0, + info.dst as _, + 0, + 0, + src.internal.raw as _, + 0, + &dst_box, + ); + } + } + } + + unsafe fn copy_image<T>( + &mut self, + src: &Image, + _: image::Layout, + dst: &Image, + _: image::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::ImageCopy>, + { + self.internal + .copy_image_2d(&self.context, src, dst, regions); + } + + unsafe fn copy_buffer_to_image<T>( + &mut self, + buffer: &Buffer, + image: &Image, + _: image::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::BufferImageCopy>, + { + if buffer.is_coherent { + self.defer_coherent_flush(buffer); + } + + self.internal + .copy_buffer_to_image(&self.context, buffer, image, regions); + } + + unsafe fn copy_image_to_buffer<T>( + &mut self, + image: &Image, + _: image::Layout, + buffer: &Buffer, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<command::BufferImageCopy>, + { + if buffer.is_coherent { + self.defer_coherent_invalidate(buffer); + } + + self.internal + .copy_image_to_buffer(&self.context, image, buffer, regions); + } + + unsafe fn draw(&mut self, vertices: Range<VertexCount>, instances: Range<InstanceCount>) { + self.context.DrawInstanced( + vertices.end - vertices.start, + instances.end - instances.start, + vertices.start, + instances.start, + ); + } + + unsafe fn draw_indexed( + &mut self, + indices: Range<IndexCount>, + base_vertex: VertexOffset, + instances: Range<InstanceCount>, + ) { + self.context.DrawIndexedInstanced( + indices.end - indices.start, + instances.end - instances.start, + indices.start, + base_vertex, + instances.start, + ); + } + + unsafe fn draw_indirect( + &mut self, + buffer: &Buffer, + offset: buffer::Offset, + draw_count: DrawCount, + _stride: u32, + ) { + assert_eq!(draw_count, 1, "DX11 doesn't support MULTI_DRAW_INDIRECT"); + self.context + .DrawInstancedIndirect(buffer.internal.raw, offset as _); + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Buffer, + offset: buffer::Offset, + draw_count: DrawCount, + _stride: u32, + ) { + assert_eq!(draw_count, 1, "DX11 doesn't support MULTI_DRAW_INDIRECT"); + self.context + .DrawIndexedInstancedIndirect(buffer.internal.raw, offset as _); + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &Buffer, + _offset: buffer::Offset, + _count_buffer: &Buffer, + _count_buffer_offset: buffer::Offset, + _max_draw_count: u32, + _stride: u32, + ) { + panic!("DX11 doesn't support DRAW_INDIRECT_COUNT") + } + + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &Buffer, + _offset: buffer::Offset, + _count_buffer: &Buffer, + _count_buffer_offset: buffer::Offset, + _max_draw_count: u32, + _stride: u32, + ) { + panic!("DX11 doesn't support DRAW_INDIRECT_COUNT") + } + + unsafe fn draw_mesh_tasks(&mut self, _: TaskCount, _: TaskCount) { + panic!("DX11 doesn't support MESH_SHADERS") + } + + unsafe fn draw_mesh_tasks_indirect( + &mut self, + _: &Buffer, + _: buffer::Offset, + _: hal::DrawCount, + _: u32, + ) { + panic!("DX11 doesn't support MESH_SHADERS") + } + + unsafe fn draw_mesh_tasks_indirect_count( + &mut self, + _: &Buffer, + _: buffer::Offset, + _: &Buffer, + _: buffer::Offset, + _: hal::DrawCount, + _: u32, + ) { + panic!("DX11 doesn't support MESH_SHADERS") + } + + unsafe fn set_event(&mut self, _: &(), _: pso::PipelineStage) { + unimplemented!() + } + + unsafe fn reset_event(&mut self, _: &(), _: pso::PipelineStage) { + unimplemented!() + } + + unsafe fn wait_events<'a, I, J>(&mut self, _: I, _: Range<pso::PipelineStage>, _: J) + where + I: IntoIterator, + I::Item: Borrow<()>, + J: IntoIterator, + J::Item: Borrow<memory::Barrier<'a, Backend>>, + { + unimplemented!() + } + + unsafe fn begin_query(&mut self, _query: query::Query<Backend>, _flags: query::ControlFlags) { + unimplemented!() + } + + unsafe fn end_query(&mut self, _query: query::Query<Backend>) { + unimplemented!() + } + + unsafe fn reset_query_pool(&mut self, _pool: &QueryPool, _queries: Range<query::Id>) { + unimplemented!() + } + + unsafe fn copy_query_pool_results( + &mut self, + _pool: &QueryPool, + _queries: Range<query::Id>, + _buffer: &Buffer, + _offset: buffer::Offset, + _stride: buffer::Offset, + _flags: query::ResultFlags, + ) { + unimplemented!() + } + + unsafe fn write_timestamp(&mut self, _: pso::PipelineStage, _query: query::Query<Backend>) { + unimplemented!() + } + + unsafe fn push_graphics_constants( + &mut self, + _layout: &PipelineLayout, + _stages: pso::ShaderStageFlags, + offset: u32, + constants: &[u32], + ) { + let start = (offset / 4) as usize; + let end = start + constants.len(); + + self.push_constant_data[start..end].copy_from_slice(constants); + + self.context.UpdateSubresource( + self.push_constant_buffer.as_raw() as *mut _, + 0, + ptr::null(), + self.push_constant_data.as_ptr() as *const _, + MAX_PUSH_CONSTANT_SIZE as _, + 1, + ); + } + + unsafe fn push_compute_constants( + &mut self, + _layout: &PipelineLayout, + offset: u32, + constants: &[u32], + ) { + let start = (offset / 4) as usize; + let end = start + constants.len(); + + self.push_constant_data[start..end].copy_from_slice(constants); + + self.context.UpdateSubresource( + self.push_constant_buffer.as_raw() as *mut _, + 0, + ptr::null(), + self.push_constant_data.as_ptr() as *const _, + MAX_PUSH_CONSTANT_SIZE as _, + 1, + ); + } + + unsafe fn execute_commands<'a, T, I>(&mut self, _buffers: I) + where + T: 'a + Borrow<CommandBuffer>, + I: IntoIterator<Item = &'a T>, + { + unimplemented!() + } + + unsafe fn insert_debug_marker(&mut self, name: &str, _color: u32) { + debug::debug_marker(&self.context, &format!("{}", name)) + } + unsafe fn begin_debug_marker(&mut self, _name: &str, _color: u32) { + // TODO: This causes everything after this to be part of this scope, why? + // self.debug_scopes.push(debug::DebugScope::with_name(&self.context, format_args!("{}", name))) + } + unsafe fn end_debug_marker(&mut self) { + // self.debug_scopes.pop(); + } +} + +#[derive(Clone, Debug)] +enum SyncRange { + Whole, + Partial(Range<u64>), +} + +#[derive(Debug)] +pub struct MemoryFlush { + host_memory: *const u8, + sync_range: SyncRange, + buffer: *mut d3d11::ID3D11Buffer, +} + +pub struct MemoryInvalidate { + working_buffer: Option<ComPtr<d3d11::ID3D11Buffer>>, + working_buffer_size: u64, + host_memory: *mut u8, + host_sync_range: Range<u64>, + buffer_sync_range: Range<u64>, + buffer: *mut d3d11::ID3D11Buffer, +} + +impl fmt::Debug for MemoryInvalidate { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("MemoryInvalidate") + } +} + +fn intersection(a: &Range<u64>, b: &Range<u64>) -> Option<Range<u64>> { + let r = a.start.max(b.start)..a.end.min(b.end); + if r.start < r.end { + Some(r) + } else { + None + } +} + +impl MemoryFlush { + fn do_flush(&self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + let src = self.host_memory; + + debug_marker!(context, "Flush({:?})", self.sync_range); + let region = match self.sync_range { + SyncRange::Partial(ref range) if range.start < range.end => Some(d3d11::D3D11_BOX { + left: range.start as u32, + top: 0, + front: 0, + right: range.end as u32, + bottom: 1, + back: 1, + }), + _ => None, + }; + + unsafe { + context.UpdateSubresource( + self.buffer as _, + 0, + region.as_ref().map_or(ptr::null(), |r| r), + src as _, + 0, + 0, + ); + } + } +} + +impl MemoryInvalidate { + fn download( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + buffer: *mut d3d11::ID3D11Buffer, + host_range: Range<u64>, + buffer_range: Range<u64>, + ) { + // Range<u64> doesn't impl `len` for some bizzare reason relating to underflow + debug_assert_eq!( + host_range.end - host_range.start, + buffer_range.end - buffer_range.start + ); + + unsafe { + context.CopySubresourceRegion( + self.working_buffer.clone().unwrap().as_raw() as _, + 0, + 0, + 0, + 0, + buffer as _, + 0, + &d3d11::D3D11_BOX { + left: buffer_range.start as _, + top: 0, + front: 0, + right: buffer_range.end as _, + bottom: 1, + back: 1, + }, + ); + + // copy over to our vec + let dst = self.host_memory.offset(host_range.start as isize); + let src = self.map(&context); + ptr::copy(src, dst, (host_range.end - host_range.start) as usize); + self.unmap(&context); + } + } + + fn do_invalidate(&self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + let stride = self.working_buffer_size; + let len = self.host_sync_range.end - self.host_sync_range.start; + let chunks = len / stride; + let remainder = len % stride; + + // we split up the copies into chunks the size of our working buffer + for i in 0..chunks { + let host_offset = self.host_sync_range.start + i * stride; + let host_range = host_offset..(host_offset + stride); + let buffer_offset = self.buffer_sync_range.start + i * stride; + let buffer_range = buffer_offset..(buffer_offset + stride); + + self.download(context, self.buffer, host_range, buffer_range); + } + + if remainder != 0 { + let host_offset = self.host_sync_range.start + chunks * stride; + let host_range = host_offset..self.host_sync_range.end; + let buffer_offset = self.buffer_sync_range.start + chunks * stride; + let buffer_range = buffer_offset..self.buffer_sync_range.end; + + debug_assert!(host_range.end - host_range.start <= stride); + debug_assert!(buffer_range.end - buffer_range.start <= stride); + + self.download(context, self.buffer, host_range, buffer_range); + } + } + + fn map(&self, context: &ComPtr<d3d11::ID3D11DeviceContext>) -> *mut u8 { + assert_eq!(self.working_buffer.is_some(), true); + + unsafe { + let mut map = mem::zeroed(); + let hr = context.Map( + self.working_buffer.clone().unwrap().as_raw() as _, + 0, + d3d11::D3D11_MAP_READ, + 0, + &mut map, + ); + + assert_eq!(hr, winerror::S_OK); + + map.pData as _ + } + } + + fn unmap(&self, context: &ComPtr<d3d11::ID3D11DeviceContext>) { + unsafe { + context.Unmap(self.working_buffer.clone().unwrap().as_raw() as _, 0); + } + } +} + +type LocalResourceArena<T> = thunderdome::Arena<(Range<u64>, T)>; + +// Since we dont have any heaps to work with directly, Beverytime we bind a +// buffer/image to memory we allocate a dx11 resource and assign it a range. +// +// `HOST_VISIBLE` memory gets a `Vec<u8>` which covers the entire memory +// range. This forces us to only expose non-coherent memory, as this +// abstraction acts as a "cache" since the "staging buffer" vec is disjoint +// from all the dx11 resources we store in the struct. +pub struct Memory { + properties: memory::Properties, + size: u64, + + // pointer to staging memory, if it's HOST_VISIBLE + host_ptr: *mut u8, + + // list of all buffers bound to this memory + local_buffers: Arc<RwLock<LocalResourceArena<InternalBuffer>>>, +} + +impl fmt::Debug for Memory { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Memory") + } +} + +unsafe impl Send for Memory {} +unsafe impl Sync for Memory {} + +impl Memory { + pub fn resolve(&self, segment: &memory::Segment) -> Range<u64> { + segment.offset..segment.size.map_or(self.size, |s| segment.offset + s) + } + + pub fn bind_buffer(&self, range: Range<u64>, buffer: InternalBuffer) -> thunderdome::Index { + let mut local_buffers = self.local_buffers.write(); + local_buffers.insert((range, buffer)) + } + + pub fn flush(&self, context: &ComPtr<d3d11::ID3D11DeviceContext>, range: Range<u64>) { + use buffer::Usage; + + for (_, &(ref buffer_range, ref buffer)) in self.local_buffers.read().iter() { + let range = match intersection(&range, &buffer_range) { + Some(r) => r, + None => continue, + }; + // we need to handle 3 cases for updating buffers: + // + // 1. if our buffer was created as a `UNIFORM` buffer *and* other usage flags, we + // also have a disjoint buffer which only has `D3D11_BIND_CONSTANT_BUFFER` due + // to DX11 limitation. we then need to update both the original buffer and the + // disjoint one with the *whole* range + // + // 2. if our buffer was created with *only* `UNIFORM` usage we need to upload + // the whole range + // + // 3. the general case, without any `UNIFORM` usage has no restrictions on + // partial updates, so we upload the specified range + // + if let Some(disjoint) = buffer.disjoint_cb { + MemoryFlush { + host_memory: unsafe { self.host_ptr.offset(buffer_range.start as _) }, + sync_range: SyncRange::Whole, + buffer: disjoint, + } + .do_flush(&context); + } + + let mem_flush = if buffer.usage == Usage::UNIFORM { + MemoryFlush { + host_memory: unsafe { self.host_ptr.offset(buffer_range.start as _) }, + sync_range: SyncRange::Whole, + buffer: buffer.raw, + } + } else { + let local_start = range.start - buffer_range.start; + let local_end = range.end - buffer_range.start; + + MemoryFlush { + host_memory: unsafe { self.host_ptr.offset(range.start as _) }, + sync_range: SyncRange::Partial(local_start..local_end), + buffer: buffer.raw, + } + }; + + mem_flush.do_flush(&context) + } + } + + pub fn invalidate( + &self, + context: &ComPtr<d3d11::ID3D11DeviceContext>, + range: Range<u64>, + working_buffer: ComPtr<d3d11::ID3D11Buffer>, + working_buffer_size: u64, + ) { + for (_, &(ref buffer_range, ref buffer)) in self.local_buffers.read().iter() { + if let Some(range) = intersection(&range, &buffer_range) { + let buffer_start_offset = range.start - buffer_range.start; + let buffer_end_offset = range.end - buffer_range.start; + + let buffer_sync_range = buffer_start_offset..buffer_end_offset; + + MemoryInvalidate { + working_buffer: Some(working_buffer.clone()), + working_buffer_size, + host_memory: self.host_ptr, + host_sync_range: range.clone(), + buffer_sync_range: buffer_sync_range, + buffer: buffer.raw, + } + .do_invalidate(&context); + } + } + } +} + +#[derive(Debug)] +pub struct CommandPool { + device: ComPtr<d3d11::ID3D11Device>, + device1: Option<ComPtr<d3d11_1::ID3D11Device1>>, + internal: Arc<internal::Internal>, +} + +unsafe impl Send for CommandPool {} +unsafe impl Sync for CommandPool {} + +impl hal::pool::CommandPool<Backend> for CommandPool { + unsafe fn reset(&mut self, _release_resources: bool) { + //unimplemented!() + } + + unsafe fn allocate_one(&mut self, _level: command::Level) -> CommandBuffer { + CommandBuffer::create_deferred( + &self.device, + self.device1.as_deref(), + Arc::clone(&self.internal), + ) + } + + unsafe fn free<I>(&mut self, _cbufs: I) + where + I: IntoIterator<Item = CommandBuffer>, + { + // TODO: + // unimplemented!() + } +} + +/// Similarily to dx12 backend, we can handle either precompiled dxbc or spirv +pub enum ShaderModule { + Dxbc(Vec<u8>), + Spirv(Vec<u32>), +} + +// TODO: temporary +impl fmt::Debug for ShaderModule { + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + write!(f, "{}", "ShaderModule { ... }") + } +} + +unsafe impl Send for ShaderModule {} +unsafe impl Sync for ShaderModule {} + +#[derive(Clone, Debug)] +pub struct SubpassDesc { + pub color_attachments: Vec<pass::AttachmentRef>, + pub depth_stencil_attachment: Option<pass::AttachmentRef>, + pub input_attachments: Vec<pass::AttachmentRef>, + pub resolve_attachments: Vec<pass::AttachmentRef>, +} + +impl SubpassDesc { + pub(crate) fn is_using(&self, at_id: pass::AttachmentId) -> bool { + self.color_attachments + .iter() + .chain(self.depth_stencil_attachment.iter()) + .chain(self.input_attachments.iter()) + .chain(self.resolve_attachments.iter()) + .any(|&(id, _)| id == at_id) + } +} + +#[derive(Clone, Debug)] +pub struct RenderPass { + pub attachments: Vec<pass::Attachment>, + pub subpasses: Vec<SubpassDesc>, +} + +#[derive(Clone, Debug)] +pub struct Framebuffer { + attachments: Vec<ImageView>, + layers: image::Layer, +} + +#[derive(Clone, Debug)] +pub struct InternalBuffer { + raw: *mut d3d11::ID3D11Buffer, + // TODO: need to sync between `raw` and `disjoint_cb`, same way as we do with + // `MemoryFlush/Invalidate` + disjoint_cb: Option<*mut d3d11::ID3D11Buffer>, // if unbound this buffer might be null. + srv: Option<*mut d3d11::ID3D11ShaderResourceView>, + uav: Option<*mut d3d11::ID3D11UnorderedAccessView>, + usage: buffer::Usage, + debug_name: Option<String>, +} + +impl InternalBuffer { + unsafe fn release_resources(&mut self) { + (&*self.raw).Release(); + self.raw = ptr::null_mut(); + self.disjoint_cb.take().map(|cb| (&*cb).Release()); + self.uav.take().map(|uav| (&*uav).Release()); + self.srv.take().map(|srv| (&*srv).Release()); + self.usage = buffer::Usage::empty(); + self.debug_name = None; + } +} + +pub struct Buffer { + internal: InternalBuffer, + is_coherent: bool, + memory_ptr: *mut u8, // null if unbound or non-cpu-visible + bound_range: Range<u64>, // 0 if unbound + /// Handle to the Memory arena storing this buffer. + local_memory_arena: Weak<RwLock<LocalResourceArena<InternalBuffer>>>, + /// Index into the above memory arena. + /// + /// Once memory is bound to a buffer, this should never be None. + memory_index: Option<thunderdome::Index>, + requirements: memory::Requirements, + bind: d3d11::D3D11_BIND_FLAG, +} + +impl fmt::Debug for Buffer { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Buffer") + } +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +#[derive(Debug)] +pub struct BufferView; + +pub struct Image { + kind: image::Kind, + usage: image::Usage, + format: format::Format, + view_caps: image::ViewCapabilities, + decomposed_format: conv::DecomposedDxgiFormat, + mip_levels: image::Level, + internal: InternalImage, + bind: d3d11::D3D11_BIND_FLAG, + requirements: memory::Requirements, +} + +impl fmt::Debug for Image { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Image") + } +} + +pub struct InternalImage { + raw: *mut d3d11::ID3D11Resource, + copy_srv: Option<ComPtr<d3d11::ID3D11ShaderResourceView>>, + srv: Option<ComPtr<d3d11::ID3D11ShaderResourceView>>, + + /// Contains UAVs for all subresources + unordered_access_views: Vec<ComPtr<d3d11::ID3D11UnorderedAccessView>>, + + /// Contains DSVs for all subresources + depth_stencil_views: Vec<ComPtr<d3d11::ID3D11DepthStencilView>>, + + /// Contains RTVs for all subresources + render_target_views: Vec<ComPtr<d3d11::ID3D11RenderTargetView>>, + + debug_name: Option<String>, +} + +impl InternalImage { + unsafe fn release_resources(&mut self) { + (&*self.raw).Release(); + self.copy_srv = None; + self.srv = None; + self.unordered_access_views.clear(); + self.depth_stencil_views.clear(); + self.render_target_views.clear(); + } +} + +impl fmt::Debug for InternalImage { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("InternalImage") + } +} + +unsafe impl Send for Image {} +unsafe impl Sync for Image {} + +impl Image { + fn calc_subresource(&self, mip_level: UINT, layer: UINT) -> UINT { + mip_level + (layer * self.mip_levels as UINT) + } + + fn get_uav( + &self, + mip_level: image::Level, + _layer: image::Layer, + ) -> Option<&ComPtr<d3d11::ID3D11UnorderedAccessView>> { + self.internal + .unordered_access_views + .get(self.calc_subresource(mip_level as _, 0) as usize) + } + + fn get_dsv( + &self, + mip_level: image::Level, + layer: image::Layer, + ) -> Option<&ComPtr<d3d11::ID3D11DepthStencilView>> { + self.internal + .depth_stencil_views + .get(self.calc_subresource(mip_level as _, layer as _) as usize) + } + + fn get_rtv( + &self, + mip_level: image::Level, + layer: image::Layer, + ) -> Option<&ComPtr<d3d11::ID3D11RenderTargetView>> { + self.internal + .render_target_views + .get(self.calc_subresource(mip_level as _, layer as _) as usize) + } +} + +impl Drop for Image { + fn drop(&mut self) { + unsafe { + (*self.internal.raw).Release(); + } + } +} + +pub struct ImageView { + subresource: UINT, + format: format::Format, + rtv_handle: Option<*mut d3d11::ID3D11RenderTargetView>, + srv_handle: Option<*mut d3d11::ID3D11ShaderResourceView>, + dsv_handle: Option<*mut d3d11::ID3D11DepthStencilView>, + rodsv_handle: Option<*mut d3d11::ID3D11DepthStencilView>, + uav_handle: Option<*mut d3d11::ID3D11UnorderedAccessView>, + owned: bool, +} + +impl Clone for ImageView { + fn clone(&self) -> Self { + Self { + subresource: self.subresource, + format: self.format, + rtv_handle: self.rtv_handle.clone(), + srv_handle: self.srv_handle.clone(), + dsv_handle: self.dsv_handle.clone(), + rodsv_handle: self.rodsv_handle.clone(), + uav_handle: self.uav_handle.clone(), + owned: false, + } + } +} + +impl Drop for ImageView { + fn drop(&mut self) { + if self.owned { + if let Some(rtv) = self.rtv_handle.take() { + unsafe { (&*rtv).Release() }; + } + if let Some(srv) = self.srv_handle.take() { + unsafe { (&*srv).Release() }; + } + if let Some(dsv) = self.dsv_handle.take() { + unsafe { (&*dsv).Release() }; + } + if let Some(rodsv) = self.rodsv_handle.take() { + unsafe { (&*rodsv).Release() }; + } + if let Some(uav) = self.uav_handle.take() { + unsafe { (&*uav).Release() }; + } + } + } +} + +impl fmt::Debug for ImageView { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ImageView") + } +} + +unsafe impl Send for ImageView {} +unsafe impl Sync for ImageView {} + +pub struct Sampler { + sampler_handle: ComPtr<d3d11::ID3D11SamplerState>, +} + +impl fmt::Debug for Sampler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Sampler") + } +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +pub struct ComputePipeline { + cs: ComPtr<d3d11::ID3D11ComputeShader>, +} + +impl fmt::Debug for ComputePipeline { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ComputePipeline") + } +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +/// NOTE: some objects are hashed internally and reused when created with the +/// same params[0], need to investigate which interfaces this applies +/// to. +/// +/// [0]: https://msdn.microsoft.com/en-us/library/windows/desktop/ff476500(v=vs.85).aspx +#[derive(Clone)] +pub struct GraphicsPipeline { + vs: ComPtr<d3d11::ID3D11VertexShader>, + gs: Option<ComPtr<d3d11::ID3D11GeometryShader>>, + hs: Option<ComPtr<d3d11::ID3D11HullShader>>, + ds: Option<ComPtr<d3d11::ID3D11DomainShader>>, + ps: Option<ComPtr<d3d11::ID3D11PixelShader>>, + topology: d3d11::D3D11_PRIMITIVE_TOPOLOGY, + input_layout: ComPtr<d3d11::ID3D11InputLayout>, + rasterizer_state: ComPtr<d3d11::ID3D11RasterizerState>, + blend_state: ComPtr<d3d11::ID3D11BlendState>, + depth_stencil_state: Option<DepthStencilState>, + baked_states: pso::BakedStates, + required_bindings: u32, + max_vertex_bindings: u32, + strides: Vec<u32>, +} + +impl fmt::Debug for GraphicsPipeline { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("GraphicsPipeline") + } +} + +unsafe impl Send for GraphicsPipeline {} +unsafe impl Sync for GraphicsPipeline {} + +type ResourceIndex = u8; +type DescriptorIndex = u16; + +#[derive(Clone, Debug, Default)] +struct RegisterData<T> { + // CBV + c: T, + // SRV + t: T, + // UAV + u: T, + // Sampler + s: T, +} + +impl<T> RegisterData<T> { + fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> RegisterData<U> { + RegisterData { + c: fun(&self.c), + t: fun(&self.t), + u: fun(&self.u), + s: fun(&self.s), + } + } +} + +impl RegisterData<DescriptorIndex> { + fn add_content_many(&mut self, content: DescriptorContent, many: DescriptorIndex) { + if content.contains(DescriptorContent::CBV) { + self.c += many; + } + if content.contains(DescriptorContent::SRV) { + self.t += many; + } + if content.contains(DescriptorContent::UAV) { + self.u += many; + } + if content.contains(DescriptorContent::SAMPLER) { + self.s += many; + } + } + + fn sum(&self) -> DescriptorIndex { + self.c + self.t + self.u + self.s + } +} + +#[derive(Clone, Debug, Default)] +struct MultiStageData<T> { + vs: T, + ps: T, + cs: T, +} + +impl<T> MultiStageData<T> { + fn select(self, stage: ShaderStage) -> T { + match stage { + ShaderStage::Vertex => self.vs, + ShaderStage::Fragment => self.ps, + ShaderStage::Compute => self.cs, + _ => panic!("Unsupported stage {:?}", stage), + } + } +} + +impl<T> MultiStageData<RegisterData<T>> { + fn map_register<U, F: Fn(&T) -> U>(&self, fun: F) -> MultiStageData<RegisterData<U>> { + MultiStageData { + vs: self.vs.map(&fun), + ps: self.ps.map(&fun), + cs: self.cs.map(&fun), + } + } + + fn map_other<U, F: Fn(&RegisterData<T>) -> U>(&self, fun: F) -> MultiStageData<U> { + MultiStageData { + vs: fun(&self.vs), + ps: fun(&self.ps), + cs: fun(&self.cs), + } + } +} + +impl MultiStageData<RegisterData<DescriptorIndex>> { + fn add_content_many( + &mut self, + content: DescriptorContent, + stages: pso::ShaderStageFlags, + count: DescriptorIndex, + ) { + if stages.contains(pso::ShaderStageFlags::VERTEX) { + self.vs.add_content_many(content, count); + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + self.ps.add_content_many(content, count); + } + if stages.contains(pso::ShaderStageFlags::COMPUTE) { + self.cs.add_content_many(content, count); + } + } + + fn sum(&self) -> DescriptorIndex { + self.vs.sum() + self.ps.sum() + self.cs.sum() + } +} + +#[derive(Clone, Debug, Default)] +struct RegisterPoolMapping { + offset: DescriptorIndex, + count: ResourceIndex, +} + +#[derive(Clone, Debug, Default)] +struct RegisterInfo { + res_index: ResourceIndex, + pool_offset: DescriptorIndex, + count: ResourceIndex, +} + +impl RegisterInfo { + fn as_some(&self) -> Option<&Self> { + if self.count == 0 { + None + } else { + Some(self) + } + } +} + +#[derive(Clone, Debug, Default)] +struct RegisterAccumulator { + res_index: ResourceIndex, +} + +impl RegisterAccumulator { + fn to_mapping(&self, cur_offset: &mut DescriptorIndex) -> RegisterPoolMapping { + let offset = *cur_offset; + *cur_offset += self.res_index as DescriptorIndex; + + RegisterPoolMapping { + offset, + count: self.res_index, + } + } + + fn advance(&mut self, mapping: &RegisterPoolMapping) -> RegisterInfo { + let res_index = self.res_index; + self.res_index += mapping.count; + RegisterInfo { + res_index, + pool_offset: mapping.offset, + count: mapping.count, + } + } +} + +impl RegisterData<RegisterAccumulator> { + fn to_mapping(&self, pool_offset: &mut DescriptorIndex) -> RegisterData<RegisterPoolMapping> { + RegisterData { + c: self.c.to_mapping(pool_offset), + t: self.t.to_mapping(pool_offset), + u: self.u.to_mapping(pool_offset), + s: self.s.to_mapping(pool_offset), + } + } + + fn advance( + &mut self, + mapping: &RegisterData<RegisterPoolMapping>, + ) -> RegisterData<RegisterInfo> { + RegisterData { + c: self.c.advance(&mapping.c), + t: self.t.advance(&mapping.t), + u: self.u.advance(&mapping.u), + s: self.s.advance(&mapping.s), + } + } +} + +impl MultiStageData<RegisterData<RegisterAccumulator>> { + fn to_mapping(&self) -> MultiStageData<RegisterData<RegisterPoolMapping>> { + let mut pool_offset = 0; + MultiStageData { + vs: self.vs.to_mapping(&mut pool_offset), + ps: self.ps.to_mapping(&mut pool_offset), + cs: self.cs.to_mapping(&mut pool_offset), + } + } + + fn advance( + &mut self, + mapping: &MultiStageData<RegisterData<RegisterPoolMapping>>, + ) -> MultiStageData<RegisterData<RegisterInfo>> { + MultiStageData { + vs: self.vs.advance(&mapping.vs), + ps: self.ps.advance(&mapping.ps), + cs: self.cs.advance(&mapping.cs), + } + } +} + +#[derive(Clone, Debug)] +struct DescriptorSetInfo { + bindings: Arc<Vec<pso::DescriptorSetLayoutBinding>>, + registers: MultiStageData<RegisterData<RegisterInfo>>, +} + +impl DescriptorSetInfo { + fn find_register( + &self, + stage: ShaderStage, + binding_index: pso::DescriptorBinding, + ) -> (DescriptorContent, RegisterData<ResourceIndex>) { + let mut res_offsets = self + .registers + .map_register(|info| info.res_index as DescriptorIndex) + .select(stage); + for binding in self.bindings.iter() { + if !binding.stage_flags.contains(stage.to_flag()) { + continue; + } + let content = DescriptorContent::from(binding.ty); + if binding.binding == binding_index { + return (content, res_offsets.map(|offset| *offset as ResourceIndex)); + } + res_offsets.add_content_many(content, 1); + } + panic!("Unable to find binding {:?}", binding_index); + } + + fn find_uav_register( + &self, + stage: ShaderStage, + binding_index: pso::DescriptorBinding, + ) -> (DescriptorContent, RegisterData<ResourceIndex>) { + // Look only where uavs are stored for that stage. + let register_stage = if stage == ShaderStage::Compute { + stage + } else { + ShaderStage::Fragment + }; + + let mut res_offsets = self + .registers + .map_register(|info| info.res_index as DescriptorIndex) + .select(register_stage); + for binding in self.bindings.iter() { + // We don't care what stage they're in, only if they are UAVs or not. + let content = DescriptorContent::from(binding.ty); + if !content.contains(DescriptorContent::UAV) { + continue; + } + if binding.binding == binding_index { + return (content, res_offsets.map(|offset| *offset as ResourceIndex)); + } + res_offsets.add_content_many(content, 1); + } + panic!("Unable to find binding {:?}", binding_index); + } +} + +/// The pipeline layout holds optimized (less api calls) ranges of objects for all descriptor sets +/// belonging to the pipeline object. +#[derive(Debug)] +pub struct PipelineLayout { + sets: Vec<DescriptorSetInfo>, +} + +/// The descriptor set layout contains mappings from a given binding to the offset in our +/// descriptor pool storage and what type of descriptor it is (combined image sampler takes up two +/// handles). +#[derive(Debug)] +pub struct DescriptorSetLayout { + bindings: Arc<Vec<pso::DescriptorSetLayoutBinding>>, + pool_mapping: MultiStageData<RegisterData<RegisterPoolMapping>>, +} + +#[derive(Debug)] +struct CoherentBufferFlushRange { + device_buffer: *mut d3d11::ID3D11Buffer, + host_ptr: *mut u8, + range: SyncRange, +} + +#[derive(Debug)] +struct CoherentBufferInvalidateRange { + device_buffer: *mut d3d11::ID3D11Buffer, + host_ptr: *mut u8, + range: Range<u64>, +} + +#[derive(Debug)] +struct CoherentBuffers { + // descriptor set writes containing coherent resources go into these vecs and are added to the + // command buffers own Vec on binding the set. + flush_coherent_buffers: RefCell<Vec<CoherentBufferFlushRange>>, + invalidate_coherent_buffers: RefCell<Vec<CoherentBufferInvalidateRange>>, +} + +impl CoherentBuffers { + fn _add_flush(&self, old: *mut d3d11::ID3D11Buffer, buffer: &Buffer) { + let new = buffer.internal.raw; + + if old != new { + let mut buffers = self.flush_coherent_buffers.borrow_mut(); + + let pos = buffers.iter().position(|sync| old == sync.device_buffer); + + let sync_range = CoherentBufferFlushRange { + device_buffer: new, + host_ptr: buffer.memory_ptr, + range: SyncRange::Whole, + }; + + if let Some(pos) = pos { + buffers[pos] = sync_range; + } else { + buffers.push(sync_range); + } + + if let Some(disjoint) = buffer.internal.disjoint_cb { + let pos = buffers + .iter() + .position(|sync| disjoint == sync.device_buffer); + + let sync_range = CoherentBufferFlushRange { + device_buffer: disjoint, + host_ptr: buffer.memory_ptr, + range: SyncRange::Whole, + }; + + if let Some(pos) = pos { + buffers[pos] = sync_range; + } else { + buffers.push(sync_range); + } + } + } + } + + fn _add_invalidate(&self, old: *mut d3d11::ID3D11Buffer, buffer: &Buffer) { + let new = buffer.internal.raw; + + if old != new { + let mut buffers = self.invalidate_coherent_buffers.borrow_mut(); + + let pos = buffers.iter().position(|sync| old == sync.device_buffer); + + let sync_range = CoherentBufferInvalidateRange { + device_buffer: new, + host_ptr: buffer.memory_ptr, + range: buffer.bound_range.clone(), + }; + + if let Some(pos) = pos { + buffers[pos] = sync_range; + } else { + buffers.push(sync_range); + } + } + } +} + +/// Newtype around a common interface that all bindable resources inherit from. +#[derive(Debug, Copy, Clone)] +#[repr(transparent)] +struct Descriptor(*mut d3d11::ID3D11DeviceChild); + +bitflags! { + /// A set of D3D11 descriptor types that need to be associated + /// with a single gfx-hal `DescriptorType`. + #[derive(Default)] + pub struct DescriptorContent: u8 { + const CBV = 0x1; + const SRV = 0x2; + const UAV = 0x4; + const SAMPLER = 0x8; + /// Indicates if the descriptor is a dynamic uniform/storage buffer. + /// Important as dynamic buffers are implemented as root descriptors. + const DYNAMIC = 0x10; + } +} + +impl From<pso::DescriptorType> for DescriptorContent { + fn from(ty: pso::DescriptorType) -> Self { + use hal::pso::{ + BufferDescriptorFormat as Bdf, BufferDescriptorType as Bdt, DescriptorType as Dt, + ImageDescriptorType as Idt, + }; + match ty { + Dt::Sampler => DescriptorContent::SAMPLER, + Dt::Image { + ty: Idt::Sampled { with_sampler: true }, + } => DescriptorContent::SRV | DescriptorContent::SAMPLER, + Dt::Image { + ty: Idt::Sampled { + with_sampler: false, + }, + } + | Dt::InputAttachment => DescriptorContent::SRV, + Dt::Image { + ty: Idt::Storage { .. }, + } => DescriptorContent::UAV, + Dt::Buffer { + ty: Bdt::Uniform, + format: + Bdf::Structured { + dynamic_offset: true, + }, + } => DescriptorContent::CBV | DescriptorContent::DYNAMIC, + Dt::Buffer { + ty: Bdt::Uniform, .. + } => DescriptorContent::CBV, + Dt::Buffer { + ty: Bdt::Storage { read_only: true }, + format: + Bdf::Structured { + dynamic_offset: true, + }, + } => DescriptorContent::SRV | DescriptorContent::DYNAMIC, + Dt::Buffer { + ty: Bdt::Storage { read_only: false }, + format: + Bdf::Structured { + dynamic_offset: true, + }, + } => DescriptorContent::UAV | DescriptorContent::DYNAMIC, + Dt::Buffer { + ty: Bdt::Storage { read_only: true }, + .. + } => DescriptorContent::SRV, + Dt::Buffer { + ty: Bdt::Storage { read_only: false }, + .. + } => DescriptorContent::UAV, + } + } +} + +pub struct DescriptorSet { + offset: DescriptorIndex, + len: DescriptorIndex, + handles: *mut Descriptor, + coherent_buffers: Mutex<CoherentBuffers>, + layout: DescriptorSetLayout, +} + +impl fmt::Debug for DescriptorSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("DescriptorSet") + } +} + +unsafe impl Send for DescriptorSet {} +unsafe impl Sync for DescriptorSet {} + +impl DescriptorSet { + fn _add_flush(&self, old: *mut d3d11::ID3D11Buffer, buffer: &Buffer) { + let new = buffer.internal.raw; + + if old != new { + self.coherent_buffers.lock()._add_flush(old, buffer); + } + } + + fn _add_invalidate(&self, old: *mut d3d11::ID3D11Buffer, buffer: &Buffer) { + let new = buffer.internal.raw; + + if old != new { + self.coherent_buffers.lock()._add_invalidate(old, buffer); + } + } + + unsafe fn assign(&self, offset: DescriptorIndex, value: *mut d3d11::ID3D11DeviceChild) { + *self.handles.offset(offset as isize) = Descriptor(value); + } + + unsafe fn assign_stages( + &self, + offsets: &MultiStageData<DescriptorIndex>, + stages: pso::ShaderStageFlags, + value: *mut d3d11::ID3D11DeviceChild, + ) { + if stages.contains(pso::ShaderStageFlags::VERTEX) { + self.assign(offsets.vs, value); + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + self.assign(offsets.ps, value); + } + if stages.contains(pso::ShaderStageFlags::COMPUTE) { + self.assign(offsets.cs, value); + } + } +} + +#[derive(Debug)] +pub struct DescriptorPool { + handles: Vec<Descriptor>, + allocator: RangeAllocator<DescriptorIndex>, +} + +unsafe impl Send for DescriptorPool {} +unsafe impl Sync for DescriptorPool {} + +impl DescriptorPool { + fn with_capacity(size: DescriptorIndex) -> Self { + DescriptorPool { + handles: vec![Descriptor(ptr::null_mut()); size as usize], + allocator: RangeAllocator::new(0..size), + } + } +} + +impl pso::DescriptorPool<Backend> for DescriptorPool { + unsafe fn allocate_set( + &mut self, + layout: &DescriptorSetLayout, + ) -> Result<DescriptorSet, pso::AllocationError> { + let len = layout + .pool_mapping + .map_register(|mapping| mapping.count as DescriptorIndex) + .sum() + .max(1); + + self.allocator + .allocate_range(len) + .map(|range| { + for handle in &mut self.handles[range.start as usize..range.end as usize] { + *handle = Descriptor(ptr::null_mut()); + } + + DescriptorSet { + offset: range.start, + len, + handles: self.handles.as_mut_ptr().offset(range.start as _), + coherent_buffers: Mutex::new(CoherentBuffers { + flush_coherent_buffers: RefCell::new(Vec::new()), + invalidate_coherent_buffers: RefCell::new(Vec::new()), + }), + layout: DescriptorSetLayout { + bindings: Arc::clone(&layout.bindings), + pool_mapping: layout.pool_mapping.clone(), + }, + } + }) + .map_err(|_| pso::AllocationError::OutOfPoolMemory) + } + + unsafe fn free<I>(&mut self, descriptor_sets: I) + where + I: IntoIterator<Item = DescriptorSet>, + { + for set in descriptor_sets { + self.allocator + .free_range(set.offset..(set.offset + set.len)) + } + } + + unsafe fn reset(&mut self) { + self.allocator.reset(); + } +} + +#[derive(Debug)] +pub struct RawFence { + mutex: Mutex<bool>, + condvar: Condvar, +} + +pub type Fence = Arc<RawFence>; + +#[derive(Debug)] +pub struct Semaphore; +#[derive(Debug)] +pub struct QueryPool; + +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum Backend {} +impl hal::Backend for Backend { + type Instance = Instance; + type PhysicalDevice = PhysicalDevice; + type Device = device::Device; + type Surface = Surface; + + type QueueFamily = QueueFamily; + type CommandQueue = CommandQueue; + type CommandBuffer = CommandBuffer; + + type Memory = Memory; + type CommandPool = CommandPool; + + type ShaderModule = ShaderModule; + type RenderPass = RenderPass; + type Framebuffer = Framebuffer; + + type Buffer = Buffer; + type BufferView = BufferView; + type Image = Image; + + type ImageView = ImageView; + type Sampler = Sampler; + + type ComputePipeline = ComputePipeline; + type GraphicsPipeline = GraphicsPipeline; + type PipelineLayout = PipelineLayout; + type PipelineCache = (); + type DescriptorSetLayout = DescriptorSetLayout; + type DescriptorPool = DescriptorPool; + type DescriptorSet = DescriptorSet; + + type Fence = Fence; + type Semaphore = Semaphore; + type Event = (); + type QueryPool = QueryPool; +} + +fn validate_line_width(width: f32) { + // Note from the Vulkan spec: + // > If the wide lines feature is not enabled, lineWidth must be 1.0 + // Simply assert and no-op because DX11 never exposes `Features::LINE_WIDTH` + assert_eq!(width, 1.0); +} diff --git a/third_party/rust/gfx-backend-dx11/src/shader.rs b/third_party/rust/gfx-backend-dx11/src/shader.rs new file mode 100644 index 0000000000..0184f21d3e --- /dev/null +++ b/third_party/rust/gfx-backend-dx11/src/shader.rs @@ -0,0 +1,405 @@ +use std::{ffi, ptr, slice}; + +use spirv_cross::{hlsl, spirv, ErrorCode as SpirvErrorCode}; + +use winapi::{ + shared::winerror, + um::{d3d11, d3dcommon, d3dcompiler}, +}; +use wio::com::ComPtr; + +use auxil::{spirv_cross_specialize_ast, ShaderStage}; +use hal::{device, pso}; + +use crate::{conv, Backend, PipelineLayout}; + +/// Emit error during shader module creation. Used if we don't expect an error +/// but might panic due to an exception in SPIRV-Cross. +fn gen_unexpected_error(err: SpirvErrorCode) -> device::ShaderError { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unexpected error".into(), + }; + device::ShaderError::CompilationFailed(msg) +} + +/// Emit error during shader module creation. Used if we execute an query command. +fn gen_query_error(err: SpirvErrorCode) -> device::ShaderError { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown query error".into(), + }; + device::ShaderError::CompilationFailed(msg) +} + +/// Introspects the input attributes of given SPIR-V shader and returns an optional vertex semantic remapping. +/// +/// The returned hashmap has attribute location as a key and an Optional remapping to a two part semantic. +/// +/// eg. +/// `2 -> None` means use default semantic `TEXCOORD2` +/// `2 -> Some((0, 2))` means use two part semantic `TEXCOORD0_2`. This is how matrices are represented by spirv-cross. +/// +/// This is a temporary workaround for https://github.com/KhronosGroup/SPIRV-Cross/issues/1512. +/// +/// This workaround also exists under the same name in the DX12 backend. +pub(crate) fn introspect_spirv_vertex_semantic_remapping( + raw_data: &[u32], +) -> Result<auxil::FastHashMap<u32, Option<(u32, u32)>>, device::ShaderError> { + // This is inefficient as we already parse it once before. This is a temporary workaround only called + // on vertex shaders. If this becomes permanent or shows up in profiles, deduplicate these as first course of action. + let ast = parse_spirv(raw_data)?; + + let mut map = auxil::FastHashMap::default(); + + let inputs = ast + .get_shader_resources() + .map_err(gen_query_error)? + .stage_inputs; + for input in inputs { + let idx = ast + .get_decoration(input.id, spirv::Decoration::Location) + .map_err(gen_query_error)?; + + let ty = ast.get_type(input.type_id).map_err(gen_query_error)?; + + match ty { + spirv::Type::Boolean { columns, .. } + | spirv::Type::Int { columns, .. } + | spirv::Type::UInt { columns, .. } + | spirv::Type::Half { columns, .. } + | spirv::Type::Float { columns, .. } + | spirv::Type::Double { columns, .. } + if columns > 1 => + { + for col in 0..columns { + if let Some(_) = map.insert(idx + col, Some((idx, col))) { + return Err(device::ShaderError::CompilationFailed(format!( + "Shader has overlapping input attachments at location {}", + idx + ))); + } + } + } + _ => { + if let Some(_) = map.insert(idx, None) { + return Err(device::ShaderError::CompilationFailed(format!( + "Shader has overlapping input attachments at location {}", + idx + ))); + } + } + } + } + + Ok(map) +} + +pub(crate) fn compile_spirv_entrypoint( + raw_data: &[u32], + stage: ShaderStage, + source: &pso::EntryPoint<Backend>, + layout: &PipelineLayout, + features: &hal::Features, +) -> Result<Option<ComPtr<d3dcommon::ID3DBlob>>, device::ShaderError> { + let mut ast = parse_spirv(raw_data)?; + spirv_cross_specialize_ast(&mut ast, &source.specialization)?; + + patch_spirv_resources(&mut ast, stage, layout)?; + let shader_model = hlsl::ShaderModel::V5_0; + let shader_code = translate_spirv( + &mut ast, + shader_model, + layout, + stage, + features, + source.entry, + )?; + log::debug!("Generated {:?} shader:\n{}", stage, shader_code,); + + let real_name = ast + .get_cleansed_entry_point_name(source.entry, conv::map_stage(stage)) + .map_err(gen_query_error)?; + + // TODO: opt: don't query *all* entry points. + let entry_points = ast.get_entry_points().map_err(gen_query_error)?; + entry_points + .iter() + .find(|entry_point| entry_point.name == real_name) + .ok_or(device::ShaderError::MissingEntryPoint(source.entry.into())) + .and_then(|entry_point| { + let stage = conv::map_execution_model(entry_point.execution_model); + let shader = compile_hlsl_shader( + stage, + shader_model, + &entry_point.name, + shader_code.as_bytes(), + )?; + Ok(Some(unsafe { ComPtr::from_raw(shader) })) + }) +} + +pub(crate) fn compile_hlsl_shader( + stage: ShaderStage, + shader_model: hlsl::ShaderModel, + entry: &str, + code: &[u8], +) -> Result<*mut d3dcommon::ID3DBlob, device::ShaderError> { + let stage_str = { + let stage = match stage { + ShaderStage::Vertex => "vs", + ShaderStage::Fragment => "ps", + ShaderStage::Compute => "cs", + _ => unimplemented!(), + }; + + let model = match shader_model { + hlsl::ShaderModel::V5_0 => "5_0", + // TODO: >= 11.3 + hlsl::ShaderModel::V5_1 => "5_1", + // TODO: >= 12?, no mention of 11 on msdn + hlsl::ShaderModel::V6_0 => "6_0", + _ => unimplemented!(), + }; + + format!("{}_{}\0", stage, model) + }; + + let mut blob = ptr::null_mut(); + let mut error = ptr::null_mut(); + let entry = ffi::CString::new(entry).unwrap(); + let hr = unsafe { + d3dcompiler::D3DCompile( + code.as_ptr() as *const _, + code.len(), + ptr::null(), + ptr::null(), + ptr::null_mut(), + entry.as_ptr() as *const _, + stage_str.as_ptr() as *const i8, + 1, + 0, + &mut blob as *mut *mut _, + &mut error as *mut *mut _, + ) + }; + + if !winerror::SUCCEEDED(hr) { + let error = unsafe { ComPtr::<d3dcommon::ID3DBlob>::from_raw(error) }; + let message = unsafe { + let pointer = error.GetBufferPointer(); + let size = error.GetBufferSize(); + let slice = slice::from_raw_parts(pointer as *const u8, size as usize); + String::from_utf8_lossy(slice).into_owned() + }; + + Err(device::ShaderError::CompilationFailed(message)) + } else { + Ok(blob) + } +} + +fn parse_spirv(raw_data: &[u32]) -> Result<spirv::Ast<hlsl::Target>, device::ShaderError> { + let module = spirv::Module::from_words(raw_data); + + spirv::Ast::parse(&module).map_err(|err| { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown parsing error".into(), + }; + device::ShaderError::CompilationFailed(msg) + }) +} + +fn patch_spirv_resources( + ast: &mut spirv::Ast<hlsl::Target>, + stage: ShaderStage, + layout: &PipelineLayout, +) -> Result<(), device::ShaderError> { + // we remap all `layout(binding = n, set = n)` to a flat space which we get from our + // `PipelineLayout` which knows of all descriptor set layouts + + let shader_resources = ast.get_shader_resources().map_err(gen_query_error)?; + for image in &shader_resources.separate_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(image.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + let (_content, res_index) = layout.sets[set].find_register(stage, binding); + + ast.set_decoration(image.id, spirv::Decoration::Binding, res_index.t as u32) + .map_err(gen_unexpected_error)?; + } + + for uniform_buffer in &shader_resources.uniform_buffers { + let set = ast + .get_decoration(uniform_buffer.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(uniform_buffer.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + let (_content, res_index) = layout.sets[set].find_register(stage, binding); + + ast.set_decoration( + uniform_buffer.id, + spirv::Decoration::Binding, + res_index.c as u32, + ) + .map_err(gen_unexpected_error)?; + } + + for storage_buffer in &shader_resources.storage_buffers { + let set = ast + .get_decoration(storage_buffer.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(storage_buffer.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + + let read_only = match layout.sets[set].bindings[binding as usize].ty { + pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Storage { read_only }, + .. + } => read_only, + _ => unreachable!(), + }; + + let (_content, res_index) = if read_only { + layout.sets[set].find_register(stage, binding) + } else { + layout.sets[set].find_uav_register(stage, binding) + }; + + // If the binding is read/write, we need to generate a UAV here. + if !read_only { + ast.set_member_decoration(storage_buffer.type_id, 0, spirv::Decoration::NonWritable, 0) + .map_err(gen_unexpected_error)?; + } + + let index = if read_only { + res_index.t as u32 + } else if stage == ShaderStage::Compute { + res_index.u as u32 + } else { + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT - 1 - res_index.u as u32 + }; + + ast.set_decoration(storage_buffer.id, spirv::Decoration::Binding, index) + .map_err(gen_unexpected_error)?; + } + + for image in &shader_resources.storage_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(image.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + let (_content, res_index) = layout.sets[set].find_uav_register(stage, binding); + + // Read only storage images are generated as UAVs by spirv-cross. + // + // Compute uses bottom up stack, all other stages use top down. + let index = if stage == ShaderStage::Compute { + res_index.u as u32 + } else { + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT - 1 - res_index.u as u32 + }; + + ast.set_decoration(image.id, spirv::Decoration::Binding, index) + .map_err(gen_unexpected_error)?; + } + + for sampler in &shader_resources.separate_samplers { + let set = ast + .get_decoration(sampler.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(sampler.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + let (_content, res_index) = layout.sets[set].find_register(stage, binding); + + ast.set_decoration(sampler.id, spirv::Decoration::Binding, res_index.s as u32) + .map_err(gen_unexpected_error)?; + } + + for image in &shader_resources.sampled_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)? as usize; + let binding = ast + .get_decoration(image.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + let (_content, res_index) = layout.sets[set].find_register(stage, binding); + + ast.set_decoration(image.id, spirv::Decoration::Binding, res_index.t as u32) + .map_err(gen_unexpected_error)?; + } + + assert!( + shader_resources.push_constant_buffers.len() <= 1, + "Only 1 push constant buffer is supported" + ); + for push_constant_buffer in &shader_resources.push_constant_buffers { + ast.set_decoration( + push_constant_buffer.id, + spirv::Decoration::DescriptorSet, + 0, // value doesn't matter, just needs a value + ) + .map_err(gen_unexpected_error)?; + ast.set_decoration( + push_constant_buffer.id, + spirv::Decoration::Binding, + d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1, + ) + .map_err(gen_unexpected_error)?; + } + + Ok(()) +} + +fn translate_spirv( + ast: &mut spirv::Ast<hlsl::Target>, + shader_model: hlsl::ShaderModel, + _layout: &PipelineLayout, + stage: ShaderStage, + features: &hal::Features, + entry_point: &str, +) -> Result<String, device::ShaderError> { + let mut compile_options = hlsl::CompilerOptions::default(); + compile_options.shader_model = shader_model; + compile_options.vertex.invert_y = !features.contains(hal::Features::NDC_Y_UP); + compile_options.force_zero_initialized_variables = true; + compile_options.entry_point = Some((entry_point.to_string(), conv::map_stage(stage))); + + //let stage_flag = stage.into(); + + // TODO: + /*let root_constant_layout = layout + .root_constants + .iter() + .filter_map(|constant| if constant.stages.contains(stage_flag) { + Some(hlsl::RootConstant { + start: constant.range.start * 4, + end: constant.range.end * 4, + binding: constant.range.start, + space: 0, + }) + } else { + None + }) + .collect();*/ + ast.set_compiler_options(&compile_options) + .map_err(gen_unexpected_error)?; + //ast.set_root_constant_layout(root_constant_layout) + // .map_err(gen_unexpected_error)?; + ast.compile().map_err(|err| { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown compile error".into(), + }; + device::ShaderError::CompilationFailed(msg) + }) +} |