summaryrefslogtreecommitdiffstats
path: root/third_party/rust/gfx-backend-dx11/shaders
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/gfx-backend-dx11/shaders/blit.hlsl63
-rw-r--r--third_party/rust/gfx-backend-dx11/shaders/clear.hlsl22
-rw-r--r--third_party/rust/gfx-backend-dx11/shaders/copy.hlsl615
3 files changed, 700 insertions, 0 deletions
diff --git a/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl b/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl
new file mode 100644
index 0000000000..f627e5c9b9
--- /dev/null
+++ b/third_party/rust/gfx-backend-dx11/shaders/blit.hlsl
@@ -0,0 +1,63 @@
+cbuffer Region : register(b0) {
+ float2 offset;
+ float2 extent;
+ float z;
+ float level;
+};
+
+struct VsOutput {
+ float4 pos: SV_POSITION;
+ float4 uv: TEXCOORD0;
+};
+
+// Create a screen filling triangle
+VsOutput vs_blit_2d(uint id: SV_VertexID) {
+ float2 coord = float2((id << 1) & 2, id & 2);
+ VsOutput output = {
+ float4(float2(-1.0, 1.0) + coord * float2(2.0, -2.0), 0.0, 1.0),
+ float4(offset + coord * extent, z, level)
+ };
+ return output;
+}
+
+SamplerState BlitSampler : register(s0);
+
+Texture2DArray<uint4> BlitSrc_Uint : register(t0);
+Texture2DArray<int4> BlitSrc_Sint : register(t0);
+Texture2DArray<float4> BlitSrc_Float : register(t0);
+
+// TODO: get rid of GetDimensions call
+uint4 Nearest_Uint(float4 uv)
+{
+ float4 size;
+ BlitSrc_Uint.GetDimensions(0, size.x, size.y, size.z, size.w);
+
+ float2 pix = uv.xy * size.xy;
+
+ return BlitSrc_Uint.Load(int4(int2(pix), uv.zw));
+}
+
+int4 Nearest_Sint(float4 uv)
+{
+ float4 size;
+ BlitSrc_Sint.GetDimensions(0, size.x, size.y, size.z, size.w);
+
+ float2 pix = uv.xy * size.xy;
+
+ return BlitSrc_Sint.Load(int4(int2(pix), uv.zw));
+}
+
+uint4 ps_blit_2d_uint(VsOutput input) : SV_Target
+{
+ return Nearest_Uint(input.uv);
+}
+
+int4 ps_blit_2d_int(VsOutput input) : SV_Target
+{
+ return Nearest_Sint(input.uv);
+}
+
+float4 ps_blit_2d_float(VsOutput input) : SV_Target
+{
+ return BlitSrc_Float.SampleLevel(BlitSampler, input.uv.xyz, input.uv.w);
+}
diff --git a/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl b/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl
new file mode 100644
index 0000000000..5c91fe6b49
--- /dev/null
+++ b/third_party/rust/gfx-backend-dx11/shaders/clear.hlsl
@@ -0,0 +1,22 @@
+cbuffer ClearColorF32 : register(b0) { float4 ClearF32; };
+cbuffer ClearColorU32 : register(b0) { uint4 ClearU32; };
+cbuffer ClearColorI32 : register(b0) { int4 ClearI32; };
+cbuffer ClearColorDepth : register(b0) { float ClearDepth; };
+
+// fullscreen triangle
+float4 vs_partial_clear(uint id : SV_VertexID) : SV_Position
+{
+ return float4(
+ float(id / 2) * 4.0 - 1.0,
+ float(id % 2) * 4.0 - 1.0,
+ 0.0,
+ 1.0
+ );
+}
+
+// TODO: send constants through VS as flat attributes
+float4 ps_partial_clear_float() : SV_Target0 { return ClearF32; }
+uint4 ps_partial_clear_uint() : SV_Target0 { return ClearU32; }
+int4 ps_partial_clear_int() : SV_Target0 { return ClearI32; }
+float ps_partial_clear_depth() : SV_Depth { return ClearDepth; }
+void ps_partial_clear_stencil() { }
diff --git a/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl
new file mode 100644
index 0000000000..f8b5d8523b
--- /dev/null
+++ b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl
@@ -0,0 +1,615 @@
+struct BufferCopy {
+ uint4 SrcDst;
+};
+
+struct ImageCopy {
+ uint4 Src;
+ uint4 Dst;
+};
+
+struct BufferImageCopy {
+ // x=offset, yz=size
+ uint4 BufferVars;
+ uint4 ImageOffset;
+ uint4 ImageExtent;
+ uint4 ImageSize;
+};
+
+cbuffer CopyConstants : register(b0) {
+ BufferCopy BufferCopies;
+ ImageCopy ImageCopies;
+ BufferImageCopy BufferImageCopies;
+};
+
+
+uint3 GetDestBounds()
+{
+ return min(
+ BufferImageCopies.ImageOffset + BufferImageCopies.ImageExtent,
+ BufferImageCopies.ImageSize
+ );
+}
+
+uint3 GetImageCopyDst(uint3 dispatch_thread_id)
+{
+ return uint3(ImageCopies.Dst.xy + dispatch_thread_id.xy, ImageCopies.Dst.z);
+}
+
+uint3 GetImageCopySrc(uint3 dispatch_thread_id)
+{
+ return uint3(ImageCopies.Src.xy + dispatch_thread_id.xy, ImageCopies.Src.z);
+}
+
+uint3 GetImageDst(uint3 dispatch_thread_id)
+{
+ return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z);
+}
+
+uint3 GetImageSrc(uint3 dispatch_thread_id)
+{
+ return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z);
+}
+
+uint GetBufferDst128(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc128(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst64(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc64(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst32(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc32(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst16(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc16(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst8(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc8(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+
+uint4 Uint32ToUint8x4(uint data)
+{
+ return (data >> uint4(0, 8, 16, 24)) & 0xFF;
+}
+
+uint2 Uint32ToUint16x2(uint data)
+{
+ return (data >> uint2(0, 16)) & 0xFFFF;
+}
+
+uint Uint8x4ToUint32(uint4 data)
+{
+ return dot(min(data, 0xFF), 1 << uint4(0, 8, 16, 24));
+}
+
+uint Uint16x2ToUint32(uint2 data)
+{
+ return dot(min(data, 0xFFFF), 1 << uint2(0, 16));
+}
+
+uint2 Uint16ToUint8x2(uint data)
+{
+ return (data >> uint2(0, 8)) & 0xFF;
+}
+
+uint Uint8x2ToUint16(uint2 data)
+{
+ return dot(min(data, 0xFF), 1 << uint2(0, 8));
+}
+
+uint4 Float4ToUint8x4(float4 data)
+{
+ return uint4(data * 255 + .5f);
+}
+
+// Buffers are always R32-aligned
+ByteAddressBuffer BufferCopySrc : register(t0);
+RWByteAddressBuffer BufferCopyDst : register(u0);
+
+RWTexture1DArray<uint> Image1CopyDstR : register(u0);
+RWTexture1DArray<uint2> Image1CopyDstRg : register(u0);
+RWTexture1DArray<uint4> Image1CopyDstRgba : register(u0);
+
+Texture2DArray<uint4> Image2CopySrc : register(t0);
+RWTexture2DArray<uint> Image2CopyDstR : register(u0);
+RWTexture2DArray<uint2> Image2CopyDstRg : register(u0);
+RWTexture2DArray<uint4> Image2CopyDstRgba : register(u0);
+
+Texture2DArray<float4> ImageCopy2SrcBgra : register(t0);
+
+// Image<->Image copies
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint8x2ToUint16(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint16ToUint8x2(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8b8a8_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint8x4ToUint32(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8b8a8_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Uint8x4ToUint32(Image2CopySrc[src_idx]));
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16g16_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint16x2ToUint32(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16g16_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Uint16x2ToUint32(Image2CopySrc[src_idx]));
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r32_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r32_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Image2CopySrc[src_idx]);
+}
+
+//#define COPY_1D_NUM_THREAD 64 //TODO
+#define COPY_2D_NUM_THREAD_X 8
+#define COPY_2D_NUM_THREAD_Y 8
+
+// Buffer<->Image copies
+
+// R32G32B32A32
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32g32b32a32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc128(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = uint4(
+ BufferCopySrc.Load(src_idx),
+ BufferCopySrc.Load(src_idx + 1 * 4),
+ BufferCopySrc.Load(src_idx + 2 * 4),
+ BufferCopySrc.Load(src_idx + 3 * 4)
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32g32b32a32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint4 data = Image2CopySrc[src_idx];
+ uint dst_idx = GetBufferDst128(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, data.x);
+ BufferCopyDst.Store(dst_idx + 1 * 4, data.y);
+ BufferCopyDst.Store(dst_idx + 2 * 4, data.z);
+ BufferCopyDst.Store(dst_idx + 3 * 4, data.w);
+}
+
+// R32G32
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32g32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc64(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = uint2(
+ BufferCopySrc.Load(src_idx),
+ BufferCopySrc.Load(src_idx + 1 * 4)
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32g32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint2 data = Image2CopySrc[src_idx].rg;
+ uint dst_idx = GetBufferDst64(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx , data.x);
+ BufferCopyDst.Store(dst_idx + 1 * 4, data.y);
+}
+
+// R16G16B16A16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16g16b16a16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc64(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = uint4(
+ Uint32ToUint16x2(BufferCopySrc.Load(src_idx)),
+ Uint32ToUint16x2(BufferCopySrc.Load(src_idx + 1 * 4))
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16g16b16a16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint4 data = Image2CopySrc[src_idx];
+ uint dst_idx = GetBufferDst64(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(data.xy));
+ BufferCopyDst.Store(dst_idx + 1 * 4, Uint16x2ToUint32(data.zw));
+}
+
+// R32
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image1CopyDstR[dst_idx.xz] = BufferCopySrc.Load(src_idx);
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = BufferCopySrc.Load(src_idx);
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Image2CopySrc[src_idx].r);
+}
+
+// R16G16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(BufferCopySrc.Load(src_idx));
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(Image2CopySrc[src_idx].xy));
+}
+
+// R8G8B8A8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image1CopyDstRgba[dst_idx.xz] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8g8b8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Image2CopySrc[src_idx]));
+}
+
+// B8G8R8A8
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_b8g8r8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Float4ToUint8x4(ImageCopy2SrcBgra[src_idx].bgra)));
+}
+
+// R16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+ uint2 data = Uint32ToUint16x2(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst16(dispatch_thread_id);
+
+ uint upper = Image2CopySrc[src_idx].r;
+ uint lower = Image2CopySrc[src_idx + uint3(1, 0, 0)].r;
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(uint2(upper, lower)));
+}
+
+// R8G8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image1CopyDstRg[dst_idx.xz + uint2(1, 0)] = data.zw;
+ }
+ if (remaining_x >= 1) {
+ Image1CopyDstRg[dst_idx.xz + uint2(0, 0)] = data.xy;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image2CopyDstRg[dst_idx + uint3(1, 0, 0)] = data.zw;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstRg[dst_idx + uint3(0, 0, 0)] = data.xy;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst16(dispatch_thread_id);
+
+ uint2 lower = Image2CopySrc[src_idx].xy;
+ uint2 upper = Image2CopySrc[src_idx + uint3(1, 0, 0)].xy;
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(lower.x, lower.y, upper.x, upper.y)));
+}
+
+// R8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc8(dispatch_thread_id);
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 4) {
+ Image1CopyDstR[dst_idx.xz + uint2(3, 0)] = data.w;
+ }
+ if (remaining_x >= 3) {
+ Image1CopyDstR[dst_idx.xz + uint2(2, 0)] = data.z;
+ }
+ if (remaining_x >= 2) {
+ Image1CopyDstR[dst_idx.xz + uint2(1, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image1CopyDstR[dst_idx.xz + uint2(0, 0)] = data.x;
+ }
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc8(dispatch_thread_id);
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 4) {
+ Image2CopyDstR[dst_idx + uint3(3, 0, 0)] = data.w;
+ }
+ if (remaining_x >= 3) {
+ Image2CopyDstR[dst_idx + uint3(2, 0, 0)] = data.z;
+ }
+ if (remaining_x >= 2) {
+ Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x;
+ }
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst8(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(
+ Image2CopySrc[src_idx].r,
+ Image2CopySrc[src_idx + uint3(1, 0, 0)].r,
+ Image2CopySrc[src_idx + uint3(2, 0, 0)].r,
+ Image2CopySrc[src_idx + uint3(3, 0, 0)].r
+ )));
+}