summaryrefslogtreecommitdiffstats
path: root/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/gfx-backend-dx11/shaders/copy.hlsl')
-rw-r--r--third_party/rust/gfx-backend-dx11/shaders/copy.hlsl615
1 files changed, 615 insertions, 0 deletions
diff --git a/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl
new file mode 100644
index 0000000000..f8b5d8523b
--- /dev/null
+++ b/third_party/rust/gfx-backend-dx11/shaders/copy.hlsl
@@ -0,0 +1,615 @@
+struct BufferCopy {
+ uint4 SrcDst;
+};
+
+struct ImageCopy {
+ uint4 Src;
+ uint4 Dst;
+};
+
+struct BufferImageCopy {
+ // x=offset, yz=size
+ uint4 BufferVars;
+ uint4 ImageOffset;
+ uint4 ImageExtent;
+ uint4 ImageSize;
+};
+
+cbuffer CopyConstants : register(b0) {
+ BufferCopy BufferCopies;
+ ImageCopy ImageCopies;
+ BufferImageCopy BufferImageCopies;
+};
+
+
+uint3 GetDestBounds()
+{
+ return min(
+ BufferImageCopies.ImageOffset + BufferImageCopies.ImageExtent,
+ BufferImageCopies.ImageSize
+ );
+}
+
+uint3 GetImageCopyDst(uint3 dispatch_thread_id)
+{
+ return uint3(ImageCopies.Dst.xy + dispatch_thread_id.xy, ImageCopies.Dst.z);
+}
+
+uint3 GetImageCopySrc(uint3 dispatch_thread_id)
+{
+ return uint3(ImageCopies.Src.xy + dispatch_thread_id.xy, ImageCopies.Src.z);
+}
+
+uint3 GetImageDst(uint3 dispatch_thread_id)
+{
+ return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z);
+}
+
+uint3 GetImageSrc(uint3 dispatch_thread_id)
+{
+ return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z);
+}
+
+uint GetBufferDst128(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc128(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst64(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc64(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst32(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc32(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst16(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc16(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+uint GetBufferDst8(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+uint GetBufferSrc8(uint3 dispatch_thread_id)
+{
+ return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x);
+}
+
+
+uint4 Uint32ToUint8x4(uint data)
+{
+ return (data >> uint4(0, 8, 16, 24)) & 0xFF;
+}
+
+uint2 Uint32ToUint16x2(uint data)
+{
+ return (data >> uint2(0, 16)) & 0xFFFF;
+}
+
+uint Uint8x4ToUint32(uint4 data)
+{
+ return dot(min(data, 0xFF), 1 << uint4(0, 8, 16, 24));
+}
+
+uint Uint16x2ToUint32(uint2 data)
+{
+ return dot(min(data, 0xFFFF), 1 << uint2(0, 16));
+}
+
+uint2 Uint16ToUint8x2(uint data)
+{
+ return (data >> uint2(0, 8)) & 0xFF;
+}
+
+uint Uint8x2ToUint16(uint2 data)
+{
+ return dot(min(data, 0xFF), 1 << uint2(0, 8));
+}
+
+uint4 Float4ToUint8x4(float4 data)
+{
+ return uint4(data * 255 + .5f);
+}
+
+// Buffers are always R32-aligned
+ByteAddressBuffer BufferCopySrc : register(t0);
+RWByteAddressBuffer BufferCopyDst : register(u0);
+
+RWTexture1DArray<uint> Image1CopyDstR : register(u0);
+RWTexture1DArray<uint2> Image1CopyDstRg : register(u0);
+RWTexture1DArray<uint4> Image1CopyDstRgba : register(u0);
+
+Texture2DArray<uint4> Image2CopySrc : register(t0);
+RWTexture2DArray<uint> Image2CopyDstR : register(u0);
+RWTexture2DArray<uint2> Image2CopyDstRg : register(u0);
+RWTexture2DArray<uint4> Image2CopyDstRgba : register(u0);
+
+Texture2DArray<float4> ImageCopy2SrcBgra : register(t0);
+
+// Image<->Image copies
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint8x2ToUint16(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint16ToUint8x2(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8b8a8_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint8x4ToUint32(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r8g8b8a8_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Uint8x4ToUint32(Image2CopySrc[src_idx]));
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16g16_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = Uint16x2ToUint32(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r16g16_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Uint16x2ToUint32(Image2CopySrc[src_idx]));
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r32_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(Image2CopySrc[src_idx]);
+}
+
+[numthreads(1, 1, 1)]
+void cs_copy_image2d_r32_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID)
+{
+ uint3 dst_idx = GetImageCopyDst(dispatch_thread_id);
+ uint3 src_idx = GetImageCopySrc(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(Image2CopySrc[src_idx]);
+}
+
+//#define COPY_1D_NUM_THREAD 64 //TODO
+#define COPY_2D_NUM_THREAD_X 8
+#define COPY_2D_NUM_THREAD_Y 8
+
+// Buffer<->Image copies
+
+// R32G32B32A32
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32g32b32a32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc128(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = uint4(
+ BufferCopySrc.Load(src_idx),
+ BufferCopySrc.Load(src_idx + 1 * 4),
+ BufferCopySrc.Load(src_idx + 2 * 4),
+ BufferCopySrc.Load(src_idx + 3 * 4)
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32g32b32a32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint4 data = Image2CopySrc[src_idx];
+ uint dst_idx = GetBufferDst128(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, data.x);
+ BufferCopyDst.Store(dst_idx + 1 * 4, data.y);
+ BufferCopyDst.Store(dst_idx + 2 * 4, data.z);
+ BufferCopyDst.Store(dst_idx + 3 * 4, data.w);
+}
+
+// R32G32
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32g32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc64(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = uint2(
+ BufferCopySrc.Load(src_idx),
+ BufferCopySrc.Load(src_idx + 1 * 4)
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32g32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint2 data = Image2CopySrc[src_idx].rg;
+ uint dst_idx = GetBufferDst64(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx , data.x);
+ BufferCopyDst.Store(dst_idx + 1 * 4, data.y);
+}
+
+// R16G16B16A16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16g16b16a16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc64(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = uint4(
+ Uint32ToUint16x2(BufferCopySrc.Load(src_idx)),
+ Uint32ToUint16x2(BufferCopySrc.Load(src_idx + 1 * 4))
+ );
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16g16b16a16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint4 data = Image2CopySrc[src_idx];
+ uint dst_idx = GetBufferDst64(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(data.xy));
+ BufferCopyDst.Store(dst_idx + 1 * 4, Uint16x2ToUint32(data.zw));
+}
+
+// R32
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image1CopyDstR[dst_idx.xz] = BufferCopySrc.Load(src_idx);
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstR[dst_idx] = BufferCopySrc.Load(src_idx);
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Image2CopySrc[src_idx].r);
+}
+
+// R16G16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstRg[dst_idx] = Uint32ToUint16x2(BufferCopySrc.Load(src_idx));
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(Image2CopySrc[src_idx].xy));
+}
+
+// R8G8B8A8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image1CopyDstRgba[dst_idx.xz] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc32(dispatch_thread_id);
+
+ Image2CopyDstRgba[dst_idx] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8g8b8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Image2CopySrc[src_idx]));
+}
+
+// B8G8R8A8
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_b8g8r8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst32(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Float4ToUint8x4(ImageCopy2SrcBgra[src_idx].bgra)));
+}
+
+// R16
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+ uint2 data = Uint32ToUint16x2(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst16(dispatch_thread_id);
+
+ uint upper = Image2CopySrc[src_idx].r;
+ uint lower = Image2CopySrc[src_idx + uint3(1, 0, 0)].r;
+
+ BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(uint2(upper, lower)));
+}
+
+// R8G8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image1CopyDstRg[dst_idx.xz + uint2(1, 0)] = data.zw;
+ }
+ if (remaining_x >= 1) {
+ Image1CopyDstRg[dst_idx.xz + uint2(0, 0)] = data.xy;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc16(dispatch_thread_id);
+
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 2) {
+ Image2CopyDstRg[dst_idx + uint3(1, 0, 0)] = data.zw;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstRg[dst_idx + uint3(0, 0, 0)] = data.xy;
+ }
+}
+
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst16(dispatch_thread_id);
+
+ uint2 lower = Image2CopySrc[src_idx].xy;
+ uint2 upper = Image2CopySrc[src_idx + uint3(1, 0, 0)].xy;
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(lower.x, lower.y, upper.x, upper.y)));
+}
+
+// R8
+[numthreads(COPY_2D_NUM_THREAD_X, 1, 1)]
+void cs_copy_buffer_image1d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc8(dispatch_thread_id);
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 4) {
+ Image1CopyDstR[dst_idx.xz + uint2(3, 0)] = data.w;
+ }
+ if (remaining_x >= 3) {
+ Image1CopyDstR[dst_idx.xz + uint2(2, 0)] = data.z;
+ }
+ if (remaining_x >= 2) {
+ Image1CopyDstR[dst_idx.xz + uint2(1, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image1CopyDstR[dst_idx.xz + uint2(0, 0)] = data.x;
+ }
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint src_idx = GetBufferSrc8(dispatch_thread_id);
+ uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx));
+
+ uint remaining_x = bounds.x - dst_idx.x;
+
+ if (remaining_x >= 4) {
+ Image2CopyDstR[dst_idx + uint3(3, 0, 0)] = data.w;
+ }
+ if (remaining_x >= 3) {
+ Image2CopyDstR[dst_idx + uint3(2, 0, 0)] = data.z;
+ }
+ if (remaining_x >= 2) {
+ Image2CopyDstR[dst_idx + uint3(1, 0, 0)] = data.y;
+ }
+ if (remaining_x >= 1) {
+ Image2CopyDstR[dst_idx + uint3(0, 0, 0)] = data.x;
+ }
+}
+[numthreads(COPY_2D_NUM_THREAD_X, COPY_2D_NUM_THREAD_Y, 1)]
+void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
+ uint3 src_idx = GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id);
+ uint3 bounds = GetDestBounds();
+ if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) {
+ return;
+ }
+
+ uint dst_idx = GetBufferDst8(dispatch_thread_id);
+
+ BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(
+ Image2CopySrc[src_idx].r,
+ Image2CopySrc[src_idx + uint3(1, 0, 0)].r,
+ Image2CopySrc[src_idx + uint3(2, 0, 0)].r,
+ Image2CopySrc[src_idx + uint3(3, 0, 0)].r
+ )));
+}