summaryrefslogtreecommitdiffstats
path: root/src/libs/dxvk-native-1.9.2a/src/d3d9/shaders/d3d9_convert_nv12.comp
blob: 70f7998d6a15ccd160b0262456c97cf402366a33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#version 450
#extension GL_GOOGLE_include_directive : enable

#include "d3d9_convert_common.h"

layout(
  local_size_x = 8,
  local_size_y = 8,
  local_size_z = 1) in;

layout(binding = 0)
writeonly uniform image2D dst;

layout(binding = 1) uniform usamplerBuffer src;

layout(push_constant)
uniform u_info_t {
  uvec2 extent;
} u_info;

vec2 fetchUnorm2x8(usamplerBuffer source, uint offset) {
  return unpackUnorm2x8(texelFetch(src, int(offset)).r);
}

// Format is:
// YYYYYYYYYYYYYYY...
// YYYYYYYYYYYYYYY...
// UVUVUVUVUVUVUVU...

void main() {
  ivec3 thread_id = ivec3(gl_GlobalInvocationID);

  if (all(lessThan(thread_id.xy, u_info.extent))) {
    uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y);

    uint offset = thread_id.x
                + thread_id.y * pitch.x;

    // Fetch 2 luminance samples.
    vec2 y = fetchUnorm2x8(src, offset) - (16 / 255.0);        

    // Go into the second plane to get the chroma data.
    // UV data is subsampled as [2, 2]
    // So we need to divide thread_id.y by 2.
    // thread_id.x is already accounted for as we read uint16
    offset = thread_id.x
           + thread_id.y / 2 * pitch.x
           + pitch.x * pitch.y;

    vec2 uv = fetchUnorm2x8(src, offset) - (128 / 255.0);

    // The NV12 format seems to use the BT.709 color space.
    vec4 color0 = convertBT_709(vec3(y.x, uv.x, uv.y));
    vec4 color1 = convertBT_709(vec3(y.y, uv.x, uv.y));

    // We write as a macropixel of [2, 1]
    // So write out 2 pixels in this run.
    ivec2 writePos = thread_id.xy * ivec2(2, 1);
    
    imageStore(dst, ivec2(writePos.x,     writePos.y), color0);
    imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1);
  }
}