summaryrefslogtreecommitdiffstats
path: root/libfreerdp/primitives/primitives.cl
diff options
context:
space:
mode:
Diffstat (limited to 'libfreerdp/primitives/primitives.cl')
-rw-r--r--libfreerdp/primitives/primitives.cl463
1 files changed, 463 insertions, 0 deletions
diff --git a/libfreerdp/primitives/primitives.cl b/libfreerdp/primitives/primitives.cl
new file mode 100644
index 0000000..5e094df
--- /dev/null
+++ b/libfreerdp/primitives/primitives.cl
@@ -0,0 +1,463 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Optimized operations using openCL
+ * vi:ts=4 sw=4
+ *
+ * Copyright 2019 David Fort <contact@hardening-consulting.com>
+ * Copyright 2019 Rangee Gmbh
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain
+ * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#define STRINGIFY(x) #x
+
+STRINGIFY(
+uchar clamp_uc(int v, short l, short h)
+{
+ if (v > h)
+ v = h;
+ if (v < l)
+ v = l;
+ return (uchar)v;
+}
+
+__kernel void yuv420_to_rgba_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
+ destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
+ /* A */
+}
+
+__kernel void yuv420_to_abgr_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ /* A */
+ destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+}
+
+__kernel void yuv444_to_abgr_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ /* A */
+ destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+}
+
+__kernel void yuv444_to_rgba_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ /* A */
+}
+
+__kernel void yuv420_to_rgbx_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
+ destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
+ destPtr[3] = 0xff; /* A */
+}
+
+__kernel void yuv420_to_xbgr_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = 0xff; /* A */
+ destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+}
+
+__kernel void yuv444_to_xbgr_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = 0xff; /* A */
+ destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+}
+
+__kernel void yuv444_to_rgbx_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[3] = 0xff; /* A */
+}
+
+
+__kernel void yuv420_to_argb_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ /* A */
+ destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
+ destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
+}
+
+__kernel void yuv420_to_bgra_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ /* A */
+}
+
+__kernel void yuv444_to_bgra_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ /* A */
+}
+
+__kernel void yuv444_to_argb_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ /* A */
+}
+
+__kernel void yuv420_to_xrgb_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = 0xff; /* A */
+ destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
+ destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
+ destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
+}
+
+__kernel void yuv420_to_bgrx_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[3] = 0xff; /* A */
+}
+
+__kernel void yuv444_to_bgrx_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[3] = 0xff; /* A */
+}
+
+__kernel void yuv444_to_xrgb_1b(
+ __global const uchar *bufY, unsigned strideY,
+ __global const uchar *bufU, unsigned strideU,
+ __global const uchar *bufV, unsigned strideV,
+ __global uchar *dest, unsigned strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[0] = 0xff; /* A */
+}
+)