summaryrefslogtreecommitdiffstats
path: root/libfreerdp/primitives/prim_YUV_opencl.c
diff options
context:
space:
mode:
Diffstat (limited to 'libfreerdp/primitives/prim_YUV_opencl.c')
-rw-r--r--libfreerdp/primitives/prim_YUV_opencl.c500
1 files changed, 500 insertions, 0 deletions
diff --git a/libfreerdp/primitives/prim_YUV_opencl.c b/libfreerdp/primitives/prim_YUV_opencl.c
new file mode 100644
index 0000000..2ca1b31
--- /dev/null
+++ b/libfreerdp/primitives/prim_YUV_opencl.c
@@ -0,0 +1,500 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Optimized YUV/RGB conversion operations using openCL
+ *
+ * Copyright 2019 David Fort <contact@hardening-consulting.com>
+ * Copyright 2019 Rangee Gmbh
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <freerdp/config.h>
+
+#include <freerdp/types.h>
+#include <freerdp/primitives.h>
+#include "prim_internal.h"
+
+#if defined(WITH_OPENCL)
+#ifdef __APPLE__
+#include "OpenCL/opencl.h"
+#else
+#include <CL/cl.h>
+#endif
+#endif
+
+#include <freerdp/log.h>
+#define TAG FREERDP_TAG("primitives")
+
+typedef struct
+{
+ BOOL support;
+ cl_platform_id platformId;
+ cl_device_id deviceId;
+ cl_context context;
+ cl_command_queue commandQueue;
+ cl_program program;
+} primitives_opencl_context;
+
+typedef struct
+{
+ primitives_opencl_context* cl;
+ cl_kernel kernel;
+ cl_mem srcObjs[3];
+ cl_mem dstObj;
+ prim_size_t roi;
+ size_t dstStep;
+} primitives_cl_kernel;
+
+static primitives_opencl_context* primitives_get_opencl_context(void);
+
+static void cl_kernel_free(primitives_cl_kernel* kernel)
+{
+ if (!kernel)
+ return;
+
+ if (kernel->dstObj)
+ clReleaseMemObject(kernel->dstObj);
+
+ for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
+ {
+ cl_mem obj = kernel->srcObjs[i];
+ kernel->srcObjs[i] = NULL;
+ if (obj)
+ clReleaseMemObject(obj);
+ }
+
+ if (kernel->kernel)
+ clReleaseKernel(kernel->kernel);
+
+ free(kernel);
+}
+
+static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi)
+{
+ WINPR_ASSERT(kernelName);
+ WINPR_ASSERT(roi);
+
+ primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel));
+ if (!kernel)
+ goto fail;
+
+ kernel->roi = *roi;
+ kernel->cl = primitives_get_opencl_context();
+ if (!kernel->cl)
+ goto fail;
+
+ cl_int ret = CL_INVALID_VALUE;
+ kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
+ goto fail;
+ }
+
+ return kernel;
+fail:
+ cl_kernel_free(kernel);
+ return NULL;
+}
+
+static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx,
+ const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3])
+{
+ const char* sourceNames[] = { "Y", "U", "V" };
+
+ WINPR_ASSERT(ctx);
+ WINPR_ASSERT(pSrc);
+ WINPR_ASSERT(srcStep);
+
+ for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
+ {
+ cl_int ret = CL_INVALID_VALUE;
+ ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+ 1ull * srcStep[i] * ctx->roi.height, pSrc[i], &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
+ return FALSE;
+ }
+
+ ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
+ return FALSE;
+ }
+
+ ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
+{
+
+ WINPR_ASSERT(ctx);
+
+ ctx->dstStep = dstStep;
+ cl_int ret = CL_INVALID_VALUE;
+ ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
+ 1ull * dstStep * ctx->roi.height, NULL, &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to create dest obj");
+ return FALSE;
+ }
+
+ ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to set arg destObj");
+ return FALSE;
+ }
+
+ ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to set arg dstStep");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
+{
+ WINPR_ASSERT(ctx);
+ WINPR_ASSERT(pDst);
+
+ size_t indexes[2] = { 0 };
+ indexes[0] = ctx->roi.width;
+ indexes[1] = ctx->roi.height;
+
+ cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
+ 0, NULL, NULL);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to enqueue call kernel");
+ return FALSE;
+ }
+
+ /* Transfer result to host */
+ ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
+ ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "unable to read back buffer");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const WINPR_RESTRICT pSrc[3],
+ const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
+ const prim_size_t* WINPR_RESTRICT roi)
+{
+ pstatus_t res = -1;
+
+ primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
+ if (!ctx)
+ goto fail;
+
+ if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
+ goto fail;
+
+ if (!cl_kernel_set_destination(ctx, dstStep))
+ goto fail;
+
+ if (!cl_kernel_process(ctx, pDst))
+ goto fail;
+
+ res = PRIMITIVES_SUCCESS;
+
+fail:
+ cl_kernel_free(ctx);
+ return res;
+}
+
+static primitives_opencl_context openclContext = { 0 };
+
+static primitives_opencl_context* primitives_get_opencl_context(void)
+{
+ return &openclContext;
+}
+
+static void cl_context_free(primitives_opencl_context* ctx)
+{
+ if (!ctx)
+ return;
+ clReleaseProgram(ctx->program);
+ clReleaseCommandQueue(ctx->commandQueue);
+ clReleaseContext(ctx->context);
+ clReleaseDevice(ctx->deviceId);
+ ctx->support = FALSE;
+}
+
+static pstatus_t primitives_uninit_opencl(void)
+{
+ if (!openclContext.support)
+ return PRIMITIVES_SUCCESS;
+
+ cl_context_free(&openclContext);
+ return PRIMITIVES_SUCCESS;
+}
+
+static const char openclProgram[] =
+#include "primitives.cl"
+ ;
+
+static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
+{
+ cl_platform_id* platform_ids = NULL;
+ cl_uint ndevices = 0;
+ cl_uint nplatforms = 0;
+ cl_kernel kernel = NULL;
+ cl_int ret = 0;
+
+ BOOL gotGPU = FALSE;
+ size_t programLen = 0;
+
+ ret = clGetPlatformIDs(0, NULL, &nplatforms);
+ if (ret != CL_SUCCESS || nplatforms < 1)
+ return FALSE;
+
+ platform_ids = calloc(nplatforms, sizeof(*platform_ids));
+ if (!platform_ids)
+ return FALSE;
+
+ ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
+ if (ret != CL_SUCCESS)
+ {
+ free(platform_ids);
+ return FALSE;
+ }
+
+ for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++)
+ {
+ cl_device_id device_id = NULL;
+ cl_context context = NULL;
+ char platformName[1000] = { 0 };
+ char deviceName[1000] = { 0 };
+
+ ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
+ platformName, NULL);
+ if (ret != CL_SUCCESS)
+ continue;
+
+ ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
+ if (ret != CL_SUCCESS)
+ continue;
+
+ ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable get device name for platform %s", platformName);
+ clReleaseDevice(device_id);
+ continue;
+ }
+
+ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable to create context for platform %s, device %s",
+ platformName, deviceName);
+ clReleaseDevice(device_id);
+ continue;
+ }
+
+ cl->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable to create command queue");
+ clReleaseContext(context);
+ clReleaseDevice(device_id);
+ continue;
+ }
+
+ WLog_INFO(TAG, "openCL: using platform=%s device=%s", platformName, deviceName);
+
+ cl->platformId = platform_ids[i];
+ cl->deviceId = device_id;
+ cl->context = context;
+ gotGPU = TRUE;
+ }
+
+ free(platform_ids);
+
+ if (!gotGPU)
+ {
+ WLog_ERR(TAG, "openCL: no GPU found");
+ return FALSE;
+ }
+
+ programLen = strnlen(openclProgram, sizeof(openclProgram));
+ const char* ptr = openclProgram;
+ cl->program = clCreateProgramWithSource(cl->context, 1, &ptr, &programLen, &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable to create program");
+ goto fail;
+ }
+
+ ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
+ if (ret != CL_SUCCESS)
+ {
+ size_t length = 0;
+ char buffer[2048];
+ ret = clGetProgramBuildInfo(cl->program, cl->deviceId, CL_PROGRAM_BUILD_LOG, sizeof(buffer),
+ buffer, &length);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG,
+ "openCL: building program failed but unable to retrieve buildLog, error=%d",
+ ret);
+ }
+ else
+ {
+ WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
+ }
+ goto fail;
+ }
+
+ kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret);
+ if (ret != CL_SUCCESS)
+ {
+ WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
+ goto fail;
+ }
+ clReleaseKernel(kernel);
+
+ cl->support = TRUE;
+ return TRUE;
+
+fail:
+ cl_context_free(cl);
+ return FALSE;
+}
+
+static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
+ const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
+ UINT32 dstStep, UINT32 DstFormat,
+ const prim_size_t* WINPR_RESTRICT roi)
+{
+ const char* kernel_name = NULL;
+
+ switch (DstFormat)
+ {
+ case PIXEL_FORMAT_ABGR32:
+ kernel_name = "yuv420_to_abgr_1b";
+ break;
+ case PIXEL_FORMAT_XBGR32:
+ kernel_name = "yuv420_to_xbgr_1b";
+ break;
+ case PIXEL_FORMAT_RGBX32:
+ kernel_name = "yuv420_to_rgba_1b";
+ break;
+ case PIXEL_FORMAT_RGBA32:
+ kernel_name = "yuv420_to_rgbx_1b";
+ break;
+ case PIXEL_FORMAT_BGRA32:
+ kernel_name = "yuv420_to_bgra_1b";
+ break;
+ case PIXEL_FORMAT_BGRX32:
+ kernel_name = "yuv420_to_bgrx_1b";
+ break;
+ case PIXEL_FORMAT_XRGB32:
+ kernel_name = "yuv420_to_xrgb_1b";
+ break;
+ case PIXEL_FORMAT_ARGB32:
+ kernel_name = "yuv420_to_argb_1b";
+ break;
+ default:
+ {
+ primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
+ if (!p)
+ return -1;
+ return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+ }
+ }
+
+ return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
+}
+
+static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
+ const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
+ UINT32 dstStep, UINT32 DstFormat,
+ const prim_size_t* WINPR_RESTRICT roi)
+{
+ const char* kernel_name = NULL;
+
+ switch (DstFormat)
+ {
+ case PIXEL_FORMAT_ABGR32:
+ kernel_name = "yuv444_to_abgr_1b";
+ break;
+ case PIXEL_FORMAT_XBGR32:
+ kernel_name = "yuv444_to_xbgr_1b";
+ break;
+ case PIXEL_FORMAT_RGBX32:
+ kernel_name = "yuv444_to_rgba_1b";
+ break;
+ case PIXEL_FORMAT_RGBA32:
+ kernel_name = "yuv444_to_rgbx_1b";
+ break;
+ case PIXEL_FORMAT_BGRA32:
+ kernel_name = "yuv444_to_bgra_1b";
+ break;
+ case PIXEL_FORMAT_BGRX32:
+ kernel_name = "yuv444_to_bgrx_1b";
+ break;
+ case PIXEL_FORMAT_XRGB32:
+ kernel_name = "yuv444_to_xrgb_1b";
+ break;
+ case PIXEL_FORMAT_ARGB32:
+ kernel_name = "yuv444_to_argb_1b";
+ break;
+ default:
+ {
+ primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
+ if (!p)
+ return -1;
+ return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+ }
+ }
+
+ return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
+}
+
+BOOL primitives_init_opencl(primitives_t* prims)
+{
+ primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
+ if (!prims || !p)
+ return FALSE;
+ *prims = *p;
+
+ if (!primitives_init_opencl_context(&openclContext))
+ return FALSE;
+
+ prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
+ prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
+ prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
+ prims->uninit = primitives_uninit_opencl;
+ return TRUE;
+}