diff options
Diffstat (limited to 'libfreerdp/primitives/prim_YUV_opencl.c')
-rw-r--r-- | libfreerdp/primitives/prim_YUV_opencl.c | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/libfreerdp/primitives/prim_YUV_opencl.c b/libfreerdp/primitives/prim_YUV_opencl.c new file mode 100644 index 0000000..2ca1b31 --- /dev/null +++ b/libfreerdp/primitives/prim_YUV_opencl.c @@ -0,0 +1,500 @@ +/** + * FreeRDP: A Remote Desktop Protocol Implementation + * Optimized YUV/RGB conversion operations using openCL + * + * Copyright 2019 David Fort <contact@hardening-consulting.com> + * Copyright 2019 Rangee Gmbh + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <freerdp/config.h> + +#include <freerdp/types.h> +#include <freerdp/primitives.h> +#include "prim_internal.h" + +#if defined(WITH_OPENCL) +#ifdef __APPLE__ +#include "OpenCL/opencl.h" +#else +#include <CL/cl.h> +#endif +#endif + +#include <freerdp/log.h> +#define TAG FREERDP_TAG("primitives") + +typedef struct +{ + BOOL support; + cl_platform_id platformId; + cl_device_id deviceId; + cl_context context; + cl_command_queue commandQueue; + cl_program program; +} primitives_opencl_context; + +typedef struct +{ + primitives_opencl_context* cl; + cl_kernel kernel; + cl_mem srcObjs[3]; + cl_mem dstObj; + prim_size_t roi; + size_t dstStep; +} primitives_cl_kernel; + +static primitives_opencl_context* primitives_get_opencl_context(void); + +static void cl_kernel_free(primitives_cl_kernel* kernel) +{ + if (!kernel) + return; + + if (kernel->dstObj) + clReleaseMemObject(kernel->dstObj); + + for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++) + { + cl_mem obj = kernel->srcObjs[i]; + kernel->srcObjs[i] = NULL; + if (obj) + clReleaseMemObject(obj); + } + + if (kernel->kernel) + clReleaseKernel(kernel->kernel); + + free(kernel); +} + +static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi) +{ + WINPR_ASSERT(kernelName); + WINPR_ASSERT(roi); + + primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel)); + if (!kernel) + goto fail; + + kernel->roi = *roi; + kernel->cl = primitives_get_opencl_context(); + if (!kernel->cl) + goto fail; + + cl_int ret = CL_INVALID_VALUE; + kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName); + goto fail; + } + + return kernel; +fail: + cl_kernel_free(kernel); + return NULL; +} + +static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx, + const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3]) +{ + const char* sourceNames[] = { "Y", "U", "V" }; + + WINPR_ASSERT(ctx); + WINPR_ASSERT(pSrc); + WINPR_ASSERT(srcStep); + + for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++) + { + cl_int ret = CL_INVALID_VALUE; + ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + 1ull * srcStep[i] * ctx->roi.height, pSrc[i], &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]); + return FALSE; + } + + ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]); + return FALSE; + } + + ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]); + return FALSE; + } + } + + return TRUE; +} + +static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep) +{ + + WINPR_ASSERT(ctx); + + ctx->dstStep = dstStep; + cl_int ret = CL_INVALID_VALUE; + ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY, + 1ull * dstStep * ctx->roi.height, NULL, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to create dest obj"); + return FALSE; + } + + ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to set arg destObj"); + return FALSE; + } + + ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to set arg dstStep"); + return FALSE; + } + + return TRUE; +} + +static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst) +{ + WINPR_ASSERT(ctx); + WINPR_ASSERT(pDst); + + size_t indexes[2] = { 0 }; + indexes[0] = ctx->roi.width; + indexes[1] = ctx->roi.height; + + cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL, + 0, NULL, NULL); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to enqueue call kernel"); + return FALSE; + } + + /* Transfer result to host */ + ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0, + ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to read back buffer"); + return FALSE; + } + + return TRUE; +} + +static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, + const prim_size_t* WINPR_RESTRICT roi) +{ + pstatus_t res = -1; + + primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi); + if (!ctx) + goto fail; + + if (!cl_kernel_set_sources(ctx, pSrc, srcStep)) + goto fail; + + if (!cl_kernel_set_destination(ctx, dstStep)) + goto fail; + + if (!cl_kernel_process(ctx, pDst)) + goto fail; + + res = PRIMITIVES_SUCCESS; + +fail: + cl_kernel_free(ctx); + return res; +} + +static primitives_opencl_context openclContext = { 0 }; + +static primitives_opencl_context* primitives_get_opencl_context(void) +{ + return &openclContext; +} + +static void cl_context_free(primitives_opencl_context* ctx) +{ + if (!ctx) + return; + clReleaseProgram(ctx->program); + clReleaseCommandQueue(ctx->commandQueue); + clReleaseContext(ctx->context); + clReleaseDevice(ctx->deviceId); + ctx->support = FALSE; +} + +static pstatus_t primitives_uninit_opencl(void) +{ + if (!openclContext.support) + return PRIMITIVES_SUCCESS; + + cl_context_free(&openclContext); + return PRIMITIVES_SUCCESS; +} + +static const char openclProgram[] = +#include "primitives.cl" + ; + +static BOOL primitives_init_opencl_context(primitives_opencl_context* cl) +{ + cl_platform_id* platform_ids = NULL; + cl_uint ndevices = 0; + cl_uint nplatforms = 0; + cl_kernel kernel = NULL; + cl_int ret = 0; + + BOOL gotGPU = FALSE; + size_t programLen = 0; + + ret = clGetPlatformIDs(0, NULL, &nplatforms); + if (ret != CL_SUCCESS || nplatforms < 1) + return FALSE; + + platform_ids = calloc(nplatforms, sizeof(*platform_ids)); + if (!platform_ids) + return FALSE; + + ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms); + if (ret != CL_SUCCESS) + { + free(platform_ids); + return FALSE; + } + + for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++) + { + cl_device_id device_id = NULL; + cl_context context = NULL; + char platformName[1000] = { 0 }; + char deviceName[1000] = { 0 }; + + ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName), + platformName, NULL); + if (ret != CL_SUCCESS) + continue; + + ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices); + if (ret != CL_SUCCESS) + continue; + + ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable get device name for platform %s", platformName); + clReleaseDevice(device_id); + continue; + } + + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable to create context for platform %s, device %s", + platformName, deviceName); + clReleaseDevice(device_id); + continue; + } + + cl->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable to create command queue"); + clReleaseContext(context); + clReleaseDevice(device_id); + continue; + } + + WLog_INFO(TAG, "openCL: using platform=%s device=%s", platformName, deviceName); + + cl->platformId = platform_ids[i]; + cl->deviceId = device_id; + cl->context = context; + gotGPU = TRUE; + } + + free(platform_ids); + + if (!gotGPU) + { + WLog_ERR(TAG, "openCL: no GPU found"); + return FALSE; + } + + programLen = strnlen(openclProgram, sizeof(openclProgram)); + const char* ptr = openclProgram; + cl->program = clCreateProgramWithSource(cl->context, 1, &ptr, &programLen, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable to create program"); + goto fail; + } + + ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL); + if (ret != CL_SUCCESS) + { + size_t length = 0; + char buffer[2048]; + ret = clGetProgramBuildInfo(cl->program, cl->deviceId, CL_PROGRAM_BUILD_LOG, sizeof(buffer), + buffer, &length); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, + "openCL: building program failed but unable to retrieve buildLog, error=%d", + ret); + } + else + { + WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer); + } + goto fail; + } + + kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel"); + goto fail; + } + clReleaseKernel(kernel); + + cl->support = TRUE; + return TRUE; + +fail: + cl_context_free(cl); + return FALSE; +} + +static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi) +{ + const char* kernel_name = NULL; + + switch (DstFormat) + { + case PIXEL_FORMAT_ABGR32: + kernel_name = "yuv420_to_abgr_1b"; + break; + case PIXEL_FORMAT_XBGR32: + kernel_name = "yuv420_to_xbgr_1b"; + break; + case PIXEL_FORMAT_RGBX32: + kernel_name = "yuv420_to_rgba_1b"; + break; + case PIXEL_FORMAT_RGBA32: + kernel_name = "yuv420_to_rgbx_1b"; + break; + case PIXEL_FORMAT_BGRA32: + kernel_name = "yuv420_to_bgra_1b"; + break; + case PIXEL_FORMAT_BGRX32: + kernel_name = "yuv420_to_bgrx_1b"; + break; + case PIXEL_FORMAT_XRGB32: + kernel_name = "yuv420_to_xrgb_1b"; + break; + case PIXEL_FORMAT_ARGB32: + kernel_name = "yuv420_to_argb_1b"; + break; + default: + { + primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); + if (!p) + return -1; + return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + } + } + + return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); +} + +static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi) +{ + const char* kernel_name = NULL; + + switch (DstFormat) + { + case PIXEL_FORMAT_ABGR32: + kernel_name = "yuv444_to_abgr_1b"; + break; + case PIXEL_FORMAT_XBGR32: + kernel_name = "yuv444_to_xbgr_1b"; + break; + case PIXEL_FORMAT_RGBX32: + kernel_name = "yuv444_to_rgba_1b"; + break; + case PIXEL_FORMAT_RGBA32: + kernel_name = "yuv444_to_rgbx_1b"; + break; + case PIXEL_FORMAT_BGRA32: + kernel_name = "yuv444_to_bgra_1b"; + break; + case PIXEL_FORMAT_BGRX32: + kernel_name = "yuv444_to_bgrx_1b"; + break; + case PIXEL_FORMAT_XRGB32: + kernel_name = "yuv444_to_xrgb_1b"; + break; + case PIXEL_FORMAT_ARGB32: + kernel_name = "yuv444_to_argb_1b"; + break; + default: + { + primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); + if (!p) + return -1; + return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + } + } + + return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); +} + +BOOL primitives_init_opencl(primitives_t* prims) +{ + primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); + if (!prims || !p) + return FALSE; + *prims = *p; + + if (!primitives_init_opencl_context(&openclContext)) + return FALSE; + + prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R; + prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R; + prims->flags |= PRIM_FLAGS_HAVE_EXTGPU; + prims->uninit = primitives_uninit_opencl; + return TRUE; +} |