diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
commit | ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch) | |
tree | 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /opencl/source | |
parent | Initial commit. (diff) | |
download | libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip |
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'opencl/source')
-rw-r--r-- | opencl/source/OpenCLZone.cxx | 47 | ||||
-rw-r--r-- | opencl/source/opencl_device.cxx | 611 | ||||
-rw-r--r-- | opencl/source/openclconfig.cxx | 255 | ||||
-rw-r--r-- | opencl/source/openclwrapper.cxx | 977 | ||||
-rw-r--r-- | opencl/source/platforminfo.cxx | 46 |
5 files changed, 1936 insertions, 0 deletions
diff --git a/opencl/source/OpenCLZone.cxx b/opencl/source/OpenCLZone.cxx new file mode 100644 index 000000000..1eaf3f43f --- /dev/null +++ b/opencl/source/OpenCLZone.cxx @@ -0,0 +1,47 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <opencl/openclwrapper.hxx> +#include <opencl/OpenCLZone.hxx> +#include <opencl_device.hxx> + +#include <memory> + +#include <officecfg/Office/Common.hxx> +#include <com/sun/star/util/XFlushable.hpp> +#include <com/sun/star/configuration/theDefaultProvider.hpp> + +/** + * Called from a signal handler if we get + * a crash or hang in some CL code. + */ +void OpenCLZone::hardDisable() +{ + // protect ourselves from double calling etc. + static bool bDisabled = false; + if (bDisabled) + return; + + bDisabled = true; + + std::shared_ptr<comphelper::ConfigurationChanges> xChanges( + comphelper::ConfigurationChanges::create()); + officecfg::Office::Common::Misc::UseOpenCL::set(false, xChanges); + xChanges->commit(); + + // Force synchronous config write + auto xConfProvider + = css::configuration::theDefaultProvider::get(comphelper::getProcessComponentContext()); + css::uno::Reference<css::util::XFlushable> xFlushable(xConfProvider, css::uno::UNO_QUERY_THROW); + xFlushable->flush(); + + releaseOpenCLEnv(&openclwrapper::gpuEnv); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx new file mode 100644 index 000000000..ca9f4f43b --- /dev/null +++ b/opencl/source/opencl_device.cxx @@ -0,0 +1,611 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <float.h> +#include <iostream> +#include <memory> +#include <string_view> +#include <vector> +#include <algorithm> + +#include <comphelper/random.hxx> +#include <o3tl/safeint.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/platforminfo.hxx> +#include <sal/log.hxx> +#include <rtl/math.hxx> +#include <tools/time.hxx> + +#include <opencl/OpenCLZone.hxx> + +#include <opencl_device.hxx> +#include <opencl_device_selection.h> + +#define INPUTSIZE 15360 +#define OUTPUTSIZE 15360 + +#define STRINGIFY(...) #__VA_ARGS__"\n" + +namespace { + +void DS_CHECK_STATUS(cl_int status, char const * name) { + if (CL_SUCCESS != status) + { + SAL_INFO("opencl.device", "Error code is " << status << " at " << name); + } +} + +bool bIsDeviceSelected = false; +ds_device selectedDevice; + +struct LibreOfficeDeviceEvaluationIO +{ + std::vector<double> input0; + std::vector<double> input1; + std::vector<double> input2; + std::vector<double> input3; + std::vector<double> output; + tools::ULong inputSize; + tools::ULong outputSize; +}; + +const char* source = STRINGIFY( +\n#if defined(KHR_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable +\n#elif defined(AMD_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable +\n#endif + \n + int isNan(fp_t a) { return a != a; } + fp_t fsum(fp_t a, fp_t b) { return a + b; } + + fp_t fAverage(__global fp_t* input) +{ + fp_t sum = 0; + int count = 0; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + sum = fsum(input[i], sum); + count += 1; + } + } + return sum / (fp_t)count; +} + fp_t fMin(__global fp_t* input) +{ + fp_t min = MAXFLOAT; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + min = fmin(input[i], min); + } + } + return min; +} + fp_t fSoP(__global fp_t* input0, __global fp_t* input1) +{ + fp_t sop = 0.0; + for (int i = 0; i < INPUTSIZE; i++) + { + sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]); + } + return sop; +} + __kernel void DynamicKernel( + __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3) +{ + int gid0 = get_global_id(0); + fp_t tmp0 = fAverage(input0); + fp_t tmp1 = fMin(input1) * fSoP(input2, input3); + result[gid0] = fsum(tmp0, tmp1); +} + ); + +size_t sourceSize[] = { strlen(source) }; + +/* Random number generator */ +double random(double min, double max) +{ + if (rtl::math::approxEqual(min, max)) + return min; + return comphelper::rng::uniform_real_distribution(min, max); +} + +/* Populate input */ +void populateInput(std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData) +{ + double* input0 = testData->input0.data(); + double* input1 = testData->input1.data(); + double* input2 = testData->input2.data(); + double* input3 = testData->input3.data(); + for (tools::ULong i = 0; i < testData->inputSize; i++) + { + input0[i] = random(0, i); + input1[i] = random(0, i); + input2[i] = random(0, i); + input3[i] = random(0, i); + } +} + +/* Evaluate devices */ +ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData) +{ + if (rDevice.eType == DeviceType::OpenCLDevice) + { + /* Evaluating an OpenCL device */ + SAL_INFO("opencl.device", "Device: \"" << rDevice.sDeviceName << "\" (OpenCL) evaluation..."); + cl_int clStatus; + + /* Check for 64-bit float extensions */ + std::unique_ptr<char[]> aExtInfo; + { + size_t aDevExtInfoSize = 0; + + OpenCLZone zone; + clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + + aExtInfo.reset(new char[aDevExtInfoSize]); + clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo.get(), nullptr); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + } + + bool bKhrFp64Flag = false; + bool bAmdFp64Flag = false; + const char* buildOption = nullptr; + std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE="); + std::ostringstream tmpOStrStr; + tmpOStrStr << std::dec << INPUTSIZE; + tmpStr.append(tmpOStrStr.str()); + + if ((std::string(aExtInfo.get())).find("cl_khr_fp64") != std::string::npos) + { + bKhrFp64Flag = true; + //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DKHR_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_khr_fp64"); + } + else if ((std::string(aExtInfo.get())).find("cl_amd_fp64") != std::string::npos) + { + bAmdFp64Flag = true; + //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DAMD_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_amd_fp64"); + } + + if (!bKhrFp64Flag && !bAmdFp64Flag) + { + /* No 64-bit float support */ + rDevice.fTime = DBL_MAX; + rDevice.bErrors = false; + SAL_INFO("opencl.device", "... no fp64 support"); + } + else + { + /* 64-bit float support present */ + + OpenCLZone zone; + + /* Create context and command queue */ + cl_context clContext = clCreateContext(nullptr, 1, &rDevice.aDeviceID, nullptr, nullptr, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext"); + cl_command_queue clQueue = clCreateCommandQueue(clContext, rDevice.aDeviceID, 0, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue"); + + /* Build program */ + cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource"); + clStatus = clBuildProgram(clProgram, 1, &rDevice.aDeviceID, buildOption, nullptr, nullptr); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram"); + if (CL_SUCCESS != clStatus) + { + /* Build program failed */ + size_t length; + char* buildLog; + clStatus = clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); + buildLog = static_cast<char*>(malloc(length)); + clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length); + SAL_INFO("opencl.device", "Build Errors:\n" << buildLog); + free(buildLog); + + rDevice.fTime = DBL_MAX; + rDevice.bErrors = true; + } + else + { + /* Build program succeeded */ + sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks(); + + /* Run kernel */ + cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel"); + cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, testData->output.data(), &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult"); + cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input0.data(), &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0"); + cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input1.data(), &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1"); + cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input2.data(), &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2"); + cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input3.data(), &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3"); + clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult)); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult"); + clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0)); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0"); + clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1)); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1"); + clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2)); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2"); + clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3)); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3"); + size_t globalWS[1] = { testData->outputSize }; + size_t const localSize[1] = { 64 }; + clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, nullptr, globalWS, localSize, 0, nullptr, nullptr); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel"); + clFinish(clQueue); + clReleaseMemObject(clInput3); + clReleaseMemObject(clInput2); + clReleaseMemObject(clInput1); + clReleaseMemObject(clInput0); + clReleaseMemObject(clResult); + clReleaseKernel(clKernel); + + rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime; + rDevice.bErrors = false; + } + + clReleaseProgram(clProgram); + clReleaseCommandQueue(clQueue); + clReleaseContext(clContext); + } + } + else + { + /* Evaluating a Native CPU device */ + SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation..."); + sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks(); + + tools::ULong j; + for (j = 0; j < testData->outputSize; j++) + { + double fAverage = 0.0f; + double fMin = DBL_MAX; + double fSoP = 0.0f; + for (tools::ULong i = 0; i < testData->inputSize; i++) + { + fAverage += testData->input0[i]; + fMin = std::min(fMin, testData->input1[i]); + fSoP += testData->input2[i] * testData->input3[i]; + } + fAverage /= testData->inputSize; + testData->output[j] = fAverage + (fMin * fSoP); + // Don't run for much longer than one second + if (j > 0 && j % 100 == 0) + { + rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime; + if (rDevice.fTime >= 1) + break; + } + } + + rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime; + + // Scale time to how long it would have taken to go all the way to outputSize + rDevice.fTime /= (static_cast<double>(j) / testData->outputSize); + + // InterpretTail - the S/W fallback is nothing like as efficient + // as any good openCL implementation: no SIMD, tons of branching + // in the inner loops etc. Generously characterise it as only 10x + // slower than the above. + rDevice.fTime *= 10.0; + rDevice.bErrors = false; + } + return DS_SUCCESS; +} + +ds_status profileDevices(std::unique_ptr<ds_profile> const & pProfile, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & pTestData) +{ + ds_status status = DS_SUCCESS; + + if (!pProfile) + return DS_INVALID_PROFILE; + + for (ds_device& rDevice : pProfile->devices) + { + ds_status evaluatorStatus = evaluateScoreForDevice(rDevice, pTestData); + if (evaluatorStatus != DS_SUCCESS) + { + status = evaluatorStatus; + return status; + } + } + return status; +} + +/* Pick best device */ +int pickBestDevice(std::unique_ptr<ds_profile> const & profile) +{ + double bestScore = DBL_MAX; + + int nBestDeviceIndex = -1; + + for (std::vector<ds_device>::size_type d = 0; d < profile->devices.size(); + d++) + { + ds_device& device = profile->devices[d]; + + // Check denylist and allowlist for actual devices + if (device.eType == DeviceType::OpenCLDevice) + { + // There is a silly impedance mismatch here. Why do we + // need two different ways to describe an OpenCL platform + // and an OpenCL device driver? + + OpenCLPlatformInfo aPlatform; + OpenCLDeviceInfo aDevice; + + // We know that only the below fields are used by checkForKnownBadCompilers() + aPlatform.maVendor = OStringToOUString(device.sPlatformVendor, RTL_TEXTENCODING_UTF8); + aDevice.maName = OStringToOUString(device.sDeviceName, RTL_TEXTENCODING_UTF8); + aDevice.maDriver = OStringToOUString(device.sDriverVersion, RTL_TEXTENCODING_UTF8); + + // If denylisted or not allowlisted, ignore it + if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice)) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " is denylisted or not allowlisted"); + device.fTime = DBL_MAX; + device.bErrors = false; + } + } + + double fScore = DBL_MAX; + if (device.fTime >= 0.0 + || rtl::math::approxEqual(device.fTime, DBL_MAX)) + { + fScore = device.fTime; + } + else + { + SAL_INFO("opencl.device", "Unusual null score"); + } + + if (device.eType == DeviceType::OpenCLDevice) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " (OpenCL) score is " << fScore); + } + else + { + SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore); + } + if (fScore < bestScore) + { + bestScore = fScore; + nBestDeviceIndex = d; + } + } + if (nBestDeviceIndex != -1 && profile->devices[nBestDeviceIndex].eType == DeviceType::OpenCLDevice) + { + SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex << "]: " << profile->devices[nBestDeviceIndex].sDeviceName << "(OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex << "]: CPU (Native)."); + } + return nBestDeviceIndex; +} + +/* Return device ID for matching device name */ +int matchDevice(std::unique_ptr<ds_profile> const & profile, const char* deviceName) +{ + int deviceMatch = -1; + for (size_t d = 0; d < profile->devices.size() - 1; d++) + { + if (profile->devices[d].sDeviceName.indexOf(deviceName) != -1) + deviceMatch = d; + } + if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) + deviceMatch = profile->devices.size() - 1; + return deviceMatch; +} + +class LogWriter +{ +private: + SvFileStream maStream; +public: + explicit LogWriter(OUString const & aFileName) + : maStream(aFileName, StreamMode::WRITE) + {} + + void text(std::string_view rText) + { + maStream.WriteOString(rText); + maStream.WriteChar('\n'); + } + + void log(std::string_view rKey, std::string_view rValue) + { + maStream.WriteOString(rKey); + maStream.WriteCharPtr(": "); + maStream.WriteOString(rValue); + maStream.WriteChar('\n'); + } + + void log(std::string_view rKey, int rValue) + { + log(rKey, OString::number(rValue)); + } + + void log(std::string_view rKey, bool rValue) + { + log(rKey, OString::boolean(rValue)); + } +}; + + +void writeDevicesLog(std::unique_ptr<ds_profile> const & rProfile, std::u16string_view sProfilePath, int nSelectedIndex) +{ + OUString aCacheFile(OUString::Concat(sProfilePath) + "opencl_devices.log"); + LogWriter aWriter(aCacheFile); + + int nIndex = 0; + + for (const ds_device& rDevice : rProfile->devices) + { + if (rDevice.eType == DeviceType::OpenCLDevice) + { + aWriter.log("Device Index", nIndex); + aWriter.log(" Selected", nIndex == nSelectedIndex); + aWriter.log(" Device Name", rDevice.sDeviceName); + aWriter.log(" Device Vendor", rDevice.sDeviceVendor); + aWriter.log(" Device Version", rDevice.sDeviceVersion); + aWriter.log(" Driver Version", rDevice.sDriverVersion); + aWriter.log(" Device Type", rDevice.sDeviceType); + aWriter.log(" Device Extensions", rDevice.sDeviceExtensions); + aWriter.log(" Device OpenCL C Version", rDevice.sDeviceOpenCLVersion); + + aWriter.log(" Device Available", rDevice.bDeviceAvailable); + aWriter.log(" Device Compiler Available", rDevice.bDeviceCompilerAvailable); + aWriter.log(" Device Linker Available", rDevice.bDeviceLinkerAvailable); + + aWriter.log(" Platform Name", rDevice.sPlatformName); + aWriter.log(" Platform Vendor", rDevice.sPlatformVendor); + aWriter.log(" Platform Version", rDevice.sPlatformVersion); + aWriter.log(" Platform Profile", rDevice.sPlatformProfile); + aWriter.log(" Platform Extensions", rDevice.sPlatformExtensions); + aWriter.text(""); + } + nIndex++; + } +} + +} // end anonymous namespace + +ds_device const & getDeviceSelection( + std::u16string_view sProfilePath, bool bForceSelection) +{ + /* Run only if device is not yet selected */ + if (!bIsDeviceSelected || bForceSelection) + { + /* Setup */ + std::unique_ptr<ds_profile> aProfile; + ds_status status; + status = initDSProfile(aProfile, "LibreOffice v1"); + + if (status != DS_SUCCESS) + { + // failed to initialize profile. + selectedDevice.eType = DeviceType::NativeCPU; + return selectedDevice; + } + + /* Try reading scores from file */ + OUString sFilePath = OUString::Concat(sProfilePath) + "opencl_profile.xml"; + + if (!bForceSelection) + { + status = readProfile(sFilePath, aProfile); + } + else + { + status = DS_INVALID_PROFILE; + SAL_INFO("opencl.device", "Performing forced profiling."); + } + if (DS_SUCCESS != status) + { + if (!bForceSelection) + { + SAL_INFO("opencl.device", "Profile file not available (" << sFilePath << "); performing profiling."); + } + + /* Populate input data for micro-benchmark */ + std::unique_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO); + testData->inputSize = INPUTSIZE; + testData->outputSize = OUTPUTSIZE; + testData->input0.resize(testData->inputSize); + testData->input1.resize(testData->inputSize); + testData->input2.resize(testData->inputSize); + testData->input3.resize(testData->inputSize); + testData->output.resize(testData->outputSize); + populateInput(testData); + + /* Perform evaluations */ + status = profileDevices(aProfile, testData); + + if (DS_SUCCESS == status) + { + /* Write scores to file */ + status = writeProfile(sFilePath, aProfile); + if (DS_SUCCESS == status) + { + SAL_INFO("opencl.device", "Scores written to file (" << sFilePath << ")."); + } + else + { + SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath << "); scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Profile read from file (" << sFilePath << ")."); + } + + /* Pick best device */ + int bestDeviceIdx = pickBestDevice(aProfile); + + /* Override if necessary */ + char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE"); + if (nullptr != overrideDeviceStr) + { + int overrideDeviceIdx = matchDevice(aProfile, overrideDeviceStr); + if (-1 != overrideDeviceIdx) + { + SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + bestDeviceIdx = overrideDeviceIdx; + if (aProfile->devices[bestDeviceIdx].eType == DeviceType::OpenCLDevice) + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << aProfile->devices[bestDeviceIdx].sDeviceName << " (OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native)."); + } + } + else + { + SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + } + } + + /* Final device selection */ + if (bestDeviceIdx >=0 && o3tl::make_unsigned( bestDeviceIdx ) < aProfile->devices.size() ) + { + selectedDevice = aProfile->devices[bestDeviceIdx]; + bIsDeviceSelected = true; + + writeDevicesLog(aProfile, sProfilePath, bestDeviceIdx); + } else { + selectedDevice.eType = DeviceType::NativeCPU; + } + } + return selectedDevice; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/openclconfig.cxx b/opencl/source/openclconfig.cxx new file mode 100644 index 000000000..a2b69c175 --- /dev/null +++ b/opencl/source/openclconfig.cxx @@ -0,0 +1,255 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <unicode/regex.h> + +#include <comphelper/configuration.hxx> +#include <officecfg/Office/Common.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/platforminfo.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <sal/log.hxx> +#include <sal/types.h> +#include <o3tl/string_view.hxx> + +OpenCLConfig::OpenCLConfig() : + mbUseOpenCL(true) +{ + // This entry we have had for some time (when denylisting was + // done elsewhere in the code), so presumably there is a known + // good reason for it. + maDenyList.insert(ImplMatcher("Windows", "", "Intel\\(R\\) Corporation", "", "9\\.17\\.10\\.2884")); + // This was reported to produce bogus values in unit tests + maDenyList.insert(ImplMatcher("Windows", "", "Intel\\(R\\) Corporation", "", "27\\.20\\.100\\.8681")); + + // For now, assume that AMD, Intel and NVIDIA drivers are good + maAllowList.insert(ImplMatcher("", "", "Advanced Micro Devices, Inc\\.", "", "")); + maAllowList.insert(ImplMatcher("", "", "Intel\\(R\\) Corporation", "", "")); + maAllowList.insert(ImplMatcher("", "", "NVIDIA Corporation", "", "")); +} + +bool OpenCLConfig::operator== (const OpenCLConfig& r) const +{ + return (mbUseOpenCL == r.mbUseOpenCL && + maDenyList == r.maDenyList && + maAllowList == r.maAllowList); +} + +bool OpenCLConfig::operator!= (const OpenCLConfig& r) const +{ + return !operator== (r); +} + +namespace { + +css::uno::Sequence<OUString> SetOfImplMatcherToStringSequence(const OpenCLConfig::ImplMatcherSet& rSet) +{ + css::uno::Sequence<OUString> result(rSet.size()); + auto resultRange = asNonConstRange(result); + size_t n(0); + for (const auto& rItem : rSet) + { + resultRange[n++] = + rItem.maOS.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" + + rItem.maOSVersion.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" + + rItem.maPlatformVendor.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" + + rItem.maDevice.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" + + rItem.maDriverVersion.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B"); + } + + return result; +} + +OUString getToken(std::u16string_view string, sal_Int32& index) +{ + std::u16string_view token(o3tl::getToken(string, 0, '/', index)); + OUStringBuffer result; + sal_Int32 i(0); + size_t p; + while ((p = token.find('%', i)) != std::u16string_view::npos) + { + if (static_cast<sal_Int32>(p) > i) + result.append(token.substr(i, p - i)); + if (p < token.size() - 2) + { + result.append(sal_Unicode(o3tl::toInt32(token.substr(p+1, 2), 16))); + i = p + 3; + } + else + { + i = token.size(); + } + } + result.append(token.substr(i)); + + return result.makeStringAndClear(); +} + +OpenCLConfig::ImplMatcherSet StringSequenceToSetOfImplMatcher(const css::uno::Sequence<OUString>& rSequence) +{ + OpenCLConfig::ImplMatcherSet result; + + for (const auto& rItem : rSequence) + { + OpenCLConfig::ImplMatcher m; + sal_Int32 index(0); + m.maOS = getToken(rItem, index); + m.maOSVersion = getToken(rItem, index); + m.maPlatformVendor = getToken(rItem, index); + m.maDevice = getToken(rItem, index); + m.maDriverVersion = getToken(rItem, index); + + result.insert(m); + } + + return result; +} + +bool match(const OUString& rPattern, const OUString& rInput) +{ + if (rPattern.isEmpty()) + return true; + + UErrorCode nIcuError(U_ZERO_ERROR); + icu::UnicodeString sIcuPattern(reinterpret_cast<const UChar*>(rPattern.getStr()), rPattern.getLength()); + icu::UnicodeString sIcuInput(reinterpret_cast<const UChar*>(rInput.getStr()), rInput.getLength()); + icu::RegexMatcher aMatcher(sIcuPattern, sIcuInput, 0, nIcuError); + + return U_SUCCESS(nIcuError) && aMatcher.matches(nIcuError) && U_SUCCESS(nIcuError); +} + +bool match(const OpenCLConfig::ImplMatcher& rListEntry, const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice) +{ +#if defined(_WIN32) + if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "Windows") + return false; +#elif defined LINUX + if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "Linux") + return false; +#elif defined MACOSX + if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "OS X") + return false; +#endif + + // OS version check not yet implemented + + if (!match(rListEntry.maPlatformVendor, rPlatform.maVendor)) + return false; + + if (!match(rListEntry.maDevice, rDevice.maName)) + return false; + + if (!match(rListEntry.maDriverVersion, rDevice.maDriver)) + return false; + + return true; +} + +bool match(const OpenCLConfig::ImplMatcherSet& rList, const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice, const char* sKindOfList) +{ + for (const auto& rListEntry : rList) + { + SAL_INFO("opencl", "Looking for match for platform=" << rPlatform << ", device=" << rDevice << + " in " << sKindOfList << " entry=" << rListEntry); + + if (match(rListEntry, rPlatform, rDevice)) + { + SAL_INFO("opencl", "Match!"); + return true; + } + } + return false; +} + +} // anonymous namespace + +OpenCLConfig OpenCLConfig::get() +{ + OpenCLConfig result; + + result.mbUseOpenCL = officecfg::Office::Common::Misc::UseOpenCL::get(); + + result.maDenyList = StringSequenceToSetOfImplMatcher(officecfg::Office::Common::Misc::OpenCLDenyList::get()); + result.maAllowList = StringSequenceToSetOfImplMatcher(officecfg::Office::Common::Misc::OpenCLAllowList::get()); + + return result; +} + +void OpenCLConfig::set() +{ + std::shared_ptr<comphelper::ConfigurationChanges> batch(comphelper::ConfigurationChanges::create()); + + officecfg::Office::Common::Misc::UseOpenCL::set(mbUseOpenCL, batch); + officecfg::Office::Common::Misc::OpenCLDenyList::set(SetOfImplMatcherToStringSequence(maDenyList), batch); + officecfg::Office::Common::Misc::OpenCLAllowList::set(SetOfImplMatcherToStringSequence(maAllowList), batch); + + batch->commit(); +} + +bool OpenCLConfig::checkImplementation(const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice) const +{ + // Check denylist of known bad OpenCL implementations + if (match(maDenyList, rPlatform, rDevice, "denylist")) + { + SAL_INFO("opencl", "Rejecting"); + return true; + } + + // Check for allowlist of known good OpenCL implementations + if (match(maAllowList, rPlatform, rDevice, "allowlist")) + { + SAL_INFO("opencl", "Approving"); + return false; + } + + // Fallback: reject + SAL_INFO("opencl", "Fallback: rejecting platform=" << rPlatform << ", device=" << rDevice); + return true; +} + +std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig& rConfig) +{ + rStream << "{" + "UseOpenCL=" << (rConfig.mbUseOpenCL ? "YES" : "NO") << "," + "DenyList=" << rConfig.maDenyList << "," + "AllowList=" << rConfig.maAllowList << + "}"; + return rStream; +} + +std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig::ImplMatcher& rImpl) +{ + rStream << "{" + "OS=" << rImpl.maOS << "," + "OSVersion=" << rImpl.maOSVersion << "," + "PlatformVendor=" << rImpl.maPlatformVendor << "," + "Device=" << rImpl.maDevice << "," + "DriverVersion=" << rImpl.maDriverVersion << + "}"; + + return rStream; +} + +std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig::ImplMatcherSet& rSet) +{ + rStream << "{"; + for (auto i = rSet.cbegin(); i != rSet.cend(); ++i) + { + if (i != rSet.cbegin()) + rStream << ","; + rStream << *i; + } + rStream << "}"; + return rStream; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx new file mode 100644 index 000000000..f6d1242ba --- /dev/null +++ b/opencl/source/openclwrapper.cxx @@ -0,0 +1,977 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <config_folders.h> + +#include <opencl_device.hxx> +#include <opencl_device_selection.h> + +#include <opencl/openclconfig.hxx> +#include <opencl/openclwrapper.hxx> +#include <opencl/platforminfo.hxx> +#include <osl/file.hxx> +#include <rtl/bootstrap.hxx> +#include <rtl/digest.h> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.hxx> +#include <sal/config.h> +#include <sal/log.hxx> +#include <opencl/OpenCLZone.hxx> + +#include <memory> +#include <string_view> + +#include <stdlib.h> + +#include <officecfg/Office/Common.hxx> + +#ifdef _WIN32 +#include <prewin.h> +#include <postwin.h> +#define OPENCL_DLL_NAME "OpenCL.dll" +#elif defined(MACOSX) +#define OPENCL_DLL_NAME nullptr +#else +#define OPENCL_DLL_NAME "libOpenCL.so.1" +#endif + +#ifdef _WIN32_WINNT_WINBLUE +#include <VersionHelpers.h> +#endif + +#define DEVICE_NAME_LENGTH 1024 +#define DRIVER_VERSION_LENGTH 1024 +#define PLATFORM_VERSION_LENGTH 1024 + +#define CHECK_OPENCL(status,name) \ +if( status != CL_SUCCESS ) \ +{ \ + SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE "from " name ); \ + return false; \ +} + +namespace { + +bool bIsInited = false; + +} + +namespace openclwrapper { + +GPUEnv gpuEnv; +sal_uInt64 kernelFailures = 0; + +namespace +{ + +OString generateMD5(const void* pData, size_t length) +{ + sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5]; + rtlDigestError aError = rtl_digest_MD5(pData, length, + pBuffer, RTL_DIGEST_LENGTH_MD5); + SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed"); + + OStringBuffer aBuffer; + const char* const pString = "0123456789ABCDEF"; + for(sal_uInt8 val : pBuffer) + { + aBuffer.append(pString[val/16]); + aBuffer.append(pString[val%16]); + } + return aBuffer.makeStringAndClear(); +} + +OString const & getCacheFolder() +{ + static OString const aCacheFolder = []() + { + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + + osl::Directory::create(url); + + return OUStringToOString(url, RTL_TEXTENCODING_UTF8); + }(); + return aCacheFolder; +} + +} + +static bool initializeCommandQueue(GPUEnv& aGpuEnv) +{ + OpenCLZone zone; + + cl_int nState; + cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE]; + + for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i) + { + command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState); + if (nState != CL_SUCCESS) + SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState)); + + if (command_queue[i] == nullptr || nState != CL_SUCCESS) + { + // Release all command queues created so far. + for (int j = 0; j <= i; ++j) + { + if (command_queue[j]) + { + clReleaseCommandQueue(command_queue[j]); + command_queue[j] = nullptr; + } + } + + clReleaseContext(aGpuEnv.mpContext); + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + + SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext); + } + + for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i) + { + aGpuEnv.mpCmdQueue[i] = command_queue[i]; + } + aGpuEnv.mbCommandQueueInitialized = true; + return true; +} + +void setKernelEnv( KernelEnv *envInfo ) +{ + if (!gpuEnv.mbCommandQueueInitialized) + { + initializeCommandQueue(gpuEnv); + } + + envInfo->mpkContext = gpuEnv.mpContext; + envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; + + assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE); + envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos]; +} + +namespace { + +OString createFileName(cl_device_id deviceId, const char* clFileName) +{ + OString fileName(clFileName); + sal_Int32 nIndex = fileName.lastIndexOf(".cl"); + if(nIndex > 0) + fileName = fileName.copy(0, nIndex); + + char deviceName[DEVICE_NAME_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DEVICE_NAME, + sizeof(deviceName), deviceName, nullptr); + + char driverVersion[DRIVER_VERSION_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DRIVER_VERSION, + sizeof(driverVersion), driverVersion, nullptr); + + cl_platform_id platformId; + clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, nullptr); + + char platformVersion[PLATFORM_VERSION_LENGTH] = {0}; + clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion), + platformVersion, nullptr); + + // create hash for deviceName + driver version + platform version + OString aString = OString::Concat(deviceName) + driverVersion + platformVersion; + OString aHash = generateMD5(aString.getStr(), aString.getLength()); + + return getCacheFolder() + fileName + "-" + aHash + ".bin"; +} + +std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context ) +{ + size_t numDevices=0; + + std::vector<std::shared_ptr<osl::File> > aGeneratedFiles; + cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + 0, nullptr, &numDevices ); + numDevices /= sizeof(numDevices); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + assert(numDevices == 1); + + // grab the handle to the device in the context. + cl_device_id pDevID; + clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ), &pDevID, nullptr ); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + assert(pDevID == gpuEnv.mpDevID); + + OString fileName = createFileName(gpuEnv.mpDevID, clFileName); + auto pNewFile = std::make_shared<osl::File>(OStringToOUString(fileName, RTL_TEXTENCODING_UTF8)); + if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None) + { + aGeneratedFiles.push_back(pNewFile); + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success"); + } + else + { + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL"); + } + + return aGeneratedFiles; +} + +bool writeBinaryToFile( std::string_view rFileName, const char* binary, size_t numBytes ) +{ + osl::File file(OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = file.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + file.write( binary, numBytes, nBytesWritten ); + + assert(numBytes == nBytesWritten); + + return true; +} + +} + +bool generatBinFromKernelSource( cl_program program, const char * clFileName ) +{ + cl_uint numDevices; + + cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, nullptr ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + assert(numDevices == 1); + + cl_device_id pDevID; + /* grab the handle to the device in the program. */ + clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id), &pDevID, nullptr ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* figure out the size of the binary. */ + size_t binarySize; + + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), &binarySize, nullptr ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* copy over the generated binary. */ + if ( binarySize != 0 ) + { + std::unique_ptr<char[]> binary(new char[binarySize]); + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES, + sizeof(char *), &binary, nullptr ); + CHECK_OPENCL(clStatus,"clGetProgramInfo"); + + OString fileName = createFileName(pDevID, clFileName); + if ( !writeBinaryToFile( fileName, + binary.get(), binarySize ) ) + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL"); + else + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success"); + } + return true; +} + +namespace { + +struct OpenCLEnv +{ + cl_platform_id mpOclPlatformID; + cl_context mpOclContext; + cl_device_id mpOclDevsID; +}; + +bool initOpenCLAttr( OpenCLEnv * env ) +{ + if ( gpuEnv.mnIsUserCreated ) + return true; + + gpuEnv.mpContext = env->mpOclContext; + gpuEnv.mpPlatformID = env->mpOclPlatformID; + gpuEnv.mpDevID = env->mpOclDevsID; + + gpuEnv.mnIsUserCreated = 1; + + gpuEnv.mbCommandQueueInitialized = false; + + gpuEnv.mnCmdQueuePos = 0; // default to 0. + + return false; +} + +bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx) +{ + cl_int clStatus; + //char options[512]; + // create a cl program executable for all the devices specified + clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID, + buildOption, nullptr, nullptr); + + if ( clStatus != CL_SUCCESS ) + { + size_t length; + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + std::unique_ptr<char[]> buildLog(new char[length]); + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length ); + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log"; + osl::File aBuildLogFile(OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = aBuildLogFile.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + aBuildLogFile.write( buildLog.get(), length, nBytesWritten ); + + return false; + } + + return true; +} + +} + +bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx) +{ + size_t numDevices; + cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + 0, nullptr, &numDevices ); + numDevices /= sizeof(numDevices); + CHECK_OPENCL( clStatus, "clGetContextInfo" ); + + std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated( + filename, gpuInfo->mpContext ); + + if (aGeneratedFiles.size() == numDevices) + { + std::unique_ptr<size_t[]> length(new size_t[numDevices]); + std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]); + for(size_t i = 0; i < numDevices; ++i) + { + sal_uInt64 nSize; + aGeneratedFiles[i]->getSize(nSize); + unsigned char* binary = new unsigned char[nSize]; + sal_uInt64 nBytesRead; + aGeneratedFiles[i]->read(binary, nSize, nBytesRead); + if(nSize != nBytesRead) + assert(false); + + length[i] = nBytesRead; + + pBinary[i] = binary; + } + + // grab the handles to all of the devices in the context. + std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]); + clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr ); + + if(clStatus != CL_SUCCESS) + { + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + return false; + } + + cl_int binary_status; + + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices, + pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()), + &binary_status, &clStatus ); + if(clStatus != CL_SUCCESS) + { + // something went wrong, fall back to compiling from source + return false; + } + SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary"); + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + } + + if ( !gpuInfo->mpArryPrograms[idx] ) + { + return false; + } + return buildProgram(buildOption, gpuInfo, idx); +} + +namespace { + +void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64) +{ + OpenCLZone zone; + + bKhrFp64 = false; + bAmdFp64 = false; + + // Check device extensions for double type + size_t aDevExtInfoSize = 0; + + cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize ); + if( clStatus != CL_SUCCESS ) + return; + + std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]); + + clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, + sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr); + + if( clStatus != CL_SUCCESS ) + return; + + if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) ) + { + bKhrFp64 = true; + } + else + { + // Check if cl_amd_fp64 extension is supported + if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) ) + bAmdFp64 = true; + } +} + +bool initOpenCLRunEnv( GPUEnv *gpuInfo ) +{ + OpenCLZone zone; + cl_uint nPreferredVectorWidthFloat; + char pName[64]; + + bool bKhrFp64 = false; + bool bAmdFp64 = false; + + checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64); + + gpuInfo->mnKhrFp64Flag = bKhrFp64; + gpuInfo->mnAmdFp64Flag = bAmdFp64; + + gpuInfo->mbNeedsTDRAvoidance = false; + + clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), + &nPreferredVectorWidthFloat, nullptr); + SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat); + + clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64, + pName, nullptr); + +#if defined (_WIN32) +// the Win32 SDK 8.1 deprecates GetVersionEx() +# ifdef _WIN32_WINNT_WINBLUE + const bool bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater(); +# else + bool bIsNotWinOrIsWin8OrGreater = true; + OSVERSIONINFOW aVersionInfo = {}; + aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo ); + if (GetVersionExW( &aVersionInfo )) + { + // Windows 7 or lower? + if (aVersionInfo.dwMajorVersion < 6 || + (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2)) + bIsNotWinOrIsWin8OrGreater = false; + } +# endif +#else + const bool bIsNotWinOrIsWin8OrGreater = true; +#endif + + // Heuristic: Certain old low-end OpenCL implementations don't + // work for us with too large group lengths. Looking at the preferred + // float vector width seems to be a way to detect these devices, except + // the non-working NVIDIA cards on Windows older than version 8. + gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) || + ( !bIsNotWinOrIsWin8OrGreater && + OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 ); + + size_t nMaxParameterSize; + clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), + &nMaxParameterSize, nullptr); + SAL_INFO("opencl", "CL_DEVICE_MAX_PARAMETER_SIZE=" << nMaxParameterSize); + + return false; +} + +bool initOpenCLRunEnv( int argc ) +{ + if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) ) + return true; + + if ( !bIsInited ) + { + if ( !gpuEnv.mnIsUserCreated ) + memset( &gpuEnv, 0, sizeof(gpuEnv) ); + + //initialize devices, context, command_queue + bool status = initOpenCLRunEnv( &gpuEnv ); + if ( status ) + { + return true; + } + //initialize program, kernelName, kernelCount + if( getenv( "SC_FLOAT" ) ) + { + gpuEnv.mnKhrFp64Flag = false; + gpuEnv.mnAmdFp64Flag = false; + } + if( gpuEnv.mnKhrFp64Flag ) + { + SAL_INFO("opencl", "Use Khr double"); + } + else if( gpuEnv.mnAmdFp64Flag ) + { + SAL_INFO("opencl", "Use AMD double type"); + } + else + { + SAL_INFO("opencl", "USE float type"); + } + bIsInited = true; + } + return false; +} + +// based on crashes and hanging during kernel compilation +void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo) +{ + OpenCLDeviceInfo aDeviceInfo; + aDeviceInfo.device = aDeviceId; + + char pName[DEVICE_NAME_LENGTH]; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maName = OUString::createFromAscii(pName); + + char pVendor[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_ulong nMemSize; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnMemory = nMemSize; + + cl_uint nClockFrequency; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnFrequency = nClockFrequency; + + cl_uint nComputeUnits; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr); + if(nState != CL_SUCCESS) + return; + + char pDriver[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr); + + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maDriver = OUString::createFromAscii(pDriver); + + bool bKhrFp64 = false; + bool bAmdFp64 = false; + checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64); + + // only list devices that support double + if(!bKhrFp64 && !bAmdFp64) + return; + + aDeviceInfo.mnComputeUnits = nComputeUnits; + + if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo)) + rPlatformInfo.maDevices.push_back(aDeviceInfo); +} + +bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo) +{ + rPlatformInfo.platform = nPlatformId; + char pName[64]; + cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64, + pName, nullptr); + if(nState != CL_SUCCESS) + return false; + rPlatformInfo.maName = OUString::createFromAscii(pName); + + char pVendor[64]; + nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64, + pVendor, nullptr); + if(nState != CL_SUCCESS) + return false; + + rPlatformInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_uint nDevices; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices); + if(nState != CL_SUCCESS) + return false; + + // memory leak that does not matter + // memory is stored in static variable that lives through the whole program + cl_device_id* pDevices = new cl_device_id[nDevices]; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr); + if(nState != CL_SUCCESS) + return false; + + for(size_t i = 0; i < nDevices; ++i) + { + createDeviceInfo(pDevices[i], rPlatformInfo); + } + + return true; +} + +} + +const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo() +{ + static std::vector<OpenCLPlatformInfo> aPlatforms; + + // return early if we already initialized or can't use OpenCL + if (!aPlatforms.empty() || !canUseOpenCL()) + return aPlatforms; + + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return aPlatforms; + + cl_uint nPlatforms; + cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms); + + if(nState != CL_SUCCESS) + return aPlatforms; + + // memory leak that does not matter, + // memory is stored in static instance aPlatforms + cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms]; + nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr); + + if(nState != CL_SUCCESS) + return aPlatforms; + + for(size_t i = 0; i < nPlatforms; ++i) + { + OpenCLPlatformInfo aPlatformInfo; + if(createPlatformInfo(pPlatforms[i], aPlatformInfo)) + aPlatforms.push_back(aPlatformInfo); + } + + return aPlatforms; +} + +namespace { + +cl_device_id findDeviceIdByDeviceString(std::u16string_view rString, const std::vector<OpenCLPlatformInfo>& rPlatforms) +{ + for (const OpenCLPlatformInfo& rPlatform : rPlatforms) + { + for (const OpenCLDeviceInfo& rDeviceInfo : rPlatform.maDevices) + { + OUString aDeviceId = rDeviceInfo.maVendor + " " + rDeviceInfo.maName; + if (rString == aDeviceId) + { + return rDeviceInfo.device; + } + } + } + + return nullptr; +} + +void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId) +{ + cl_platform_id platformId; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, nullptr); + + if(nState != CL_SUCCESS) + return; + + const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo(); + for(size_t i = 0; i < rPlatforms.size(); ++i) + { + cl_platform_id platId = rPlatforms[i].platform; + if(platId != platformId) + continue; + + for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j) + { + cl_device_id id = rPlatforms[i].maDevices[j].device; + if(id == aDeviceId) + { + rDeviceId = j; + rPlatformId = i; + return; + } + } + } +} + +} + +bool canUseOpenCL() +{ + if( const char* env = getenv( "SC_FORCE_CALCULATION" )) + { + if( strcmp( env, "opencl" ) == 0 ) + return true; + } + return !getenv("SAL_DISABLE_OPENCL") && officecfg::Office::Common::Misc::UseOpenCL::get(); +} + +bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString) +{ + if (!canUseOpenCL() || fillOpenCLInfo().empty()) + return false; + + cl_device_id pDeviceId = nullptr; + if(pDevice) + pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo()); + + if(!pDeviceId || bAutoSelect) + { + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return false; + + OUString url(OStringToOUString(getCacheFolder(), RTL_TEXTENCODING_UTF8)); + OUString path; + osl::FileBase::getSystemPathFromFileURL(url,path); + ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation); + if ( aSelectedDevice.eType != DeviceType::OpenCLDevice) + return false; + pDeviceId = aSelectedDevice.aDeviceID; + } + + if(gpuEnv.mpDevID == pDeviceId) + { + // we don't need to change anything + // still the same device + return pDeviceId != nullptr; + } + + cl_context context; + cl_platform_id platformId; + + { + OpenCLZone zone; + cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, nullptr); + + cl_context_properties cps[3]; + cps[0] = CL_CONTEXT_PLATFORM; + cps[1] = reinterpret_cast<cl_context_properties>(platformId); + cps[2] = 0; + context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState ); + if (nState != CL_SUCCESS) + SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState)); + + if(nState != CL_SUCCESS || context == nullptr) + { + if(context != nullptr) + clReleaseContext(context); + + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId); + + OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION); + rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8); + } + + setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter. + + releaseOpenCLEnv(&gpuEnv); + + OpenCLEnv env; + env.mpOclPlatformID = platformId; + env.mpOclContext = context; + env.mpOclDevsID = pDeviceId; + + initOpenCLAttr(&env); + + return !initOpenCLRunEnv(0); +} + +void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId) +{ + if (!canUseOpenCL()) + return; + + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return; + + cl_device_id id = gpuEnv.mpDevID; + findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId); +} + +void getOpenCLDeviceName(OUString& rDeviceName, OUString& rPlatformName) +{ + if (!canUseOpenCL()) + return; + + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return; + + cl_device_id deviceId = gpuEnv.mpDevID; + cl_platform_id platformId; + if( clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, sizeof(platformId), &platformId, nullptr) != CL_SUCCESS ) + return; + + char deviceName[DEVICE_NAME_LENGTH] = {0}; + if( clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr) != CL_SUCCESS ) + return; + char platformName[64]; + if( clGetPlatformInfo(platformId, CL_PLATFORM_NAME, 64, platformName, nullptr) != CL_SUCCESS ) + return; + rDeviceName = OUString::createFromAscii(deviceName); + rPlatformName = OUString::createFromAscii(platformName); +} + +void setOpenCLCmdQueuePosition( int nPos ) +{ + if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE) + // Out of range. Ignore this. + return; + + gpuEnv.mnCmdQueuePos = nPos; +} + +const char* errorString(cl_int nError) +{ +#define CASE(val) case CL_##val: return #val + switch (nError) + { + CASE(SUCCESS); + CASE(DEVICE_NOT_FOUND); + CASE(DEVICE_NOT_AVAILABLE); + CASE(COMPILER_NOT_AVAILABLE); + CASE(MEM_OBJECT_ALLOCATION_FAILURE); + CASE(OUT_OF_RESOURCES); + CASE(OUT_OF_HOST_MEMORY); + CASE(PROFILING_INFO_NOT_AVAILABLE); + CASE(MEM_COPY_OVERLAP); + CASE(IMAGE_FORMAT_MISMATCH); + CASE(IMAGE_FORMAT_NOT_SUPPORTED); + CASE(BUILD_PROGRAM_FAILURE); + CASE(MAP_FAILURE); + CASE(INVALID_VALUE); + CASE(INVALID_DEVICE_TYPE); + CASE(INVALID_PLATFORM); + CASE(INVALID_DEVICE); + CASE(INVALID_CONTEXT); + CASE(INVALID_QUEUE_PROPERTIES); + CASE(INVALID_COMMAND_QUEUE); + CASE(INVALID_HOST_PTR); + CASE(INVALID_MEM_OBJECT); + CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR); + CASE(INVALID_IMAGE_SIZE); + CASE(INVALID_SAMPLER); + CASE(INVALID_BINARY); + CASE(INVALID_BUILD_OPTIONS); + CASE(INVALID_PROGRAM); + CASE(INVALID_PROGRAM_EXECUTABLE); + CASE(INVALID_KERNEL_NAME); + CASE(INVALID_KERNEL_DEFINITION); + CASE(INVALID_KERNEL); + CASE(INVALID_ARG_INDEX); + CASE(INVALID_ARG_VALUE); + CASE(INVALID_ARG_SIZE); + CASE(INVALID_KERNEL_ARGS); + CASE(INVALID_WORK_DIMENSION); + CASE(INVALID_WORK_GROUP_SIZE); + CASE(INVALID_WORK_ITEM_SIZE); + CASE(INVALID_GLOBAL_OFFSET); + CASE(INVALID_EVENT_WAIT_LIST); + CASE(INVALID_EVENT); + CASE(INVALID_OPERATION); + CASE(INVALID_GL_OBJECT); + CASE(INVALID_BUFFER_SIZE); + CASE(INVALID_MIP_LEVEL); + CASE(INVALID_GLOBAL_WORK_SIZE); + default: + return "Unknown OpenCL error code"; + } +#undef CASE +} + +bool GPUEnv::isOpenCLEnabled() +{ + return gpuEnv.mpDevID && gpuEnv.mpContext; +} + +} + +void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo ) +{ + OpenCLZone zone; + + if ( !bIsInited ) + { + return; + } + + for (_cl_command_queue* & i : openclwrapper::gpuEnv.mpCmdQueue) + { + if (i) + { + clReleaseCommandQueue(i); + i = nullptr; + } + } + openclwrapper::gpuEnv.mnCmdQueuePos = 0; + + if ( openclwrapper::gpuEnv.mpContext ) + { + clReleaseContext( openclwrapper::gpuEnv.mpContext ); + openclwrapper::gpuEnv.mpContext = nullptr; + } + bIsInited = false; + gpuInfo->mnIsUserCreated = 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/platforminfo.cxx b/opencl/source/platforminfo.cxx new file mode 100644 index 000000000..a23aa87a5 --- /dev/null +++ b/opencl/source/platforminfo.cxx @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <ostream> + +#include <opencl/platforminfo.hxx> + +OpenCLDeviceInfo::OpenCLDeviceInfo() + : device(nullptr) + , mnMemory(0) + , mnComputeUnits(0) + , mnFrequency(0) +{ +} + +OpenCLPlatformInfo::OpenCLPlatformInfo() + : platform(nullptr) +{ +} + +std::ostream& operator<<(std::ostream& rStream, const OpenCLPlatformInfo& rPlatform) +{ + rStream << "{" + "Vendor=" << rPlatform.maVendor << "," + "Name=" << rPlatform.maName << + "}"; + return rStream; +} + +std::ostream& operator<<(std::ostream& rStream, const OpenCLDeviceInfo& rDevice) +{ + rStream << "{" + "Name=" << rDevice.maName << "," + "Vendor=" << rDevice.maVendor << "," + "Driver=" << rDevice.maDriver << + "}"; + return rStream; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |