summaryrefslogtreecommitdiffstats
path: root/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'opencl')
-rw-r--r--opencl/Executable_opencltest.mk32
-rw-r--r--opencl/IwyuFilter_opencl.yaml6
-rw-r--r--opencl/Library_opencl.mk58
-rw-r--r--opencl/Makefile14
-rw-r--r--opencl/Module_opencl.mk17
-rw-r--r--opencl/README7
-rw-r--r--opencl/inc/clew_setup.hxx25
-rw-r--r--opencl/inc/opencl_device.hxx24
-rw-r--r--opencl/inc/opencl_device_selection.h405
-rw-r--r--opencl/opencltest/main.cxx184
-rw-r--r--opencl/source/OpenCLZone.cxx45
-rw-r--r--opencl/source/opencl_device.cxx610
-rw-r--r--opencl/source/openclconfig.cxx257
-rw-r--r--opencl/source/openclwrapper.cxx978
-rw-r--r--opencl/source/platforminfo.cxx46
15 files changed, 2708 insertions, 0 deletions
diff --git a/opencl/Executable_opencltest.mk b/opencl/Executable_opencltest.mk
new file mode 100644
index 000000000..2767333b6
--- /dev/null
+++ b/opencl/Executable_opencltest.mk
@@ -0,0 +1,32 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Executable_Executable,opencltest))
+
+$(eval $(call gb_Executable_set_include,opencltest,\
+ -I$(SRCDIR)/opencl/inc \
+ $$(INCLUDE) \
+))
+
+
+$(eval $(call gb_Executable_add_exception_objects,opencltest,\
+ opencl/opencltest/main \
+))
+
+$(eval $(call gb_Executable_use_externals,opencltest,\
+ clew \
+))
+
+$(eval $(call gb_Executable_use_libraries,opencltest,\
+ sal \
+))
+
+$(eval $(call gb_Executable_add_default_nativeres,opencltest))
+
+# vim: set noet sw=4 ts=4:
diff --git a/opencl/IwyuFilter_opencl.yaml b/opencl/IwyuFilter_opencl.yaml
new file mode 100644
index 000000000..10b5be193
--- /dev/null
+++ b/opencl/IwyuFilter_opencl.yaml
@@ -0,0 +1,6 @@
+---
+assumeFilename: opencl/source/opencl_device.cxx
+blacklist:
+ opencl/source/opencl_device.cxx:
+ # Avoid loplugin:unreffun error
+ - opencl_device.hxx
diff --git a/opencl/Library_opencl.mk b/opencl/Library_opencl.mk
new file mode 100644
index 000000000..5b3de52d6
--- /dev/null
+++ b/opencl/Library_opencl.mk
@@ -0,0 +1,58 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Library_Library,opencl))
+
+$(eval $(call gb_Library_set_include,opencl,\
+ -I$(SRCDIR)/opencl/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_Library_add_defs,opencl,\
+ -DOPENCL_DLLIMPLEMENTATION \
+))
+
+$(eval $(call gb_Library_use_externals,opencl,\
+ boost_headers \
+ clew \
+ icu_headers \
+ icui18n \
+ icuuc \
+ libxml2 \
+))
+
+$(eval $(call gb_Library_use_custom_headers,opencl,\
+ officecfg/registry \
+))
+
+$(eval $(call gb_Library_use_sdk_api,opencl))
+
+$(eval $(call gb_Library_use_libraries,opencl,\
+ comphelper \
+ cppu \
+ sal \
+ salhelper \
+ tl \
+))
+
+$(eval $(call gb_Library_add_exception_objects,opencl,\
+ opencl/source/openclconfig \
+ opencl/source/openclwrapper \
+ opencl/source/opencl_device \
+ opencl/source/platforminfo \
+ opencl/source/OpenCLZone \
+))
+
+ifeq ($(OS),LINUX)
+$(eval $(call gb_Library_add_libs,opencl,\
+ -lrt \
+))
+endif
+
+# vim: set noet sw=4 ts=4:
diff --git a/opencl/Makefile b/opencl/Makefile
new file mode 100644
index 000000000..0997e6284
--- /dev/null
+++ b/opencl/Makefile
@@ -0,0 +1,14 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
+
+include $(module_directory)/../solenv/gbuild/partial_build.mk
+
+# vim: set noet sw=4 ts=4:
diff --git a/opencl/Module_opencl.mk b/opencl/Module_opencl.mk
new file mode 100644
index 000000000..5db04e530
--- /dev/null
+++ b/opencl/Module_opencl.mk
@@ -0,0 +1,17 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Module_Module,opencl))
+
+$(eval $(call gb_Module_add_targets,opencl,\
+ Executable_opencltest \
+ Library_opencl \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/opencl/README b/opencl/README
new file mode 100644
index 000000000..c5699e797
--- /dev/null
+++ b/opencl/README
@@ -0,0 +1,7 @@
+OpenCL-related code that is not specific to any particular
+functionality OpenCL is used for. (Like formula group calculation in
+Calc.)
+
+Not compiled on platforms where OpenCL is not available (iOS and
+Android). On other platforms OpenCL is optional at run-time, but not
+at build time.
diff --git a/opencl/inc/clew_setup.hxx b/opencl/inc/clew_setup.hxx
new file mode 100644
index 000000000..58571faad
--- /dev/null
+++ b/opencl/inc/clew_setup.hxx
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_OPENCL_INC_CLEW_SETUP_HXX
+#define INCLUDED_OPENCL_INC_CLEW_SETUP_HXX
+
+#ifdef _WIN32
+#include <prewin.h>
+#include <postwin.h>
+#define OPENCL_DLL_NAME "OpenCL.dll"
+#elif defined(MACOSX)
+#define OPENCL_DLL_NAME nullptr
+#else
+#define OPENCL_DLL_NAME "libOpenCL.so.1"
+#endif
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/inc/opencl_device.hxx b/opencl/inc/opencl_device.hxx
new file mode 100644
index 000000000..93cc5f7a1
--- /dev/null
+++ b/opencl/inc/opencl_device.hxx
@@ -0,0 +1,24 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX
+#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX
+
+#include <rtl/ustring.hxx>
+
+namespace openclwrapper { struct GPUEnv; }
+struct ds_device;
+
+ds_device const & getDeviceSelection(OUString const & pFileName, bool bForceSelection);
+
+void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo );
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/inc/opencl_device_selection.h b/opencl/inc/opencl_device_selection.h
new file mode 100644
index 000000000..af0ecd37b
--- /dev/null
+++ b/opencl/inc/opencl_device_selection.h
@@ -0,0 +1,405 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H
+#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H
+
+#ifdef _MSC_VER
+//#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <memory>
+
+#include <float.h>
+
+#include <clew/clew.h>
+#include <tools/stream.hxx>
+#include <tools/XmlWriter.hxx>
+#include <tools/XmlWalker.hxx>
+#include <rtl/math.hxx>
+
+#include <opencl/OpenCLZone.hxx>
+
+#include <vector>
+
+enum ds_status
+{
+ DS_SUCCESS = 0
+ ,DS_INVALID_PROFILE = 1000
+ ,DS_MEMORY_ERROR
+ , DS_INVALID_PERF_EVALUATOR_TYPE
+ , DS_INVALID_PERF_EVALUATOR
+ , DS_PERF_EVALUATOR_ERROR
+ , DS_FILE_ERROR
+ , DS_UNKNOWN_DEVICE_TYPE
+ , DS_PROFILE_FILE_ERROR
+ , DS_SCORE_SERIALIZER_ERROR
+ , DS_SCORE_DESERIALIZER_ERROR
+};
+
+// device type
+enum class DeviceType
+{
+ None,
+ // NativeCPU means the traditional Calc interpreter code path. (That also includes the so-called
+ // "software interpreter", but note that it definitely does not mean *exclusively* that.)
+ NativeCPU,
+ // OpenCLDevice means an OpenCL device as supplied by an OpenCL platform, which might well be
+ // implemented using code that runs on the CPU (and not a GPU). On Windows, OpenCL platforms
+ // typically provide two devices, one for the GPU and one for the CPU.
+ OpenCLDevice
+};
+
+struct ds_device
+{
+ DeviceType eType;
+ cl_device_id aDeviceID;
+
+ OString sPlatformName;
+ OString sPlatformVendor;
+ OString sPlatformVersion;
+ OString sPlatformProfile;
+ OString sPlatformExtensions;
+
+ OString sDeviceName;
+ OString sDeviceVendor;
+ OString sDeviceVersion;
+ OString sDriverVersion;
+ OString sDeviceType;
+ OString sDeviceExtensions;
+ OString sDeviceOpenCLVersion;
+
+ bool bDeviceAvailable;
+ bool bDeviceCompilerAvailable;
+ bool bDeviceLinkerAvailable;
+
+ double fTime; // small time means faster device
+ bool bErrors; // were there any opencl errors
+};
+
+struct ds_profile
+{
+ std::vector<ds_device> devices;
+ OString version;
+
+ ds_profile(OString const & inVersion)
+ : version(inVersion)
+ {}
+};
+
+inline OString getPlatformInfoString(cl_platform_id aPlatformId, cl_platform_info aPlatformInfo)
+{
+ std::vector<char> temporary(2048, 0);
+ clGetPlatformInfo(aPlatformId, aPlatformInfo, temporary.size(), temporary.data(), nullptr);
+ return temporary.data();
+}
+
+inline OString getDeviceInfoString(cl_device_id aDeviceId, cl_device_info aDeviceInfo)
+{
+ std::vector<char> temporary(2048, 0);
+ clGetDeviceInfo(aDeviceId, aDeviceInfo, temporary.size(), temporary.data(), nullptr);
+ return temporary.data();
+}
+
+inline OString getDeviceType(cl_device_id aDeviceId)
+{
+ OString sType = "";
+ cl_device_type aDeviceType;
+ clGetDeviceInfo(aDeviceId, CL_DEVICE_TYPE, sizeof(aDeviceType), &aDeviceType, nullptr);
+ if (aDeviceType & CL_DEVICE_TYPE_CPU)
+ sType += "cpu ";
+ if (aDeviceType & CL_DEVICE_TYPE_GPU)
+ sType += "gpu ";
+ if (aDeviceType & CL_DEVICE_TYPE_ACCELERATOR)
+ sType += "accelerator ";
+ if (aDeviceType & CL_DEVICE_TYPE_CUSTOM)
+ sType += "custom ";
+ if (aDeviceType & CL_DEVICE_TYPE_DEFAULT)
+ sType += "default ";
+ return sType;
+}
+
+inline bool getDeviceInfoBool(cl_device_id aDeviceId, cl_device_info aDeviceInfo)
+{
+ cl_bool bCLBool = 0;
+ // init to false in case clGetDeviceInfo returns CL_INVALID_VALUE when
+ // requesting unsupported (in version 1.0) CL_DEVICE_LINKER_AVAILABLE
+ clGetDeviceInfo(aDeviceId, aDeviceInfo, sizeof(bCLBool), &bCLBool, nullptr);
+ return bCLBool == CL_TRUE;
+}
+
+inline ds_status initDSProfile(std::unique_ptr<ds_profile>& rProfile, OString const & rVersion)
+{
+ OpenCLZone zone;
+
+ int numDevices;
+ cl_uint numPlatforms;
+ std::vector<cl_platform_id> platforms;
+ std::vector<cl_device_id> devices;
+
+ unsigned int next;
+ unsigned int i;
+
+ rProfile.reset(new ds_profile(rVersion));
+
+ clGetPlatformIDs(0, nullptr, &numPlatforms);
+ if (numPlatforms != 0)
+ {
+ platforms.resize(numPlatforms);
+ clGetPlatformIDs(numPlatforms, platforms.data(), nullptr);
+ }
+
+ numDevices = 0;
+ for (i = 0; i < static_cast<unsigned int>(numPlatforms); i++)
+ {
+ cl_uint num = 0;
+ cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &num);
+ if (err != CL_SUCCESS)
+ {
+ /* we want to catch at least the case when the call returns
+ * CL_DEVICE_NOT_FOUND (i.e. no devices), because some platforms
+ * don't set num to 0 in this case; but in fact this is a good
+ * thing to do for _any_ error returned by the call
+ */
+ num = 0;
+ }
+ numDevices += num;
+ }
+
+ if (numDevices != 0)
+ {
+ devices.resize(numDevices);
+ }
+
+ rProfile->devices.resize(numDevices + 1); // +1 to numDevices to include the native CPU
+
+ next = 0;
+ for (i = 0; i < static_cast<unsigned int>(numPlatforms); i++)
+ {
+ cl_uint num = 0;
+ unsigned j;
+
+ OString sPlatformProfile = getPlatformInfoString(platforms[i], CL_PLATFORM_PROFILE);
+ OString sPlatformVersion = getPlatformInfoString(platforms[i], CL_PLATFORM_VERSION);
+ OString sPlatformName = getPlatformInfoString(platforms[i], CL_PLATFORM_NAME);
+ OString sPlatformVendor = getPlatformInfoString(platforms[i], CL_PLATFORM_VENDOR);
+ OString sPlatformExts = getPlatformInfoString(platforms[i], CL_PLATFORM_EXTENSIONS);
+
+ cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), &num);
+ if (err != CL_SUCCESS)
+ {
+ /* we want to catch at least the case when the call returns
+ * CL_DEVICE_NOT_FOUND (i.e. no devices), because some platforms
+ * don't set num to 0 in this case; but in fact this is a good
+ * thing to do for _any_ error returned by the call
+ */
+ num = 0;
+ }
+ for (j = 0; j < num; j++, next++)
+ {
+ cl_device_id aDeviceID = devices[j];
+
+ ds_device& rDevice = rProfile->devices[next];
+ rDevice.eType = DeviceType::OpenCLDevice;
+ rDevice.aDeviceID = aDeviceID;
+
+ rDevice.sPlatformName = sPlatformName;
+ rDevice.sPlatformVendor = sPlatformVendor;
+ rDevice.sPlatformVersion = sPlatformVersion;
+ rDevice.sPlatformProfile = sPlatformProfile;
+ rDevice.sPlatformExtensions = sPlatformExts;
+
+ rDevice.sDeviceName = getDeviceInfoString(aDeviceID, CL_DEVICE_NAME);
+ rDevice.sDeviceVendor = getDeviceInfoString(aDeviceID, CL_DEVICE_VENDOR);
+ rDevice.sDeviceVersion = getDeviceInfoString(aDeviceID, CL_DEVICE_VERSION);
+ rDevice.sDriverVersion = getDeviceInfoString(aDeviceID, CL_DRIVER_VERSION);
+ rDevice.sDeviceType = getDeviceType(aDeviceID);
+ rDevice.sDeviceExtensions = getDeviceInfoString(aDeviceID, CL_DEVICE_EXTENSIONS);
+ rDevice.sDeviceOpenCLVersion = getDeviceInfoString(aDeviceID, CL_DEVICE_OPENCL_C_VERSION);
+
+ rDevice.bDeviceAvailable = getDeviceInfoBool(aDeviceID, CL_DEVICE_AVAILABLE);
+ rDevice.bDeviceCompilerAvailable = getDeviceInfoBool(aDeviceID, CL_DEVICE_COMPILER_AVAILABLE);
+ rDevice.bDeviceLinkerAvailable = getDeviceInfoBool(aDeviceID, CL_DEVICE_LINKER_AVAILABLE);
+ }
+ }
+ rProfile->devices[next].eType = DeviceType::NativeCPU;
+
+ return DS_SUCCESS;
+}
+
+inline ds_status writeProfile(const OUString& rStreamName, std::unique_ptr<ds_profile> const & pProfile)
+{
+ if (pProfile == nullptr)
+ return DS_INVALID_PROFILE;
+ if (rStreamName.isEmpty())
+ return DS_INVALID_PROFILE;
+
+ std::unique_ptr<SvStream> pStream;
+ pStream.reset(new SvFileStream(rStreamName, StreamMode::STD_READWRITE | StreamMode::TRUNC));
+
+ tools::XmlWriter aXmlWriter(pStream.get());
+
+ if (!aXmlWriter.startDocument())
+ return DS_FILE_ERROR;
+
+ aXmlWriter.startElement("profile");
+
+ aXmlWriter.startElement("version");
+ aXmlWriter.content(pProfile->version);
+ aXmlWriter.endElement();
+
+ for (const ds_device& rDevice : pProfile->devices)
+ {
+ aXmlWriter.startElement("device");
+
+ switch(rDevice.eType)
+ {
+ case DeviceType::NativeCPU:
+ aXmlWriter.startElement("type");
+ aXmlWriter.content(OString("native"));
+ aXmlWriter.endElement();
+ break;
+ case DeviceType::OpenCLDevice:
+ aXmlWriter.startElement("type");
+ aXmlWriter.content(OString("opencl"));
+ aXmlWriter.endElement();
+
+ aXmlWriter.startElement("name");
+ aXmlWriter.content(rDevice.sDeviceName);
+ aXmlWriter.endElement();
+
+ aXmlWriter.startElement("driver");
+ aXmlWriter.content(rDevice.sDriverVersion);
+ aXmlWriter.endElement();
+ break;
+ default:
+ break;
+ }
+
+ aXmlWriter.startElement("time");
+ if (rtl::math::approxEqual(rDevice.fTime, DBL_MAX))
+ aXmlWriter.content(OString("max"));
+ else
+ aXmlWriter.content(OString::number(rDevice.fTime));
+ aXmlWriter.endElement();
+
+ aXmlWriter.startElement("errors");
+ aXmlWriter.content(rDevice.bErrors ? OString("true") : OString("false"));
+ aXmlWriter.endElement();
+
+ aXmlWriter.endElement();
+ }
+
+ aXmlWriter.endElement();
+ aXmlWriter.endDocument();
+
+ return DS_SUCCESS;
+}
+
+inline ds_status readProfile(const OUString& rStreamName, std::unique_ptr<ds_profile> const & pProfile)
+{
+ ds_status eStatus = DS_SUCCESS;
+
+ if (rStreamName.isEmpty())
+ return DS_INVALID_PROFILE;
+
+ std::unique_ptr<SvStream> pStream;
+ pStream.reset(new SvFileStream(rStreamName, StreamMode::READ));
+ tools::XmlWalker aWalker;
+
+ if (!aWalker.open(pStream.get()))
+ return DS_FILE_ERROR;
+
+ if (aWalker.name() == "profile")
+ {
+ aWalker.children();
+ while (aWalker.isValid())
+ {
+ if (aWalker.name() == "version")
+ {
+ if (aWalker.content() != pProfile->version)
+ return DS_PROFILE_FILE_ERROR;
+ }
+ else if (aWalker.name() == "device")
+ {
+ aWalker.children();
+
+ DeviceType eDeviceType = DeviceType::None;
+ OString sName;
+ OString sVersion;
+ double fTime = -1.0;
+ bool bErrors = true;
+
+ while (aWalker.isValid())
+ {
+ if (aWalker.name() == "type")
+ {
+ OString sContent = aWalker.content();
+ if (sContent == "native")
+ eDeviceType = DeviceType::NativeCPU;
+ else if (sContent == "opencl")
+ eDeviceType = DeviceType::OpenCLDevice;
+ else
+ return DS_PROFILE_FILE_ERROR;
+ }
+ else if (aWalker.name() == "name")
+ {
+ sName = aWalker.content();
+ }
+ else if (aWalker.name() == "driver")
+ {
+ sVersion = aWalker.content();
+ }
+ else if (aWalker.name() == "time")
+ {
+ if (aWalker.content() == "max")
+ fTime = DBL_MAX;
+ else
+ fTime = aWalker.content().toDouble();
+ }
+ else if (aWalker.name() == "errors")
+ {
+ bErrors = (aWalker.content() == "true");
+ }
+
+ aWalker.next();
+ }
+
+ if (fTime < 0.0)
+ return DS_PROFILE_FILE_ERROR;
+
+ for (ds_device& rDevice : pProfile->devices)
+ {
+ // type matches? either both are DS_DEVICE_OPENCL_DEVICE or DS_DEVICE_NATIVE_CPU
+ if (rDevice.eType == eDeviceType)
+ {
+ // is DS_DEVICE_NATIVE_CPU or name + version matches?
+ if (eDeviceType == DeviceType::NativeCPU ||
+ (sName == rDevice.sDeviceName &&
+ sVersion == rDevice.sDriverVersion))
+ {
+ rDevice.fTime = fTime;
+ rDevice.bErrors = bErrors;
+ }
+ }
+ }
+
+ aWalker.parent();
+ }
+ aWalker.next();
+ }
+ aWalker.parent();
+ }
+
+ return eStatus;
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/opencltest/main.cxx b/opencl/opencltest/main.cxx
new file mode 100644
index 000000000..a499032f8
--- /dev/null
+++ b/opencl/opencltest/main.cxx
@@ -0,0 +1,184 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <clew/clew.h>
+
+#include <vector>
+#include <cassert>
+#include <cstring>
+#include <iostream>
+
+#include <sal/log.hxx>
+
+#include <clew_setup.hxx>
+
+using namespace std;
+
+// The purpose of this separate executable is to check whether OpenCL works
+// without crashing (asserting, etc.). Other checks can be done by LO core itself.
+
+#define check(value, expected) \
+ do \
+ { \
+ auto val = (value); \
+ if (val != (expected)) \
+ { \
+ SAL_WARN("opencl", \
+ "OpenCL driver check failed: " << val << "(line " << __LINE__ << ")"); \
+ return; \
+ } \
+ } while (false);
+#define openclcheck(value) check(value, CL_SUCCESS)
+
+static void runTest(const char* deviceName, const char* devicePlatform)
+{
+ int status = clewInit(OPENCL_DLL_NAME);
+ check(status, CLEW_SUCCESS);
+
+ // Find the given OpenCL device (in order to use the same one as LO core).
+ cl_uint numPlatforms;
+ openclcheck(clGetPlatformIDs(0, nullptr, &numPlatforms));
+ vector<cl_platform_id> platforms(numPlatforms);
+ openclcheck(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr));
+ cl_platform_id platformId = nullptr;
+ for (cl_uint i = 0; i < numPlatforms; ++i)
+ {
+ char platformName[64];
+ if (clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 64, platformName, nullptr)
+ == CL_SUCCESS
+ && strcmp(devicePlatform, platformName) == 0)
+ {
+ platformId = platforms[i];
+ break;
+ }
+ }
+ if (platformId == nullptr)
+ {
+ SAL_WARN("opencl", "Device platform not found: " << devicePlatform);
+ assert(false);
+ return;
+ }
+
+ cl_uint numDevices;
+ openclcheck(clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices));
+ vector<cl_device_id> devices(numDevices);
+ openclcheck(
+ clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr));
+ cl_device_id deviceId = nullptr;
+ for (cl_uint i = 0; i < numDevices; ++i)
+ {
+ char name[1024];
+ if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 64, name, nullptr) == CL_SUCCESS
+ && strcmp(deviceName, name) == 0)
+ {
+ deviceId = devices[i];
+ break;
+ }
+ }
+ if (deviceId == nullptr)
+ {
+ SAL_WARN("opencl", "Device not found: " << deviceName);
+ assert(false);
+ return;
+ }
+
+ cl_context context;
+ cl_int state;
+ cl_context_properties cps[3];
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = reinterpret_cast<cl_context_properties>(platformId);
+ cps[2] = 0;
+ context = clCreateContext(cps, 1, &deviceId, nullptr, nullptr, &state);
+ openclcheck(state);
+ cl_command_queue queue = clCreateCommandQueue(context, deviceId, 0, &state);
+ openclcheck(state);
+
+ // Just a simple OpenCL program, the functionality or results do not really matter.
+ const char* source[] = { "__kernel void testFunction( __global float* input1, __global float* "
+ "input2, __global float* output )"
+ "{"
+ " int gid0 = get_global_id( 0 );"
+ " output[ gid0 ] = input1[ gid0 ] * input2[ gid0 ];"
+ "}" };
+ size_t sourceSize[] = { strlen(source[0]) };
+ cl_program program = clCreateProgramWithSource(context, 1, source, sourceSize, &state);
+ openclcheck(state);
+ state = clBuildProgram(program, 1, &deviceId, nullptr, nullptr, nullptr);
+#ifdef DBG_UTIL
+ if (state != CL_SUCCESS)
+ {
+ size_t length;
+ status
+ = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
+ vector<char> error(length + 1);
+ status = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, length,
+ error.data(), nullptr);
+ error[length] = '\0';
+ cerr << "OpenCL driver check build error:" << error.data() << endl;
+ abort();
+ }
+#endif
+ openclcheck(state);
+ cl_kernel kernel = clCreateKernel(program, "testFunction", &state);
+ openclcheck(state);
+
+ // Some random data for the program.
+ constexpr int dataSize = 1000;
+ cl_float inputData1[dataSize];
+ cl_float inputData2[dataSize];
+ cl_float outputData[dataSize];
+ for (int i = 0; i < dataSize; ++i)
+ {
+ inputData1[i] = i * 2;
+ inputData2[i] = i % 100;
+ }
+ cl_mem input1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+ sizeof(cl_float) * dataSize, inputData1, &state);
+ openclcheck(state);
+ cl_mem input2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+ sizeof(cl_float) * dataSize, inputData2, &state);
+ openclcheck(state);
+ cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+ sizeof(cl_float) * dataSize, outputData, &state);
+ openclcheck(state);
+ state = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1);
+ openclcheck(state);
+ state = clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2);
+ openclcheck(state);
+ state = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
+ openclcheck(state);
+
+ const size_t globalWorkSize[] = { dataSize };
+ const size_t localSize[1] = { 64 };
+ state = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, globalWorkSize, localSize, 0, nullptr,
+ nullptr);
+ openclcheck(state);
+ openclcheck(clFinish(queue));
+ openclcheck(clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(cl_float) * dataSize,
+ outputData, 0, nullptr, nullptr));
+ clReleaseMemObject(input1);
+ clReleaseMemObject(input2);
+ clReleaseMemObject(output);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(context);
+}
+
+int main(int argc, char* argv[])
+{
+ if (argc != 3)
+ return 1;
+ runTest(argv[1], argv[2]);
+ // Always return exit code 0, LO itself can do error checking better, we just care
+ // if this helper crashes or not.
+ return 0;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/OpenCLZone.cxx b/opencl/source/OpenCLZone.cxx
new file mode 100644
index 000000000..01b50ed14
--- /dev/null
+++ b/opencl/source/OpenCLZone.cxx
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <opencl/openclwrapper.hxx>
+#include <opencl/OpenCLZone.hxx>
+#include <opencl_device.hxx>
+
+#include <memory>
+
+#include <officecfg/Office/Common.hxx>
+#include <com/sun/star/util/XFlushable.hpp>
+#include <com/sun/star/configuration/theDefaultProvider.hpp>
+
+/**
+ * Called from a signal handler if we get
+ * a crash or hang in some CL code.
+ */
+void OpenCLZone::hardDisable()
+{
+ // protect ourselves from double calling etc.
+ static bool bDisabled = false;
+ if (bDisabled)
+ return;
+
+ bDisabled = true;
+
+ std::shared_ptr<comphelper::ConfigurationChanges> xChanges(comphelper::ConfigurationChanges::create());
+ officecfg::Office::Common::Misc::UseOpenCL::set(false, xChanges);
+ xChanges->commit();
+
+ // Force synchronous config write
+ auto xConfProvider = css::configuration::theDefaultProvider::get(comphelper::getProcessComponentContext());
+ css::uno::Reference<css::util::XFlushable> xFlushable(xConfProvider, css::uno::UNO_QUERY_THROW);
+ xFlushable->flush();
+
+ releaseOpenCLEnv(&openclwrapper::gpuEnv);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx
new file mode 100644
index 000000000..1b6800e0e
--- /dev/null
+++ b/opencl/source/opencl_device.cxx
@@ -0,0 +1,610 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <float.h>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <algorithm>
+
+#include <comphelper/random.hxx>
+#include <o3tl/safeint.hxx>
+#include <opencl/openclconfig.hxx>
+#include <opencl/platforminfo.hxx>
+#include <sal/log.hxx>
+#include <rtl/math.hxx>
+#include <tools/time.hxx>
+
+#include <opencl/OpenCLZone.hxx>
+
+#include <opencl_device.hxx>
+#include <opencl_device_selection.h>
+
+#define INPUTSIZE 15360
+#define OUTPUTSIZE 15360
+
+#define STRINGIFY(...) #__VA_ARGS__"\n"
+
+namespace {
+
+void DS_CHECK_STATUS(cl_int status, char const * name) {
+ if (CL_SUCCESS != status)
+ {
+ SAL_INFO("opencl.device", "Error code is " << status << " at " << name);
+ }
+}
+
+bool bIsDeviceSelected = false;
+ds_device selectedDevice;
+
+struct LibreOfficeDeviceEvaluationIO
+{
+ std::vector<double> input0;
+ std::vector<double> input1;
+ std::vector<double> input2;
+ std::vector<double> input3;
+ std::vector<double> output;
+ unsigned long inputSize;
+ unsigned long outputSize;
+};
+
+const char* source = STRINGIFY(
+\n#if defined(KHR_DP_EXTENSION)
+\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+\n#elif defined(AMD_DP_EXTENSION)
+\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
+\n#endif
+ \n
+ int isNan(fp_t a) { return a != a; }
+ fp_t fsum(fp_t a, fp_t b) { return a + b; }
+
+ fp_t fAverage(__global fp_t* input)
+{
+ fp_t sum = 0;
+ int count = 0;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ if (!isNan(input[i]))
+ {
+ sum = fsum(input[i], sum);
+ count += 1;
+ }
+ }
+ return sum / (fp_t)count;
+}
+ fp_t fMin(__global fp_t* input)
+{
+ fp_t min = MAXFLOAT;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ if (!isNan(input[i]))
+ {
+ min = fmin(input[i], min);
+ }
+ }
+ return min;
+}
+ fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
+{
+ fp_t sop = 0.0;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
+ }
+ return sop;
+}
+ __kernel void DynamicKernel(
+ __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
+{
+ int gid0 = get_global_id(0);
+ fp_t tmp0 = fAverage(input0);
+ fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
+ result[gid0] = fsum(tmp0, tmp1);
+}
+ );
+
+size_t sourceSize[] = { strlen(source) };
+
+/* Random number generator */
+double random(double min, double max)
+{
+ if (rtl::math::approxEqual(min, max))
+ return min;
+ return comphelper::rng::uniform_real_distribution(min, max);
+}
+
+/* Populate input */
+void populateInput(std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
+{
+ double* input0 = testData->input0.data();
+ double* input1 = testData->input1.data();
+ double* input2 = testData->input2.data();
+ double* input3 = testData->input3.data();
+ for (unsigned long i = 0; i < testData->inputSize; i++)
+ {
+ input0[i] = random(0, i);
+ input1[i] = random(0, i);
+ input2[i] = random(0, i);
+ input3[i] = random(0, i);
+ }
+}
+
+/* Evaluate devices */
+ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
+{
+ if (rDevice.eType == DeviceType::OpenCLDevice)
+ {
+ /* Evaluating an OpenCL device */
+ SAL_INFO("opencl.device", "Device: \"" << rDevice.sDeviceName << "\" (OpenCL) evaluation...");
+ cl_int clStatus;
+
+ /* Check for 64-bit float extensions */
+ std::unique_ptr<char[]> aExtInfo;
+ {
+ size_t aDevExtInfoSize = 0;
+
+ OpenCLZone zone;
+ clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
+
+ aExtInfo.reset(new char[aDevExtInfoSize]);
+ clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo.get(), nullptr);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
+ }
+
+ bool bKhrFp64Flag = false;
+ bool bAmdFp64Flag = false;
+ const char* buildOption = nullptr;
+ std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
+ std::ostringstream tmpOStrStr;
+ tmpOStrStr << std::dec << INPUTSIZE;
+ tmpStr.append(tmpOStrStr.str());
+
+ if ((std::string(aExtInfo.get())).find("cl_khr_fp64") != std::string::npos)
+ {
+ bKhrFp64Flag = true;
+ //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
+ tmpStr.append(" -DKHR_DP_EXTENSION");
+ buildOption = tmpStr.c_str();
+ SAL_INFO("opencl.device", "... has cl_khr_fp64");
+ }
+ else if ((std::string(aExtInfo.get())).find("cl_amd_fp64") != std::string::npos)
+ {
+ bAmdFp64Flag = true;
+ //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
+ tmpStr.append(" -DAMD_DP_EXTENSION");
+ buildOption = tmpStr.c_str();
+ SAL_INFO("opencl.device", "... has cl_amd_fp64");
+ }
+
+ if (!bKhrFp64Flag && !bAmdFp64Flag)
+ {
+ /* No 64-bit float support */
+ rDevice.fTime = DBL_MAX;
+ rDevice.bErrors = false;
+ SAL_INFO("opencl.device", "... no fp64 support");
+ }
+ else
+ {
+ /* 64-bit float support present */
+
+ OpenCLZone zone;
+
+ /* Create context and command queue */
+ cl_context clContext = clCreateContext(nullptr, 1, &rDevice.aDeviceID, nullptr, nullptr, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
+ cl_command_queue clQueue = clCreateCommandQueue(clContext, rDevice.aDeviceID, 0, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
+
+ /* Build program */
+ cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
+ clStatus = clBuildProgram(clProgram, 1, &rDevice.aDeviceID, buildOption, nullptr, nullptr);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
+ if (CL_SUCCESS != clStatus)
+ {
+ /* Build program failed */
+ size_t length;
+ char* buildLog;
+ clStatus = clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
+ buildLog = static_cast<char*>(malloc(length));
+ clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
+ SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
+ free(buildLog);
+
+ rDevice.fTime = DBL_MAX;
+ rDevice.bErrors = true;
+ }
+ else
+ {
+ /* Build program succeeded */
+ sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
+
+ /* Run kernel */
+ cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
+ cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, testData->output.data(), &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
+ cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input0.data(), &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
+ cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input1.data(), &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
+ cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input2.data(), &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
+ cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input3.data(), &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
+ clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult));
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
+ clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0));
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
+ clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1));
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
+ clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2));
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
+ clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3));
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
+ size_t globalWS[1] = { testData->outputSize };
+ size_t const localSize[1] = { 64 };
+ clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, nullptr, globalWS, localSize, 0, nullptr, nullptr);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
+ clFinish(clQueue);
+ clReleaseMemObject(clInput3);
+ clReleaseMemObject(clInput2);
+ clReleaseMemObject(clInput1);
+ clReleaseMemObject(clInput0);
+ clReleaseMemObject(clResult);
+ clReleaseKernel(clKernel);
+
+ rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
+ rDevice.bErrors = false;
+ }
+
+ clReleaseProgram(clProgram);
+ clReleaseCommandQueue(clQueue);
+ clReleaseContext(clContext);
+ }
+ }
+ else
+ {
+ /* Evaluating a Native CPU device */
+ SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
+ sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
+
+ unsigned long j;
+ for (j = 0; j < testData->outputSize; j++)
+ {
+ double fAverage = 0.0f;
+ double fMin = DBL_MAX;
+ double fSoP = 0.0f;
+ for (unsigned long i = 0; i < testData->inputSize; i++)
+ {
+ fAverage += testData->input0[i];
+ fMin = std::min(fMin, testData->input1[i]);
+ fSoP += testData->input2[i] * testData->input3[i];
+ }
+ fAverage /= testData->inputSize;
+ testData->output[j] = fAverage + (fMin * fSoP);
+ // Don't run for much longer than one second
+ if (j > 0 && j % 100 == 0)
+ {
+ rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
+ if (rDevice.fTime >= 1)
+ break;
+ }
+ }
+
+ rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
+
+ // Scale time to how long it would have taken to go all the way to outputSize
+ rDevice.fTime /= (static_cast<double>(j) / testData->outputSize);
+
+ // InterpretTail - the S/W fallback is nothing like as efficient
+ // as any good openCL implementation: no SIMD, tons of branching
+ // in the inner loops etc. Generously characterise it as only 10x
+ // slower than the above.
+ rDevice.fTime *= 10.0;
+ rDevice.bErrors = false;
+ }
+ return DS_SUCCESS;
+}
+
+ds_status profileDevices(std::unique_ptr<ds_profile> const & pProfile, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & pTestData)
+{
+ ds_status status = DS_SUCCESS;
+
+ if (!pProfile)
+ return DS_INVALID_PROFILE;
+
+ for (ds_device& rDevice : pProfile->devices)
+ {
+ ds_status evaluatorStatus = evaluateScoreForDevice(rDevice, pTestData);
+ if (evaluatorStatus != DS_SUCCESS)
+ {
+ status = evaluatorStatus;
+ return status;
+ }
+ }
+ return status;
+}
+
+/* Pick best device */
+int pickBestDevice(std::unique_ptr<ds_profile> const & profile)
+{
+ double bestScore = DBL_MAX;
+
+ int nBestDeviceIndex = -1;
+
+ for (std::vector<ds_device>::size_type d = 0; d < profile->devices.size();
+ d++)
+ {
+ ds_device& device = profile->devices[d];
+
+ // Check blacklist and whitelist for actual devices
+ if (device.eType == DeviceType::OpenCLDevice)
+ {
+ // There is a silly impedance mismatch here. Why do we
+ // need two different ways to describe an OpenCL platform
+ // and an OpenCL device driver?
+
+ OpenCLPlatformInfo aPlatform;
+ OpenCLDeviceInfo aDevice;
+
+ // We know that only the below fields are used by checkForKnownBadCompilers()
+ aPlatform.maVendor = OStringToOUString(device.sPlatformVendor, RTL_TEXTENCODING_UTF8);
+ aDevice.maName = OStringToOUString(device.sDeviceName, RTL_TEXTENCODING_UTF8);
+ aDevice.maDriver = OStringToOUString(device.sDriverVersion, RTL_TEXTENCODING_UTF8);
+
+ // If blacklisted or not whitelisted, ignore it
+ if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " is blacklisted or not whitelisted");
+ device.fTime = DBL_MAX;
+ device.bErrors = false;
+ }
+ }
+
+ double fScore = DBL_MAX;
+ if (device.fTime >= 0.0
+ || rtl::math::approxEqual(device.fTime, DBL_MAX))
+ {
+ fScore = device.fTime;
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Unusual null score");
+ }
+
+ if (device.eType == DeviceType::OpenCLDevice)
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " (OpenCL) score is " << fScore);
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
+ }
+ if (fScore < bestScore)
+ {
+ bestScore = fScore;
+ nBestDeviceIndex = d;
+ }
+ }
+ if (nBestDeviceIndex != -1 && profile->devices[nBestDeviceIndex].eType == DeviceType::OpenCLDevice)
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex << "]: " << profile->devices[nBestDeviceIndex].sDeviceName << "(OpenCL).");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex << "]: CPU (Native).");
+ }
+ return nBestDeviceIndex;
+}
+
+/* Return device ID for matching device name */
+int matchDevice(std::unique_ptr<ds_profile> const & profile, char* deviceName)
+{
+ int deviceMatch = -1;
+ for (size_t d = 0; d < profile->devices.size() - 1; d++)
+ {
+ if (profile->devices[d].sDeviceName.indexOf(deviceName) != -1)
+ deviceMatch = d;
+ }
+ if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos)
+ deviceMatch = profile->devices.size() - 1;
+ return deviceMatch;
+}
+
+class LogWriter
+{
+private:
+ SvFileStream maStream;
+public:
+ explicit LogWriter(OUString const & aFileName)
+ : maStream(aFileName, StreamMode::WRITE)
+ {}
+
+ void text(const OString& rText)
+ {
+ maStream.WriteOString(rText);
+ maStream.WriteChar('\n');
+ }
+
+ void log(const OString& rKey, const OString& rValue)
+ {
+ maStream.WriteOString(rKey);
+ maStream.WriteCharPtr(": ");
+ maStream.WriteOString(rValue);
+ maStream.WriteChar('\n');
+ }
+
+ void log(const OString& rKey, int rValue)
+ {
+ log(rKey, OString::number(rValue));
+ }
+
+ void log(const OString& rKey, bool rValue)
+ {
+ log(rKey, OString::boolean(rValue));
+ }
+};
+
+
+void writeDevicesLog(std::unique_ptr<ds_profile> const & rProfile, OUString const & sProfilePath, int nSelectedIndex)
+{
+ OUString aCacheFile(sProfilePath + "opencl_devices.log");
+ LogWriter aWriter(aCacheFile);
+
+ int nIndex = 0;
+
+ for (const ds_device& rDevice : rProfile->devices)
+ {
+ if (rDevice.eType == DeviceType::OpenCLDevice)
+ {
+ aWriter.log("Device Index", nIndex);
+ aWriter.log(" Selected", nIndex == nSelectedIndex);
+ aWriter.log(" Device Name", rDevice.sDeviceName);
+ aWriter.log(" Device Vendor", rDevice.sDeviceVendor);
+ aWriter.log(" Device Version", rDevice.sDeviceVersion);
+ aWriter.log(" Driver Version", rDevice.sDriverVersion);
+ aWriter.log(" Device Type", rDevice.sDeviceType);
+ aWriter.log(" Device Extensions", rDevice.sDeviceExtensions);
+ aWriter.log(" Device OpenCL C Version", rDevice.sDeviceOpenCLVersion);
+
+ aWriter.log(" Device Available", rDevice.bDeviceAvailable);
+ aWriter.log(" Device Compiler Available", rDevice.bDeviceCompilerAvailable);
+ aWriter.log(" Device Linker Available", rDevice.bDeviceLinkerAvailable);
+
+ aWriter.log(" Platform Name", rDevice.sPlatformName);
+ aWriter.log(" Platform Vendor", rDevice.sPlatformVendor);
+ aWriter.log(" Platform Version", rDevice.sPlatformVersion);
+ aWriter.log(" Platform Profile", rDevice.sPlatformProfile);
+ aWriter.log(" Platform Extensions", rDevice.sPlatformExtensions);
+ aWriter.text("");
+ }
+ nIndex++;
+ }
+}
+
+} // end anonymous namespace
+
+ds_device const & getDeviceSelection(
+ OUString const & sProfilePath, bool bForceSelection)
+{
+ /* Run only if device is not yet selected */
+ if (!bIsDeviceSelected || bForceSelection)
+ {
+ /* Setup */
+ std::unique_ptr<ds_profile> aProfile;
+ ds_status status;
+ status = initDSProfile(aProfile, "LibreOffice v1");
+
+ if (status != DS_SUCCESS)
+ {
+ // failed to initialize profile.
+ selectedDevice.eType = DeviceType::NativeCPU;
+ return selectedDevice;
+ }
+
+ /* Try reading scores from file */
+ OUString sFilePath = sProfilePath + "opencl_profile.xml";
+
+ if (!bForceSelection)
+ {
+ status = readProfile(sFilePath, aProfile);
+ }
+ else
+ {
+ status = DS_INVALID_PROFILE;
+ SAL_INFO("opencl.device", "Performing forced profiling.");
+ }
+ if (DS_SUCCESS != status)
+ {
+ if (!bForceSelection)
+ {
+ SAL_INFO("opencl.device", "Profile file not available (" << sFilePath << "); performing profiling.");
+ }
+
+ /* Populate input data for micro-benchmark */
+ std::unique_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
+ testData->inputSize = INPUTSIZE;
+ testData->outputSize = OUTPUTSIZE;
+ testData->input0.resize(testData->inputSize);
+ testData->input1.resize(testData->inputSize);
+ testData->input2.resize(testData->inputSize);
+ testData->input3.resize(testData->inputSize);
+ testData->output.resize(testData->outputSize);
+ populateInput(testData);
+
+ /* Perform evaluations */
+ status = profileDevices(aProfile, testData);
+
+ if (DS_SUCCESS == status)
+ {
+ /* Write scores to file */
+ status = writeProfile(sFilePath, aProfile);
+ if (DS_SUCCESS == status)
+ {
+ SAL_INFO("opencl.device", "Scores written to file (" << sFilePath << ").");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath << "); scores not written to file.");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Profile read from file (" << sFilePath << ").");
+ }
+
+ /* Pick best device */
+ int bestDeviceIdx = pickBestDevice(aProfile);
+
+ /* Override if necessary */
+ char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
+ if (nullptr != overrideDeviceStr)
+ {
+ int overrideDeviceIdx = matchDevice(aProfile, overrideDeviceStr);
+ if (-1 != overrideDeviceIdx)
+ {
+ SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
+ bestDeviceIdx = overrideDeviceIdx;
+ if (aProfile->devices[bestDeviceIdx].eType == DeviceType::OpenCLDevice)
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << aProfile->devices[bestDeviceIdx].sDeviceName << " (OpenCL).");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
+ }
+ }
+
+ /* Final device selection */
+ if (bestDeviceIdx >=0 && o3tl::make_unsigned( bestDeviceIdx ) < aProfile->devices.size() )
+ {
+ selectedDevice = aProfile->devices[bestDeviceIdx];
+ bIsDeviceSelected = true;
+
+ writeDevicesLog(aProfile, sProfilePath, bestDeviceIdx);
+ } else {
+ selectedDevice.eType = DeviceType::NativeCPU;
+ }
+ }
+ return selectedDevice;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/openclconfig.cxx b/opencl/source/openclconfig.cxx
new file mode 100644
index 000000000..643d9fc2a
--- /dev/null
+++ b/opencl/source/openclconfig.cxx
@@ -0,0 +1,257 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+
+#include <string_view>
+
+#include <unicode/regex.h>
+
+#include <comphelper/configuration.hxx>
+#include <officecfg/Office/Common.hxx>
+#include <opencl/openclconfig.hxx>
+#include <opencl/platforminfo.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <sal/log.hxx>
+#include <sal/types.h>
+
+OpenCLConfig::OpenCLConfig() :
+ mbUseOpenCL(true)
+{
+ // This entry we have had for some time (when blacklisting was
+ // done elsewhere in the code), so presumably there is a known
+ // good reason for it.
+ maBlackList.insert(ImplMatcher("Windows", "", "Intel\\(R\\) Corporation", "", "9\\.17\\.10\\.2884"));
+
+ // This is what I have tested on Linux and it works for our unit tests.
+ maWhiteList.insert(ImplMatcher("Linux", "", "Advanced Micro Devices, Inc\\.", "", "1445\\.5 \\(sse2,avx\\)"));
+
+ // For now, assume that AMD, Intel and NVIDIA drivers are good
+ maWhiteList.insert(ImplMatcher("", "", "Advanced Micro Devices, Inc\\.", "", ""));
+ maWhiteList.insert(ImplMatcher("", "", "Intel\\(R\\) Corporation", "", ""));
+ maWhiteList.insert(ImplMatcher("", "", "NVIDIA Corporation", "", ""));
+}
+
+bool OpenCLConfig::operator== (const OpenCLConfig& r) const
+{
+ return (mbUseOpenCL == r.mbUseOpenCL &&
+ maBlackList == r.maBlackList &&
+ maWhiteList == r.maWhiteList);
+}
+
+bool OpenCLConfig::operator!= (const OpenCLConfig& r) const
+{
+ return !operator== (r);
+}
+
+namespace {
+
+css::uno::Sequence<OUString> SetOfImplMatcherToStringSequence(const OpenCLConfig::ImplMatcherSet& rSet)
+{
+ css::uno::Sequence<OUString> result(rSet.size());
+
+ size_t n(0);
+ for (const auto& rItem : rSet)
+ {
+ result[n++] =
+ rItem.maOS.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" +
+ rItem.maOSVersion.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" +
+ rItem.maPlatformVendor.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" +
+ rItem.maDevice.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B") + "/" +
+ rItem.maDriverVersion.replaceAll("%", "%25").replaceAll("/", "%2F").replaceAll(";", "%3B");
+ }
+
+ return result;
+}
+
+OUString getToken(const OUString& string, sal_Int32& index)
+{
+ OUString token(string.getToken(0, '/', index));
+ OUStringBuffer result;
+ sal_Int32 i(0);
+ sal_Int32 p;
+ while ((p = token.indexOf('%', i)) >= 0)
+ {
+ if (p > i)
+ result.append(std::u16string_view(token).substr(i, p - i));
+ if (p < token.getLength() - 2)
+ {
+ result.append(sal_Unicode(token.copy(p+1, 2).toInt32(16)));
+ i = p + 3;
+ }
+ else
+ {
+ i = token.getLength();
+ }
+ }
+ result.append(std::u16string_view(token).substr(i));
+
+ return result.makeStringAndClear();
+}
+
+OpenCLConfig::ImplMatcherSet StringSequenceToSetOfImplMatcher(const css::uno::Sequence<OUString>& rSequence)
+{
+ OpenCLConfig::ImplMatcherSet result;
+
+ for (const auto& rItem : rSequence)
+ {
+ OpenCLConfig::ImplMatcher m;
+ sal_Int32 index(0);
+ m.maOS = getToken(rItem, index);
+ m.maOSVersion = getToken(rItem, index);
+ m.maPlatformVendor = getToken(rItem, index);
+ m.maDevice = getToken(rItem, index);
+ m.maDriverVersion = getToken(rItem, index);
+
+ result.insert(m);
+ }
+
+ return result;
+}
+
+bool match(const OUString& rPattern, const OUString& rInput)
+{
+ if (rPattern.isEmpty())
+ return true;
+
+ UErrorCode nIcuError(U_ZERO_ERROR);
+ icu::UnicodeString sIcuPattern(reinterpret_cast<const UChar*>(rPattern.getStr()), rPattern.getLength());
+ icu::UnicodeString sIcuInput(reinterpret_cast<const UChar*>(rInput.getStr()), rInput.getLength());
+ icu::RegexMatcher aMatcher(sIcuPattern, sIcuInput, 0, nIcuError);
+
+ return U_SUCCESS(nIcuError) && aMatcher.matches(nIcuError) && U_SUCCESS(nIcuError);
+}
+
+bool match(const OpenCLConfig::ImplMatcher& rListEntry, const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice)
+{
+#if defined(_WIN32)
+ if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "Windows")
+ return false;
+#elif defined LINUX
+ if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "Linux")
+ return false;
+#elif defined MACOSX
+ if (!rListEntry.maOS.isEmpty() && rListEntry.maOS != "OS X")
+ return false;
+#endif
+
+ // OS version check not yet implemented
+
+ if (!match(rListEntry.maPlatformVendor, rPlatform.maVendor))
+ return false;
+
+ if (!match(rListEntry.maDevice, rDevice.maName))
+ return false;
+
+ if (!match(rListEntry.maDriverVersion, rDevice.maDriver))
+ return false;
+
+ return true;
+}
+
+bool match(const OpenCLConfig::ImplMatcherSet& rList, const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice, const char* sKindOfList)
+{
+ for (const auto& rListEntry : rList)
+ {
+ SAL_INFO("opencl", "Looking for match for platform=" << rPlatform << ", device=" << rDevice <<
+ " in " << sKindOfList << " entry=" << rListEntry);
+
+ if (match(rListEntry, rPlatform, rDevice))
+ {
+ SAL_INFO("opencl", "Match!");
+ return true;
+ }
+ }
+ return false;
+}
+
+} // anonymous namespace
+
+OpenCLConfig OpenCLConfig::get()
+{
+ OpenCLConfig result;
+
+ result.mbUseOpenCL = officecfg::Office::Common::Misc::UseOpenCL::get();
+
+ result.maBlackList = StringSequenceToSetOfImplMatcher(officecfg::Office::Common::Misc::OpenCLBlackList::get());
+ result.maWhiteList = StringSequenceToSetOfImplMatcher(officecfg::Office::Common::Misc::OpenCLWhiteList::get());
+
+ return result;
+}
+
+void OpenCLConfig::set()
+{
+ std::shared_ptr<comphelper::ConfigurationChanges> batch(comphelper::ConfigurationChanges::create());
+
+ officecfg::Office::Common::Misc::UseOpenCL::set(mbUseOpenCL, batch);
+ officecfg::Office::Common::Misc::OpenCLBlackList::set(SetOfImplMatcherToStringSequence(maBlackList), batch);
+ officecfg::Office::Common::Misc::OpenCLWhiteList::set(SetOfImplMatcherToStringSequence(maWhiteList), batch);
+
+ batch->commit();
+}
+
+bool OpenCLConfig::checkImplementation(const OpenCLPlatformInfo& rPlatform, const OpenCLDeviceInfo& rDevice) const
+{
+ // Check blacklist of known bad OpenCL implementations
+ if (match(maBlackList, rPlatform, rDevice, "blacklist"))
+ {
+ SAL_INFO("opencl", "Rejecting");
+ return true;
+ }
+
+ // Check for whitelist of known good OpenCL implementations
+ if (match(maWhiteList, rPlatform, rDevice, "whitelist"))
+ {
+ SAL_INFO("opencl", "Approving");
+ return false;
+ }
+
+ // Fallback: reject
+ SAL_INFO("opencl", "Fallback: rejecting platform=" << rPlatform << ", device=" << rDevice);
+ return true;
+}
+
+std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig& rConfig)
+{
+ rStream << "{"
+ "UseOpenCL=" << (rConfig.mbUseOpenCL ? "YES" : "NO") << ","
+ "BlackList=" << rConfig.maBlackList << ","
+ "WhiteList=" << rConfig.maWhiteList <<
+ "}";
+ return rStream;
+}
+
+std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig::ImplMatcher& rImpl)
+{
+ rStream << "{"
+ "OS=" << rImpl.maOS << ","
+ "OSVersion=" << rImpl.maOSVersion << ","
+ "PlatformVendor=" << rImpl.maPlatformVendor << ","
+ "Device=" << rImpl.maDevice << ","
+ "DriverVersion=" << rImpl.maDriverVersion <<
+ "}";
+
+ return rStream;
+}
+
+std::ostream& operator<<(std::ostream& rStream, const OpenCLConfig::ImplMatcherSet& rSet)
+{
+ rStream << "{";
+ for (auto i = rSet.cbegin(); i != rSet.cend(); ++i)
+ {
+ if (i != rSet.cbegin())
+ rStream << ",";
+ rStream << *i;
+ }
+ rStream << "}";
+ return rStream;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx
new file mode 100644
index 000000000..aea040c73
--- /dev/null
+++ b/opencl/source/openclwrapper.cxx
@@ -0,0 +1,978 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <config_folders.h>
+
+#include <opencl_device.hxx>
+#include <opencl_device_selection.h>
+
+#include <opencl/openclconfig.hxx>
+#include <opencl/openclwrapper.hxx>
+#include <opencl/platforminfo.hxx>
+#include <osl/file.hxx>
+#include <rtl/bootstrap.hxx>
+#include <rtl/digest.h>
+#include <rtl/strbuf.hxx>
+#include <rtl/ustring.hxx>
+#include <sal/config.h>
+#include <sal/log.hxx>
+#include <opencl/OpenCLZone.hxx>
+
+#include <memory>
+
+#include <stdlib.h>
+
+#include <officecfg/Office/Common.hxx>
+
+#ifdef _WIN32
+#include <prewin.h>
+#include <postwin.h>
+#define OPENCL_DLL_NAME "OpenCL.dll"
+#elif defined(MACOSX)
+#define OPENCL_DLL_NAME nullptr
+#else
+#define OPENCL_DLL_NAME "libOpenCL.so.1"
+#endif
+
+#ifdef _WIN32_WINNT_WINBLUE
+#include <VersionHelpers.h>
+#endif
+
+#define DEVICE_NAME_LENGTH 1024
+#define DRIVER_VERSION_LENGTH 1024
+#define PLATFORM_VERSION_LENGTH 1024
+
+#define CHECK_OPENCL(status,name) \
+if( status != CL_SUCCESS ) \
+{ \
+ SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE "from " name ); \
+ return false; \
+}
+
+using namespace std;
+
+namespace {
+
+bool bIsInited = false;
+
+}
+
+namespace openclwrapper {
+
+GPUEnv gpuEnv;
+sal_uInt64 kernelFailures = 0;
+
+namespace
+{
+
+OString generateMD5(const void* pData, size_t length)
+{
+ sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
+ rtlDigestError aError = rtl_digest_MD5(pData, length,
+ pBuffer, RTL_DIGEST_LENGTH_MD5);
+ SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
+
+ OStringBuffer aBuffer;
+ const char* const pString = "0123456789ABCDEF";
+ for(sal_uInt8 val : pBuffer)
+ {
+ aBuffer.append(pString[val/16]);
+ aBuffer.append(pString[val%16]);
+ }
+ return aBuffer.makeStringAndClear();
+}
+
+OString const & getCacheFolder()
+{
+ static OString const aCacheFolder = [&]()
+ {
+ OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
+ rtl::Bootstrap::expandMacros(url);
+
+ osl::Directory::create(url);
+
+ return OUStringToOString(url, RTL_TEXTENCODING_UTF8);
+ }();
+ return aCacheFolder;
+}
+
+}
+
+static bool initializeCommandQueue(GPUEnv& aGpuEnv)
+{
+ OpenCLZone zone;
+
+ cl_int nState;
+ cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
+
+ for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
+ {
+ command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState);
+ if (nState != CL_SUCCESS)
+ SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState));
+
+ if (command_queue[i] == nullptr || nState != CL_SUCCESS)
+ {
+ // Release all command queues created so far.
+ for (int j = 0; j <= i; ++j)
+ {
+ if (command_queue[j])
+ {
+ clReleaseCommandQueue(command_queue[j]);
+ command_queue[j] = nullptr;
+ }
+ }
+
+ clReleaseContext(aGpuEnv.mpContext);
+ SAL_WARN("opencl", "failed to set/switch opencl device");
+ return false;
+ }
+
+ SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext);
+ }
+
+ for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
+ {
+ aGpuEnv.mpCmdQueue[i] = command_queue[i];
+ }
+ aGpuEnv.mbCommandQueueInitialized = true;
+ return true;
+}
+
+void setKernelEnv( KernelEnv *envInfo )
+{
+ if (!gpuEnv.mbCommandQueueInitialized)
+ {
+ initializeCommandQueue(gpuEnv);
+ }
+
+ envInfo->mpkContext = gpuEnv.mpContext;
+ envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
+
+ assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
+ envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
+}
+
+namespace {
+
+OString createFileName(cl_device_id deviceId, const char* clFileName)
+{
+ OString fileName(clFileName);
+ sal_Int32 nIndex = fileName.lastIndexOf(".cl");
+ if(nIndex > 0)
+ fileName = fileName.copy(0, nIndex);
+
+ char deviceName[DEVICE_NAME_LENGTH] = {0};
+ clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName, nullptr);
+
+ char driverVersion[DRIVER_VERSION_LENGTH] = {0};
+ clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
+ sizeof(driverVersion), driverVersion, nullptr);
+
+ cl_platform_id platformId;
+ clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, nullptr);
+
+ char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
+ clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
+ platformVersion, nullptr);
+
+ // create hash for deviceName + driver version + platform version
+ OString aString = rtl::OStringView(deviceName) + driverVersion + platformVersion;
+ OString aHash = generateMD5(aString.getStr(), aString.getLength());
+
+ return getCacheFolder() + fileName + "-" + aHash + ".bin";
+}
+
+std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
+{
+ size_t numDevices=0;
+
+ std::vector<std::shared_ptr<osl::File> > aGeneratedFiles;
+ cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
+ 0, nullptr, &numDevices );
+ numDevices /= sizeof(numDevices);
+
+ if(clStatus != CL_SUCCESS)
+ return aGeneratedFiles;
+
+ assert(numDevices == 1);
+
+ // grab the handle to the device in the context.
+ cl_device_id pDevID;
+ clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
+ sizeof( cl_device_id ), &pDevID, nullptr );
+
+ if(clStatus != CL_SUCCESS)
+ return aGeneratedFiles;
+
+ assert(pDevID == gpuEnv.mpDevID);
+
+ OString fileName = createFileName(gpuEnv.mpDevID, clFileName);
+ auto pNewFile = std::make_shared<osl::File>(OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
+ if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
+ {
+ aGeneratedFiles.push_back(pNewFile);
+ SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
+ }
+ else
+ {
+ SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
+ }
+
+ return aGeneratedFiles;
+}
+
+bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
+{
+ osl::File file(OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
+ osl::FileBase::RC status = file.open(
+ osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
+
+ if(status != osl::FileBase::E_None)
+ return false;
+
+ sal_uInt64 nBytesWritten = 0;
+ file.write( binary, numBytes, nBytesWritten );
+
+ assert(numBytes == nBytesWritten);
+
+ return true;
+}
+
+}
+
+bool generatBinFromKernelSource( cl_program program, const char * clFileName )
+{
+ cl_uint numDevices;
+
+ cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
+ sizeof(numDevices), &numDevices, nullptr );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ assert(numDevices == 1);
+
+ cl_device_id pDevID;
+ /* grab the handle to the device in the program. */
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id), &pDevID, nullptr );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ /* figure out the size of the binary. */
+ size_t binarySize;
+
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t), &binarySize, nullptr );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ /* copy over the generated binary. */
+ if ( binarySize != 0 )
+ {
+ std::unique_ptr<char[]> binary(new char[binarySize]);
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
+ sizeof(char *), &binary, nullptr );
+ CHECK_OPENCL(clStatus,"clGetProgramInfo");
+
+ OString fileName = createFileName(pDevID, clFileName);
+ if ( !writeBinaryToFile( fileName,
+ binary.get(), binarySize ) )
+ SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
+ else
+ SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
+ }
+ return true;
+}
+
+namespace {
+
+struct OpenCLEnv
+{
+ cl_platform_id mpOclPlatformID;
+ cl_context mpOclContext;
+ cl_device_id mpOclDevsID;
+};
+
+bool initOpenCLAttr( OpenCLEnv * env )
+{
+ if ( gpuEnv.mnIsUserCreated )
+ return true;
+
+ gpuEnv.mpContext = env->mpOclContext;
+ gpuEnv.mpPlatformID = env->mpOclPlatformID;
+ gpuEnv.mpDevID = env->mpOclDevsID;
+
+ gpuEnv.mnIsUserCreated = 1;
+
+ gpuEnv.mbCommandQueueInitialized = false;
+
+ gpuEnv.mnCmdQueuePos = 0; // default to 0.
+
+ return false;
+}
+
+bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
+{
+ cl_int clStatus;
+ //char options[512];
+ // create a cl program executable for all the devices specified
+ clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID,
+ buildOption, nullptr, nullptr);
+
+ if ( clStatus != CL_SUCCESS )
+ {
+ size_t length;
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+ CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
+ if ( clStatus != CL_SUCCESS )
+ {
+ return false;
+ }
+
+ std::unique_ptr<char[]> buildLog(new char[length]);
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+ CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
+ if ( clStatus != CL_SUCCESS )
+ {
+ return false;
+ }
+
+ OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log";
+ osl::File aBuildLogFile(OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
+ osl::FileBase::RC status = aBuildLogFile.open(
+ osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
+
+ if(status != osl::FileBase::E_None)
+ return false;
+
+ sal_uInt64 nBytesWritten = 0;
+ aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
+
+ return false;
+ }
+
+ return true;
+}
+
+}
+
+bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
+{
+ size_t numDevices;
+ cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ 0, nullptr, &numDevices );
+ numDevices /= sizeof(numDevices);
+ CHECK_OPENCL( clStatus, "clGetContextInfo" );
+
+ std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
+ filename, gpuInfo->mpContext );
+
+ if (aGeneratedFiles.size() == numDevices)
+ {
+ std::unique_ptr<size_t[]> length(new size_t[numDevices]);
+ std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]);
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ sal_uInt64 nSize;
+ aGeneratedFiles[i]->getSize(nSize);
+ unsigned char* binary = new unsigned char[nSize];
+ sal_uInt64 nBytesRead;
+ aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
+ if(nSize != nBytesRead)
+ assert(false);
+
+ length[i] = nBytesRead;
+
+ pBinary[i] = binary;
+ }
+
+ // grab the handles to all of the devices in the context.
+ std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]);
+ clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr );
+
+ if(clStatus != CL_SUCCESS)
+ {
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ delete[] pBinary[i];
+ }
+ return false;
+ }
+
+ cl_int binary_status;
+
+ gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
+ pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
+ &binary_status, &clStatus );
+ if(clStatus != CL_SUCCESS)
+ {
+ // something went wrong, fall back to compiling from source
+ return false;
+ }
+ SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ delete[] pBinary[i];
+ }
+ }
+
+ if ( !gpuInfo->mpArryPrograms[idx] )
+ {
+ return false;
+ }
+ return buildProgram(buildOption, gpuInfo, idx);
+}
+
+namespace {
+
+void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
+{
+ OpenCLZone zone;
+
+ bKhrFp64 = false;
+ bAmdFp64 = false;
+
+ // Check device extensions for double type
+ size_t aDevExtInfoSize = 0;
+
+ cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize );
+ if( clStatus != CL_SUCCESS )
+ return;
+
+ std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]);
+
+ clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
+ sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr);
+
+ if( clStatus != CL_SUCCESS )
+ return;
+
+ if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
+ {
+ bKhrFp64 = true;
+ }
+ else
+ {
+ // Check if cl_amd_fp64 extension is supported
+ if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
+ bAmdFp64 = true;
+ }
+}
+
+bool initOpenCLRunEnv( GPUEnv *gpuInfo )
+{
+ OpenCLZone zone;
+ cl_uint nPreferredVectorWidthFloat;
+ char pName[64];
+
+ bool bKhrFp64 = false;
+ bool bAmdFp64 = false;
+
+ checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64);
+
+ gpuInfo->mnKhrFp64Flag = bKhrFp64;
+ gpuInfo->mnAmdFp64Flag = bAmdFp64;
+
+ gpuInfo->mbNeedsTDRAvoidance = false;
+
+ clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
+ &nPreferredVectorWidthFloat, nullptr);
+ SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat);
+
+ clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
+ pName, nullptr);
+
+#if defined (_WIN32)
+// the Win32 SDK 8.1 deprecates GetVersionEx()
+# ifdef _WIN32_WINNT_WINBLUE
+ const bool bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
+# else
+ bool bIsNotWinOrIsWin8OrGreater = true;
+ OSVERSIONINFOW aVersionInfo = {};
+ aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
+ if (GetVersionExW( &aVersionInfo ))
+ {
+ // Windows 7 or lower?
+ if (aVersionInfo.dwMajorVersion < 6 ||
+ (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
+ bIsNotWinOrIsWin8OrGreater = false;
+ }
+# endif
+#else
+ const bool bIsNotWinOrIsWin8OrGreater = true;
+#endif
+
+ // Heuristic: Certain old low-end OpenCL implementations don't
+ // work for us with too large group lengths. Looking at the preferred
+ // float vector width seems to be a way to detect these devices, except
+ // the non-working NVIDIA cards on Windows older than version 8.
+ gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
+ ( !bIsNotWinOrIsWin8OrGreater &&
+ OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
+
+ size_t nMaxParameterSize;
+ clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t),
+ &nMaxParameterSize, nullptr);
+ SAL_INFO("opencl", "CL_DEVICE_MAX_PARAMETER_SIZE=" << nMaxParameterSize);
+
+ return false;
+}
+
+bool initOpenCLRunEnv( int argc )
+{
+ if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
+ return true;
+
+ if ( !bIsInited )
+ {
+ if ( !gpuEnv.mnIsUserCreated )
+ memset( &gpuEnv, 0, sizeof(gpuEnv) );
+
+ //initialize devices, context, command_queue
+ bool status = initOpenCLRunEnv( &gpuEnv );
+ if ( status )
+ {
+ return true;
+ }
+ //initialize program, kernelName, kernelCount
+ if( getenv( "SC_FLOAT" ) )
+ {
+ gpuEnv.mnKhrFp64Flag = false;
+ gpuEnv.mnAmdFp64Flag = false;
+ }
+ if( gpuEnv.mnKhrFp64Flag )
+ {
+ SAL_INFO("opencl", "Use Khr double");
+ }
+ else if( gpuEnv.mnAmdFp64Flag )
+ {
+ SAL_INFO("opencl", "Use AMD double type");
+ }
+ else
+ {
+ SAL_INFO("opencl", "USE float type");
+ }
+ bIsInited = true;
+ }
+ return false;
+}
+
+// based on crashes and hanging during kernel compilation
+void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
+{
+ OpenCLDeviceInfo aDeviceInfo;
+ aDeviceInfo.device = aDeviceId;
+
+ char pName[DEVICE_NAME_LENGTH];
+ cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maName = OUString::createFromAscii(pName);
+
+ char pVendor[DEVICE_NAME_LENGTH];
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
+
+ cl_ulong nMemSize;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.mnMemory = nMemSize;
+
+ cl_uint nClockFrequency;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.mnFrequency = nClockFrequency;
+
+ cl_uint nComputeUnits;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr);
+ if(nState != CL_SUCCESS)
+ return;
+
+ char pDriver[DEVICE_NAME_LENGTH];
+ nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr);
+
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
+
+ bool bKhrFp64 = false;
+ bool bAmdFp64 = false;
+ checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
+
+ // only list devices that support double
+ if(!bKhrFp64 && !bAmdFp64)
+ return;
+
+ aDeviceInfo.mnComputeUnits = nComputeUnits;
+
+ if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
+ rPlatformInfo.maDevices.push_back(aDeviceInfo);
+}
+
+bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
+{
+ rPlatformInfo.platform = nPlatformId;
+ char pName[64];
+ cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
+ pName, nullptr);
+ if(nState != CL_SUCCESS)
+ return false;
+ rPlatformInfo.maName = OUString::createFromAscii(pName);
+
+ char pVendor[64];
+ nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
+ pVendor, nullptr);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
+
+ cl_uint nDevices;
+ nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ // memory leak that does not matter
+ // memory is stored in static variable that lives through the whole program
+ cl_device_id* pDevices = new cl_device_id[nDevices];
+ nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ for(size_t i = 0; i < nDevices; ++i)
+ {
+ createDeviceInfo(pDevices[i], rPlatformInfo);
+ }
+
+ return true;
+}
+
+}
+
+const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
+{
+ static std::vector<OpenCLPlatformInfo> aPlatforms;
+
+ // return early if we already initialized or can't use OpenCL
+ if (!aPlatforms.empty() || !canUseOpenCL())
+ return aPlatforms;
+
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return aPlatforms;
+
+ cl_uint nPlatforms;
+ cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms);
+
+ if(nState != CL_SUCCESS)
+ return aPlatforms;
+
+ // memory leak that does not matter,
+ // memory is stored in static instance aPlatforms
+ cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
+ nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr);
+
+ if(nState != CL_SUCCESS)
+ return aPlatforms;
+
+ for(size_t i = 0; i < nPlatforms; ++i)
+ {
+ OpenCLPlatformInfo aPlatformInfo;
+ if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
+ aPlatforms.push_back(aPlatformInfo);
+ }
+
+ return aPlatforms;
+}
+
+namespace {
+
+cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
+{
+ for (const OpenCLPlatformInfo& rPlatform : rPlatforms)
+ {
+ for (const OpenCLDeviceInfo& rDeviceInfo : rPlatform.maDevices)
+ {
+ OUString aDeviceId = rDeviceInfo.maVendor + " " + rDeviceInfo.maName;
+ if (rString == aDeviceId)
+ {
+ return rDeviceInfo.device;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
+{
+ cl_platform_id platformId;
+ cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, nullptr);
+
+ if(nState != CL_SUCCESS)
+ return;
+
+ const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
+ for(size_t i = 0; i < rPlatforms.size(); ++i)
+ {
+ cl_platform_id platId = rPlatforms[i].platform;
+ if(platId != platformId)
+ continue;
+
+ for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
+ {
+ cl_device_id id = rPlatforms[i].maDevices[j].device;
+ if(id == aDeviceId)
+ {
+ rDeviceId = j;
+ rPlatformId = i;
+ return;
+ }
+ }
+ }
+}
+
+}
+
+bool canUseOpenCL()
+{
+ if( const char* env = getenv( "SC_FORCE_CALCULATION" ))
+ {
+ if( strcmp( env, "opencl" ) == 0 )
+ return true;
+ }
+ return !getenv("SAL_DISABLE_OPENCL") && officecfg::Office::Common::Misc::UseOpenCL::get();
+}
+
+bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString)
+{
+ if (!canUseOpenCL() || fillOpenCLInfo().empty())
+ return false;
+
+ cl_device_id pDeviceId = nullptr;
+ if(pDevice)
+ pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
+
+ if(!pDeviceId || bAutoSelect)
+ {
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return false;
+
+ OUString url(OStringToOUString(getCacheFolder(), RTL_TEXTENCODING_UTF8));
+ OUString path;
+ osl::FileBase::getSystemPathFromFileURL(url,path);
+ ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation);
+ if ( aSelectedDevice.eType != DeviceType::OpenCLDevice)
+ return false;
+ pDeviceId = aSelectedDevice.aDeviceID;
+ }
+
+ if(gpuEnv.mpDevID == pDeviceId)
+ {
+ // we don't need to change anything
+ // still the same device
+ return pDeviceId != nullptr;
+ }
+
+ cl_context context;
+ cl_platform_id platformId;
+
+ {
+ OpenCLZone zone;
+ cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, nullptr);
+
+ cl_context_properties cps[3];
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = reinterpret_cast<cl_context_properties>(platformId);
+ cps[2] = 0;
+ context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState );
+ if (nState != CL_SUCCESS)
+ SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState));
+
+ if(nState != CL_SUCCESS || context == nullptr)
+ {
+ if(context != nullptr)
+ clReleaseContext(context);
+
+ SAL_WARN("opencl", "failed to set/switch opencl device");
+ return false;
+ }
+ SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
+
+ OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION);
+ rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8);
+ }
+
+ setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
+
+ releaseOpenCLEnv(&gpuEnv);
+
+ OpenCLEnv env;
+ env.mpOclPlatformID = platformId;
+ env.mpOclContext = context;
+ env.mpOclDevsID = pDeviceId;
+
+ initOpenCLAttr(&env);
+
+ return !initOpenCLRunEnv(0);
+}
+
+void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
+{
+ if (!canUseOpenCL())
+ return;
+
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return;
+
+ cl_device_id id = gpuEnv.mpDevID;
+ findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
+}
+
+void getOpenCLDeviceName(OUString& rDeviceName, OUString& rPlatformName)
+{
+ if (!canUseOpenCL())
+ return;
+
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return;
+
+ cl_device_id deviceId = gpuEnv.mpDevID;
+ cl_platform_id platformId;
+ if( clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, sizeof(platformId), &platformId, nullptr) != CL_SUCCESS )
+ return;
+
+ char deviceName[DEVICE_NAME_LENGTH] = {0};
+ if( clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr) != CL_SUCCESS )
+ return;
+ char platformName[64];
+ if( clGetPlatformInfo(platformId, CL_PLATFORM_NAME, 64, platformName, nullptr) != CL_SUCCESS )
+ return;
+ rDeviceName = OUString::createFromAscii(deviceName);
+ rPlatformName = OUString::createFromAscii(platformName);
+}
+
+void setOpenCLCmdQueuePosition( int nPos )
+{
+ if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
+ // Out of range. Ignore this.
+ return;
+
+ gpuEnv.mnCmdQueuePos = nPos;
+}
+
+const char* errorString(cl_int nError)
+{
+#define CASE(val) case CL_##val: return #val
+ switch (nError)
+ {
+ CASE(SUCCESS);
+ CASE(DEVICE_NOT_FOUND);
+ CASE(DEVICE_NOT_AVAILABLE);
+ CASE(COMPILER_NOT_AVAILABLE);
+ CASE(MEM_OBJECT_ALLOCATION_FAILURE);
+ CASE(OUT_OF_RESOURCES);
+ CASE(OUT_OF_HOST_MEMORY);
+ CASE(PROFILING_INFO_NOT_AVAILABLE);
+ CASE(MEM_COPY_OVERLAP);
+ CASE(IMAGE_FORMAT_MISMATCH);
+ CASE(IMAGE_FORMAT_NOT_SUPPORTED);
+ CASE(BUILD_PROGRAM_FAILURE);
+ CASE(MAP_FAILURE);
+ CASE(INVALID_VALUE);
+ CASE(INVALID_DEVICE_TYPE);
+ CASE(INVALID_PLATFORM);
+ CASE(INVALID_DEVICE);
+ CASE(INVALID_CONTEXT);
+ CASE(INVALID_QUEUE_PROPERTIES);
+ CASE(INVALID_COMMAND_QUEUE);
+ CASE(INVALID_HOST_PTR);
+ CASE(INVALID_MEM_OBJECT);
+ CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
+ CASE(INVALID_IMAGE_SIZE);
+ CASE(INVALID_SAMPLER);
+ CASE(INVALID_BINARY);
+ CASE(INVALID_BUILD_OPTIONS);
+ CASE(INVALID_PROGRAM);
+ CASE(INVALID_PROGRAM_EXECUTABLE);
+ CASE(INVALID_KERNEL_NAME);
+ CASE(INVALID_KERNEL_DEFINITION);
+ CASE(INVALID_KERNEL);
+ CASE(INVALID_ARG_INDEX);
+ CASE(INVALID_ARG_VALUE);
+ CASE(INVALID_ARG_SIZE);
+ CASE(INVALID_KERNEL_ARGS);
+ CASE(INVALID_WORK_DIMENSION);
+ CASE(INVALID_WORK_GROUP_SIZE);
+ CASE(INVALID_WORK_ITEM_SIZE);
+ CASE(INVALID_GLOBAL_OFFSET);
+ CASE(INVALID_EVENT_WAIT_LIST);
+ CASE(INVALID_EVENT);
+ CASE(INVALID_OPERATION);
+ CASE(INVALID_GL_OBJECT);
+ CASE(INVALID_BUFFER_SIZE);
+ CASE(INVALID_MIP_LEVEL);
+ CASE(INVALID_GLOBAL_WORK_SIZE);
+ default:
+ return "Unknown OpenCL error code";
+ }
+#undef CASE
+}
+
+bool GPUEnv::isOpenCLEnabled()
+{
+ return gpuEnv.mpDevID && gpuEnv.mpContext;
+}
+
+}
+
+void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo )
+{
+ OpenCLZone zone;
+
+ if ( !bIsInited )
+ {
+ return;
+ }
+
+ for (_cl_command_queue* & i : openclwrapper::gpuEnv.mpCmdQueue)
+ {
+ if (i)
+ {
+ clReleaseCommandQueue(i);
+ i = nullptr;
+ }
+ }
+ openclwrapper::gpuEnv.mnCmdQueuePos = 0;
+
+ if ( openclwrapper::gpuEnv.mpContext )
+ {
+ clReleaseContext( openclwrapper::gpuEnv.mpContext );
+ openclwrapper::gpuEnv.mpContext = nullptr;
+ }
+ bIsInited = false;
+ gpuInfo->mnIsUserCreated = 0;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/platforminfo.cxx b/opencl/source/platforminfo.cxx
new file mode 100644
index 000000000..a23aa87a5
--- /dev/null
+++ b/opencl/source/platforminfo.cxx
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <ostream>
+
+#include <opencl/platforminfo.hxx>
+
+OpenCLDeviceInfo::OpenCLDeviceInfo()
+ : device(nullptr)
+ , mnMemory(0)
+ , mnComputeUnits(0)
+ , mnFrequency(0)
+{
+}
+
+OpenCLPlatformInfo::OpenCLPlatformInfo()
+ : platform(nullptr)
+{
+}
+
+std::ostream& operator<<(std::ostream& rStream, const OpenCLPlatformInfo& rPlatform)
+{
+ rStream << "{"
+ "Vendor=" << rPlatform.maVendor << ","
+ "Name=" << rPlatform.maName <<
+ "}";
+ return rStream;
+}
+
+std::ostream& operator<<(std::ostream& rStream, const OpenCLDeviceInfo& rDevice)
+{
+ rStream << "{"
+ "Name=" << rDevice.maName << ","
+ "Vendor=" << rDevice.maVendor << ","
+ "Driver=" << rDevice.maDriver <<
+ "}";
+ return rStream;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */