summaryrefslogtreecommitdiffstats
path: root/sc/source/core/opencl/formulagroupcl.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sc/source/core/opencl/formulagroupcl.cxx')
-rw-r--r--sc/source/core/opencl/formulagroupcl.cxx4438
1 files changed, 4438 insertions, 0 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
new file mode 100644
index 000000000..f9ce31297
--- /dev/null
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -0,0 +1,4438 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <formulagroup.hxx>
+#include <formulagroupcl.hxx>
+#include <document.hxx>
+#include <formulacell.hxx>
+#include <tokenarray.hxx>
+#include <compiler.hxx>
+#include <comphelper/random.hxx>
+#include <formula/vectortoken.hxx>
+#include <scmatrix.hxx>
+#include <sal/log.hxx>
+
+#include <opencl/openclwrapper.hxx>
+#include <opencl/OpenCLZone.hxx>
+
+#include "op_financial.hxx"
+#include "op_database.hxx"
+#include "op_math.hxx"
+#include "op_logical.hxx"
+#include "op_statistical.hxx"
+#include "op_array.hxx"
+#include "op_spreadsheet.hxx"
+#include "op_addin.hxx"
+
+#include <limits>
+
+#include <com/sun/star/sheet/FormulaLanguage.hpp>
+
+// FIXME: The idea that somebody would bother to (now and then? once a year? once a month?) manually
+// edit a source file and change the value of some #defined constant and run some ill-defined
+// "correctness test" is of course ludicrous. Either things are checked in normal unit tests, in
+// every 'make check', or not at all. The below comments are ridiculous.
+
+#define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
+#define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
+
+const char* const publicFunc =
+ "\n"
+ "#define IllegalArgument 502\n"
+ "#define IllegalFPOperation 503 // #NUM!\n"
+ "#define NoValue 519 // #VALUE!\n"
+ "#define NoConvergence 523\n"
+ "#define DivisionByZero 532 // #DIV/0!\n"
+ "#define NOTAVAILABLE 0x7fff // #N/A\n"
+ "\n"
+ "double CreateDoubleError(ulong nErr)\n"
+ "{\n"
+ // At least nVidia on Linux and Intel on Windows seem to ignore the argument to nan(),
+ // so using that would not propagate the type of error, work that around
+ // by directly constructing the proper IEEE double NaN value
+ // TODO: maybe use a better way to detect such systems?
+ " return as_double(0x7FF8000000000000+nErr);\n"
+// " return nan(nErr);\n"
+ "}\n"
+ "\n"
+ "uint GetDoubleErrorValue(double fVal)\n"
+ "{\n"
+ " if (isfinite(fVal))\n"
+ " return 0;\n"
+ " if (isinf(fVal))\n"
+ " return IllegalFPOperation; // normal INF\n"
+ " if (as_ulong(fVal) & 0XFFFF0000u)\n"
+ " return NoValue; // just a normal NAN\n"
+ " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
+ "}\n"
+ "\n"
+ "double fsum_count(double a, double b, __private int *p) {\n"
+ " bool t = isnan(a);\n"
+ " (*p) += t?0:1;\n"
+ " return t?b:a+b;\n"
+ "}\n"
+ "double fmin_count(double a, double b, __private int *p) {\n"
+ " double result = fmin(a, b);\n"
+ " bool t = isnan(result);\n"
+ " (*p) += t?0:1;\n"
+ " return result;\n"
+ "}\n"
+ "double fmax_count(double a, double b, __private int *p) {\n"
+ " double result = fmax(a, b);\n"
+ " bool t = isnan(result);\n"
+ " (*p) += t?0:1;\n"
+ " return result;\n"
+ "}\n"
+ "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
+ "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
+ "double fsub(double a, double b) { return a-b; }\n"
+ "double fdiv(double a, double b) { return a/b; }\n"
+ "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
+ "int is_representable_integer(double a) {\n"
+ " long kMaxInt = (1L << 53) - 1;\n"
+ " if (a <= as_double(kMaxInt))\n"
+ " {\n"
+ " long nInt = as_long(a);\n"
+ " double fInt;\n"
+ " return (nInt <= kMaxInt &&\n"
+ " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
+ " }\n"
+ " return 0;\n"
+ "}\n"
+ "int approx_equal(double a, double b) {\n"
+ " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
+ " double e44 = e48 * 16.0;\n"
+ " if (a == b)\n"
+ " return 1;\n"
+ " if (a == 0.0 || b == 0.0)\n"
+ " return 0;\n"
+ " double d = fabs(a - b);\n"
+ " if (!isfinite(d))\n"
+ " return 0; // Nan or Inf involved\n"
+ " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
+ " return 0;\n"
+ " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
+ " return 0; // special case for representable integers.\n"
+ " return (d < a * e48 && d < b * e48);\n"
+ "}\n"
+ "double fsum_approx(double a, double b) {\n"
+ " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
+ " && approx_equal( a, -b ) )\n"
+ " return 0.0;\n"
+ " return a + b;\n"
+ "}\n"
+ "double fsub_approx(double a, double b) {\n"
+ " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
+ " return 0.0;\n"
+ " return a - b;\n"
+ "}\n"
+ ;
+
+#include <vector>
+#include <map>
+#include <iostream>
+#include <algorithm>
+
+#include <rtl/digest.h>
+
+#include <memory>
+
+using namespace formula;
+
+namespace sc::opencl {
+
+namespace {
+
+std::string linenumberify(const std::string& s)
+{
+ std::stringstream ss;
+ int linenumber = 1;
+ size_t start = 0;
+ size_t newline;
+ while ((newline = s.find('\n', start)) != std::string::npos)
+ {
+ ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
+ start = newline + 1;
+ }
+ if (start < s.size())
+ ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
+ return ss.str();
+}
+
+bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
+{
+ if (pStringArray == nullptr)
+ return true;
+
+ for (size_t i = 0; i < nLength; i++)
+ if (pStringArray[i] != nullptr)
+ return false;
+
+ return true;
+}
+
+OUString LimitedString( const OUString& str )
+{
+ if( str.getLength() < 20 )
+ return "\"" + str + "\"";
+ else
+ return OUString::Concat("\"") + str.subView( 0, 20 ) + "\"...";
+}
+
+// Returns formatted contents of the data (possibly shortened), to be used in debug output.
+OUString DebugPeekData(const FormulaToken* ref, int doubleRefIndex = 0)
+{
+ if (ref->GetType() == formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>(ref);
+ OUStringBuffer buf = "SingleRef {";
+ for( size_t i = 0; i < std::min< size_t >( 4, pSVR->GetArrayLength()); ++i )
+ {
+ if( i != 0 )
+ buf.append( "," );
+ if( pSVR->GetArray().mpNumericArray != nullptr )
+ buf.append( pSVR->GetArray().mpNumericArray[ i ] );
+ else if( pSVR->GetArray().mpStringArray != nullptr )
+ buf.append( LimitedString( OUString( pSVR->GetArray().mpStringArray[ i ] )));
+ }
+ if( pSVR->GetArrayLength() > 4 )
+ buf.append( ",..." );
+ buf.append( "}" );
+ return buf.makeStringAndClear();
+ }
+ else if (ref->GetType() == formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pDVR =
+ static_cast<const formula::DoubleVectorRefToken*>(ref);
+ OUStringBuffer buf = "DoubleRef {";
+ for( size_t i = 0; i < std::min< size_t >( 4, pDVR->GetArrayLength()); ++i )
+ {
+ if( i != 0 )
+ buf.append( "," );
+ if( pDVR->GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
+ buf.append( pDVR->GetArrays()[doubleRefIndex].mpNumericArray[ i ] );
+ else if( pDVR->GetArrays()[doubleRefIndex].mpStringArray != nullptr )
+ buf.append( LimitedString( OUString( pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] )));
+ }
+ if( pDVR->GetArrayLength() > 4 )
+ buf.append( ",..." );
+ buf.append( "}" );
+ return buf.makeStringAndClear();
+ }
+ else if (ref->GetType() == formula::svString)
+ {
+ return "String " + LimitedString( ref->GetString().getString());
+ }
+ else if (ref->GetType() == formula::svDouble)
+ {
+ return OUString::number(ref->GetDouble());
+ }
+ else
+ {
+ return "?";
+ }
+}
+
+// Returns formatted contents of a doubles buffer, to be used in debug output.
+OUString DebugPeekDoubles(const double* data, int size)
+{
+ OUStringBuffer buf = "{";
+ for( int i = 0; i < std::min( 4, size ); ++i )
+ {
+ if( i != 0 )
+ buf.append( "," );
+ buf.append( data[ i ] );
+ }
+ if( size > 4 )
+ buf.append( ",..." );
+ buf.append( "}" );
+ return buf.makeStringAndClear();
+}
+
+} // anonymous namespace
+
+/// Map the buffer used by an argument and do necessary argument setting
+size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
+{
+ OpenCLZone zone;
+ FormulaToken* ref = mFormulaTree->GetFormulaToken();
+ double* pHostBuffer = nullptr;
+ size_t szHostBuffer = 0;
+ if (ref->GetType() == formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>(ref);
+
+ SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
+
+ pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
+ szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
+ }
+ else if (ref->GetType() == formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pDVR =
+ static_cast<const formula::DoubleVectorRefToken*>(ref);
+
+ SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
+
+ pHostBuffer = const_cast<double*>(
+ pDVR->GetArrays()[mnIndex].mpNumericArray);
+ szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
+ }
+ else
+ {
+ throw Unhandled(__FILE__, __LINE__);
+ }
+
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ if (pHostBuffer)
+ {
+ mpClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+ szHostBuffer,
+ pHostBuffer, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
+ }
+ else
+ {
+ if (szHostBuffer == 0)
+ szHostBuffer = sizeof(double); // a dummy small value
+ // Marshal as a buffer of NANs
+ mpClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
+ szHostBuffer, nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
+
+ double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
+ kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
+ szHostBuffer, 0, nullptr, nullptr, &err));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+
+ for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
+ pNanBuffer[i] = std::numeric_limits<double>::quiet_NaN();
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
+ pNanBuffer, 0, nullptr, nullptr);
+ // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+ if (CL_SUCCESS != err)
+ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+ }
+
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref, mnIndex) << ")");
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+}
+
+/// Arguments that are actually compile-time constant string
+/// Currently, only the hash is passed.
+/// TBD(IJSUNG): pass also length and the actual string if there is a
+/// hash function collision
+
+/// FIXME: This idea of passing of hashes of uppercased strings into OpenCL code is fairly potent
+/// crack. It is hopefully not used at all any more, but noticing that there are string arguments
+/// automatically disables use of OpenCL for a formula group. If at some point there are resources
+/// to drain the OpenCL swamp, this should go away.
+
+namespace {
+
+class ConstStringArgument : public DynamicKernelArgument
+{
+public:
+ ConstStringArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft ) :
+ DynamicKernelArgument(config, s, ft) { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ ss << "unsigned " << mSymName;
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ ss << GenSlidingWindowDeclRef();
+ }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ GenDecl(ss);
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
+ {
+ std::stringstream ss;
+ if (GetFormulaToken()->GetType() != formula::svString)
+ throw Unhandled(__FILE__, __LINE__);
+ FormulaToken* Tok = GetFormulaToken();
+ ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
+ return ss.str();
+ }
+ virtual size_t GetWindowSize() const override
+ {
+ return 1;
+ }
+ /// Pass the 32-bit hash of the string to the kernel
+ virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
+ {
+ OpenCLZone zone;
+ FormulaToken* ref = mFormulaTree->GetFormulaToken();
+ cl_uint hashCode = 0;
+ if (ref->GetType() != formula::svString)
+ {
+ throw Unhandled(__FILE__, __LINE__);
+ }
+
+ const OUString s = ref->GetString().getString().toAsciiUpperCase();
+ hashCode = s.hashCode();
+
+ // Pass the scalar result back to the rest of the formula kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode << "(" << DebugPeekData(ref) << ")" );
+ cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+ }
+};
+
+/// Arguments that are actually compile-time constants
+class DynamicKernelConstantArgument : public DynamicKernelArgument
+{
+public:
+ DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft ) :
+ DynamicKernelArgument(config, s, ft) { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ ss << "double " << mSymName;
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ ss << mSymName;
+ }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ GenDecl(ss);
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
+ {
+ if (GetFormulaToken()->GetType() != formula::svDouble)
+ throw Unhandled(__FILE__, __LINE__);
+ return mSymName;
+ }
+ virtual size_t GetWindowSize() const override
+ {
+ return 1;
+ }
+ double GetDouble() const
+ {
+ FormulaToken* Tok = GetFormulaToken();
+ if (Tok->GetType() != formula::svDouble)
+ throw Unhandled(__FILE__, __LINE__);
+ return Tok->GetDouble();
+ }
+ /// Create buffer and pass the buffer to a given kernel
+ virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
+ {
+ OpenCLZone zone;
+ double tmp = GetDouble();
+ // Pass the scalar result back to the rest of the formula kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
+ cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+ }
+};
+
+class DynamicKernelPiArgument : public DynamicKernelArgument
+{
+public:
+ DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft ) :
+ DynamicKernelArgument(config, s, ft) { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ ss << "double " << mSymName;
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ ss << "3.14159265358979";
+ }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ GenDecl(ss);
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
+ {
+ return mSymName;
+ }
+ virtual size_t GetWindowSize() const override
+ {
+ return 1;
+ }
+ /// Create buffer and pass the buffer to a given kernel
+ virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
+ {
+ OpenCLZone zone;
+ double tmp = 0.0;
+ // Pass the scalar result back to the rest of the formula kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp << " (PI)");
+ cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+ }
+};
+
+class DynamicKernelRandomArgument : public DynamicKernelArgument
+{
+public:
+ DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft ) :
+ DynamicKernelArgument(config, s, ft) { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ ss << "double " << mSymName;
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ ss << mSymName;
+ }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ ss << "int " << mSymName;
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
+ {
+ return mSymName + "_Random(" + mSymName + ")";
+ }
+ virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
+ {
+ // This string is from the pi_opencl_kernel.i file as
+ // generated when building the Random123 examples. Unused
+ // stuff has been removed, and the actual kernel is not the
+ // same as in the totally different use case of that example,
+ // of course. Only the code that calculates the counter-based
+ // random number and what it needs is left.
+ ss << "\
+\n\
+#ifndef DEFINED_RANDOM123_STUFF\n\
+#define DEFINED_RANDOM123_STUFF\n\
+\n\
+/*\n\
+Copyright 2010-2011, D. E. Shaw Research.\n\
+All rights reserved.\n\
+\n\
+Redistribution and use in source and binary forms, with or without\n\
+modification, are permitted provided that the following conditions are\n\
+met:\n\
+\n\
+* Redistributions of source code must retain the above copyright\n\
+ notice, this list of conditions, and the following disclaimer.\n\
+\n\
+* Redistributions in binary form must reproduce the above copyright\n\
+ notice, this list of conditions, and the following disclaimer in the\n\
+ documentation and/or other materials provided with the distribution.\n\
+\n\
+* Neither the name of D. E. Shaw Research nor the names of its\n\
+ contributors may be used to endorse or promote products derived from\n\
+ this software without specific prior written permission.\n\
+\n\
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
+\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
+*/\n\
+\n\
+typedef uint uint32_t;\n\
+struct r123array2x32\n\
+{\n\
+ uint32_t v[2];\n\
+};\n\
+enum r123_enum_threefry32x2\n\
+{\n\
+ R_32x2_0_0 = 13,\n\
+ R_32x2_1_0 = 15,\n\
+ R_32x2_2_0 = 26,\n\
+ R_32x2_3_0 = 6,\n\
+ R_32x2_4_0 = 17,\n\
+ R_32x2_5_0 = 29,\n\
+ R_32x2_6_0 = 16,\n\
+ R_32x2_7_0 = 24\n\
+};\n\
+inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
+ __attribute__ ((always_inline));\n\
+inline uint32_t\n\
+RotL_32 (uint32_t x, unsigned int N)\n\
+{\n\
+ return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
+}\n\
+\n\
+typedef struct r123array2x32 threefry2x32_ctr_t;\n\
+typedef struct r123array2x32 threefry2x32_key_t;\n\
+typedef struct r123array2x32 threefry2x32_ukey_t;\n\
+inline threefry2x32_key_t\n\
+threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
+{\n\
+ return uk;\n\
+}\n\
+\n\
+inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
+ threefry2x32_ctr_t in,\n\
+ threefry2x32_key_t k)\n\
+ __attribute__ ((always_inline));\n\
+inline threefry2x32_ctr_t\n\
+threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
+ threefry2x32_key_t k)\n\
+{\n\
+ threefry2x32_ctr_t X;\n\
+ uint32_t ks[2 + 1];\n\
+ int i;\n\
+ ks[2] = 0x1BD11BDA;\n\
+ for (i = 0; i < 2; i++) {\n\
+ ks[i] = k.v[i];\n\
+ X.v[i] = in.v[i];\n\
+ ks[2] ^= k.v[i];\n\
+ }\n\
+ X.v[0] += ks[0];\n\
+ X.v[1] += ks[1];\n\
+ if (Nrounds > 0) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 1) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 2) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 3) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 3) {\n\
+ X.v[0] += ks[1];\n\
+ X.v[1] += ks[2];\n\
+ X.v[1] += 1;\n\
+ }\n\
+ if (Nrounds > 4) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 5) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 6) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 7) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 7) {\n\
+ X.v[0] += ks[2];\n\
+ X.v[1] += ks[0];\n\
+ X.v[1] += 2;\n\
+ }\n\
+ if (Nrounds > 8) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 9) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 10) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 11) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 11) {\n\
+ X.v[0] += ks[0];\n\
+ X.v[1] += ks[1];\n\
+ X.v[1] += 3;\n\
+ }\n\
+ if (Nrounds > 12) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 13) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 14) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 15) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 15) {\n\
+ X.v[0] += ks[1];\n\
+ X.v[1] += ks[2];\n\
+ X.v[1] += 4;\n\
+ }\n\
+ if (Nrounds > 16) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 17) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 18) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 19) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 19) {\n\
+ X.v[0] += ks[2];\n\
+ X.v[1] += ks[0];\n\
+ X.v[1] += 5;\n\
+ }\n\
+ if (Nrounds > 20) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 21) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 22) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 23) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 23) {\n\
+ X.v[0] += ks[0];\n\
+ X.v[1] += ks[1];\n\
+ X.v[1] += 6;\n\
+ }\n\
+ if (Nrounds > 24) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 25) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 26) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 27) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 27) {\n\
+ X.v[0] += ks[1];\n\
+ X.v[1] += ks[2];\n\
+ X.v[1] += 7;\n\
+ }\n\
+ if (Nrounds > 28) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 29) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 30) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 31) {\n\
+ X.v[0] += X.v[1];\n\
+ X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+ X.v[1] ^= X.v[0];\n\
+ }\n\
+ if (Nrounds > 31) {\n\
+ X.v[0] += ks[2];\n\
+ X.v[1] += ks[0];\n\
+ X.v[1] += 8;\n\
+ }\n\
+ return X;\n\
+}\n\
+\n\
+enum r123_enum_threefry2x32\n\
+{ threefry2x32_rounds = 20 };\n\
+inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
+ threefry2x32_key_t k)\n\
+ __attribute__ ((always_inline));\n\
+inline threefry2x32_ctr_t\n\
+threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
+{\n\
+ return threefry2x32_R (threefry2x32_rounds, in, k);\n\
+}\n\
+#endif\n\
+\n\
+";
+ ss << "double " << mSymName << "_Random (int seed)\n\
+{\n\
+ unsigned tid = get_global_id(0);\n\
+ threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
+ threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
+ c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
+ const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
+ const double halffactor = 0.5*factor;\n\
+ return c.v[0] * factor + halffactor;\n\
+}\n\
+";
+ }
+ virtual size_t GetWindowSize() const override
+ {
+ return 1;
+ }
+ /// Create buffer and pass the buffer to a given kernel
+ virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
+ {
+ OpenCLZone zone;
+ cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
+ // Pass the scalar result back to the rest of the formula kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed << "(RANDOM)");
+ cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+ }
+};
+
+/// A vector of strings
+class DynamicKernelStringArgument : public VectorRef
+{
+public:
+ DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft, int index = 0 ) :
+ VectorRef(config, s, ft, index) { }
+
+ virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ ss << "__global unsigned int *" << mSymName;
+ }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ DynamicKernelStringArgument::GenDecl(ss);
+ }
+ virtual size_t Marshal( cl_kernel, int, int, cl_program ) override;
+};
+
+}
+
+/// Marshal a string vector reference
+size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
+{
+ OpenCLZone zone;
+ FormulaToken* ref = mFormulaTree->GetFormulaToken();
+
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ formula::VectorRefArray vRef;
+ size_t nStrings = 0;
+ if (ref->GetType() == formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>(ref);
+ nStrings = pSVR->GetArrayLength();
+ vRef = pSVR->GetArray();
+ }
+ else if (ref->GetType() == formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pDVR =
+ static_cast<const formula::DoubleVectorRefToken*>(ref);
+ nStrings = pDVR->GetArrayLength();
+ vRef = pDVR->GetArrays()[mnIndex];
+ }
+ size_t szHostBuffer = nStrings * sizeof(cl_int);
+ cl_uint* pHashBuffer = nullptr;
+
+ if (vRef.mpStringArray != nullptr)
+ {
+ // Marshal strings. Right now we pass hashes of these string
+ mpClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
+ szHostBuffer, nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
+
+ pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
+ kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
+ szHostBuffer, 0, nullptr, nullptr, &err));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+
+ for (size_t i = 0; i < nStrings; i++)
+ {
+ if (vRef.mpStringArray[i])
+ {
+ const OUString tmp(vRef.mpStringArray[i]);
+ pHashBuffer[i] = tmp.hashCode();
+ }
+ else
+ {
+ pHashBuffer[i] = 0;
+ }
+ }
+ }
+ else
+ {
+ if (nStrings == 0)
+ szHostBuffer = sizeof(cl_int); // a dummy small value
+ // Marshal as a buffer of NANs
+ mpClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
+ szHostBuffer, nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
+
+ pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
+ kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
+ szHostBuffer, 0, nullptr, nullptr, &err));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+
+ for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
+ pHashBuffer[i] = 0;
+ }
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
+ pHashBuffer, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref,mnIndex) << ")");
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+}
+
+namespace {
+
+/// A mixed string/numeric vector
+class DynamicKernelMixedArgument : public VectorRef
+{
+public:
+ DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft ) :
+ VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ VectorRef::GenSlidingWindowDecl(ss);
+ ss << ", ";
+ mStringArgument.GenSlidingWindowDecl(ss);
+ }
+ virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ VectorRef::GenDecl(ss);
+ ss << ", ";
+ mStringArgument.GenDecl(ss);
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ VectorRef::GenDeclRef(ss);
+ ss << ",";
+ mStringArgument.GenDeclRef(ss);
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
+ {
+ std::stringstream ss;
+ ss << "(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
+ ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
+ ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
+ ss << ")";
+ return ss.str();
+ }
+ virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
+ {
+ std::stringstream ss;
+ ss << VectorRef::GenSlidingWindowDeclRef();
+ return ss.str();
+ }
+ virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
+ {
+ std::stringstream ss;
+ ss << mStringArgument.GenSlidingWindowDeclRef();
+ return ss.str();
+ }
+ virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
+ {
+ int i = VectorRef::Marshal(k, argno, vw, p);
+ i += mStringArgument.Marshal(k, argno + i, vw, p);
+ return i;
+ }
+
+protected:
+ DynamicKernelStringArgument mStringArgument;
+};
+
+/// Handling a Double Vector that is used as a sliding window input
+/// to either a sliding window average or sum-of-products
+/// Generate a sequential loop for reductions
+template<class Base>
+class DynamicKernelSlidingArgument : public Base
+{
+public:
+ DynamicKernelSlidingArgument(const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft,
+ const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
+ : Base(config, s, ft, index)
+ , mpCodeGen(CodeGen)
+ {
+ FormulaToken* t = ft->GetFormulaToken();
+ if (t->GetType() != formula::svDoubleVectorRef)
+ throw Unhandled(__FILE__, __LINE__);
+ mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
+ bIsStartFixed = mpDVR->IsStartFixed();
+ bIsEndFixed = mpDVR->IsEndFixed();
+ }
+
+ // Should only be called by SumIfs. Yikes!
+ virtual bool NeedParallelReduction() const
+ {
+ assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
+ return GetWindowSize() > 100 &&
+ ((GetStartFixed() && GetEndFixed()) ||
+ (!GetStartFixed() && !GetEndFixed()));
+ }
+
+ virtual void GenSlidingWindowFunction( std::stringstream& ) { }
+
+ std::string GenSlidingWindowDeclRef( bool nested = false ) const
+ {
+ size_t nArrayLength = mpDVR->GetArrayLength();
+ std::stringstream ss;
+ if (!bIsStartFixed && !bIsEndFixed)
+ {
+ if (nested)
+ ss << "((i+gid0) <" << nArrayLength << "?";
+ ss << Base::GetName() << "[i + gid0]";
+ if (nested)
+ ss << ":NAN)";
+ }
+ else
+ {
+ if (nested)
+ ss << "(i <" << nArrayLength << "?";
+ ss << Base::GetName() << "[i]";
+ if (nested)
+ ss << ":NAN)";
+ }
+ return ss.str();
+ }
+ /// Controls how the elements in the DoubleVectorRef are traversed
+ size_t GenReductionLoopHeader(
+ std::stringstream& ss, bool& needBody )
+ {
+ assert(mpDVR);
+ size_t nCurWindowSize = mpDVR->GetRefRowSize();
+
+ {
+ if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ {
+ ss << "for (int i = ";
+ ss << "gid0; i < " << mpDVR->GetArrayLength();
+ ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
+ needBody = true;
+ return nCurWindowSize;
+ }
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ {
+ ss << "for (int i = ";
+ ss << "0; i < " << mpDVR->GetArrayLength();
+ ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
+ needBody = true;
+ return nCurWindowSize;
+ }
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ {
+ ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
+ ss << "{int i;\n\t";
+ std::stringstream temp1, temp2;
+ int outLoopSize = UNROLLING_FACTOR;
+ if (nCurWindowSize / outLoopSize != 0)
+ {
+ ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
+ for (int count = 0; count < outLoopSize; count++)
+ {
+ ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
+ if (count == 0)
+ {
+ temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
+ temp1 << "){\n\t\t";
+ temp1 << "tmp = legalize(";
+ temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
+ temp1 << ", tmp);\n\t\t\t";
+ temp1 << "}\n\t";
+ }
+ ss << temp1.str();
+ }
+ ss << "}\n\t";
+ }
+ // The residual of mod outLoopSize
+ for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
+ {
+ ss << "i = " << count << ";\n\t";
+ if (count == nCurWindowSize / outLoopSize * outLoopSize)
+ {
+ temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
+ temp2 << "){\n\t\t";
+ temp2 << "tmp = legalize(";
+ temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
+ temp2 << ", tmp);\n\t\t\t";
+ temp2 << "}\n\t";
+ }
+ ss << temp2.str();
+ }
+ ss << "}\n";
+ needBody = false;
+ return nCurWindowSize;
+ }
+ // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ else
+ {
+ ss << "\n\t";
+ ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
+ ss << "{int i;\n\t";
+ std::stringstream temp1, temp2;
+ int outLoopSize = UNROLLING_FACTOR;
+ if (nCurWindowSize / outLoopSize != 0)
+ {
+ ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
+ for (int count = 0; count < outLoopSize; count++)
+ {
+ ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
+ if (count == 0)
+ {
+ temp1 << "if(i < " << mpDVR->GetArrayLength();
+ temp1 << "){\n\t\t";
+ temp1 << "tmp = legalize(";
+ temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
+ temp1 << ", tmp);\n\t\t\t";
+ temp1 << "}\n\t";
+ }
+ ss << temp1.str();
+ }
+ ss << "}\n\t";
+ }
+ // The residual of mod outLoopSize
+ for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
+ {
+ ss << "i = " << count << ";\n\t";
+ if (count == nCurWindowSize / outLoopSize * outLoopSize)
+ {
+ temp2 << "if(i < " << mpDVR->GetArrayLength();
+ temp2 << "){\n\t\t";
+ temp2 << "tmp = legalize(";
+ temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
+ temp2 << ", tmp);\n\t\t\t";
+ temp2 << "}\n\t";
+ }
+ ss << temp2.str();
+ }
+ ss << "}\n";
+ needBody = false;
+ return nCurWindowSize;
+ }
+ }
+ }
+
+ size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
+
+ size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
+
+ bool GetStartFixed() const { return bIsStartFixed; }
+
+ bool GetEndFixed() const { return bIsEndFixed; }
+
+protected:
+ bool bIsStartFixed, bIsEndFixed;
+ const formula::DoubleVectorRefToken* mpDVR;
+ // from parent nodes
+ std::shared_ptr<SlidingFunctionBase> mpCodeGen;
+};
+
+/// A mixed string/numeric vector
+class DynamicKernelMixedSlidingArgument : public VectorRef
+{
+public:
+ DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft, const std::shared_ptr<SlidingFunctionBase>& CodeGen,
+ int index ) :
+ VectorRef(config, s, ft),
+ mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
+ mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ mDoubleArgument.GenSlidingWindowDecl(ss);
+ ss << ", ";
+ mStringArgument.GenSlidingWindowDecl(ss);
+ }
+ virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
+ /// Generate declaration
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ mDoubleArgument.GenDecl(ss);
+ ss << ", ";
+ mStringArgument.GenDecl(ss);
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ mDoubleArgument.GenDeclRef(ss);
+ ss << ",";
+ mStringArgument.GenDeclRef(ss);
+ }
+ virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
+ {
+ std::stringstream ss;
+ ss << "(!isnan(" << mDoubleArgument.GenSlidingWindowDeclRef();
+ ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
+ ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
+ ss << ")";
+ return ss.str();
+ }
+ virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
+ {
+ std::stringstream ss;
+ ss << mDoubleArgument.GenSlidingWindowDeclRef();
+ return ss.str();
+ }
+ virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
+ {
+ std::stringstream ss;
+ ss << mStringArgument.GenSlidingWindowDeclRef();
+ return ss.str();
+ }
+ virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
+ {
+ int i = mDoubleArgument.Marshal(k, argno, vw, p);
+ i += mStringArgument.Marshal(k, argno + i, vw, p);
+ return i;
+ }
+
+protected:
+ DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
+ DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
+};
+
+/// Holds the symbol table for a given dynamic kernel
+class SymbolTable
+{
+public:
+ typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
+ // This avoids instability caused by using pointer as the key type
+ SymbolTable() : mCurId(0) { }
+ template <class T>
+ const DynamicKernelArgument* DeclRefArg(const ScCalcConfig& config, const FormulaTreeNodeRef&,
+ std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize);
+ /// Used to generate sliding window helpers
+ void DumpSlidingWindowFunctions( std::stringstream& ss )
+ {
+ for (auto const& argument : mParams)
+ {
+ argument->GenSlidingWindowFunction(ss);
+ ss << "\n";
+ }
+ }
+ /// Memory mapping from host to device and pass buffers to the given kernel as
+ /// arguments
+ void Marshal( cl_kernel, int, cl_program );
+
+private:
+ unsigned int mCurId;
+ ArgumentMap mSymbols;
+ std::vector<DynamicKernelArgumentRef> mParams;
+};
+
+}
+
+void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
+{
+ int i = 1; //The first argument is reserved for results
+ for (auto const& argument : mParams)
+ {
+ i += argument->Marshal(k, i, nVectorWidth, pProgram);
+ }
+}
+
+namespace {
+
+/// Handling a Double Vector that is used as a sliding window input
+/// Performs parallel reduction based on given operator
+template<class Base>
+class ParallelReductionVectorRef : public Base
+{
+public:
+ ParallelReductionVectorRef(const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft,
+ const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
+ : Base(config, s, ft, index)
+ , mpCodeGen(CodeGen)
+ , mpClmem2(nullptr)
+ {
+ FormulaToken* t = ft->GetFormulaToken();
+ if (t->GetType() != formula::svDoubleVectorRef)
+ throw Unhandled(__FILE__, __LINE__);
+ mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
+ bIsStartFixed = mpDVR->IsStartFixed();
+ bIsEndFixed = mpDVR->IsEndFixed();
+ }
+
+ /// Emit the definition for the auxiliary reduction kernel
+ virtual void GenSlidingWindowFunction( std::stringstream& ss );
+
+ virtual std::string GenSlidingWindowDeclRef( bool ) const
+ {
+ std::stringstream ss;
+ if (!bIsStartFixed && !bIsEndFixed)
+ ss << Base::GetName() << "[i + gid0]";
+ else
+ ss << Base::GetName() << "[i]";
+ return ss.str();
+ }
+
+ /// Controls how the elements in the DoubleVectorRef are traversed
+ size_t GenReductionLoopHeader(
+ std::stringstream& ss, int nResultSize, bool& needBody );
+
+ virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
+
+ ~ParallelReductionVectorRef()
+ {
+ if (mpClmem2)
+ {
+ cl_int err;
+ err = clReleaseMemObject(mpClmem2);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+ mpClmem2 = nullptr;
+ }
+ }
+
+ size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
+
+ size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
+
+ bool GetStartFixed() const { return bIsStartFixed; }
+
+ bool GetEndFixed() const { return bIsEndFixed; }
+
+protected:
+ bool bIsStartFixed, bIsEndFixed;
+ const formula::DoubleVectorRefToken* mpDVR;
+ // from parent nodes
+ std::shared_ptr<SlidingFunctionBase> mpCodeGen;
+ // controls whether to invoke the reduction kernel during marshaling or not
+ cl_mem mpClmem2;
+};
+
+class Reduction : public SlidingFunctionBase
+{
+ int mnResultSize;
+public:
+ explicit Reduction(int nResultSize) : mnResultSize(nResultSize) {}
+
+ typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
+ typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
+ typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
+
+ virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
+ {
+ return false;
+ }
+
+ virtual void GenSlidingWindowFunction( std::stringstream& ss,
+ const std::string& sSymName, SubArguments& vSubArguments ) override
+ {
+ ss << "\ndouble " << sSymName;
+ ss << "_" << BinFuncName() << "(";
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ if (i)
+ ss << ", ";
+ vSubArguments[i]->GenSlidingWindowDecl(ss);
+ }
+ ss << ") {\n";
+ ss << "double tmp = " << GetBottom() << ";\n";
+ ss << "int gid0 = get_global_id(0);\n";
+ if (isAverage() || isMinOrMax())
+ ss << "int nCount = 0;\n";
+ ss << "double tmpBottom;\n";
+ unsigned i = vSubArguments.size();
+ while (i--)
+ {
+ if (NumericRange* NR =
+ dynamic_cast<NumericRange*>(vSubArguments[i].get()))
+ {
+ bool needBody;
+ NR->GenReductionLoopHeader(ss, needBody);
+ if (!needBody)
+ continue;
+ }
+ else if (ParallelNumericRange* PNR =
+ dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
+ {
+ //did not handle yet
+ bool bNeedBody = false;
+ PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
+ if (!bNeedBody)
+ continue;
+ }
+ else if (StringRange* SR =
+ dynamic_cast<StringRange*>(vSubArguments[i].get()))
+ {
+ //did not handle yet
+ bool needBody;
+ SR->GenReductionLoopHeader(ss, needBody);
+ if (!needBody)
+ continue;
+ }
+ else
+ {
+ FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
+ assert(pCur);
+ assert(pCur->GetType() != formula::svDoubleVectorRef);
+
+ if (pCur->GetType() == formula::svSingleVectorRef ||
+ pCur->GetType() == formula::svDouble)
+ {
+ ss << "{\n";
+ }
+ }
+ if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
+ {
+ bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
+
+ ss << "tmpBottom = " << GetBottom() << ";\n";
+
+ if (!bNanHandled)
+ {
+ ss << "if (isnan(";
+ ss << vSubArguments[i]->GenSlidingWindowDeclRef();
+ ss << "))\n";
+ if (ZeroReturnZero())
+ ss << " return 0;\n";
+ else
+ {
+ ss << " tmp = ";
+ ss << Gen2("tmpBottom", "tmp") << ";\n";
+ }
+ ss << "else\n";
+ }
+ ss << "{";
+ ss << " tmp = ";
+ ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
+ ss << ";\n";
+ ss << " }\n";
+ ss << "}\n";
+ }
+ else
+ {
+ ss << "tmp = ";
+ ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
+ ss << ";\n";
+ }
+ }
+ if (isAverage())
+ ss <<
+ "if (nCount==0)\n"
+ " return CreateDoubleError(DivisionByZero);\n";
+ else if (isMinOrMax())
+ ss <<
+ "if (nCount==0)\n"
+ " return 0;\n";
+ ss << "return tmp";
+ if (isAverage())
+ ss << "*pow((double)nCount,-1.0)";
+ ss << ";\n}";
+ }
+ virtual bool isAverage() const { return false; }
+ virtual bool isMinOrMax() const { return false; }
+ virtual bool takeString() const override { return false; }
+ virtual bool takeNumeric() const override { return true; }
+};
+
+// Strictly binary operators
+class Binary : public SlidingFunctionBase
+{
+public:
+ virtual void GenSlidingWindowFunction( std::stringstream& ss,
+ const std::string& sSymName, SubArguments& vSubArguments ) override
+ {
+ ss << "\ndouble " << sSymName;
+ ss << "_" << BinFuncName() << "(";
+ assert(vSubArguments.size() == 2);
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ if (i)
+ ss << ", ";
+ vSubArguments[i]->GenSlidingWindowDecl(ss);
+ }
+ ss << ") {\n\t";
+ ss << "int gid0 = get_global_id(0), i = 0;\n\t";
+ ss << "double tmp = ";
+ ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
+ vSubArguments[1]->GenSlidingWindowDeclRef()) << ";\n\t";
+ ss << "return tmp;\n}";
+ }
+ virtual bool takeString() const override { return true; }
+ virtual bool takeNumeric() const override { return true; }
+};
+
+class SumOfProduct : public SlidingFunctionBase
+{
+public:
+ virtual void GenSlidingWindowFunction( std::stringstream& ss,
+ const std::string& sSymName, SubArguments& vSubArguments ) override
+ {
+ size_t nCurWindowSize = 0;
+ FormulaToken* tmpCur = nullptr;
+ const formula::DoubleVectorRefToken* pCurDVR = nullptr;
+ ss << "\ndouble " << sSymName;
+ ss << "_" << BinFuncName() << "(";
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ if (i)
+ ss << ",";
+ vSubArguments[i]->GenSlidingWindowDecl(ss);
+ size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
+ nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
+ nCurChildWindowSize : nCurWindowSize;
+ tmpCur = vSubArguments[i]->GetFormulaToken();
+ if (ocPush == tmpCur->GetOpCode())
+ {
+
+ pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
+ if (pCurDVR->IsStartFixed() != pCurDVR->IsEndFixed())
+ throw Unhandled(__FILE__, __LINE__);
+ }
+ }
+ ss << ") {\n";
+ ss << " double tmp = 0.0;\n";
+ ss << " int gid0 = get_global_id(0);\n";
+
+ ss << "\tint i;\n\t";
+ ss << "int currentCount0;\n";
+ for (size_t i = 0; i < vSubArguments.size() - 1; i++)
+ ss << "int currentCount" << i + 1 << ";\n";
+ std::stringstream temp3, temp4;
+ int outLoopSize = UNROLLING_FACTOR;
+ if (nCurWindowSize / outLoopSize != 0)
+ {
+ ss << "for(int outLoop=0; outLoop<" <<
+ nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
+ for (int count = 0; count < outLoopSize; count++)
+ {
+ ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
+ if (count == 0)
+ {
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ tmpCur = vSubArguments[i]->GetFormulaToken();
+ if (ocPush == tmpCur->GetOpCode())
+ {
+ pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
+ if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
+ {
+ temp3 << " currentCount";
+ temp3 << i;
+ temp3 << " =i+gid0+1;\n";
+ }
+ else
+ {
+ temp3 << " currentCount";
+ temp3 << i;
+ temp3 << " =i+1;\n";
+ }
+ }
+ }
+
+ temp3 << "tmp = fsum(";
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ if (i)
+ temp3 << "*";
+ if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
+ {
+ temp3 << "(";
+ temp3 << "(currentCount";
+ temp3 << i;
+ temp3 << ">";
+ if (vSubArguments[i]->GetFormulaToken()->GetType() ==
+ formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>
+ (vSubArguments[i]->GetFormulaToken());
+ temp3 << pSVR->GetArrayLength();
+ temp3 << ")||isnan(" << vSubArguments[i]
+ ->GenSlidingWindowDeclRef();
+ temp3 << ")?0:";
+ temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
+ temp3 << ")";
+ }
+ else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
+ formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pSVR =
+ static_cast<const formula::DoubleVectorRefToken*>
+ (vSubArguments[i]->GetFormulaToken());
+ temp3 << pSVR->GetArrayLength();
+ temp3 << ")||isnan(" << vSubArguments[i]
+ ->GenSlidingWindowDeclRef(true);
+ temp3 << ")?0:";
+ temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
+ temp3 << ")";
+ }
+
+ }
+ else
+ temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
+ }
+ temp3 << ", tmp);\n\t";
+ }
+ ss << temp3.str();
+ }
+ ss << "}\n\t";
+ }
+ //The residual of mod outLoopSize
+ for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
+ count < nCurWindowSize; count++)
+ {
+ ss << "i =" << count << ";\n";
+ if (count == nCurWindowSize / outLoopSize * outLoopSize)
+ {
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ tmpCur = vSubArguments[i]->GetFormulaToken();
+ if (ocPush == tmpCur->GetOpCode())
+ {
+ pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
+ if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
+ {
+ temp4 << " currentCount";
+ temp4 << i;
+ temp4 << " =i+gid0+1;\n";
+ }
+ else
+ {
+ temp4 << " currentCount";
+ temp4 << i;
+ temp4 << " =i+1;\n";
+ }
+ }
+ }
+
+ temp4 << "tmp = fsum(";
+ for (size_t i = 0; i < vSubArguments.size(); i++)
+ {
+ if (i)
+ temp4 << "*";
+ if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
+ {
+ temp4 << "(";
+ temp4 << "(currentCount";
+ temp4 << i;
+ temp4 << ">";
+ if (vSubArguments[i]->GetFormulaToken()->GetType() ==
+ formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>
+ (vSubArguments[i]->GetFormulaToken());
+ temp4 << pSVR->GetArrayLength();
+ temp4 << ")||isnan(" << vSubArguments[i]
+ ->GenSlidingWindowDeclRef();
+ temp4 << ")?0:";
+ temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
+ temp4 << ")";
+ }
+ else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
+ formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pSVR =
+ static_cast<const formula::DoubleVectorRefToken*>
+ (vSubArguments[i]->GetFormulaToken());
+ temp4 << pSVR->GetArrayLength();
+ temp4 << ")||isnan(" << vSubArguments[i]
+ ->GenSlidingWindowDeclRef(true);
+ temp4 << ")?0:";
+ temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
+ temp4 << ")";
+ }
+
+ }
+ else
+ {
+ temp4 << vSubArguments[i]
+ ->GenSlidingWindowDeclRef(true);
+ }
+ }
+ temp4 << ", tmp);\n\t";
+ }
+ ss << temp4.str();
+ }
+ ss << "return tmp;\n";
+ ss << "}";
+ }
+ virtual bool takeString() const override { return false; }
+ virtual bool takeNumeric() const override { return true; }
+};
+
+/// operator traits
+class OpNop : public Reduction
+{
+public:
+ explicit OpNop(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& ) const override
+ {
+ return lhs;
+ }
+ virtual std::string BinFuncName() const override { return "nop"; }
+};
+
+class OpCount : public Reduction
+{
+public:
+ explicit OpCount(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "(isnan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "fcount"; }
+ virtual bool canHandleMultiVector() const override { return true; }
+};
+
+class OpEqual : public Binary
+{
+public:
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "strequal(" << lhs << "," << rhs << ")";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "eq"; }
+};
+
+class OpLessEqual : public Binary
+{
+public:
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "(" << lhs << "<=" << rhs << ")";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "leq"; }
+};
+
+class OpLess : public Binary
+{
+public:
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "(" << lhs << "<" << rhs << ")";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "less"; }
+};
+
+class OpGreater : public Binary
+{
+public:
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "(" << lhs << ">" << rhs << ")";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "gt"; }
+};
+
+class OpSum : public Reduction
+{
+public:
+ explicit OpSum(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "fsum_approx((" << lhs << "),(" << rhs << "))";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "fsum"; }
+ // All arguments are simply summed, so it doesn't matter if SvDoubleVector is split.
+ virtual bool canHandleMultiVector() const override { return true; }
+};
+
+class OpAverage : public Reduction
+{
+public:
+ explicit OpAverage(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ std::stringstream ss;
+ ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
+ return ss.str();
+ }
+ virtual std::string BinFuncName() const override { return "average"; }
+ virtual bool isAverage() const override { return true; }
+ virtual bool canHandleMultiVector() const override { return true; }
+};
+
+class OpSub : public Reduction
+{
+public:
+ explicit OpSub(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return "fsub_approx(" + lhs + "," + rhs + ")";
+ }
+ virtual std::string BinFuncName() const override { return "fsub"; }
+};
+
+class OpMul : public Reduction
+{
+public:
+ explicit OpMul(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "1"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return lhs + "*" + rhs;
+ }
+ virtual std::string BinFuncName() const override { return "fmul"; }
+ virtual bool ZeroReturnZero() override { return true; }
+};
+
+/// Technically not a reduction, but fits the framework.
+class OpDiv : public Reduction
+{
+public:
+ explicit OpDiv(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "1.0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return "(" + rhs + "==0 ? CreateDoubleError(DivisionByZero) : (" + lhs + "/" + rhs + ") )";
+ }
+ virtual std::string BinFuncName() const override { return "fdiv"; }
+
+ virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const override
+ {
+ if (argno == 1)
+ {
+ ss <<
+ "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
+ " return CreateDoubleError(DivisionByZero);\n"
+ "}\n";
+ return true;
+ }
+ else if (argno == 0)
+ {
+ ss <<
+ "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") &&\n"
+ " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
+ " return 0;\n"
+ "}\n";
+ }
+ return false;
+ }
+
+};
+
+class OpMin : public Reduction
+{
+public:
+ explicit OpMin(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "NAN"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return "fmin_count(" + lhs + "," + rhs + ", &nCount)";
+ }
+ virtual std::string BinFuncName() const override { return "min"; }
+ virtual bool isMinOrMax() const override { return true; }
+ virtual bool canHandleMultiVector() const override { return true; }
+};
+
+class OpMax : public Reduction
+{
+public:
+ explicit OpMax(int nResultSize) : Reduction(nResultSize) {}
+
+ virtual std::string GetBottom() override { return "NAN"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return "fmax_count(" + lhs + "," + rhs + ", &nCount)";
+ }
+ virtual std::string BinFuncName() const override { return "max"; }
+ virtual bool isMinOrMax() const override { return true; }
+ virtual bool canHandleMultiVector() const override { return true; }
+};
+
+class OpSumProduct : public SumOfProduct
+{
+public:
+ virtual std::string GetBottom() override { return "0"; }
+ virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
+ {
+ return lhs + "*" + rhs;
+ }
+ virtual std::string BinFuncName() const override { return "fsop"; }
+};
+
+template<class Base>
+void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
+{
+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+ {
+ std::string name = Base::GetName();
+ ss << "__kernel void " << name;
+ ss << "_reduction(__global double* A, "
+ "__global double *result,int arrayLength,int windowSize){\n";
+ ss << " double tmp, current_result =" <<
+ mpCodeGen->GetBottom();
+ ss << ";\n";
+ ss << " int writePos = get_group_id(1);\n";
+ ss << " int lidx = get_local_id(0);\n";
+ ss << " __local double shm_buf[256];\n";
+ if (mpDVR->IsStartFixed())
+ ss << " int offset = 0;\n";
+ else // if (!mpDVR->IsStartFixed())
+ ss << " int offset = get_group_id(1);\n";
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = offset + windowSize;\n";
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = windowSize + get_group_id(1);\n";
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ ss << " end = min(end, arrayLength);\n";
+
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " int loop = arrayLength/512 + 1;\n";
+ ss << " for (int l=0; l<loop; l++){\n";
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+ ss << " int loopOffset = l*512;\n";
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+ ss << " tmp = legalize(" << mpCodeGen->Gen2(
+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+ ss << " tmp = legalize(" << mpCodeGen->Gen2(
+ "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+ ss << " tmp = legalize(" << mpCodeGen->Gen2(
+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+ ss << " shm_buf[lidx] = tmp;\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " for (int i = 128; i >0; i/=2) {\n";
+ ss << " if (lidx < i)\n";
+ ss << " shm_buf[lidx] = ";
+ // Special case count
+ if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+ else
+ ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " current_result =";
+ if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+ ss << "current_result + shm_buf[0]";
+ else
+ ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+ ss << ";\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " result[writePos] = current_result;\n";
+ ss << "}\n";
+ }
+ else
+ {
+ std::string name = Base::GetName();
+ /*sum reduction*/
+ ss << "__kernel void " << name << "_sum";
+ ss << "_reduction(__global double* A, "
+ "__global double *result,int arrayLength,int windowSize){\n";
+ ss << " double tmp, current_result =" <<
+ mpCodeGen->GetBottom();
+ ss << ";\n";
+ ss << " int writePos = get_group_id(1);\n";
+ ss << " int lidx = get_local_id(0);\n";
+ ss << " __local double shm_buf[256];\n";
+ if (mpDVR->IsStartFixed())
+ ss << " int offset = 0;\n";
+ else // if (!mpDVR->IsStartFixed())
+ ss << " int offset = get_group_id(1);\n";
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = offset + windowSize;\n";
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = windowSize + get_group_id(1);\n";
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ ss << " end = min(end, arrayLength);\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " int loop = arrayLength/512 + 1;\n";
+ ss << " for (int l=0; l<loop; l++){\n";
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+ ss << " int loopOffset = l*512;\n";
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+ ss << " tmp = legalize(";
+ ss << "(A[loopOffset + lidx + offset]+ tmp)";
+ ss << ", tmp);\n";
+ ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
+ ss << ", tmp);\n";
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+ ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
+ ss << ", tmp);\n";
+ ss << " shm_buf[lidx] = tmp;\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " for (int i = 128; i >0; i/=2) {\n";
+ ss << " if (lidx < i)\n";
+ ss << " shm_buf[lidx] = ";
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " current_result =";
+ ss << "current_result + shm_buf[0]";
+ ss << ";\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " result[writePos] = current_result;\n";
+ ss << "}\n";
+ /*count reduction*/
+ ss << "__kernel void " << name << "_count";
+ ss << "_reduction(__global double* A, "
+ "__global double *result,int arrayLength,int windowSize){\n";
+ ss << " double tmp, current_result =" <<
+ mpCodeGen->GetBottom();
+ ss << ";\n";
+ ss << " int writePos = get_group_id(1);\n";
+ ss << " int lidx = get_local_id(0);\n";
+ ss << " __local double shm_buf[256];\n";
+ if (mpDVR->IsStartFixed())
+ ss << " int offset = 0;\n";
+ else // if (!mpDVR->IsStartFixed())
+ ss << " int offset = get_group_id(1);\n";
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = offset + windowSize;\n";
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int end = windowSize + get_group_id(1);\n";
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int end = windowSize;\n";
+ ss << " end = min(end, arrayLength);\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " int loop = arrayLength/512 + 1;\n";
+ ss << " for (int l=0; l<loop; l++){\n";
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+ ss << " int loopOffset = l*512;\n";
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+ ss << ", tmp);\n";
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
+ ss << ", tmp);\n";
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+ ss << ", tmp);\n";
+ ss << " shm_buf[lidx] = tmp;\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " for (int i = 128; i >0; i/=2) {\n";
+ ss << " if (lidx < i)\n";
+ ss << " shm_buf[lidx] = ";
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " current_result =";
+ ss << "current_result + shm_buf[0];";
+ ss << ";\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " result[writePos] = current_result;\n";
+ ss << "}\n";
+ }
+
+}
+
+template<class Base>
+size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
+ std::stringstream& ss, int nResultSize, bool& needBody )
+{
+ assert(mpDVR);
+ size_t nCurWindowSize = mpDVR->GetRefRowSize();
+ std::string temp = Base::GetName() + "[gid0]";
+ ss << "tmp = ";
+ // Special case count
+ if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+ {
+ ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
+ ss << "nCount = nCount-1;\n";
+ ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
+ ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
+ }
+ else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+ ss << temp << "+ tmp";
+ else
+ ss << mpCodeGen->Gen2(temp, "tmp");
+ ss << ";\n\t";
+ needBody = false;
+ return nCurWindowSize;
+}
+
+template<class Base>
+size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
+{
+ assert(Base::mpClmem == nullptr);
+
+ OpenCLZone zone;
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ size_t nInput = mpDVR->GetArrayLength();
+ size_t nCurWindowSize = mpDVR->GetRefRowSize();
+ // create clmem buffer
+ if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
+ throw Unhandled(__FILE__, __LINE__);
+ double* pHostBuffer = const_cast<double*>(
+ mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
+ size_t szHostBuffer = nInput * sizeof(double);
+ Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+ szHostBuffer,
+ pHostBuffer, &err);
+ SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
+
+ mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+ sizeof(double) * w, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
+
+ // reproduce the reduction function name
+ std::string kernelName;
+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+ kernelName = Base::GetName() + "_reduction";
+ else
+ kernelName = Base::GetName() + "_sum_reduction";
+ cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+
+ // set kernel arg of reduction kernel
+ // TODO(Wei Wei): use unique name for kernel
+ cl_mem buf = Base::GetCLBuffer();
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+ static_cast<void*>(&buf));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ // set work group size and execute
+ size_t global_work_size[] = { 256, static_cast<size_t>(w) };
+ size_t const local_work_size[] = { 256, 1 };
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+ global_work_size, local_work_size, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+ err = clFinish(kEnv.mpkCmdQueue);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+ if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+ {
+ /*average need more reduction kernel for count computing*/
+ std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
+ double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+ mpClmem2,
+ CL_TRUE, CL_MAP_READ, 0,
+ sizeof(double) * w, 0, nullptr, nullptr,
+ &err));
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+
+ for (int i = 0; i < w; i++)
+ pAllBuffer[i] = resbuf[i];
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+
+ kernelName = Base::GetName() + "_count_reduction";
+ redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+
+ // set kernel arg of reduction kernel
+ buf = Base::GetCLBuffer();
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+ static_cast<void*>(&buf));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ // set work group size and execute
+ size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
+ size_t const local_work_size1[] = { 256, 1 };
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+ global_work_size1, local_work_size1, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+ err = clFinish(kEnv.mpkCmdQueue);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+ resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+ mpClmem2,
+ CL_TRUE, CL_MAP_READ, 0,
+ sizeof(double) * w, 0, nullptr, nullptr,
+ &err));
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+ for (int i = 0; i < w; i++)
+ pAllBuffer[i + w] = resbuf[i];
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+ // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+ if (CL_SUCCESS != err)
+ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+ if (mpClmem2)
+ {
+ err = clReleaseMemObject(mpClmem2);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+ mpClmem2 = nullptr;
+ }
+ mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
+ w * sizeof(double) * 2, pAllBuffer.get(), &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
+ }
+ // set kernel arg
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ return 1;
+}
+
+struct SumIfsArgs
+{
+ explicit SumIfsArgs(cl_mem x) : mCLMem(x), mConst(0.0) { }
+ explicit SumIfsArgs(double x) : mCLMem(nullptr), mConst(x) { }
+ cl_mem mCLMem;
+ double mConst;
+};
+
+/// Helper functions that have multiple buffers
+class DynamicKernelSoPArguments : public DynamicKernelArgument
+{
+public:
+ typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
+
+ DynamicKernelSoPArguments( const ScCalcConfig& config,
+ const std::string& s, const FormulaTreeNodeRef& ft,
+ std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize );
+
+ /// Create buffer and pass the buffer to a given kernel
+ virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) override
+ {
+ OpenCLZone zone;
+ unsigned i = 0;
+ for (const auto& rxSubArgument : mvSubArguments)
+ {
+ i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
+ }
+ if (dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
+ {
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ cl_mem pClmem2;
+
+ std::vector<cl_mem> vclmem;
+ for (const auto& rxSubArgument : mvSubArguments)
+ {
+ if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
+ vclmem.push_back(VR->GetCLBuffer());
+ else
+ vclmem.push_back(nullptr);
+ }
+ pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
+ sizeof(double) * nVectorWidth, nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
+
+ std::string kernelName = "GeoMean_reduction";
+ cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
+
+ // set kernel arg of reduction kernel
+ for (size_t j = 0; j < vclmem.size(); j++)
+ {
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
+ err = clSetKernelArg(redKernel, j,
+ vclmem[j] ? sizeof(cl_mem) : sizeof(double),
+ static_cast<void*>(&vclmem[j]));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ }
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
+ err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ // set work group size and execute
+ size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
+ size_t const local_work_size[] = { 256, 1 };
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+ global_work_size, local_work_size, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+ err = clFinish(kEnv.mpkCmdQueue);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+
+ // Pass pClmem2 to the "real" kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ }
+ if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
+ {
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ DynamicKernelArgument* Arg = mvSubArguments[0].get();
+ DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
+ static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
+ mpClmem2 = nullptr;
+
+ if (OpSumCodeGen->NeedReductionKernel())
+ {
+ size_t nInput = slidingArgPtr->GetArrayLength();
+ size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
+ std::vector<SumIfsArgs> vclmem;
+
+ for (const auto& rxSubArgument : mvSubArguments)
+ {
+ if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
+ vclmem.emplace_back(VR->GetCLBuffer());
+ else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
+ vclmem.emplace_back(CA->GetDouble());
+ else
+ vclmem.emplace_back(nullptr);
+ }
+ mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
+ sizeof(double) * nVectorWidth, nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
+
+ std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
+ cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
+
+ // set kernel arg of reduction kernel
+ for (size_t j = 0; j < vclmem.size(); j++)
+ {
+ if (vclmem[j].mCLMem)
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
+ else
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
+ err = clSetKernelArg(redKernel, j,
+ vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
+ vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
+ static_cast<void*>(&vclmem[j].mConst));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ }
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
+ err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
+ err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
+ err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ // set work group size and execute
+ size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
+ size_t const local_work_size[] = { 256, 1 };
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+ global_work_size, local_work_size, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+
+ err = clFinish(kEnv.mpkCmdQueue);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "Releasing kernel " << redKernel);
+ err = clReleaseKernel(redKernel);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
+
+ // Pass mpClmem2 to the "real" kernel
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ }
+ }
+ return i;
+ }
+
+ virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
+ {
+ for (DynamicKernelArgumentRef & rArg : mvSubArguments)
+ rArg->GenSlidingWindowFunction(ss);
+ mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
+ }
+ virtual void GenDeclRef( std::stringstream& ss ) const override
+ {
+ for (size_t i = 0; i < mvSubArguments.size(); i++)
+ {
+ if (i)
+ ss << ",";
+ mvSubArguments[i]->GenDeclRef(ss);
+ }
+ }
+ virtual void GenDecl( std::stringstream& ss ) const override
+ {
+ for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
+ ++it)
+ {
+ if (it != mvSubArguments.begin())
+ ss << ", ";
+ (*it)->GenDecl(ss);
+ }
+ }
+
+ virtual size_t GetWindowSize() const override
+ {
+ size_t nCurWindowSize = 0;
+ for (const auto & rSubArgument : mvSubArguments)
+ {
+ size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
+ nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
+ nCurChildWindowSize : nCurWindowSize;
+ }
+ return nCurWindowSize;
+ }
+
+ /// When declared as input to a sliding window function
+ virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
+ {
+ for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
+ ++it)
+ {
+ if (it != mvSubArguments.begin())
+ ss << ", ";
+ (*it)->GenSlidingWindowDecl(ss);
+ }
+ }
+ /// Generate either a function call to each children
+ /// or directly inline it if we are already inside a loop
+ virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const override
+ {
+ std::stringstream ss;
+ if (!nested)
+ {
+ ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
+ for (size_t i = 0; i < mvSubArguments.size(); i++)
+ {
+ if (i)
+ ss << ", ";
+ mvSubArguments[i]->GenDeclRef(ss);
+ }
+ ss << ")";
+ }
+ else
+ {
+ if (mvSubArguments.size() != 2)
+ throw Unhandled(__FILE__, __LINE__);
+ bool bArgument1_NeedNested =
+ mvSubArguments[0]->GetFormulaToken()->GetType()
+ != formula::svSingleVectorRef;
+ bool bArgument2_NeedNested =
+ mvSubArguments[1]->GetFormulaToken()->GetType()
+ != formula::svSingleVectorRef;
+ ss << "(";
+ ss << mpCodeGen->
+ Gen2(mvSubArguments[0]
+ ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
+ mvSubArguments[1]
+ ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
+ ss << ")";
+ }
+ return ss.str();
+ }
+ virtual std::string DumpOpName() const override
+ {
+ std::string t = "_" + mpCodeGen->BinFuncName();
+ for (const auto & rSubArgument : mvSubArguments)
+ t += rSubArgument->DumpOpName();
+ return t;
+ }
+ virtual void DumpInlineFun( std::set<std::string>& decls,
+ std::set<std::string>& funs ) const override
+ {
+ mpCodeGen->BinInlineFun(decls, funs);
+ for (const auto & rSubArgument : mvSubArguments)
+ rSubArgument->DumpInlineFun(decls, funs);
+ }
+ virtual bool IsEmpty() const override
+ {
+ for (const auto & rSubArgument : mvSubArguments)
+ if( !rSubArgument->IsEmpty())
+ return false;
+ return true;
+ }
+ virtual ~DynamicKernelSoPArguments() override
+ {
+ if (mpClmem2)
+ {
+ cl_int err;
+ err = clReleaseMemObject(mpClmem2);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+ mpClmem2 = nullptr;
+ }
+ }
+
+private:
+ SubArgumentsType mvSubArguments;
+ std::shared_ptr<SlidingFunctionBase> mpCodeGen;
+ cl_mem mpClmem2;
+};
+
+}
+
+static DynamicKernelArgumentRef SoPHelper( const ScCalcConfig& config,
+ const std::string& ts, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
+ int nResultSize )
+{
+ return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
+}
+
+template<class Base>
+static std::shared_ptr<DynamicKernelArgument> VectorRefFactory( const ScCalcConfig& config, const std::string& s,
+ const FormulaTreeNodeRef& ft,
+ std::shared_ptr<SlidingFunctionBase>& pCodeGen,
+ int index )
+{
+ //Black lists ineligible classes here ..
+ // SUMIFS does not perform parallel reduction at DoubleVectorRef level
+ if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
+ {
+ // coverity[identical_branches] - only identical if Base happens to be VectorRef
+ if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
+ return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+ }
+ // AVERAGE is not supported yet
+ //Average has been supported by reduction kernel
+ /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
+ {
+ return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
+ }*/
+ // MUL is not supported yet
+ else if (dynamic_cast<OpMul*>(pCodeGen.get()))
+ {
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+ }
+ // Sub is not a reduction per se
+ else if (dynamic_cast<OpSub*>(pCodeGen.get()))
+ {
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+ }
+ // Only child class of Reduction is supported
+ else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
+ {
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+ }
+
+ const formula::DoubleVectorRefToken* pDVR =
+ static_cast<const formula::DoubleVectorRefToken*>(
+ ft->GetFormulaToken());
+ // Window being too small to justify a parallel reduction
+ if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+ if (pDVR->IsStartFixed() == pDVR->IsEndFixed())
+ return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
+ else // Other cases are not supported as well
+ return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
+}
+
+DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
+ const std::string& s, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize ) :
+ DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(nullptr)
+{
+ size_t nChildren = ft->Children.size();
+
+ for (size_t i = 0; i < nChildren; i++)
+ {
+ FormulaTreeNodeRef rChild = ft->Children[i];
+ if (!rChild)
+ throw Unhandled(__FILE__, __LINE__);
+ FormulaToken* pChild = rChild->GetFormulaToken();
+ if (!pChild)
+ throw Unhandled(__FILE__, __LINE__);
+ OpCode opc = pChild->GetOpCode();
+ std::stringstream tmpname;
+ tmpname << s << "_" << i;
+ std::string ts = tmpname.str();
+ switch (opc)
+ {
+ case ocPush:
+ if (pChild->GetType() == formula::svDoubleVectorRef)
+ {
+ const formula::DoubleVectorRefToken* pDVR =
+ static_cast<const formula::DoubleVectorRefToken*>(pChild);
+
+ // The code below will split one svDoubleVectorRef into one subargument
+ // for each column of data, and then all these subarguments will be later
+ // passed to the code generating the function. Most of the code then
+ // simply treats each subargument as one argument to the function, and thus
+ // could break in this case.
+ // As a simple solution, simply prevent this case, unless the code in question
+ // explicitly claims it will handle this situation properly.
+ if( pDVR->GetArrays().size() > 1 )
+ {
+ if( !pCodeGen->canHandleMultiVector())
+ throw UnhandledToken(("Function '" + pCodeGen->BinFuncName()
+ + "' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
+
+ SAL_INFO("sc.opencl", "multi-column DoubleRef");
+
+ }
+
+ // FIXME: The Right Thing to do would be to compare the accumulated kernel
+ // parameter size against the CL_DEVICE_MAX_PARAMETER_SIZE of the device, but
+ // let's just do this sanity check for now. The kernel compilation will
+ // hopefully fail anyway if the size of parameters exceeds the limit and this
+ // sanity check is just to make us bail out a bit earlier.
+
+ // The number 50 comes from the fact that the minimum size of
+ // CL_DEVICE_MAX_PARAMETER_SIZE is 256, which for 32-bit code probably means 64
+ // of them. Round down a bit.
+
+ if (pDVR->GetArrays().size() > 50)
+ throw UnhandledToken(("Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->GetArrays().size()) + ")").c_str(), __FILE__, __LINE__);
+
+ for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
+ {
+ SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
+ " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
+ " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
+ " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
+ " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
+ " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
+
+ if (pDVR->GetArrays()[j].mpNumericArray &&
+ pCodeGen->takeNumeric() &&
+ pDVR->GetArrays()[j].mpStringArray &&
+ pCodeGen->takeString())
+ {
+ // Function takes numbers or strings, there are both
+ SAL_INFO("sc.opencl", "Numbers and strings");
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelMixedSlidingArgument>(mCalcConfig,
+ ts, ft->Children[i], mpCodeGen, j));
+ }
+ else if (pDVR->GetArrays()[j].mpNumericArray &&
+ pCodeGen->takeNumeric() &&
+ (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
+ {
+ // Function takes numbers, and either there
+ // are no strings, or there are strings but
+ // they are to be treated as zero
+ SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
+ mvSubArguments.push_back(
+ VectorRefFactory<VectorRef>(mCalcConfig,
+ ts, ft->Children[i], mpCodeGen, j));
+ }
+ else if (pDVR->GetArrays()[j].mpNumericArray == nullptr &&
+ pCodeGen->takeNumeric() &&
+ pDVR->GetArrays()[j].mpStringArray &&
+ mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO)
+ {
+ // Function takes numbers, and there are only
+ // strings, but they are to be treated as zero
+ SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
+ mvSubArguments.push_back(
+ VectorRefFactory<VectorRef>(mCalcConfig,
+ ts, ft->Children[i], mpCodeGen, j));
+ }
+ else if (pDVR->GetArrays()[j].mpStringArray &&
+ pCodeGen->takeString())
+ {
+ // There are strings, and the function takes strings.
+ SAL_INFO("sc.opencl", "Strings only");
+ mvSubArguments.push_back(
+ VectorRefFactory
+ <DynamicKernelStringArgument>(mCalcConfig,
+ ts, ft->Children[i], mpCodeGen, j));
+ }
+ else if (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) &&
+ pDVR->GetArrays()[j].mpNumericArray == nullptr)
+ {
+ // There are only empty cells. Push as an
+ // array of NANs
+ SAL_INFO("sc.opencl", "Only empty cells");
+ mvSubArguments.push_back(
+ VectorRefFactory<VectorRef>(mCalcConfig,
+ ts, ft->Children[i], mpCodeGen, j));
+ }
+ else
+ {
+ SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
+ throw UnhandledToken(("Unhandled numbers/strings combination for '"
+ + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
+ }
+ }
+ }
+ else if (pChild->GetType() == formula::svSingleVectorRef)
+ {
+ const formula::SingleVectorRefToken* pSVR =
+ static_cast<const formula::SingleVectorRefToken*>(pChild);
+
+ SAL_INFO("sc.opencl", "i=" << i <<
+ " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
+ " mpStringArray=" << pSVR->GetArray().mpStringArray <<
+ " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
+ " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
+ " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
+
+ if (pSVR->GetArray().mpNumericArray &&
+ pCodeGen->takeNumeric() &&
+ pSVR->GetArray().mpStringArray &&
+ pCodeGen->takeString())
+ {
+ // Function takes numbers or strings, there are both
+ SAL_INFO("sc.opencl", "Numbers and strings");
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelMixedArgument>(mCalcConfig,
+ ts, ft->Children[i]));
+ }
+ else if (pSVR->GetArray().mpNumericArray &&
+ pCodeGen->takeNumeric() &&
+ (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
+ {
+ // Function takes numbers, and either there
+ // are no strings, or there are strings but
+ // they are to be treated as zero
+ SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
+ mvSubArguments.push_back(
+ std::make_shared<VectorRef>(mCalcConfig, ts,
+ ft->Children[i]));
+ }
+ else if (pSVR->GetArray().mpNumericArray == nullptr &&
+ pCodeGen->takeNumeric() &&
+ pSVR->GetArray().mpStringArray &&
+ mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO)
+ {
+ // Function takes numbers, and there are only
+ // strings, but they are to be treated as zero
+ SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
+ mvSubArguments.push_back(
+ std::make_shared<VectorRef>(mCalcConfig, ts,
+ ft->Children[i]));
+ }
+ else if (pSVR->GetArray().mpStringArray &&
+ pCodeGen->takeString())
+ {
+ // There are strings, and the function takes strings.
+ SAL_INFO("sc.opencl", "Strings only");
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelStringArgument>(mCalcConfig,
+ ts, ft->Children[i]));
+ }
+ else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
+ pSVR->GetArray().mpNumericArray == nullptr)
+ {
+ // There are only empty cells. Push as an
+ // array of NANs
+ SAL_INFO("sc.opencl", "Only empty cells");
+ mvSubArguments.push_back(
+ std::make_shared<VectorRef>(mCalcConfig, ts,
+ ft->Children[i]));
+ }
+ else
+ {
+ SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
+ throw UnhandledToken(("Unhandled numbers/strings combination for '"
+ + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
+ }
+ }
+ else if (pChild->GetType() == formula::svDouble)
+ {
+ SAL_INFO("sc.opencl", "Constant number case");
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelConstantArgument>(mCalcConfig, ts,
+ ft->Children[i]));
+ }
+ else if (pChild->GetType() == formula::svString
+ && pCodeGen->takeString())
+ {
+ SAL_INFO("sc.opencl", "Constant string case");
+ mvSubArguments.push_back(
+ std::make_shared<ConstStringArgument>(mCalcConfig, ts,
+ ft->Children[i]));
+ }
+ else
+ {
+ SAL_INFO("sc.opencl", "Unhandled operand, rejecting for OpenCL");
+ throw UnhandledToken(("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str(), __FILE__, __LINE__);
+ }
+ break;
+ case ocDiv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDiv>(nResultSize), nResultSize));
+ break;
+ case ocMul:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMul>(nResultSize), nResultSize));
+ break;
+ case ocSub:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSub>(nResultSize), nResultSize));
+ break;
+ case ocAdd:
+ case ocSum:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSum>(nResultSize), nResultSize));
+ break;
+ case ocAverage:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAverage>(nResultSize), nResultSize));
+ break;
+ case ocMin:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMin>(nResultSize), nResultSize));
+ break;
+ case ocMax:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMax>(nResultSize), nResultSize));
+ break;
+ case ocCount:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCount>(nResultSize), nResultSize));
+ break;
+ case ocSumProduct:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSumProduct>(), nResultSize));
+ break;
+ case ocIRR:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIRR>(), nResultSize));
+ break;
+ case ocMIRR:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMIRR>(), nResultSize));
+ break;
+ case ocPMT:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPMT>(), nResultSize));
+ break;
+ case ocRate:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIntrate>(), nResultSize));
+ break;
+ case ocRRI:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRRI>(), nResultSize));
+ break;
+ case ocPpmt:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPPMT>(), nResultSize));
+ break;
+ case ocFisher:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisher>(), nResultSize));
+ break;
+ case ocFisherInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisherInv>(), nResultSize));
+ break;
+ case ocGamma:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGamma>(), nResultSize));
+ break;
+ case ocSLN:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSLN>(), nResultSize));
+ break;
+ case ocGammaLn:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGammaLn>(), nResultSize));
+ break;
+ case ocGauss:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGauss>(), nResultSize));
+ break;
+ /*case ocGeoMean:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean));
+ break;*/
+ case ocHarMean:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpHarMean>(), nResultSize));
+ break;
+ case ocLessEqual:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLessEqual>(), nResultSize));
+ break;
+ case ocLess:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLess>(), nResultSize));
+ break;
+ case ocEqual:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEqual>(), nResultSize));
+ break;
+ case ocGreater:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGreater>(), nResultSize));
+ break;
+ case ocSYD:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSYD>(), nResultSize));
+ break;
+ case ocCorrel:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCorrel>(), nResultSize));
+ break;
+ case ocCos:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCos>(), nResultSize));
+ break;
+ case ocNegBinomVert :
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNegbinomdist>(), nResultSize));
+ break;
+ case ocPearson:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPearson>(), nResultSize));
+ break;
+ case ocRSQ:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRsq>(), nResultSize));
+ break;
+ case ocCosecant:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCsc>(), nResultSize));
+ break;
+ case ocISPMT:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpISPMT>(), nResultSize));
+ break;
+ case ocPDuration:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
+ break;
+ case ocSinHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
+ break;
+ case ocAbs:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
+ break;
+ case ocPV:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPV>(), nResultSize));
+ break;
+ case ocSin:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSin>(), nResultSize));
+ break;
+ case ocTan:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTan>(), nResultSize));
+ break;
+ case ocTanHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
+ break;
+ case ocStandard:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
+ break;
+ case ocWeibull:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
+ break;
+ /*case ocMedian:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpMedian));
+ break;*/
+ case ocDDB:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
+ break;
+ case ocFV:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFV>(), nResultSize));
+ break;
+ case ocSumIfs:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
+ break;
+ /*case ocVBD:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpVDB));
+ break;*/
+ case ocKurt:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
+ break;
+ /*case ocNper:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNper));
+ break;*/
+ case ocNormDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
+ break;
+ case ocArcCos:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
+ break;
+ case ocSqrt:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
+ break;
+ case ocArcCosHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
+ break;
+ case ocNPV:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
+ break;
+ case ocStdNormDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
+ break;
+ case ocNormInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
+ break;
+ case ocSNormInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
+ break;
+ case ocPermut:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
+ break;
+ case ocPermutationA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
+ break;
+ case ocPhi:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
+ break;
+ case ocIpmt:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
+ break;
+ case ocConfidence:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
+ break;
+ case ocIntercept:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
+ break;
+ case ocDB:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpDB>(), nResultSize));
+ break;
+ case ocLogInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
+ break;
+ case ocArcCot:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
+ break;
+ case ocCosHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
+ break;
+ case ocCritBinom:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
+ break;
+ case ocArcCotHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
+ break;
+ case ocArcSin:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
+ break;
+ case ocArcSinHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
+ break;
+ case ocArcTan:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
+ break;
+ case ocArcTanHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
+ break;
+ case ocBitAnd:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
+ break;
+ case ocForecast:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
+ break;
+ case ocLogNormDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
+ break;
+ /*case ocGammaDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpGammaDist));
+ break;*/
+ case ocLn:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpLn>(), nResultSize));
+ break;
+ case ocRound:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpRound>(), nResultSize));
+ break;
+ case ocCot:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCot>(), nResultSize));
+ break;
+ case ocCotHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
+ break;
+ case ocFDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
+ break;
+ case ocVar:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpVar>(), nResultSize));
+ break;
+ /*case ocChiDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpChiDist));
+ break;*/
+ case ocPow:
+ case ocPower:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPower>(), nResultSize));
+ break;
+ case ocOdd:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
+ break;
+ /*case ocChiSqDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpChiSqDist));
+ break;
+ case ocChiSqInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpChiSqInv));
+ break;
+ case ocGammaInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpGammaInv));
+ break;*/
+ case ocFloor:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
+ break;
+ /*case ocFInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFInv));
+ break;*/
+ case ocFTest:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
+ break;
+ case ocB:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpB>(), nResultSize));
+ break;
+ case ocBetaDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
+ break;
+ case ocCosecantHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
+ break;
+ case ocExp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpExp>(), nResultSize));
+ break;
+ case ocLog10:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
+ break;
+ case ocExpDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
+ break;
+ case ocAverageIfs:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
+ break;
+ case ocCountIfs:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
+ break;
+ case ocCombinA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
+ break;
+ case ocEven:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpEven>(), nResultSize));
+ break;
+ case ocLog:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpLog>(), nResultSize));
+ break;
+ case ocMod:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpMod>(), nResultSize));
+ break;
+ case ocTrunc:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
+ break;
+ case ocSkew:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
+ break;
+ case ocArcTan2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
+ break;
+ case ocBitOr:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
+ break;
+ case ocBitLshift:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
+ break;
+ case ocBitRshift:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
+ break;
+ case ocBitXor:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
+ break;
+ /*case ocChiInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpChiInv));
+ break;*/
+ case ocPoissonDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
+ break;
+ case ocSumSQ:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
+ break;
+ case ocSkewp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
+ break;
+ case ocBinomDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
+ break;
+ case ocVarP:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
+ break;
+ case ocCeil:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
+ break;
+ case ocCombin:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
+ break;
+ case ocDevSq:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
+ break;
+ case ocStDev:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
+ break;
+ case ocSlope:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
+ break;
+ case ocSTEYX:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
+ break;
+ case ocZTest:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
+ break;
+ case ocPi:
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelPiArgument>(mCalcConfig, ts,
+ ft->Children[i]));
+ break;
+ case ocRandom:
+ mvSubArguments.push_back(
+ std::make_shared<DynamicKernelRandomArgument>(mCalcConfig, ts,
+ ft->Children[i]));
+ break;
+ case ocProduct:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
+ break;
+ /*case ocHypGeomDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpHypGeomDist));
+ break;*/
+ case ocSumX2MY2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
+ break;
+ case ocSumX2DY2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
+ break;
+ /*case ocBetaInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i],std::make_shared<OpBetainv));
+ break;*/
+ case ocTTest:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
+ break;
+ case ocTDist:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
+ break;
+ /*case ocTInv:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpTInv));
+ break;*/
+ case ocSumXMY2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
+ break;
+ case ocStDevP:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
+ break;
+ case ocCovar:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
+ break;
+ case ocAnd:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
+ break;
+ case ocVLookup:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
+ break;
+ case ocOr:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpOr>(), nResultSize));
+ break;
+ case ocNot:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNot>(), nResultSize));
+ break;
+ case ocXor:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpXor>(), nResultSize));
+ break;
+ case ocDBMax:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
+ break;
+ case ocDBMin:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
+ break;
+ case ocDBProduct:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
+ break;
+ case ocDBAverage:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
+ break;
+ case ocDBStdDev:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
+ break;
+ case ocDBStdDevP:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
+ break;
+ case ocDBSum:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
+ break;
+ case ocDBVar:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
+ break;
+ case ocDBVarP:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
+ break;
+ case ocAverageIf:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
+ break;
+ case ocDBCount:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
+ break;
+ case ocDBCount2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
+ break;
+ case ocDeg:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
+ break;
+ case ocRoundUp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
+ break;
+ case ocRoundDown:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
+ break;
+ case ocInt:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpInt>(), nResultSize));
+ break;
+ case ocRad:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
+ break;
+ case ocCountIf:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
+ break;
+ case ocIsEven:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
+ break;
+ case ocIsOdd:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
+ break;
+ case ocFact:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpFact>(), nResultSize));
+ break;
+ case ocMinA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
+ break;
+ case ocCount2:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
+ break;
+ case ocMaxA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
+ break;
+ case ocAverageA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
+ break;
+ case ocVarA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
+ break;
+ case ocVarPA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
+ break;
+ case ocStDevA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
+ break;
+ case ocStDevPA:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
+ break;
+ case ocSecant:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSec>(), nResultSize));
+ break;
+ case ocSecantHyp:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
+ break;
+ case ocSumIf:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
+ break;
+ case ocNegSub:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
+ break;
+ case ocAveDev:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
+ break;
+ case ocIf:
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpIf>(), nResultSize));
+ break;
+ case ocExternal:
+ if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getEffect")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEffective>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumipmt")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumipmt>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getNominal")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNominal>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumprinc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumprinc>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXnpv")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpXNPV>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricemat")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPriceMat>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getReceived")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpReceived>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbilleq")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbilleq>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillprice")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillprice>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillyield")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillyield>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getFvschedule")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFvschedule>(), nResultSize));
+ }
+ /*else if ( pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYield")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYield));
+ }*/
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYielddisc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYielddisc>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYieldmat")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYieldmat>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrintm")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAccrintm>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaybs")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaybs>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarde")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarde>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarfr")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarfr>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdays")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdays>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaysnc>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDisc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDISC>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIntrate")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpINTRATE>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPrice")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupnum")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpCoupnum>(), nResultSize));
+ }
+ /*else if pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDuration"))
+ {
+ mvSubArguments.push_back(
+ SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDuration_ADD));
+ }*/
+ /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmordegrc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpAmordegrc, nResultSize));
+ }*/
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmorlinc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpAmorlinc>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMduration")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpMDuration>(), nResultSize));
+ }
+ /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXirr")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpXirr, nResultSize));
+ }*/
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlprice")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlyield")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpOddlyield>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricedisc")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
+ ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCouppcd")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpCouppcd>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupncd")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpCoupncd>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrint")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpAccrint>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSqrtpi")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpSqrtPi>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getConvert")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpConvert>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIseven")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpIsEven>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIsodd")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpIsOdd>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMround")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpMROUND>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getQuotient")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpQuotient>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSeriessum")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpSeriesSum>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getBesselj")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpBesselj>(), nResultSize));
+ }
+ else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getGestep")
+ {
+ mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
+ std::make_shared<OpGestep>(), nResultSize));
+ }
+ else
+ throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
+ break;
+
+ default:
+ throw UnhandledToken(OUString("unhandled opcode "
+ + formula::FormulaCompiler().GetOpCodeMap(com::sun::star::sheet::FormulaLanguage::ENGLISH)->getSymbol(opc)
+ + "(" + OUString::number(opc) + ")").toUtf8().getStr(), __FILE__, __LINE__);
+ }
+ }
+}
+
+namespace {
+
+class DynamicKernel : public CompiledFormula
+{
+public:
+ DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
+ virtual ~DynamicKernel() override;
+
+ static std::shared_ptr<DynamicKernel> create( const ScCalcConfig& config, const ScTokenArray& rCode, int nResultSize );
+
+ /// OpenCL code generation
+ void CodeGen();
+
+ /// Produce kernel hash
+ std::string const & GetMD5();
+
+ /// Create program, build, and create kernel
+ /// TODO cache results based on kernel body hash
+ /// TODO: abstract OpenCL part out into OpenCL wrapper.
+ void CreateKernel();
+
+ /// Prepare buffers, marshal them to GPU, and launch the kernel
+ /// TODO: abstract OpenCL part out into OpenCL wrapper.
+ void Launch( size_t nr );
+
+ cl_mem GetResultBuffer() const { return mpResClmem; }
+
+private:
+ ScCalcConfig mCalcConfig;
+ FormulaTreeNodeRef mpRoot;
+ SymbolTable mSyms;
+ std::string mKernelSignature, mKernelHash;
+ std::string mFullProgramSrc;
+ cl_program mpProgram;
+ cl_kernel mpKernel;
+ cl_mem mpResClmem; // Results
+ std::set<std::string> inlineDecl;
+ std::set<std::string> inlineFun;
+
+ int mnResultSize;
+};
+
+}
+
+DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
+ mCalcConfig(config),
+ mpRoot(r),
+ mpProgram(nullptr),
+ mpKernel(nullptr),
+ mpResClmem(nullptr),
+ mnResultSize(nResultSize) {}
+
+DynamicKernel::~DynamicKernel()
+{
+ cl_int err;
+ if (mpResClmem)
+ {
+ err = clReleaseMemObject(mpResClmem);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+ }
+ if (mpKernel)
+ {
+ SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
+ err = clReleaseKernel(mpKernel);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
+ }
+ // mpProgram is not going to be released here -- it's cached.
+}
+
+void DynamicKernel::CodeGen()
+{
+ // Traverse the tree of expression and declare symbols used
+ const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, std::make_shared<OpNop>(mnResultSize), mnResultSize);
+
+ std::stringstream decl;
+ if (openclwrapper::gpuEnv.mnKhrFp64Flag)
+ {
+ decl << "#if __OPENCL_VERSION__ < 120\n";
+ decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
+ decl << "#endif\n";
+ }
+ else if (openclwrapper::gpuEnv.mnAmdFp64Flag)
+ {
+ decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
+ }
+ // preambles
+ decl << publicFunc;
+ DK->DumpInlineFun(inlineDecl, inlineFun);
+ for (const auto& rItem : inlineDecl)
+ {
+ decl << rItem;
+ }
+
+ for (const auto& rItem : inlineFun)
+ {
+ decl << rItem;
+ }
+ mSyms.DumpSlidingWindowFunctions(decl);
+ mKernelSignature = DK->DumpOpName();
+ decl << "__kernel void DynamicKernel" << mKernelSignature;
+ decl << "(__global double *result";
+ if( !DK->IsEmpty())
+ {
+ decl << ", ";
+ DK->GenSlidingWindowDecl(decl);
+ }
+ decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
+ DK->GenSlidingWindowDeclRef() << ";\n}\n";
+ mFullProgramSrc = decl.str();
+ SAL_INFO(
+ "sc.opencl.source",
+ (mKernelSignature[0] == '_'
+ ? mKernelSignature.substr(1, std::string::npos) : mKernelSignature)
+ << " program to be compiled:\n" << linenumberify(mFullProgramSrc));
+}
+
+std::string const & DynamicKernel::GetMD5()
+{
+ if (mKernelHash.empty())
+ {
+ std::stringstream md5s;
+ // Compute MD5SUM of kernel body to obtain the name
+ sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
+ rtl_digest_MD5(
+ mFullProgramSrc.c_str(),
+ mFullProgramSrc.length(), result,
+ RTL_DIGEST_LENGTH_MD5);
+ for (sal_uInt8 i : result)
+ {
+ md5s << std::hex << static_cast<int>(i);
+ }
+ mKernelHash = md5s.str();
+ }
+ return mKernelHash;
+}
+
+/// Build code
+void DynamicKernel::CreateKernel()
+{
+ if (mpKernel)
+ // already created.
+ return;
+
+ cl_int err;
+ std::string kname = "DynamicKernel" + mKernelSignature;
+ // Compile kernel here!!!
+
+ OpenCLZone zone;
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ const char* src = mFullProgramSrc.c_str();
+ static std::string lastOneKernelHash;
+ static std::string lastSecondKernelHash;
+ static cl_program lastOneProgram = nullptr;
+ static cl_program lastSecondProgram = nullptr;
+ std::string KernelHash = mKernelSignature + GetMD5();
+ if (lastOneKernelHash == KernelHash && lastOneProgram)
+ {
+ mpProgram = lastOneProgram;
+ }
+ else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
+ {
+ mpProgram = lastSecondProgram;
+ }
+ else
+ { // doesn't match the last compiled formula.
+
+ if (lastSecondProgram)
+ {
+ SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
+ err = clReleaseProgram(lastSecondProgram);
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << openclwrapper::errorString(err));
+ lastSecondProgram = nullptr;
+ }
+ if (openclwrapper::buildProgramFromBinary("",
+ &openclwrapper::gpuEnv, KernelHash.c_str(), 0))
+ {
+ mpProgram = openclwrapper::gpuEnv.mpArryPrograms[0];
+ openclwrapper::gpuEnv.mpArryPrograms[0] = nullptr;
+ }
+ else
+ {
+ mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
+ &src, nullptr, &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created program " << mpProgram);
+
+ err = clBuildProgram(mpProgram, 1,
+ &openclwrapper::gpuEnv.mpDevID, "", nullptr, nullptr);
+ if (err != CL_SUCCESS)
+ {
+#if OSL_DEBUG_LEVEL > 0
+ if (err == CL_BUILD_PROGRAM_FAILURE)
+ {
+ cl_build_status stat;
+ cl_int e = clGetProgramBuildInfo(
+ mpProgram, openclwrapper::gpuEnv.mpDevID,
+ CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
+ &stat, nullptr);
+ SAL_WARN_IF(
+ e != CL_SUCCESS, "sc.opencl",
+ "after CL_BUILD_PROGRAM_FAILURE,"
+ " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
+ " fails with " << openclwrapper::errorString(e));
+ if (e == CL_SUCCESS)
+ {
+ size_t n;
+ e = clGetProgramBuildInfo(
+ mpProgram, openclwrapper::gpuEnv.mpDevID,
+ CL_PROGRAM_BUILD_LOG, 0, nullptr, &n);
+ SAL_WARN_IF(
+ e != CL_SUCCESS || n == 0, "sc.opencl",
+ "after CL_BUILD_PROGRAM_FAILURE,"
+ " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
+ " fails with " << openclwrapper::errorString(e) << ", n=" << n);
+ if (e == CL_SUCCESS && n != 0)
+ {
+ std::vector<char> log(n);
+ e = clGetProgramBuildInfo(
+ mpProgram, openclwrapper::gpuEnv.mpDevID,
+ CL_PROGRAM_BUILD_LOG, n, log.data(), nullptr);
+ SAL_WARN_IF(
+ e != CL_SUCCESS || n == 0, "sc.opencl",
+ "after CL_BUILD_PROGRAM_FAILURE,"
+ " clGetProgramBuildInfo("
+ "CL_PROGRAM_BUILD_LOG) fails with " << openclwrapper::errorString(e));
+ if (e == CL_SUCCESS)
+ SAL_WARN(
+ "sc.opencl",
+ "CL_BUILD_PROGRAM_FAILURE, status " << stat
+ << ", log \"" << log.data() << "\"");
+ }
+ }
+ }
+#endif
+#ifdef DBG_UTIL
+ SAL_WARN("sc.opencl", "Program failed to build, aborting.");
+ abort(); // make sure errors such as typos don't accidentally go unnoticed
+#else
+ throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
+#endif
+ }
+ SAL_INFO("sc.opencl", "Built program " << mpProgram);
+
+ // Generate binary out of compiled kernel.
+ openclwrapper::generatBinFromKernelSource(mpProgram,
+ (mKernelSignature + GetMD5()).c_str());
+ }
+ lastSecondKernelHash = lastOneKernelHash;
+ lastSecondProgram = lastOneProgram;
+ lastOneKernelHash = KernelHash;
+ lastOneProgram = mpProgram;
+ }
+ mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
+ if (err != CL_SUCCESS)
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
+}
+
+void DynamicKernel::Launch( size_t nr )
+{
+ OpenCLZone zone;
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+ cl_int err;
+ // The results
+ mpResClmem = clCreateBuffer(kEnv.mpkContext,
+ cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
+ nr * sizeof(double), nullptr, &err);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+ SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
+
+ SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem << " (result)");
+ err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+ // The rest of buffers
+ mSyms.Marshal(mpKernel, nr, mpProgram);
+ size_t global_work_size[] = { nr };
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << mpKernel);
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, nullptr,
+ global_work_size, nullptr, 0, nullptr, nullptr);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+ err = clFlush(kEnv.mpkCmdQueue);
+ if (CL_SUCCESS != err)
+ throw OpenCLError("clFlush", err, __FILE__, __LINE__);
+}
+
+// Symbol lookup. If there is no such symbol created, allocate one
+// kernel with argument with unique name and return so.
+// The template argument T must be a subclass of DynamicKernelArgument
+template <typename T>
+const DynamicKernelArgument* SymbolTable::DeclRefArg(const ScCalcConfig& config,
+ const FormulaTreeNodeRef& t,
+ std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize)
+{
+ FormulaToken* ref = t->GetFormulaToken();
+ ArgumentMap::iterator it = mSymbols.find(ref);
+ if (it == mSymbols.end())
+ {
+ // Allocate new symbols
+ std::stringstream ss;
+ ss << "tmp" << mCurId++;
+ DynamicKernelArgumentRef new_arg = std::make_shared<T>(config, ss.str(), t, std::move(pCodeGen), nResultSize);
+ mSymbols[ref] = new_arg;
+ mParams.push_back(new_arg);
+ return new_arg.get();
+ }
+ else
+ {
+ return it->second.get();
+ }
+}
+
+FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() {}
+
+FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
+
+ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
+{
+ return nullptr;
+}
+
+std::shared_ptr<DynamicKernel> DynamicKernel::create( const ScCalcConfig& rConfig, const ScTokenArray& rCode, int nResultSize )
+{
+ // Constructing "AST"
+ FormulaTokenIterator aCode(rCode);
+ std::vector<FormulaToken*> aTokenVector;
+ std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
+ FormulaToken* pCur;
+ while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != nullptr)
+ {
+ OpCode eOp = pCur->GetOpCode();
+ if (eOp != ocPush)
+ {
+ FormulaTreeNodeRef pCurNode = std::make_shared<FormulaTreeNode>(pCur);
+ sal_uInt8 nParamCount = pCur->GetParamCount();
+ for (sal_uInt8 i = 0; i < nParamCount; i++)
+ {
+ if( aTokenVector.empty())
+ return nullptr;
+ FormulaToken* pTempFormula = aTokenVector.back();
+ aTokenVector.pop_back();
+ if (pTempFormula->GetOpCode() != ocPush)
+ {
+ if (aHashMap.find(pTempFormula) == aHashMap.end())
+ return nullptr;
+ pCurNode->Children.push_back(aHashMap[pTempFormula]);
+ }
+ else
+ {
+ FormulaTreeNodeRef pChildTreeNode =
+ std::make_shared<FormulaTreeNode>(pTempFormula);
+ pCurNode->Children.push_back(pChildTreeNode);
+ }
+ }
+ std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
+ aHashMap[pCur] = pCurNode;
+ }
+ aTokenVector.push_back(pCur);
+ }
+
+ FormulaTreeNodeRef Root = std::make_shared<FormulaTreeNode>(nullptr);
+ Root->Children.push_back(aHashMap[aTokenVector.back()]);
+
+ auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
+
+ // OpenCL source code generation and kernel compilation
+ try
+ {
+ pDynamicKernel->CodeGen();
+ pDynamicKernel->CreateKernel();
+ }
+ catch (const UnhandledToken& ut)
+ {
+ SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
+ return nullptr;
+ }
+ catch (const InvalidParameterCount& ipc)
+ {
+ SAL_INFO("sc.opencl", "Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
+ << " at " << ipc.mFile << ":" << ipc.mLineNumber);
+ return nullptr;
+ }
+ catch (const OpenCLError& oce)
+ {
+ // I think OpenCLError exceptions are actually exceptional (unexpected), so do use SAL_WARN
+ // here.
+ SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
+
+ // OpenCLError used to go to the catch-all below, and not delete pDynamicKernel. Was that
+ // intentional, should we not do it here then either?
+ openclwrapper::kernelFailures++;
+ return nullptr;
+ }
+ catch (const Unhandled& uh)
+ {
+ SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
+
+ // Unhandled used to go to the catch-all below, and not delete pDynamicKernel. Was that
+ // intentional, should we not do it here then either?
+ openclwrapper::kernelFailures++;
+ return nullptr;
+ }
+ catch (...)
+ {
+ // FIXME: Do we really want to catch random exceptions here?
+ SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
+ openclwrapper::kernelFailures++;
+ return nullptr;
+ }
+ return pDynamicKernel;
+}
+
+namespace {
+
+class CLInterpreterResult
+{
+ DynamicKernel* mpKernel;
+
+ SCROW mnGroupLength;
+
+ cl_mem mpCLResBuf;
+ double* mpResBuf;
+
+public:
+ CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
+ CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
+ mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
+
+ bool isValid() const { return mpKernel != nullptr; }
+
+ void fetchResultFromKernel()
+ {
+ if (!isValid())
+ return;
+
+ OpenCLZone zone;
+
+ // Map results back
+ mpCLResBuf = mpKernel->GetResultBuffer();
+
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+
+ cl_int err;
+ mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+ mpCLResBuf,
+ CL_TRUE, CL_MAP_READ, 0,
+ mnGroupLength * sizeof(double), 0, nullptr, nullptr,
+ &err));
+
+ if (err != CL_SUCCESS)
+ {
+ SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << openclwrapper::errorString(err));
+ mpResBuf = nullptr;
+ return;
+ }
+ SAL_INFO("sc.opencl", "Kernel results: cl_mem: " << mpResBuf << " (" << DebugPeekDoubles(mpResBuf, mnGroupLength) << ")");
+ }
+
+ bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
+ {
+ if (!mpResBuf)
+ return false;
+
+ OpenCLZone zone;
+
+ rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
+
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+
+ cl_int err;
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, nullptr, nullptr);
+
+ if (err != CL_SUCCESS)
+ {
+ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+ return false;
+ }
+
+ return true;
+ }
+};
+
+class CLInterpreterContext
+{
+ std::shared_ptr<DynamicKernel> mpKernelStore; /// for managed kernel instance.
+ DynamicKernel* mpKernel;
+
+ SCROW mnGroupLength;
+
+public:
+ explicit CLInterpreterContext(SCROW nGroupLength)
+ : mpKernel(nullptr)
+ , mnGroupLength(nGroupLength) {}
+
+ bool isValid() const
+ {
+ return mpKernel != nullptr;
+ }
+
+ void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
+ {
+ mpKernelStore = std::move(pKernel);
+ mpKernel = mpKernelStore.get();
+ }
+
+ CLInterpreterResult launchKernel()
+ {
+ if (!isValid())
+ return CLInterpreterResult();
+
+ try
+ {
+ // Run the kernel.
+ mpKernel->Launch(mnGroupLength);
+ }
+ catch (const UnhandledToken& ut)
+ {
+ SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
+ openclwrapper::kernelFailures++;
+ return CLInterpreterResult();
+ }
+ catch (const OpenCLError& oce)
+ {
+ SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
+ openclwrapper::kernelFailures++;
+ return CLInterpreterResult();
+ }
+ catch (const Unhandled& uh)
+ {
+ SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
+ openclwrapper::kernelFailures++;
+ return CLInterpreterResult();
+ }
+ catch (...)
+ {
+ SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
+ openclwrapper::kernelFailures++;
+ return CLInterpreterResult();
+ }
+
+ return CLInterpreterResult(mpKernel, mnGroupLength);
+ }
+};
+
+
+CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
+ const ScFormulaCellGroupRef& xGroup, const ScTokenArray& rCode )
+{
+ CLInterpreterContext aCxt(xGroup->mnLength);
+
+ aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
+
+ return aCxt;
+}
+
+void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
+{
+ ScCompiler aComp(rDoc, rTopPos, rCode, rDoc.GetGrammar());
+ // Disable special ordering for jump commands for the OpenCL interpreter.
+ aComp.EnableJumpCommandReorder(false);
+ aComp.CompileTokenArray(); // Regenerate RPN tokens.
+}
+
+bool waitForResults()
+{
+ OpenCLZone zone;
+ openclwrapper::KernelEnv kEnv;
+ openclwrapper::setKernelEnv(&kEnv);
+
+ cl_int err = clFinish(kEnv.mpkCmdQueue);
+ if (err != CL_SUCCESS)
+ SAL_WARN("sc.opencl", "clFinish failed: " << openclwrapper::errorString(err));
+
+ return err == CL_SUCCESS;
+}
+
+}
+
+bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
+ const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
+ ScTokenArray& rCode )
+{
+ MergeCalcConfig(rDoc);
+
+ genRPNTokens(rDoc, rTopPos, rCode);
+
+ if( rCode.GetCodeLen() == 0 )
+ return false;
+
+ CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
+ if (!aCxt.isValid())
+ return false;
+
+ CLInterpreterResult aRes = aCxt.launchKernel();
+ if (!aRes.isValid())
+ return false;
+
+ if (!waitForResults())
+ return false;
+
+ aRes.fetchResultFromKernel();
+
+ return aRes.pushResultToDocument(rDoc, rTopPos);
+}
+
+} // namespace sc::opencl
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */