summaryrefslogtreecommitdiffstats
path: root/sc/inc/arraysumfunctor.hxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /sc/inc/arraysumfunctor.hxx
parentInitial commit. (diff)
downloadlibreoffice-upstream.tar.xz
libreoffice-upstream.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--sc/inc/arraysumfunctor.hxx122
1 files changed, 122 insertions, 0 deletions
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
new file mode 100644
index 000000000..c261c120a
--- /dev/null
+++ b/sc/inc/arraysumfunctor.hxx
@@ -0,0 +1,122 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#pragma once
+
+#include <cmath>
+#include "kahan.hxx"
+#include "arraysumfunctor.hxx"
+#include <formula/errorcodes.hxx>
+
+namespace sc::op
+{
+// Checkout available optimization options.
+// Note that it turned out to be problematic to support CPU-specific code
+// that's not guaranteed to be available on that specific platform (see
+// git history). SSE2 is guaranteed on x86_64 and it is our baseline requirement
+// for x86 on Windows, so SSE2 use is hardcoded on those platforms.
+// Whenever we raise baseline to e.g. AVX, this may get
+// replaced with AVX code (get it from git history).
+// Do it similarly with other platforms.
+#if defined(X86_64) || (defined(X86) && defined(_WIN32))
+#define SC_USE_SSE2 1
+KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+#else
+#define SC_USE_SSE2 0
+#endif
+
+/**
+ * If no boosts available, Unrolled KahanSum.
+ * Most likely to use on android.
+ */
+static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pCurrent)
+{
+ size_t nRealSize = nSize - i;
+ size_t nUnrolledSize = nRealSize - (nRealSize % 4);
+
+ if (nUnrolledSize > 0)
+ {
+ KahanSum sum0 = 0.0;
+ KahanSum sum1 = 0.0;
+ KahanSum sum2 = 0.0;
+ KahanSum sum3 = 0.0;
+
+ for (; i + 3 < nUnrolledSize; i += 4)
+ {
+ sum0 += *pCurrent++;
+ sum1 += *pCurrent++;
+ sum2 += *pCurrent++;
+ sum3 += *pCurrent++;
+ }
+ // We are using pairwise summation alongside Kahan
+ return (sum0 + sum1) + (sum2 + sum3);
+ }
+ return 0.0;
+}
+
+/**
+ * This function task is to choose the fastest method available to perform the sum.
+ * @param i
+ * @param nSize
+ * @param pCurrent
+ */
+static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
+{
+#if SC_USE_SSE2
+ return executeSSE2(i, nSize, pCurrent);
+#else
+ return executeUnrolled(i, nSize, pCurrent);
+#endif
+}
+
+/**
+ * Performs the sum of an array.
+ * Note that align 16 will speed up the process.
+ * @param pArray
+ * @param nSize
+ */
+inline KahanSum sumArray(const double* pArray, size_t nSize)
+{
+ size_t i = 0;
+ const double* pCurrent = pArray;
+ KahanSum fSum = executeFast(i, nSize, pCurrent);
+
+ // sum rest of the array
+ for (; i < nSize; ++i)
+ fSum += pArray[i];
+
+ // If the sum is a NaN, some of the terms were empty cells, probably.
+ // Re-calculate, carefully
+ double fVal = fSum.get();
+ if (!std::isfinite(fVal))
+ {
+ FormulaError nErr = GetDoubleErrorValue(fVal);
+ if (nErr == FormulaError::NoValue)
+ {
+ fSum = 0;
+ for (i = 0; i < nSize; i++)
+ {
+ if (!std::isfinite(pArray[i]))
+ {
+ nErr = GetDoubleErrorValue(pArray[i]);
+ if (nErr != FormulaError::NoValue)
+ fSum += pArray[i]; // Let errors encoded as NaNs propagate ???
+ }
+ else
+ fSum += pArray[i];
+ }
+ }
+ }
+ return fSum;
+}
+
+} // end namespace sc::op
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */