1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#pragma once
#include <cmath>
#include "kahan.hxx"
#include "arraysumfunctor.hxx"
#include <formula/errorcodes.hxx>
namespace sc::op
{
// Checkout available optimization options.
// Note that it turned out to be problematic to support CPU-specific code
// that's not guaranteed to be available on that specific platform (see
// git history). SSE2 is guaranteed on x86_64 and it is our baseline requirement
// for x86 on Windows, so SSE2 use is hardcoded on those platforms.
// Whenever we raise baseline to e.g. AVX, this may get
// replaced with AVX code (get it from git history).
// Do it similarly with other platforms.
#if defined(X86_64) || (defined(X86) && defined(_WIN32))
#define SC_USE_SSE2 1
KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
#else
#define SC_USE_SSE2 0
#endif
/**
* If no boosts available, Unrolled KahanSum.
* Most likely to use on android.
*/
static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pCurrent)
{
size_t nRealSize = nSize - i;
size_t nUnrolledSize = nRealSize - (nRealSize % 4);
if (nUnrolledSize > 0)
{
KahanSum sum0 = 0.0;
KahanSum sum1 = 0.0;
KahanSum sum2 = 0.0;
KahanSum sum3 = 0.0;
for (; i + 3 < nUnrolledSize; i += 4)
{
sum0 += *pCurrent++;
sum1 += *pCurrent++;
sum2 += *pCurrent++;
sum3 += *pCurrent++;
}
// We are using pairwise summation alongside Kahan
return (sum0 + sum1) + (sum2 + sum3);
}
return 0.0;
}
/**
* This function task is to choose the fastest method available to perform the sum.
* @param i
* @param nSize
* @param pCurrent
*/
static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
{
#if SC_USE_SSE2
return executeSSE2(i, nSize, pCurrent);
#else
return executeUnrolled(i, nSize, pCurrent);
#endif
}
/**
* Performs the sum of an array.
* Note that align 16 will speed up the process.
* @param pArray
* @param nSize
*/
inline KahanSum sumArray(const double* pArray, size_t nSize)
{
size_t i = 0;
const double* pCurrent = pArray;
KahanSum fSum = executeFast(i, nSize, pCurrent);
// sum rest of the array
for (; i < nSize; ++i)
fSum += pArray[i];
// If the sum is a NaN, some of the terms were empty cells, probably.
// Re-calculate, carefully
double fVal = fSum.get();
if (!std::isfinite(fVal))
{
FormulaError nErr = GetDoubleErrorValue(fVal);
if (nErr == FormulaError::NoValue)
{
fSum = 0;
for (i = 0; i < nSize; i++)
{
if (!std::isfinite(pArray[i]))
{
nErr = GetDoubleErrorValue(pArray[i]);
if (nErr != FormulaError::NoValue)
fSum += pArray[i]; // Let errors encoded as NaNs propagate ???
}
else
fSum += pArray[i];
}
}
}
return fSum;
}
} // end namespace sc::op
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|