diff options
Diffstat (limited to 'sc/source/core/data/dpcache.cxx')
-rw-r--r-- | sc/source/core/data/dpcache.cxx | 1522 |
1 files changed, 1522 insertions, 0 deletions
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx new file mode 100644 index 000000000..52109c673 --- /dev/null +++ b/sc/source/core/data/dpcache.cxx @@ -0,0 +1,1522 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <dpcache.hxx> + +#include <document.hxx> +#include <queryentry.hxx> +#include <queryparam.hxx> +#include <dpobject.hxx> +#include <globstr.hrc> +#include <scresid.hxx> +#include <docoptio.hxx> +#include <dpitemdata.hxx> +#include <dputil.hxx> +#include <dpnumgroupinfo.hxx> +#include <columniterator.hxx> +#include <cellvalue.hxx> + +#include <comphelper/parallelsort.hxx> +#include <rtl/math.hxx> +#include <unotools/charclass.hxx> +#include <unotools/textsearch.hxx> +#include <unotools/localedatawrapper.hxx> +#include <unotools/collatorwrapper.hxx> +#include <svl/numformat.hxx> +#include <svl/zforlist.hxx> +#include <o3tl/safeint.hxx> +#include <osl/diagnose.h> + +#if DUMP_PIVOT_TABLE +#include <com/sun/star/sheet/DataPilotFieldGroupBy.hpp> +#endif + +// TODO : Threaded pivot cache operation is disabled until we can figure out +// ways to make the edit engine and number formatter codes thread-safe in a +// proper fashion. +#define ENABLE_THREADED_PIVOT_CACHE 0 + +#if ENABLE_THREADED_PIVOT_CACHE +#include <thread> +#include <future> +#include <queue> +#endif + +using namespace ::com::sun::star; + +using ::com::sun::star::uno::Exception; + +ScDPCache::GroupItems::GroupItems() : mnGroupType(0) {} + +ScDPCache::GroupItems::GroupItems(const ScDPNumGroupInfo& rInfo, sal_Int32 nGroupType) : + maInfo(rInfo), mnGroupType(nGroupType) {} + +ScDPCache::Field::Field() : mnNumFormat(0) {} + +ScDPCache::ScDPCache(ScDocument& rDoc) : + mrDoc( rDoc ), + mnColumnCount ( 0 ), + maEmptyRows(0, rDoc.GetMaxRowCount(), true), + mnDataSize(-1), + mnRowCount(0), + mbDisposing(false) +{ +} + +namespace { + +struct ClearObjectSource +{ + void operator() (ScDPObject* p) const + { + p->ClearTableData(); + } +}; + +} + +ScDPCache::~ScDPCache() +{ + // Make sure no live ScDPObject instances hold reference to this cache any + // more. + mbDisposing = true; + std::for_each(maRefObjects.begin(), maRefObjects.end(), ClearObjectSource()); +} + +namespace { + +/** + * While the macro interpret level is incremented, the formula cells are + * (semi-)guaranteed to be interpreted. + */ +class MacroInterpretIncrementer +{ +public: + explicit MacroInterpretIncrementer(ScDocument& rDoc) : + mrDoc(rDoc) + { + mrDoc.IncMacroInterpretLevel(); + } + ~MacroInterpretIncrementer() + { + mrDoc.DecMacroInterpretLevel(); + } +private: + ScDocument& mrDoc; +}; + +rtl_uString* internString( ScDPCache::StringSetType& rPool, const OUString& rStr ) +{ + return rPool.insert(rStr).first->pData; +} + +OUString createLabelString( const ScDocument& rDoc, SCCOL nCol, const ScRefCellValue& rCell ) +{ + OUString aDocStr = rCell.getRawString(rDoc); + + if (aDocStr.isEmpty()) + { + // Replace an empty label string with column name. + + ScAddress aColAddr(nCol, 0, 0); + aDocStr = ScResId(STR_COLUMN) + " " + aColAddr.Format(ScRefFlags::COL_VALID); + } + return aDocStr; +} + +void initFromCell( + ScDPCache::StringSetType& rStrPool, const ScDocument& rDoc, const ScAddress& rPos, + const ScRefCellValue& rCell, ScDPItemData& rData, sal_uInt32& rNumFormat) +{ + OUString aDocStr = rCell.getRawString(rDoc); + rNumFormat = 0; + + if (rCell.hasError()) + { + rData.SetErrorStringInterned(internString(rStrPool, rDoc.GetString(rPos.Col(), rPos.Row(), rPos.Tab()))); + } + else if (rCell.hasNumeric()) + { + double fVal = rCell.getRawValue(); + rNumFormat = rDoc.GetNumberFormat(rPos); + rData.SetValue(fVal); + } + else if (!rCell.isEmpty()) + { + rData.SetStringInterned(internString(rStrPool, aDocStr)); + } + else + rData.SetEmpty(); +} + +struct Bucket +{ + ScDPItemData maValue; + SCROW mnOrderIndex; + SCROW mnDataIndex; + Bucket() : + mnOrderIndex(0), mnDataIndex(0) {} + Bucket(const ScDPItemData& rValue, SCROW nData) : + maValue(rValue), mnOrderIndex(0), mnDataIndex(nData) {} +}; + +#if DEBUG_PIVOT_TABLE +#include <iostream> +using std::cout; +using std::endl; + +struct PrintBucket +{ + void operator() (const Bucket& v) const + { + cout << "value: " << v.maValue.GetValue() << " order index: " << v.mnOrderIndex << " data index: " << v.mnDataIndex << endl; + } +}; + +#endif + +struct LessByValue +{ + bool operator() (const Bucket& left, const Bucket& right) const + { + return left.maValue < right.maValue; + } +}; + +struct LessByOrderIndex +{ + bool operator() (const Bucket& left, const Bucket& right) const + { + return left.mnOrderIndex < right.mnOrderIndex; + } +}; + +struct LessByDataIndex +{ + bool operator() (const Bucket& left, const Bucket& right) const + { + return left.mnDataIndex < right.mnDataIndex; + } +}; + +struct EqualByOrderIndex +{ + bool operator() (const Bucket& left, const Bucket& right) const + { + return left.mnOrderIndex == right.mnOrderIndex; + } +}; + +class PushBackValue +{ + ScDPCache::ScDPItemDataVec& mrItems; +public: + explicit PushBackValue(ScDPCache::ScDPItemDataVec& _items) : mrItems(_items) {} + void operator() (const Bucket& v) + { + mrItems.push_back(v.maValue); + } +}; + +class PushBackOrderIndex +{ + ScDPCache::IndexArrayType& mrData; +public: + explicit PushBackOrderIndex(ScDPCache::IndexArrayType& _items) : mrData(_items) {} + void operator() (const Bucket& v) + { + mrData.push_back(v.mnOrderIndex); + } +}; + +void processBuckets(std::vector<Bucket>& aBuckets, ScDPCache::Field& rField) +{ + if (aBuckets.empty()) + return; + + // Sort by the value. + comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByValue()); + + { + // Set order index such that unique values have identical index value. + SCROW nCurIndex = 0; + std::vector<Bucket>::iterator it = aBuckets.begin(), itEnd = aBuckets.end(); + ScDPItemData aPrev = it->maValue; + it->mnOrderIndex = nCurIndex; + for (++it; it != itEnd; ++it) + { + if (!aPrev.IsCaseInsEqual(it->maValue)) + ++nCurIndex; + + it->mnOrderIndex = nCurIndex; + aPrev = it->maValue; + } + } + + // Re-sort the bucket this time by the data index. + comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByDataIndex()); + + // Copy the order index series into the field object. + rField.maData.reserve(aBuckets.size()); + std::for_each(aBuckets.begin(), aBuckets.end(), PushBackOrderIndex(rField.maData)); + + // Sort by the value again. + comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByOrderIndex()); + + // Unique by value. + std::vector<Bucket>::iterator itUniqueEnd = + std::unique(aBuckets.begin(), aBuckets.end(), EqualByOrderIndex()); + + // Copy the unique values into items. + std::vector<Bucket>::iterator itBeg = aBuckets.begin(); + size_t nLen = distance(itBeg, itUniqueEnd); + rField.maItems.reserve(nLen); + std::for_each(itBeg, itUniqueEnd, PushBackValue(rField.maItems)); +} + +struct InitColumnData +{ + ScDPCache::EmptyRowsType maEmptyRows; + OUString maLabel; + + ScDPCache::StringSetType* mpStrPool; + ScDPCache::Field* mpField; + + SCCOL mnCol; + + InitColumnData(ScSheetLimits const & rSheetLimits) : + maEmptyRows(0, rSheetLimits.GetMaxRowCount(), true), + mpStrPool(nullptr), + mpField(nullptr), + mnCol(-1) {} + + void init( SCCOL nCol, ScDPCache::StringSetType* pStrPool, ScDPCache::Field* pField ) + { + mpStrPool = pStrPool; + mpField = pField; + mnCol = nCol; + } +}; + +struct InitDocData +{ + ScDocument& mrDoc; + SCTAB mnDocTab; + SCROW mnStartRow; + SCROW mnEndRow; + bool mbTailEmptyRows; + + InitDocData(ScDocument& rDoc) : + mrDoc(rDoc), + mnDocTab(-1), + mnStartRow(-1), + mnEndRow(-1), + mbTailEmptyRows(false) {} +}; + +typedef std::unordered_set<OUString> LabelSet; + +void normalizeAddLabel(const OUString& rLabel, std::vector<OUString>& rLabels, LabelSet& rExistingNames) +{ + const OUString aLabelLower = ScGlobal::getCharClass().lowercase(rLabel); + sal_Int32 nSuffix = 1; + OUString aNewLabel = rLabel; + OUString aNewLabelLower = aLabelLower; + while (true) + { + if (!rExistingNames.count(aNewLabelLower)) + { + // this is a unique label. + rLabels.push_back(aNewLabel); + rExistingNames.insert(aNewLabelLower); + break; + } + + // This name already exists. + aNewLabel = rLabel + OUString::number(++nSuffix); + aNewLabelLower = aLabelLower + OUString::number(nSuffix); + } +} + +std::vector<OUString> normalizeLabels(const std::vector<InitColumnData>& rColData) +{ + std::vector<OUString> aLabels; + aLabels.reserve(rColData.size() + 1); + + LabelSet aExistingNames; + normalizeAddLabel(ScResId(STR_PIVOT_DATA), aLabels, aExistingNames); + + for (const InitColumnData& rCol : rColData) + normalizeAddLabel(rCol.maLabel, aLabels, aExistingNames); + + return aLabels; +} + +std::vector<OUString> normalizeLabels(const ScDPCache::DBConnector& rDB, const sal_Int32 nLabelCount) +{ + std::vector<OUString> aLabels; + aLabels.reserve(nLabelCount + 1); + + LabelSet aExistingNames; + normalizeAddLabel(ScResId(STR_PIVOT_DATA), aLabels, aExistingNames); + + for (sal_Int32 nCol = 0; nCol < nLabelCount; ++nCol) + { + OUString aColTitle = rDB.getColumnLabel(nCol); + normalizeAddLabel(aColTitle, aLabels, aExistingNames); + } + + return aLabels; +} + +void initColumnFromDoc( InitDocData& rDocData, InitColumnData &rColData ) +{ + ScDPCache::Field& rField = *rColData.mpField; + ScDocument& rDoc = rDocData.mrDoc; + SCTAB nDocTab = rDocData.mnDocTab; + SCCOL nCol = rColData.mnCol; + SCROW nStartRow = rDocData.mnStartRow; + SCROW nEndRow = rDocData.mnEndRow; + bool bTailEmptyRows = rDocData.mbTailEmptyRows; + + std::optional<sc::ColumnIterator> pIter = + rDoc.GetColumnIterator(nDocTab, nCol, nStartRow, nEndRow); + assert(pIter); + assert(pIter->hasCell()); + + ScDPItemData aData; + + rColData.maLabel = createLabelString(rDoc, nCol, pIter->getCell()); + pIter->next(); + + std::vector<Bucket> aBuckets; + aBuckets.reserve(nEndRow-nStartRow); // skip the topmost label cell. + + // Push back all original values. + for (SCROW i = 0, n = nEndRow-nStartRow; i < n; ++i, pIter->next()) + { + assert(pIter->hasCell()); + + sal_uInt32 nNumFormat = 0; + ScAddress aPos(nCol, pIter->getRow(), nDocTab); + initFromCell(*rColData.mpStrPool, rDoc, aPos, pIter->getCell(), aData, nNumFormat); + + aBuckets.emplace_back(aData, i); + + if (!aData.IsEmpty()) + { + rColData.maEmptyRows.insert_back(i, i+1, false); + if (nNumFormat) + // Only take non-default number format. + rField.mnNumFormat = nNumFormat; + } + } + + processBuckets(aBuckets, rField); + + if (bTailEmptyRows) + { + // If the last item is not empty, append one. Note that the items + // are sorted, and empty item should come last when sorted. + if (rField.maItems.empty() || !rField.maItems.back().IsEmpty()) + { + aData.SetEmpty(); + rField.maItems.push_back(aData); + } + } +} + +#if ENABLE_THREADED_PIVOT_CACHE + +class ThreadQueue +{ + using FutureType = std::future<void>; + std::queue<FutureType> maQueue; + std::mutex maMutex; + std::condition_variable maCond; + + size_t mnMaxQueue; + +public: + ThreadQueue( size_t nMaxQueue ) : mnMaxQueue(nMaxQueue) {} + + void push( std::function<void()> aFunc ) + { + std::unique_lock<std::mutex> lock(maMutex); + + while (maQueue.size() >= mnMaxQueue) + maCond.wait(lock); + + FutureType f = std::async(std::launch::async, aFunc); + maQueue.push(std::move(f)); + lock.unlock(); + + maCond.notify_one(); + } + + void waitForOne() + { + std::unique_lock<std::mutex> lock(maMutex); + + while (maQueue.empty()) + maCond.wait(lock); + + FutureType ret = std::move(maQueue.front()); + maQueue.pop(); + lock.unlock(); + + ret.get(); // This may throw if an exception was thrown on the async thread. + + maCond.notify_one(); + } +}; + +class ThreadScopedGuard +{ + std::thread maThread; +public: + ThreadScopedGuard(std::thread thread) : maThread(std::move(thread)) {} + ThreadScopedGuard(ThreadScopedGuard&& other) : maThread(std::move(other.maThread)) {} + + ThreadScopedGuard(const ThreadScopedGuard&) = delete; + ThreadScopedGuard& operator= (const ThreadScopedGuard&) = delete; + + ~ThreadScopedGuard() + { + maThread.join(); + } +}; + +#endif + +} + +void ScDPCache::InitFromDoc(ScDocument& rDoc, const ScRange& rRange) +{ + Clear(); + + InitDocData aDocData(rDoc); + + // Make sure the formula cells within the data range are interpreted + // during this call, for this method may be called from the interpretation + // of GETPIVOTDATA, which disables nested formula interpretation without + // increasing the macro level. + MacroInterpretIncrementer aMacroInc(rDoc); + + aDocData.mnStartRow = rRange.aStart.Row(); // start of data + aDocData.mnEndRow = rRange.aEnd.Row(); + + // Sanity check + if (!GetDoc().ValidRow(aDocData.mnStartRow) || !GetDoc().ValidRow(aDocData.mnEndRow) || aDocData.mnEndRow <= aDocData.mnStartRow) + return; + + SCCOL nStartCol = rRange.aStart.Col(); + SCCOL nEndCol = rRange.aEnd.Col(); + aDocData.mnDocTab = rRange.aStart.Tab(); + + mnColumnCount = nEndCol - nStartCol + 1; + + // this row count must include the trailing empty rows. + mnRowCount = aDocData.mnEndRow - aDocData.mnStartRow; // skip the topmost label row. + + // Skip trailing empty rows if exists. + SCCOL nCol1 = nStartCol, nCol2 = nEndCol; + SCROW nRow1 = aDocData.mnStartRow, nRow2 = aDocData.mnEndRow; + rDoc.ShrinkToDataArea(aDocData.mnDocTab, nCol1, nRow1, nCol2, nRow2); + aDocData.mbTailEmptyRows = aDocData.mnEndRow > nRow2; // Trailing empty rows exist. + aDocData.mnEndRow = nRow2; + + if (aDocData.mnEndRow <= aDocData.mnStartRow) + { + // Check this again since the end row position has changed. It's + // possible that the new end row becomes lower than the start row + // after the shrinkage. + Clear(); + return; + } + + maStringPools.resize(mnColumnCount); + std::vector<InitColumnData> aColData(mnColumnCount, InitColumnData(rDoc.GetSheetLimits())); + maFields.reserve(mnColumnCount); + for (SCCOL i = 0; i < mnColumnCount; ++i) + maFields.push_back(std::make_unique<Field>()); + + maLabelNames.reserve(mnColumnCount+1); + + // Ensure that none of the formula cells in the data range are dirty. + rDoc.EnsureFormulaCellResults(rRange); + +#if ENABLE_THREADED_PIVOT_CACHE + ThreadQueue aQueue(std::thread::hardware_concurrency()); + + auto aFuncLaunchFieldThreads = [&]() + { + for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol) + { + size_t nDim = nCol - nStartCol; + InitColumnData& rColData = aColData[nDim]; + rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get()); + + auto func = [&aDocData,&rColData]() + { + initColumnFromDoc(aDocData, rColData); + }; + + aQueue.push(std::move(func)); + } + }; + + { + // Launch a separate thread that in turn spawns async threads to populate the fields. + std::thread t(aFuncLaunchFieldThreads); + ThreadScopedGuard sg(std::move(t)); + + // Wait for all the async threads to complete on the main thread. + for (SCCOL i = 0; i < mnColumnCount; ++i) + aQueue.waitForOne(); + } + +#else + for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol) + { + size_t nDim = nCol - nStartCol; + InitColumnData& rColData = aColData[nDim]; + rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get()); + + initColumnFromDoc(aDocData, rColData); + } +#endif + + maLabelNames = normalizeLabels(aColData); + + // Merge all non-empty rows data. + for (const InitColumnData& rCol : aColData) + { + EmptyRowsType::const_segment_iterator it = rCol.maEmptyRows.begin_segment(); + EmptyRowsType::const_segment_iterator ite = rCol.maEmptyRows.end_segment(); + EmptyRowsType::const_iterator pos = maEmptyRows.begin(); + + for (; it != ite; ++it) + { + if (!it->value) + // Non-empty segment found. Record it. + pos = maEmptyRows.insert(pos, it->start, it->end, false).first; + } + } + + PostInit(); +} + +bool ScDPCache::InitFromDataBase(DBConnector& rDB) +{ + Clear(); + + try + { + mnColumnCount = rDB.getColumnCount(); + maStringPools.resize(mnColumnCount); + maFields.clear(); + maFields.reserve(mnColumnCount); + for (SCCOL i = 0; i < mnColumnCount; ++i) + maFields.push_back(std::make_unique<Field>()); + + // Get column titles and types. + maLabelNames = normalizeLabels(rDB, mnColumnCount); + + std::vector<Bucket> aBuckets; + ScDPItemData aData; + for (sal_Int32 nCol = 0; nCol < mnColumnCount; ++nCol) + { + if (!rDB.first()) + continue; + + aBuckets.clear(); + Field& rField = *maFields[nCol]; + SCROW nRow = 0; + do + { + SvNumFormatType nFormatType = SvNumFormatType::UNDEFINED; + aData.SetEmpty(); + rDB.getValue(nCol, aData, nFormatType); + aBuckets.emplace_back(aData, nRow); + if (!aData.IsEmpty()) + { + maEmptyRows.insert_back(nRow, nRow+1, false); + SvNumberFormatter* pFormatter = mrDoc.GetFormatTable(); + rField.mnNumFormat = pFormatter ? pFormatter->GetStandardFormat(nFormatType) : 0; + } + + ++nRow; + } + while (rDB.next()); + + processBuckets(aBuckets, rField); + } + + rDB.finish(); + + if (!maFields.empty()) + mnRowCount = maFields[0]->maData.size(); + + PostInit(); + return true; + } + catch (const Exception&) + { + return false; + } +} + +bool ScDPCache::ValidQuery( SCROW nRow, const ScQueryParam &rParam) const +{ + if (!rParam.GetEntryCount()) + return true; + + if (!rParam.GetEntry(0).bDoQuery) + return true; + + bool bMatchWholeCell = mrDoc.GetDocOptions().IsMatchWholeCell(); + + SCSIZE nEntryCount = rParam.GetEntryCount(); + std::vector<bool> aPassed(nEntryCount, false); + + tools::Long nPos = -1; + CollatorWrapper& rCollator = ScGlobal::GetCollator(rParam.bCaseSens); + ::utl::TransliterationWrapper& rTransliteration = ScGlobal::GetTransliteration(rParam.bCaseSens); + + for (size_t i = 0; i < nEntryCount && rParam.GetEntry(i).bDoQuery; ++i) + { + const ScQueryEntry& rEntry = rParam.GetEntry(i); + const ScQueryEntry::Item& rItem = rEntry.GetQueryItem(); + // we can only handle one single direct query + // #i115431# nField in QueryParam is the sheet column, not the field within the source range + SCCOL nQueryCol = static_cast<SCCOL>(rEntry.nField); + if ( nQueryCol < rParam.nCol1 ) + nQueryCol = rParam.nCol1; + if ( nQueryCol > rParam.nCol2 ) + nQueryCol = rParam.nCol2; + SCCOL nSourceField = nQueryCol - rParam.nCol1; + SCROW nId = GetItemDataId( nSourceField, nRow, false ); + const ScDPItemData* pCellData = GetItemDataById( nSourceField, nId ); + + bool bOk = false; + + if (rEntry.GetQueryItem().meType == ScQueryEntry::ByEmpty) + { + if (rEntry.IsQueryByEmpty()) + bOk = pCellData->IsEmpty(); + else + { + assert(rEntry.IsQueryByNonEmpty()); + bOk = !pCellData->IsEmpty(); + } + } + else if (rEntry.GetQueryItem().meType != ScQueryEntry::ByString && pCellData->IsValue()) + { // by Value + double nCellVal = pCellData->GetValue(); + + switch (rEntry.eOp) + { + case SC_EQUAL : + bOk = ::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + case SC_LESS : + bOk = (nCellVal < rItem.mfVal) && !::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + case SC_GREATER : + bOk = (nCellVal > rItem.mfVal) && !::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + case SC_LESS_EQUAL : + bOk = (nCellVal < rItem.mfVal) || ::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + case SC_GREATER_EQUAL : + bOk = (nCellVal > rItem.mfVal) || ::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + case SC_NOT_EQUAL : + bOk = !::rtl::math::approxEqual(nCellVal, rItem.mfVal); + break; + default: + bOk= false; + break; + } + } + else if ((rEntry.eOp == SC_EQUAL || rEntry.eOp == SC_NOT_EQUAL) + || (rEntry.GetQueryItem().meType == ScQueryEntry::ByString + && pCellData->HasStringData() ) + ) + { // by String + OUString aCellStr = pCellData->GetString(); + + bool bRealWildOrRegExp = (rParam.eSearchType != utl::SearchParam::SearchType::Normal && + ((rEntry.eOp == SC_EQUAL) || (rEntry.eOp == SC_NOT_EQUAL))); + if (bRealWildOrRegExp) + { + sal_Int32 nStart = 0; + sal_Int32 nEnd = aCellStr.getLength(); + + bool bMatch = rEntry.GetSearchTextPtr( rParam.eSearchType, rParam.bCaseSens, bMatchWholeCell ) + ->SearchForward( aCellStr, &nStart, &nEnd ); + // from 614 on, nEnd is behind the found text + if (bMatch && bMatchWholeCell + && (nStart != 0 || nEnd != aCellStr.getLength())) + bMatch = false; // RegExp must match entire cell string + + bOk = ((rEntry.eOp == SC_NOT_EQUAL) ? !bMatch : bMatch); + } + else + { + if (rEntry.eOp == SC_EQUAL || rEntry.eOp == SC_NOT_EQUAL) + { + if (bMatchWholeCell) + { + // TODO: Use shared string for fast equality check. + OUString aStr = rEntry.GetQueryItem().maString.getString(); + bOk = rTransliteration.isEqual(aCellStr, aStr); + bool bHasStar = false; + sal_Int32 nIndex; + if (( nIndex = aStr.indexOf('*') ) != -1) + bHasStar = true; + if (bHasStar && (nIndex>0)) + { + for (sal_Int32 j=0;(j<nIndex) && (j< aCellStr.getLength()) ; j++) + { + if (aCellStr[j] == aStr[j]) + { + bOk=true; + } + else + { + bOk=false; + break; + } + } + } + } + else + { + OUString aQueryStr = rEntry.GetQueryItem().maString.getString(); + css::uno::Sequence< sal_Int32 > xOff; + const LanguageType nLang = ScGlobal::oSysLocale->GetLanguageTag().getLanguageType(); + OUString aCell = rTransliteration.transliterate( + aCellStr, nLang, 0, aCellStr.getLength(), &xOff); + OUString aQuer = rTransliteration.transliterate( + aQueryStr, nLang, 0, aQueryStr.getLength(), &xOff); + bOk = (aCell.indexOf( aQuer ) != -1); + } + if (rEntry.eOp == SC_NOT_EQUAL) + bOk = !bOk; + } + else + { // use collator here because data was probably sorted + sal_Int32 nCompare = rCollator.compareString( + aCellStr, rEntry.GetQueryItem().maString.getString()); + switch (rEntry.eOp) + { + case SC_LESS : + bOk = (nCompare < 0); + break; + case SC_GREATER : + bOk = (nCompare > 0); + break; + case SC_LESS_EQUAL : + bOk = (nCompare <= 0); + break; + case SC_GREATER_EQUAL : + bOk = (nCompare >= 0); + break; + case SC_NOT_EQUAL: + OSL_FAIL("SC_NOT_EQUAL"); + break; + case SC_TOPVAL: + case SC_BOTVAL: + case SC_TOPPERC: + case SC_BOTPERC: + default: + break; + } + } + } + } + + if (nPos == -1) + { + nPos++; + aPassed[nPos] = bOk; + } + else + { + if (rEntry.eConnect == SC_AND) + { + aPassed[nPos] = aPassed[nPos] && bOk; + } + else + { + nPos++; + aPassed[nPos] = bOk; + } + } + } + + for (tools::Long j=1; j <= nPos; j++) + aPassed[0] = aPassed[0] || aPassed[j]; + + bool bRet = aPassed[0]; + return bRet; +} + +ScDocument& ScDPCache::GetDoc() const +{ + return mrDoc; +} + +tools::Long ScDPCache::GetColumnCount() const +{ + return mnColumnCount; +} + +bool ScDPCache::IsRowEmpty(SCROW nRow) const +{ + bool bEmpty = true; + maEmptyRows.search_tree(nRow, bEmpty); + return bEmpty; +} + +const ScDPCache::GroupItems* ScDPCache::GetGroupItems(tools::Long nDim) const +{ + if (nDim < 0) + return nullptr; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + return maFields[nDim]->mpGroup.get(); + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + return maGroupFields[nDim].get(); + + return nullptr; +} + +OUString ScDPCache::GetDimensionName(std::vector<OUString>::size_type nDim) const +{ + OSL_ENSURE(nDim < maLabelNames.size()-1 , "ScDPTableDataCache::GetDimensionName"); + OSL_ENSURE(maLabelNames.size() == static_cast <sal_uInt16> (mnColumnCount+1), "ScDPTableDataCache::GetDimensionName"); + + if ( nDim+1 < maLabelNames.size() ) + { + return maLabelNames[nDim+1]; + } + else + return OUString(); +} + +void ScDPCache::PostInit() +{ + OSL_ENSURE(!maFields.empty(), "Cache not initialized!"); + + maEmptyRows.build_tree(); + auto it = maEmptyRows.rbegin(); + OSL_ENSURE(it != maEmptyRows.rend(), "corrupt flat_segment_tree instance!"); + mnDataSize = maFields[0]->maData.size(); + ++it; // Skip the first position. + OSL_ENSURE(it != maEmptyRows.rend(), "buggy version of flat_segment_tree is used."); + if (it->second) + { + SCROW nLastNonEmpty = it->first - 1; + if (nLastNonEmpty+1 < mnDataSize) + mnDataSize = nLastNonEmpty+1; + } +} + +void ScDPCache::Clear() +{ + mnColumnCount = 0; + mnRowCount = 0; + maFields.clear(); + maLabelNames.clear(); + maGroupFields.clear(); + maEmptyRows.clear(); + maStringPools.clear(); +} + +SCROW ScDPCache::GetItemDataId(sal_uInt16 nDim, SCROW nRow, bool bRepeatIfEmpty) const +{ + OSL_ENSURE(nDim < mnColumnCount, "ScDPTableDataCache::GetItemDataId "); + + const Field& rField = *maFields[nDim]; + if (o3tl::make_unsigned(nRow) >= rField.maData.size()) + { + // nRow is in the trailing empty rows area. + if (bRepeatIfEmpty) + nRow = rField.maData.size()-1; // Move to the last non-empty row. + else + // Return the last item, which should always be empty if the + // initialization has skipped trailing empty rows. + return rField.maItems.size()-1; + + } + else if (bRepeatIfEmpty) + { + while (nRow > 0 && rField.maItems[rField.maData[nRow]].IsEmpty()) + --nRow; + } + + return rField.maData[nRow]; +} + +const ScDPItemData* ScDPCache::GetItemDataById(tools::Long nDim, SCROW nId) const +{ + if (nDim < 0 || nId < 0) + return nullptr; + + size_t nSourceCount = maFields.size(); + size_t nDimPos = static_cast<size_t>(nDim); + size_t nItemId = static_cast<size_t>(nId); + if (nDimPos < nSourceCount) + { + // source field. + const Field& rField = *maFields[nDimPos]; + if (nItemId < rField.maItems.size()) + return &rField.maItems[nItemId]; + + if (!rField.mpGroup) + return nullptr; + + nItemId -= rField.maItems.size(); + const ScDPItemDataVec& rGI = rField.mpGroup->maItems; + if (nItemId >= rGI.size()) + return nullptr; + + return &rGI[nItemId]; + } + + // Try group fields. + nDimPos -= nSourceCount; + if (nDimPos >= maGroupFields.size()) + return nullptr; + + const ScDPItemDataVec& rGI = maGroupFields[nDimPos]->maItems; + if (nItemId >= rGI.size()) + return nullptr; + + return &rGI[nItemId]; +} + +size_t ScDPCache::GetFieldCount() const +{ + return maFields.size(); +} + +size_t ScDPCache::GetGroupFieldCount() const +{ + return maGroupFields.size(); +} + +SCROW ScDPCache::GetRowCount() const +{ + return mnRowCount; +} + +SCROW ScDPCache::GetDataSize() const +{ + OSL_ENSURE(mnDataSize <= GetRowCount(), "Data size should never be larger than the row count."); + return mnDataSize >= 0 ? mnDataSize : 0; +} + +const ScDPCache::IndexArrayType* ScDPCache::GetFieldIndexArray( size_t nDim ) const +{ + if (nDim >= maFields.size()) + return nullptr; + + return &maFields[nDim]->maData; +} + +const ScDPCache::ScDPItemDataVec& ScDPCache::GetDimMemberValues(SCCOL nDim) const +{ + OSL_ENSURE( nDim>=0 && nDim < mnColumnCount ," nDim < mnColumnCount "); + return maFields.at(nDim)->maItems; +} + +sal_uInt32 ScDPCache::GetNumberFormat( tools::Long nDim ) const +{ + if ( nDim >= mnColumnCount ) + return 0; + + // TODO: Find a way to determine the dominant number format in presence of + // multiple number formats in the same field. + return maFields[nDim]->mnNumFormat; +} + +bool ScDPCache::IsDateDimension( tools::Long nDim ) const +{ + if (nDim >= mnColumnCount) + return false; + + SvNumberFormatter* pFormatter = mrDoc.GetFormatTable(); + if (!pFormatter) + return false; + + SvNumFormatType eType = pFormatter->GetType(maFields[nDim]->mnNumFormat); + return (eType == SvNumFormatType::DATE) || (eType == SvNumFormatType::DATETIME); +} + +tools::Long ScDPCache::GetDimMemberCount(tools::Long nDim) const +{ + OSL_ENSURE( nDim>=0 && nDim < mnColumnCount ," ScDPTableDataCache::GetDimMemberCount : out of bound "); + return maFields[nDim]->maItems.size(); +} + +SCCOL ScDPCache::GetDimensionIndex(std::u16string_view sName) const +{ + for (size_t i = 1; i < maLabelNames.size(); ++i) + { + if (maLabelNames[i] == sName) + return static_cast<SCCOL>(i-1); + } + return -1; +} + +rtl_uString* ScDPCache::InternString( size_t nDim, const OUString& rStr ) +{ + assert(nDim < maStringPools.size()); + return internString(maStringPools[nDim], rStr); +} + +void ScDPCache::AddReference(ScDPObject* pObj) const +{ + maRefObjects.insert(pObj); +} + +void ScDPCache::RemoveReference(ScDPObject* pObj) const +{ + if (mbDisposing) + // Object being deleted. + return; + + maRefObjects.erase(pObj); + if (maRefObjects.empty()) + mrDoc.GetDPCollection()->RemoveCache(this); +} + +const ScDPCache::ScDPObjectSet& ScDPCache::GetAllReferences() const +{ + return maRefObjects; +} + +SCROW ScDPCache::GetIdByItemData(tools::Long nDim, const ScDPItemData& rItem) const +{ + if (nDim < 0) + return -1; + + if (nDim < mnColumnCount) + { + // source field. + const ScDPItemDataVec& rItems = maFields[nDim]->maItems; + for (size_t i = 0, n = rItems.size(); i < n; ++i) + { + if (rItems[i] == rItem) + return i; + } + + if (!maFields[nDim]->mpGroup) + return -1; + + // grouped source field. + const ScDPItemDataVec& rGI = maFields[nDim]->mpGroup->maItems; + for (size_t i = 0, n = rGI.size(); i < n; ++i) + { + if (rGI[i] == rItem) + return rItems.size() + i; + } + return -1; + } + + // group field. + nDim -= mnColumnCount; + if (o3tl::make_unsigned(nDim) < maGroupFields.size()) + { + const ScDPItemDataVec& rGI = maGroupFields[nDim]->maItems; + for (size_t i = 0, n = rGI.size(); i < n; ++i) + { + if (rGI[i] == rItem) + return i; + } + } + + return -1; +} + +// static +sal_uInt32 ScDPCache::GetLocaleIndependentFormat( SvNumberFormatter& rFormatter, sal_uInt32 nNumFormat ) +{ + // For a date or date+time format use ISO format so it works across locales + // and can be matched against string based item queries. For time use 24h + // format. All others use General format, no currency, percent, ... + // Use en-US locale for all. + switch (rFormatter.GetType( nNumFormat)) + { + case SvNumFormatType::DATE: + return rFormatter.GetFormatIndex( NF_DATE_ISO_YYYYMMDD, LANGUAGE_ENGLISH_US); + case SvNumFormatType::TIME: + return rFormatter.GetFormatIndex( NF_TIME_HHMMSS, LANGUAGE_ENGLISH_US); + case SvNumFormatType::DATETIME: + return rFormatter.GetFormatIndex( NF_DATETIME_ISO_YYYYMMDD_HHMMSS, LANGUAGE_ENGLISH_US); + default: + return rFormatter.GetFormatIndex( NF_NUMBER_STANDARD, LANGUAGE_ENGLISH_US); + } +} + +// static +OUString ScDPCache::GetLocaleIndependentFormattedNumberString( double fValue ) +{ + return rtl::math::doubleToUString( fValue, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); +} + +// static +OUString ScDPCache::GetLocaleIndependentFormattedString( double fValue, + SvNumberFormatter& rFormatter, sal_uInt32 nNumFormat ) +{ + nNumFormat = GetLocaleIndependentFormat( rFormatter, nNumFormat); + if ((nNumFormat % SV_COUNTRY_LANGUAGE_OFFSET) == 0) + return GetLocaleIndependentFormattedNumberString( fValue); + + OUString aStr; + const Color* pColor = nullptr; + rFormatter.GetOutputString( fValue, nNumFormat, aStr, &pColor); + return aStr; +} + +OUString ScDPCache::GetFormattedString(tools::Long nDim, const ScDPItemData& rItem, bool bLocaleIndependent) const +{ + if (nDim < 0) + return rItem.GetString(); + + ScDPItemData::Type eType = rItem.GetType(); + if (eType == ScDPItemData::Value) + { + // Format value using the stored number format. + SvNumberFormatter* pFormatter = mrDoc.GetFormatTable(); + if (pFormatter) + { + sal_uInt32 nNumFormat = GetNumberFormat(nDim); + if (bLocaleIndependent) + return GetLocaleIndependentFormattedString( rItem.GetValue(), *pFormatter, nNumFormat); + + OUString aStr; + const Color* pColor = nullptr; + pFormatter->GetOutputString(rItem.GetValue(), nNumFormat, aStr, &pColor); + return aStr; + } + + // Last resort... + return GetLocaleIndependentFormattedNumberString( rItem.GetValue()); + } + + if (eType == ScDPItemData::GroupValue) + { + ScDPItemData::GroupValueAttr aAttr = rItem.GetGroupValue(); + double fStart = 0.0, fEnd = 0.0; + const GroupItems* p = GetGroupItems(nDim); + if (p) + { + fStart = p->maInfo.mfStart; + fEnd = p->maInfo.mfEnd; + } + return ScDPUtil::getDateGroupName( + aAttr.mnGroupType, aAttr.mnValue, mrDoc.GetFormatTable(), fStart, fEnd); + } + + if (eType == ScDPItemData::RangeStart) + { + double fVal = rItem.GetValue(); + const GroupItems* p = GetGroupItems(nDim); + if (!p) + return rItem.GetString(); + + sal_Unicode cDecSep = ScGlobal::getLocaleData().getNumDecimalSep()[0]; + return ScDPUtil::getNumGroupName(fVal, p->maInfo, cDecSep, mrDoc.GetFormatTable()); + } + + return rItem.GetString(); +} + +SvNumberFormatter* ScDPCache::GetNumberFormatter() const +{ + return mrDoc.GetFormatTable(); +} + +tools::Long ScDPCache::AppendGroupField() +{ + maGroupFields.push_back(std::make_unique<GroupItems>()); + return static_cast<tools::Long>(maFields.size() + maGroupFields.size() - 1); +} + +void ScDPCache::ResetGroupItems(tools::Long nDim, const ScDPNumGroupInfo& rNumInfo, sal_Int32 nGroupType) +{ + if (nDim < 0) + return; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + { + maFields.at(nDim)->mpGroup.reset(new GroupItems(rNumInfo, nGroupType)); + return; + } + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + { + GroupItems& rGI = *maGroupFields[nDim]; + rGI.maItems.clear(); + rGI.maInfo = rNumInfo; + rGI.mnGroupType = nGroupType; + } +} + +SCROW ScDPCache::SetGroupItem(tools::Long nDim, const ScDPItemData& rData) +{ + if (nDim < 0) + return -1; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + { + GroupItems& rGI = *maFields.at(nDim)->mpGroup; + rGI.maItems.push_back(rData); + SCROW nId = maFields[nDim]->maItems.size() + rGI.maItems.size() - 1; + return nId; + } + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + { + ScDPItemDataVec& rItems = maGroupFields.at(nDim)->maItems; + rItems.push_back(rData); + return rItems.size()-1; + } + + return -1; +} + +void ScDPCache::GetGroupDimMemberIds(tools::Long nDim, std::vector<SCROW>& rIds) const +{ + if (nDim < 0) + return; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + { + if (!maFields.at(nDim)->mpGroup) + return; + + size_t nOffset = maFields[nDim]->maItems.size(); + const ScDPItemDataVec& rGI = maFields[nDim]->mpGroup->maItems; + for (size_t i = 0, n = rGI.size(); i < n; ++i) + rIds.push_back(static_cast<SCROW>(i + nOffset)); + + return; + } + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + { + const ScDPItemDataVec& rGI = maGroupFields.at(nDim)->maItems; + for (size_t i = 0, n = rGI.size(); i < n; ++i) + rIds.push_back(static_cast<SCROW>(i)); + } +} + +namespace { + +struct ClearGroupItems +{ + void operator() (const std::unique_ptr<ScDPCache::Field>& r) const + { + r->mpGroup.reset(); + } +}; + +} + +void ScDPCache::ClearGroupFields() +{ + maGroupFields.clear(); +} + +void ScDPCache::ClearAllFields() +{ + ClearGroupFields(); + std::for_each(maFields.begin(), maFields.end(), ClearGroupItems()); +} + +const ScDPNumGroupInfo* ScDPCache::GetNumGroupInfo(tools::Long nDim) const +{ + if (nDim < 0) + return nullptr; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + { + if (!maFields.at(nDim)->mpGroup) + return nullptr; + + return &maFields[nDim]->mpGroup->maInfo; + } + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + return &maGroupFields.at(nDim)->maInfo; + + return nullptr; +} + +sal_Int32 ScDPCache::GetGroupType(tools::Long nDim) const +{ + if (nDim < 0) + return 0; + + tools::Long nSourceCount = static_cast<tools::Long>(maFields.size()); + if (nDim < nSourceCount) + { + if (!maFields.at(nDim)->mpGroup) + return 0; + + return maFields[nDim]->mpGroup->mnGroupType; + } + + nDim -= nSourceCount; + if (nDim < static_cast<tools::Long>(maGroupFields.size())) + return maGroupFields.at(nDim)->mnGroupType; + + return 0; +} + +#if DUMP_PIVOT_TABLE + +namespace { + +void dumpItems(const ScDPCache& rCache, tools::Long nDim, const ScDPCache::ScDPItemDataVec& rItems, size_t nOffset) +{ + for (size_t i = 0; i < rItems.size(); ++i) + cout << " " << (i+nOffset) << ": " << rCache.GetFormattedString(nDim, rItems[i], false) << endl; +} + +void dumpSourceData(const ScDPCache& rCache, tools::Long nDim, const ScDPCache::ScDPItemDataVec& rItems, const ScDPCache::IndexArrayType& rArray) +{ + for (const auto& rIndex : rArray) + cout << " '" << rCache.GetFormattedString(nDim, rItems[rIndex], false) << "'" << endl; +} + +const char* getGroupTypeName(sal_Int32 nType) +{ + static const char* pNames[] = { + "", "years", "quarters", "months", "days", "hours", "minutes", "seconds" + }; + + switch (nType) + { + case sheet::DataPilotFieldGroupBy::YEARS: return pNames[1]; + case sheet::DataPilotFieldGroupBy::QUARTERS: return pNames[2]; + case sheet::DataPilotFieldGroupBy::MONTHS: return pNames[3]; + case sheet::DataPilotFieldGroupBy::DAYS: return pNames[4]; + case sheet::DataPilotFieldGroupBy::HOURS: return pNames[5]; + case sheet::DataPilotFieldGroupBy::MINUTES: return pNames[6]; + case sheet::DataPilotFieldGroupBy::SECONDS: return pNames[7]; + default: + ; + } + + return pNames[0]; +} + +} + +void ScDPCache::Dump() const +{ + // Change these flags to fit your debugging needs. + bool bDumpItems = false; + bool bDumpSourceData = false; + + cout << "--- pivot cache dump" << endl; + { + size_t i = 0; + for (const auto& rxField : maFields) + { + const Field& fld = *rxField; + cout << "* source dimension: " << GetDimensionName(i) << " (ID = " << i << ")" << endl; + cout << " item count: " << fld.maItems.size() << endl; + if (bDumpItems) + dumpItems(*this, i, fld.maItems, 0); + if (fld.mpGroup) + { + cout << " group item count: " << fld.mpGroup->maItems.size() << endl; + cout << " group type: " << getGroupTypeName(fld.mpGroup->mnGroupType) << endl; + if (bDumpItems) + dumpItems(*this, i, fld.mpGroup->maItems, fld.maItems.size()); + } + + if (bDumpSourceData) + { + cout << " source data (re-constructed):" << endl; + dumpSourceData(*this, i, fld.maItems, fld.maData); + } + + ++i; + } + } + + { + size_t i = maFields.size(); + for (const auto& rxGroupField : maGroupFields) + { + const GroupItems& gi = *rxGroupField; + cout << "* group dimension: (unnamed) (ID = " << i << ")" << endl; + cout << " item count: " << gi.maItems.size() << endl; + cout << " group type: " << getGroupTypeName(gi.mnGroupType) << endl; + if (bDumpItems) + dumpItems(*this, i, gi.maItems, 0); + ++i; + } + } + + { + struct { SCROW start; SCROW end; bool empty; } aRange; + cout << "* empty rows: " << endl; + mdds::flat_segment_tree<SCROW, bool>::const_iterator it = maEmptyRows.begin(), itEnd = maEmptyRows.end(); + if (it != itEnd) + { + aRange.start = it->first; + aRange.empty = it->second; + + for (++it; it != itEnd; ++it) + { + aRange.end = it->first-1; + cout << " rows " << aRange.start << "-" << aRange.end << ": " << (aRange.empty ? "empty" : "not-empty") << endl; + aRange.start = it->first; + aRange.empty = it->second; + } + } + } + + cout << "---" << endl; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |