summaryrefslogtreecommitdiffstats
path: root/linguistic/source/gciterator.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'linguistic/source/gciterator.cxx')
-rw-r--r--linguistic/source/gciterator.cxx1199
1 files changed, 1199 insertions, 0 deletions
diff --git a/linguistic/source/gciterator.cxx b/linguistic/source/gciterator.cxx
new file mode 100644
index 0000000000..2ef50fbeab
--- /dev/null
+++ b/linguistic/source/gciterator.cxx
@@ -0,0 +1,1199 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/macros.h>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/container/ElementExistException.hpp>
+#include <com/sun/star/container/XNameAccess.hpp>
+#include <com/sun/star/configuration/theDefaultProvider.hpp>
+#include <com/sun/star/i18n/BreakIterator.hpp>
+#include <com/sun/star/lang/IndexOutOfBoundsException.hpp>
+#include <com/sun/star/lang/XComponent.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/lang/XMultiServiceFactory.hpp>
+#include <com/sun/star/linguistic2/XSupportedLocales.hpp>
+#include <com/sun/star/linguistic2/XProofreader.hpp>
+#include <com/sun/star/linguistic2/XProofreadingIterator.hpp>
+#include <com/sun/star/linguistic2/SingleProofreadingError.hpp>
+#include <com/sun/star/linguistic2/ProofreadingResult.hpp>
+#include <com/sun/star/linguistic2/LinguServiceEvent.hpp>
+#include <com/sun/star/linguistic2/LinguServiceEventFlags.hpp>
+#include <com/sun/star/text/TextMarkupType.hpp>
+#include <com/sun/star/text/TextMarkupDescriptor.hpp>
+#include <com/sun/star/text/XMultiTextMarkup.hpp>
+#include <com/sun/star/text/XFlatParagraph.hpp>
+#include <com/sun/star/text/XFlatParagraphIterator.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+
+#include <sal/config.h>
+#include <sal/log.hxx>
+#include <o3tl/safeint.hxx>
+#include <osl/conditn.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include <i18nlangtag/languagetag.hxx>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/propertysequence.hxx>
+#include <tools/debug.hxx>
+#include <comphelper/diagnose_ex.hxx>
+
+#include <map>
+
+#include <linguistic/misc.hxx>
+
+#include "gciterator.hxx"
+
+using namespace linguistic;
+using namespace ::com::sun::star;
+
+// white space list: obtained from the fonts.config.txt of a Linux system.
+const sal_Unicode aWhiteSpaces[] =
+{
+ 0x0020, /* SPACE */
+ 0x00a0, /* NO-BREAK SPACE */
+ 0x00ad, /* SOFT HYPHEN */
+ 0x115f, /* HANGUL CHOSEONG FILLER */
+ 0x1160, /* HANGUL JUNGSEONG FILLER */
+ 0x1680, /* OGHAM SPACE MARK */
+ 0x2000, /* EN QUAD */
+ 0x2001, /* EM QUAD */
+ 0x2002, /* EN SPACE */
+ 0x2003, /* EM SPACE */
+ 0x2004, /* THREE-PER-EM SPACE */
+ 0x2005, /* FOUR-PER-EM SPACE */
+ 0x2006, /* SIX-PER-EM SPACE */
+ 0x2007, /* FIGURE SPACE */
+ 0x2008, /* PUNCTUATION SPACE */
+ 0x2009, /* THIN SPACE */
+ 0x200a, /* HAIR SPACE */
+ 0x200b, /* ZERO WIDTH SPACE */
+ 0x200c, /* ZERO WIDTH NON-JOINER */
+ 0x200d, /* ZERO WIDTH JOINER */
+ 0x200e, /* LEFT-TO-RIGHT MARK */
+ 0x200f, /* RIGHT-TO-LEFT MARK */
+ 0x2028, /* LINE SEPARATOR */
+ 0x2029, /* PARAGRAPH SEPARATOR */
+ 0x202a, /* LEFT-TO-RIGHT EMBEDDING */
+ 0x202b, /* RIGHT-TO-LEFT EMBEDDING */
+ 0x202c, /* POP DIRECTIONAL FORMATTING */
+ 0x202d, /* LEFT-TO-RIGHT OVERRIDE */
+ 0x202e, /* RIGHT-TO-LEFT OVERRIDE */
+ 0x202f, /* NARROW NO-BREAK SPACE */
+ 0x205f, /* MEDIUM MATHEMATICAL SPACE */
+ 0x2060, /* WORD JOINER */
+ 0x2061, /* FUNCTION APPLICATION */
+ 0x2062, /* INVISIBLE TIMES */
+ 0x2063, /* INVISIBLE SEPARATOR */
+ 0x206A, /* INHIBIT SYMMETRIC SWAPPING */
+ 0x206B, /* ACTIVATE SYMMETRIC SWAPPING */
+ 0x206C, /* INHIBIT ARABIC FORM SHAPING */
+ 0x206D, /* ACTIVATE ARABIC FORM SHAPING */
+ 0x206E, /* NATIONAL DIGIT SHAPES */
+ 0x206F, /* NOMINAL DIGIT SHAPES */
+ 0x3000, /* IDEOGRAPHIC SPACE */
+ 0x3164, /* HANGUL FILLER */
+ 0xfeff, /* ZERO WIDTH NO-BREAK SPACE */
+ 0xffa0, /* HALFWIDTH HANGUL FILLER */
+ 0xfff9, /* INTERLINEAR ANNOTATION ANCHOR */
+ 0xfffa, /* INTERLINEAR ANNOTATION SEPARATOR */
+ 0xfffb /* INTERLINEAR ANNOTATION TERMINATOR */
+};
+
+// Information about reason for proofreading (ProofInfo)
+ const sal_Int32 PROOFINFO_GET_PROOFRESULT = 1;
+ const sal_Int32 PROOFINFO_MARK_PARAGRAPH = 2;
+
+const int nWhiteSpaces = SAL_N_ELEMENTS( aWhiteSpaces );
+
+static bool lcl_IsWhiteSpace( sal_Unicode cChar )
+{
+ bool bFound = false;
+ for (int i = 0; i < nWhiteSpaces && !bFound; ++i)
+ {
+ if (cChar == aWhiteSpaces[i])
+ bFound = true;
+ }
+ return bFound;
+}
+
+static sal_Int32 lcl_SkipWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
+{
+ // note having nStartPos point right behind the string is OK since that one
+ // is a correct end-of-sentence position to be returned from a grammar checker...
+
+ const sal_Int32 nLen = rText.getLength();
+ bool bIllegalArgument = false;
+ if (nStartPos < 0)
+ {
+ bIllegalArgument = true;
+ nStartPos = 0;
+ }
+ if (nStartPos > nLen)
+ {
+ bIllegalArgument = true;
+ nStartPos = nLen;
+ }
+ if (bIllegalArgument)
+ {
+ SAL_WARN( "linguistic", "lcl_SkipWhiteSpaces: illegal arguments" );
+ }
+
+ sal_Int32 nRes = nStartPos;
+ if (0 <= nStartPos && nStartPos < nLen)
+ {
+ const sal_Unicode* const pEnd = rText.getStr() + nLen;
+ const sal_Unicode *pText = rText.getStr() + nStartPos;
+ while (pText != pEnd && lcl_IsWhiteSpace(*pText))
+ ++pText;
+ nRes = pText - rText.getStr();
+ }
+
+ DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_SkipWhiteSpaces return value out of range" );
+ return nRes;
+}
+
+static sal_Int32 lcl_BacktraceWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
+{
+ // note: having nStartPos point right behind the string is OK since that one
+ // is a correct end-of-sentence position to be returned from a grammar checker...
+
+ const sal_Int32 nLen = rText.getLength();
+ bool bIllegalArgument = false;
+ if (nStartPos < 0)
+ {
+ bIllegalArgument = true;
+ nStartPos = 0;
+ }
+ if (nStartPos > nLen)
+ {
+ bIllegalArgument = true;
+ nStartPos = nLen;
+ }
+ if (bIllegalArgument)
+ {
+ SAL_WARN( "linguistic", "lcl_BacktraceWhiteSpaces: illegal arguments" );
+ }
+
+ sal_Int32 nRes = nStartPos;
+ sal_Int32 nPosBefore = nStartPos - 1;
+ const sal_Unicode *pStart = rText.getStr();
+ if (0 <= nPosBefore && nPosBefore < nLen && lcl_IsWhiteSpace( pStart[ nPosBefore ] ))
+ {
+ nStartPos = nPosBefore;
+ const sal_Unicode *pText = rText.getStr() + nStartPos;
+ while (pText > pStart && lcl_IsWhiteSpace( *pText ))
+ --pText;
+ // now add 1 since we want to point to the first char after the last char in the sentence...
+ nRes = pText - pStart + 1;
+ }
+
+ DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_BacktraceWhiteSpaces return value out of range" );
+ return nRes;
+}
+
+
+extern "C" {
+
+static void lcl_workerfunc (void * gci)
+{
+ osl_setThreadName("GrammarCheckingIterator");
+
+ static_cast<GrammarCheckingIterator*>(gci)->DequeueAndCheck();
+}
+
+}
+
+static lang::Locale lcl_GetPrimaryLanguageOfSentence(
+ const uno::Reference< text::XFlatParagraph >& xFlatPara,
+ sal_Int32 nStartIndex )
+{
+ //get the language of the first word
+ return xFlatPara->getLanguageOfText( nStartIndex, 1 );
+}
+
+
+LngXStringKeyMap::LngXStringKeyMap() {}
+
+void SAL_CALL LngXStringKeyMap::insertValue(const OUString& aKey, const css::uno::Any& aValue)
+{
+ std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey);
+ if (aIter != maMap.end())
+ throw css::container::ElementExistException();
+
+ maMap[aKey] = aValue;
+}
+
+css::uno::Any SAL_CALL LngXStringKeyMap::getValue(const OUString& aKey)
+{
+ std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey);
+ if (aIter == maMap.end())
+ throw css::container::NoSuchElementException();
+
+ return (*aIter).second;
+}
+
+sal_Bool SAL_CALL LngXStringKeyMap::hasValue(const OUString& aKey)
+{
+ return maMap.find(aKey) != maMap.end();
+}
+
+::sal_Int32 SAL_CALL LngXStringKeyMap::getCount() { return maMap.size(); }
+
+OUString SAL_CALL LngXStringKeyMap::getKeyByIndex(::sal_Int32 nIndex)
+{
+ if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size())
+ throw css::lang::IndexOutOfBoundsException();
+
+ return OUString();
+}
+
+css::uno::Any SAL_CALL LngXStringKeyMap::getValueByIndex(::sal_Int32 nIndex)
+{
+ if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size())
+ throw css::lang::IndexOutOfBoundsException();
+
+ return css::uno::Any();
+}
+
+
+osl::Mutex& GrammarCheckingIterator::MyMutex()
+{
+ static osl::Mutex SINGLETON;
+ return SINGLETON;
+}
+
+GrammarCheckingIterator::GrammarCheckingIterator() :
+ m_bEnd( false ),
+ m_bGCServicesChecked( false ),
+ m_nDocIdCounter( 0 ),
+ m_thread(nullptr),
+ m_aEventListeners( MyMutex() ),
+ m_aNotifyListeners( MyMutex() )
+{
+}
+
+
+GrammarCheckingIterator::~GrammarCheckingIterator()
+{
+ TerminateThread();
+}
+
+void GrammarCheckingIterator::TerminateThread()
+{
+ oslThread t;
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ t = m_thread;
+ m_thread = nullptr;
+ m_bEnd = true;
+ m_aWakeUpThread.set();
+ }
+ if (t != nullptr)
+ {
+ osl_joinWithThread(t);
+ osl_destroyThread(t);
+ }
+}
+
+sal_Int32 GrammarCheckingIterator::NextDocId()
+{
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ m_nDocIdCounter += 1;
+ return m_nDocIdCounter;
+}
+
+
+OUString GrammarCheckingIterator::GetOrCreateDocId(
+ const uno::Reference< lang::XComponent > &xComponent )
+{
+ // internal method; will always be called with locked mutex
+
+ OUString aRes;
+ if (xComponent.is())
+ {
+ if (m_aDocIdMap.find( xComponent.get() ) != m_aDocIdMap.end())
+ {
+ // return already existing entry
+ aRes = m_aDocIdMap[ xComponent.get() ];
+ }
+ else // add new entry
+ {
+ sal_Int32 nRes = NextDocId();
+ aRes = OUString::number( nRes );
+ m_aDocIdMap[ xComponent.get() ] = aRes;
+ xComponent->addEventListener( this );
+ }
+ }
+ return aRes;
+}
+
+
+void GrammarCheckingIterator::AddEntry(
+ const uno::Reference< text::XFlatParagraphIterator >& xFlatParaIterator,
+ const uno::Reference< text::XFlatParagraph >& xFlatPara,
+ const OUString & rDocId,
+ sal_Int32 nStartIndex,
+ bool bAutomatic )
+{
+ // we may not need/have a xFlatParaIterator (e.g. if checkGrammarAtPos was called)
+ // but we always need a xFlatPara...
+ if (!xFlatPara.is())
+ return;
+
+ FPEntry aNewFPEntry;
+ aNewFPEntry.m_xParaIterator = xFlatParaIterator;
+ aNewFPEntry.m_xPara = xFlatPara;
+ aNewFPEntry.m_aDocId = rDocId;
+ aNewFPEntry.m_nStartIndex = nStartIndex;
+ aNewFPEntry.m_bAutomatic = bAutomatic;
+
+ // add new entry to the end of this queue
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ if (!m_thread)
+ m_thread = osl_createThread( lcl_workerfunc, this );
+ m_aFPEntriesQueue.push_back( aNewFPEntry );
+
+ // wake up the thread in order to do grammar checking
+ m_aWakeUpThread.set();
+}
+
+
+void GrammarCheckingIterator::ProcessResult(
+ const linguistic2::ProofreadingResult &rRes,
+ const uno::Reference< text::XFlatParagraphIterator > &rxFlatParagraphIterator,
+ bool bIsAutomaticChecking )
+{
+ DBG_ASSERT( rRes.xFlatParagraph.is(), "xFlatParagraph is missing" );
+ //no guard necessary as no members are used
+ bool bContinueWithNextPara = false;
+ if (!rRes.xFlatParagraph.is() || rRes.xFlatParagraph->isModified())
+ {
+ // if paragraph was modified/deleted meanwhile continue with the next one...
+ bContinueWithNextPara = true;
+ }
+ else // paragraph is still unchanged...
+ {
+ // mark found errors...
+
+ sal_Int32 nTextLen = rRes.aText.getLength();
+ bool bBoundariesOk = 0 <= rRes.nStartOfSentencePosition && rRes.nStartOfSentencePosition <= nTextLen &&
+ 0 <= rRes.nBehindEndOfSentencePosition && rRes.nBehindEndOfSentencePosition <= nTextLen &&
+ 0 <= rRes.nStartOfNextSentencePosition && rRes.nStartOfNextSentencePosition <= nTextLen &&
+ rRes.nStartOfSentencePosition <= rRes.nBehindEndOfSentencePosition &&
+ rRes.nBehindEndOfSentencePosition <= rRes.nStartOfNextSentencePosition;
+ DBG_ASSERT( bBoundariesOk, "inconsistent sentence boundaries" );
+
+ uno::Reference< text::XMultiTextMarkup > xMulti( rRes.xFlatParagraph, uno::UNO_QUERY );
+ if (xMulti.is()) // use new API for markups
+ {
+ try
+ {
+ // length = number of found errors + 1 sentence markup
+ sal_Int32 nErrors = rRes.aErrors.getLength();
+ uno::Sequence< text::TextMarkupDescriptor > aDescriptors( nErrors + 1 );
+ text::TextMarkupDescriptor * pDescriptors = aDescriptors.getArray();
+
+ // at pos 0 .. nErrors-1 -> all grammar errors
+ for (const linguistic2::SingleProofreadingError &rError : rRes.aErrors)
+ {
+ text::TextMarkupDescriptor &rDesc = *pDescriptors++;
+
+ rDesc.nType = rError.nErrorType;
+ rDesc.nOffset = rError.nErrorStart;
+ rDesc.nLength = rError.nErrorLength;
+
+ // the proofreader may return SPELLING but right now our core
+ // does only handle PROOFREADING if the result is from the proofreader...
+ // (later on we may wish to color spelling errors found by the proofreader
+ // differently for example. But no special handling right now.
+ if (rDesc.nType == text::TextMarkupType::SPELLCHECK)
+ rDesc.nType = text::TextMarkupType::PROOFREADING;
+
+ uno::Reference< container::XStringKeyMap > xKeyMap(
+ new LngXStringKeyMap());
+ for( const beans::PropertyValue& rProperty : rError.aProperties )
+ {
+ if ( rProperty.Name == "LineColor" )
+ {
+ xKeyMap->insertValue(rProperty.Name,
+ rProperty.Value);
+ rDesc.xMarkupInfoContainer = xKeyMap;
+ }
+ else if ( rProperty.Name == "LineType" )
+ {
+ xKeyMap->insertValue(rProperty.Name,
+ rProperty.Value);
+ rDesc.xMarkupInfoContainer = xKeyMap;
+ }
+ }
+ }
+
+ // at pos nErrors -> sentence markup
+ // nSentenceLength: includes the white-spaces following the sentence end...
+ const sal_Int32 nSentenceLength = rRes.nStartOfNextSentencePosition - rRes.nStartOfSentencePosition;
+ pDescriptors->nType = text::TextMarkupType::SENTENCE;
+ pDescriptors->nOffset = rRes.nStartOfSentencePosition;
+ pDescriptors->nLength = nSentenceLength;
+
+ xMulti->commitMultiTextMarkup( aDescriptors ) ;
+ }
+ catch (lang::IllegalArgumentException &)
+ {
+ TOOLS_WARN_EXCEPTION( "linguistic", "commitMultiTextMarkup" );
+ }
+ }
+
+ // other sentences left to be checked in this paragraph?
+ if (rRes.nStartOfNextSentencePosition < rRes.aText.getLength())
+ {
+ AddEntry( rxFlatParagraphIterator, rRes.xFlatParagraph, rRes.aDocumentIdentifier, rRes.nStartOfNextSentencePosition, bIsAutomaticChecking );
+ }
+ else // current paragraph finished
+ {
+ // set "already checked" flag for the current flat paragraph
+ if (rRes.xFlatParagraph.is())
+ rRes.xFlatParagraph->setChecked( text::TextMarkupType::PROOFREADING, true );
+
+ bContinueWithNextPara = true;
+ }
+ }
+
+ if (bContinueWithNextPara)
+ {
+ // we need to continue with the next paragraph
+ if (rxFlatParagraphIterator.is())
+ AddEntry(rxFlatParagraphIterator, rxFlatParagraphIterator->getNextPara(),
+ rRes.aDocumentIdentifier, 0, bIsAutomaticChecking);
+ }
+}
+
+
+std::pair<OUString, std::optional<OUString>>
+GrammarCheckingIterator::getServiceForLocale(const lang::Locale& rLocale) const
+{
+ if (!rLocale.Language.isEmpty())
+ {
+ const OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false);
+ GCImplNames_t::const_iterator aLangIt(m_aGCImplNamesByLang.find(sBcp47));
+ if (aLangIt != m_aGCImplNamesByLang.end())
+ return { aLangIt->second, {} };
+
+ for (const auto& sFallbackBcp47 : LanguageTag(rLocale).getFallbackStrings(false))
+ {
+ aLangIt = m_aGCImplNamesByLang.find(sFallbackBcp47);
+ if (aLangIt != m_aGCImplNamesByLang.end())
+ return { aLangIt->second, sFallbackBcp47 };
+ }
+ }
+
+ return {};
+}
+
+
+uno::Reference< linguistic2::XProofreader > GrammarCheckingIterator::GetGrammarChecker(
+ lang::Locale &rLocale )
+{
+ uno::Reference< linguistic2::XProofreader > xRes;
+
+ // ---- THREAD SAFE START ----
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+
+ // check supported locales for each grammarchecker if not already done
+ if (!m_bGCServicesChecked)
+ {
+ GetConfiguredGCSvcs_Impl();
+ m_bGCServicesChecked = true;
+ }
+
+ if (const auto& [aSvcImplName, oFallbackBcp47] = getServiceForLocale(rLocale);
+ !aSvcImplName.isEmpty()) // matching configured language found?
+ {
+ if (oFallbackBcp47)
+ rLocale = LanguageTag::convertToLocale(*oFallbackBcp47, false);
+ GCReferences_t::const_iterator aImplNameIt( m_aGCReferencesByService.find( aSvcImplName ) );
+ if (aImplNameIt != m_aGCReferencesByService.end()) // matching impl name found?
+ {
+ xRes = aImplNameIt->second;
+ }
+ else // the service is to be instantiated here for the first time...
+ {
+ try
+ {
+ uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() );
+ uno::Reference< linguistic2::XProofreader > xGC(
+ xContext->getServiceManager()->createInstanceWithContext(aSvcImplName, xContext),
+ uno::UNO_QUERY_THROW );
+ uno::Reference< linguistic2::XSupportedLocales > xSuppLoc( xGC, uno::UNO_QUERY_THROW );
+
+ if (xSuppLoc->hasLocale( rLocale ))
+ {
+ m_aGCReferencesByService[ aSvcImplName ] = xGC;
+ xRes = xGC;
+
+ uno::Reference< linguistic2::XLinguServiceEventBroadcaster > xBC( xGC, uno::UNO_QUERY );
+ if (xBC.is())
+ xBC->addLinguServiceEventListener( this );
+ }
+ else
+ {
+ SAL_WARN( "linguistic", "grammar checker does not support required locale" );
+ }
+ }
+ catch (uno::Exception &)
+ {
+ SAL_WARN( "linguistic", "instantiating grammar checker failed" );
+ }
+ }
+ }
+ else // not found - quite normal
+ {
+ SAL_INFO("linguistic", "No grammar checker found for \""
+ << LanguageTag::convertToBcp47(rLocale, false) << "\"");
+ }
+ // ---- THREAD SAFE END ----
+
+ return xRes;
+}
+
+static uno::Sequence<beans::PropertyValue>
+lcl_makeProperties(uno::Reference<text::XFlatParagraph> const& xFlatPara, sal_Int32 nProofInfo)
+{
+ uno::Reference<beans::XPropertySet> const xProps(
+ xFlatPara, uno::UNO_QUERY_THROW);
+ css::uno::Any a (nProofInfo);
+ return comphelper::InitPropertySequence({
+ { "FieldPositions", xProps->getPropertyValue("FieldPositions") },
+ { "FootnotePositions", xProps->getPropertyValue("FootnotePositions") },
+ { "SortedTextId", xProps->getPropertyValue("SortedTextId") },
+ { "DocumentElementsCount", xProps->getPropertyValue("DocumentElementsCount") },
+ { "ProofInfo", a }
+ });
+}
+
+void GrammarCheckingIterator::DequeueAndCheck()
+{
+ for (;;)
+ {
+ // ---- THREAD SAFE START ----
+ bool bQueueEmpty = false;
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ if (m_bEnd)
+ {
+ break;
+ }
+ bQueueEmpty = m_aFPEntriesQueue.empty();
+ }
+ // ---- THREAD SAFE END ----
+
+ if (!bQueueEmpty)
+ {
+ uno::Reference< text::XFlatParagraphIterator > xFPIterator;
+ uno::Reference< text::XFlatParagraph > xFlatPara;
+ FPEntry aFPEntryItem;
+ OUString aCurDocId;
+ // ---- THREAD SAFE START ----
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ aFPEntryItem = m_aFPEntriesQueue.front();
+ xFPIterator = aFPEntryItem.m_xParaIterator;
+ xFlatPara = aFPEntryItem.m_xPara;
+ m_aCurCheckedDocId = aFPEntryItem.m_aDocId;
+ aCurDocId = m_aCurCheckedDocId;
+
+ m_aFPEntriesQueue.pop_front();
+ }
+ // ---- THREAD SAFE END ----
+
+ if (xFlatPara.is() && xFPIterator.is())
+ {
+ try
+ {
+ OUString aCurTxt( xFlatPara->getText() );
+ lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, aFPEntryItem.m_nStartIndex );
+
+ const bool bModified = xFlatPara->isModified();
+ if (!bModified)
+ {
+ linguistic2::ProofreadingResult aRes;
+
+ // ---- THREAD SAFE START ----
+ {
+ osl::ClearableMutexGuard aGuard(MyMutex());
+
+ sal_Int32 nStartPos = aFPEntryItem.m_nStartIndex;
+ sal_Int32 nSuggestedEnd
+ = GetSuggestedEndOfSentence(aCurTxt, nStartPos, aCurLocale);
+ DBG_ASSERT((nSuggestedEnd == 0 && aCurTxt.isEmpty())
+ || nSuggestedEnd > nStartPos,
+ "nSuggestedEndOfSentencePos calculation failed?");
+
+ uno::Reference<linguistic2::XProofreader> xGC =
+ GetGrammarChecker(aCurLocale);
+ if (xGC.is())
+ {
+ aGuard.clear();
+ uno::Sequence<beans::PropertyValue> const aProps(
+ lcl_makeProperties(xFlatPara, PROOFINFO_MARK_PARAGRAPH));
+ aRes = xGC->doProofreading(aCurDocId, aCurTxt, aCurLocale,
+ nStartPos, nSuggestedEnd, aProps);
+
+ //!! work-around to prevent looping if the grammar checker
+ //!! failed to properly identify the sentence end
+ if (aRes.nBehindEndOfSentencePosition <= nStartPos
+ && aRes.nBehindEndOfSentencePosition != nSuggestedEnd)
+ {
+ SAL_WARN(
+ "linguistic",
+ "!! Grammarchecker failed to provide end of sentence !!");
+ aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
+ }
+
+ aRes.xFlatParagraph = xFlatPara;
+ aRes.nStartOfSentencePosition = nStartPos;
+ }
+ else
+ {
+ // no grammar checker -> no error
+ // but we need to provide the data below in order to continue with the next sentence
+ aRes.aDocumentIdentifier = aCurDocId;
+ aRes.xFlatParagraph = xFlatPara;
+ aRes.aText = aCurTxt;
+ aRes.aLocale = aCurLocale;
+ aRes.nStartOfSentencePosition = nStartPos;
+ aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
+ }
+ aRes.nStartOfNextSentencePosition
+ = lcl_SkipWhiteSpaces(aCurTxt, aRes.nBehindEndOfSentencePosition);
+ aRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces(
+ aCurTxt, aRes.nStartOfNextSentencePosition);
+
+ //guard has to be cleared as ProcessResult calls out of this class
+ }
+ // ---- THREAD SAFE END ----
+ ProcessResult( aRes, xFPIterator, aFPEntryItem.m_bAutomatic );
+ }
+ else
+ {
+ // the paragraph changed meanwhile... (and maybe is still edited)
+ // thus we simply continue to ask for the next to be checked.
+ uno::Reference< text::XFlatParagraph > xFlatParaNext( xFPIterator->getNextPara() );
+ AddEntry( xFPIterator, xFlatParaNext, aCurDocId, 0, aFPEntryItem.m_bAutomatic );
+ }
+ }
+ catch (css::uno::Exception &)
+ {
+ TOOLS_WARN_EXCEPTION("linguistic", "GrammarCheckingIterator::DequeueAndCheck ignoring");
+ }
+ }
+
+ // ---- THREAD SAFE START ----
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ m_aCurCheckedDocId.clear();
+ }
+ // ---- THREAD SAFE END ----
+ }
+ else
+ {
+ // ---- THREAD SAFE START ----
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ if (m_bEnd)
+ {
+ break;
+ }
+ // Check queue state again
+ if (m_aFPEntriesQueue.empty())
+ m_aWakeUpThread.reset();
+ }
+ // ---- THREAD SAFE END ----
+
+ //if the queue is empty
+ // IMPORTANT: Don't call condition.wait() with locked
+ // mutex. Otherwise you would keep out other threads
+ // to add entries to the queue! A condition is thread-
+ // safe implemented.
+ m_aWakeUpThread.wait();
+ }
+ }
+}
+
+
+void SAL_CALL GrammarCheckingIterator::startProofreading(
+ const uno::Reference< ::uno::XInterface > & xDoc,
+ const uno::Reference< text::XFlatParagraphIteratorProvider > & xIteratorProvider )
+{
+ // get paragraph to start checking with
+ const bool bAutomatic = true;
+ uno::Reference<text::XFlatParagraphIterator> xFPIterator = xIteratorProvider->getFlatParagraphIterator(
+ text::TextMarkupType::PROOFREADING, bAutomatic );
+ uno::Reference< text::XFlatParagraph > xPara( xFPIterator.is()? xFPIterator->getFirstPara() : nullptr );
+ uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
+
+ // ---- THREAD SAFE START ----
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ if (xPara.is() && xComponent.is())
+ {
+ OUString aDocId = GetOrCreateDocId( xComponent );
+
+ // create new entry and add it to queue
+ AddEntry( xFPIterator, xPara, aDocId, 0, bAutomatic );
+ }
+ // ---- THREAD SAFE END ----
+}
+
+
+linguistic2::ProofreadingResult SAL_CALL GrammarCheckingIterator::checkSentenceAtPosition(
+ const uno::Reference< uno::XInterface >& xDoc,
+ const uno::Reference< text::XFlatParagraph >& xFlatPara,
+ const OUString& rText,
+ const lang::Locale&,
+ sal_Int32 nStartOfSentencePos,
+ sal_Int32 nSuggestedEndOfSentencePos,
+ sal_Int32 nErrorPosInPara )
+{
+ // for the context menu...
+
+ linguistic2::ProofreadingResult aRes;
+
+ uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
+ if (xFlatPara.is() && xComponent.is() &&
+ ( nErrorPosInPara < 0 || nErrorPosInPara < rText.getLength()))
+ {
+ // iterate through paragraph until we find the sentence we are interested in
+ linguistic2::ProofreadingResult aTmpRes;
+ sal_Int32 nStartPos = nStartOfSentencePos >= 0 ? nStartOfSentencePos : 0;
+
+ bool bFound = false;
+ do
+ {
+ lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, nStartPos );
+ sal_Int32 nOldStartOfSentencePos = nStartPos;
+ uno::Reference< linguistic2::XProofreader > xGC;
+ OUString aDocId;
+
+ // ---- THREAD SAFE START ----
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ aDocId = GetOrCreateDocId( xComponent );
+ nSuggestedEndOfSentencePos = GetSuggestedEndOfSentence( rText, nStartPos, aCurLocale );
+ DBG_ASSERT( nSuggestedEndOfSentencePos > nStartPos, "nSuggestedEndOfSentencePos calculation failed?" );
+
+ xGC = GetGrammarChecker( aCurLocale );
+ }
+ // ---- THREAD SAFE START ----
+ sal_Int32 nEndPos = -1;
+ if (xGC.is())
+ {
+ uno::Sequence<beans::PropertyValue> const aProps(
+ lcl_makeProperties(xFlatPara, PROOFINFO_GET_PROOFRESULT));
+ aTmpRes = xGC->doProofreading( aDocId, rText,
+ aCurLocale, nStartPos, nSuggestedEndOfSentencePos, aProps );
+
+ //!! work-around to prevent looping if the grammar checker
+ //!! failed to properly identify the sentence end
+ if (aTmpRes.nBehindEndOfSentencePosition <= nStartPos)
+ {
+ SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!" );
+ aTmpRes.nBehindEndOfSentencePosition = nSuggestedEndOfSentencePos;
+ }
+
+ aTmpRes.xFlatParagraph = xFlatPara;
+ aTmpRes.nStartOfSentencePosition = nStartPos;
+ nEndPos = aTmpRes.nBehindEndOfSentencePosition;
+
+ if ((nErrorPosInPara< 0 || nStartPos <= nErrorPosInPara) && nErrorPosInPara < nEndPos)
+ bFound = true;
+ }
+ if (nEndPos == -1) // no result from grammar checker
+ nEndPos = nSuggestedEndOfSentencePos;
+ nStartPos = lcl_SkipWhiteSpaces( rText, nEndPos );
+ aTmpRes.nBehindEndOfSentencePosition = nEndPos;
+ aTmpRes.nStartOfNextSentencePosition = nStartPos;
+ aTmpRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( rText, aTmpRes.nStartOfNextSentencePosition );
+
+ // prevent endless loop by forcefully advancing if needs be...
+ if (nStartPos <= nOldStartOfSentencePos)
+ {
+ SAL_WARN( "linguistic", "end-of-sentence detection failed?" );
+ nStartPos = nOldStartOfSentencePos + 1;
+ }
+ }
+ while (!bFound && nStartPos < rText.getLength());
+
+ if (bFound && !xFlatPara->isModified())
+ aRes = aTmpRes;
+ }
+
+ return aRes;
+}
+
+
+sal_Int32 GrammarCheckingIterator::GetSuggestedEndOfSentence(
+ const OUString &rText,
+ sal_Int32 nSentenceStartPos,
+ const lang::Locale &rLocale )
+{
+ // internal method; will always be called with locked mutex
+
+ if (!m_xBreakIterator.is())
+ {
+ uno::Reference< uno::XComponentContext > xContext = ::comphelper::getProcessComponentContext();
+ m_xBreakIterator = i18n::BreakIterator::create(xContext);
+ }
+ sal_Int32 nTextLen = rText.getLength();
+ sal_Int32 nEndPosition(0);
+ sal_Int32 nTmpStartPos = nSentenceStartPos;
+ do
+ {
+ sal_Int32 const nPrevEndPosition(nEndPosition);
+ nEndPosition = nTextLen;
+ if (nTmpStartPos < nTextLen)
+ {
+ nEndPosition = m_xBreakIterator->endOfSentence( rText, nTmpStartPos, rLocale );
+ if (nEndPosition <= nPrevEndPosition)
+ {
+ // fdo#68750 if there's no progress at all then presumably
+ // there's no end of sentence in this paragraph so just
+ // set the end position to end of paragraph
+ nEndPosition = nTextLen;
+ }
+ }
+ if (nEndPosition < 0)
+ nEndPosition = nTextLen;
+
+ ++nTmpStartPos;
+ }
+ while (nEndPosition <= nSentenceStartPos && nEndPosition < nTextLen);
+ if (nEndPosition > nTextLen)
+ nEndPosition = nTextLen;
+ return nEndPosition;
+}
+
+
+void SAL_CALL GrammarCheckingIterator::resetIgnoreRules( )
+{
+ for (auto const& elem : m_aGCReferencesByService)
+ {
+ uno::Reference< linguistic2::XProofreader > xGC(elem.second);
+ if (xGC.is())
+ xGC->resetIgnoreRules();
+ }
+}
+
+
+sal_Bool SAL_CALL GrammarCheckingIterator::isProofreading(
+ const uno::Reference< uno::XInterface >& xDoc )
+{
+ // ---- THREAD SAFE START ----
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+
+ bool bRes = false;
+
+ uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
+ if (xComponent.is())
+ {
+ // if the component was already used in one of the two calls to check text
+ // i.e. in startGrammarChecking or checkGrammarAtPos it will be found in the
+ // m_aDocIdMap unless the document already disposed.
+ // If it is not found then it is not yet being checked (or requested to being checked)
+ const DocMap_t::const_iterator aIt( m_aDocIdMap.find( xComponent.get() ) );
+ if (aIt != m_aDocIdMap.end())
+ {
+ // check in document is checked automatically in the background...
+ OUString aDocId = aIt->second;
+ if (!m_aCurCheckedDocId.isEmpty() && m_aCurCheckedDocId == aDocId)
+ {
+ // an entry for that document was dequeued and is currently being checked.
+ bRes = true;
+ }
+ else
+ {
+ // we need to check if there is an entry for that document in the queue...
+ // That is the document is going to be checked sooner or later.
+
+ sal_Int32 nSize = m_aFPEntriesQueue.size();
+ for (sal_Int32 i = 0; i < nSize && !bRes; ++i)
+ {
+ if (aDocId == m_aFPEntriesQueue[i].m_aDocId)
+ bRes = true;
+ }
+ }
+ }
+ }
+ // ---- THREAD SAFE END ----
+
+ return bRes;
+}
+
+
+void SAL_CALL GrammarCheckingIterator::processLinguServiceEvent(
+ const linguistic2::LinguServiceEvent& rLngSvcEvent )
+{
+ if (rLngSvcEvent.nEvent != linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN)
+ return;
+
+ try
+ {
+ uno::Reference< uno::XInterface > xThis( getXWeak() );
+ linguistic2::LinguServiceEvent aEvent( xThis, linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN );
+ m_aNotifyListeners.notifyEach(
+ &linguistic2::XLinguServiceEventListener::processLinguServiceEvent,
+ aEvent);
+ }
+ catch (uno::RuntimeException &)
+ {
+ throw;
+ }
+ catch (const ::uno::Exception &)
+ {
+ // ignore
+ TOOLS_WARN_EXCEPTION("linguistic", "processLinguServiceEvent");
+ }
+}
+
+
+sal_Bool SAL_CALL GrammarCheckingIterator::addLinguServiceEventListener(
+ const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener )
+{
+ if (xListener.is())
+ {
+ m_aNotifyListeners.addInterface( xListener );
+ }
+ return true;
+}
+
+
+sal_Bool SAL_CALL GrammarCheckingIterator::removeLinguServiceEventListener(
+ const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener )
+{
+ if (xListener.is())
+ {
+ m_aNotifyListeners.removeInterface( xListener );
+ }
+ return true;
+}
+
+
+void SAL_CALL GrammarCheckingIterator::dispose()
+{
+ lang::EventObject aEvt( static_cast<linguistic2::XProofreadingIterator *>(this) );
+ m_aEventListeners.disposeAndClear( aEvt );
+
+ TerminateThread();
+
+ // ---- THREAD SAFE START ----
+ {
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+
+ // release all UNO references
+
+ m_xBreakIterator.clear();
+
+ // clear containers with UNO references AND have those references released
+ GCReferences_t aTmpEmpty1;
+ DocMap_t aTmpEmpty2;
+ FPQueue_t aTmpEmpty3;
+ m_aGCReferencesByService.swap( aTmpEmpty1 );
+ m_aDocIdMap.swap( aTmpEmpty2 );
+ m_aFPEntriesQueue.swap( aTmpEmpty3 );
+ }
+ // ---- THREAD SAFE END ----
+}
+
+
+void SAL_CALL GrammarCheckingIterator::addEventListener(
+ const uno::Reference< lang::XEventListener >& xListener )
+{
+ if (xListener.is())
+ {
+ m_aEventListeners.addInterface( xListener );
+ }
+}
+
+
+void SAL_CALL GrammarCheckingIterator::removeEventListener(
+ const uno::Reference< lang::XEventListener >& xListener )
+{
+ if (xListener.is())
+ {
+ m_aEventListeners.removeInterface( xListener );
+ }
+}
+
+
+void SAL_CALL GrammarCheckingIterator::disposing( const lang::EventObject &rSource )
+{
+ // if the component (document) is disposing release all references
+ //!! There is no need to remove entries from the queue that are from this document
+ //!! since the respectives xFlatParagraphs should become invalid (isModified() == true)
+ //!! and the call to xFlatParagraphIterator->getNextPara() will result in an empty reference.
+ //!! And if an entry is currently checked by a grammar checker upon return the results
+ //!! should be ignored.
+ //!! Also GetOrCreateDocId will not use that very same Id again...
+ //!! All of the above resulting in that we only have to get rid of the implementation pointer here.
+ uno::Reference< lang::XComponent > xDoc( rSource.Source, uno::UNO_QUERY );
+ if (xDoc.is())
+ {
+ // ---- THREAD SAFE START ----
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ m_aDocIdMap.erase( xDoc.get() );
+ // ---- THREAD SAFE END ----
+ }
+}
+
+
+uno::Reference< util::XChangesBatch > const & GrammarCheckingIterator::GetUpdateAccess() const
+{
+ if (!m_xUpdateAccess.is())
+ {
+ try
+ {
+ // get configuration provider
+ uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
+ uno::Reference< lang::XMultiServiceFactory > xConfigurationProvider =
+ configuration::theDefaultProvider::get( xContext );
+
+ // get configuration update access
+ beans::PropertyValue aValue;
+ aValue.Name = "nodepath";
+ aValue.Value <<= OUString("org.openoffice.Office.Linguistic/ServiceManager");
+ uno::Sequence< uno::Any > aProps{ uno::Any(aValue) };
+ m_xUpdateAccess.set(
+ xConfigurationProvider->createInstanceWithArguments(
+ "com.sun.star.configuration.ConfigurationUpdateAccess", aProps ),
+ uno::UNO_QUERY_THROW );
+ }
+ catch (uno::Exception &)
+ {
+ }
+ }
+
+ return m_xUpdateAccess;
+}
+
+
+void GrammarCheckingIterator::GetConfiguredGCSvcs_Impl()
+{
+ GCImplNames_t aTmpGCImplNamesByLang;
+
+ try
+ {
+ // get node names (locale iso strings) for configured grammar checkers
+ uno::Reference< container::XNameAccess > xNA( GetUpdateAccess(), uno::UNO_QUERY_THROW );
+ xNA.set( xNA->getByName( "GrammarCheckerList" ), uno::UNO_QUERY_THROW );
+ const uno::Sequence< OUString > aElementNames( xNA->getElementNames() );
+
+ for (const OUString& rElementName : aElementNames)
+ {
+ uno::Sequence< OUString > aImplNames;
+ uno::Any aTmp( xNA->getByName( rElementName ) );
+ if (aTmp >>= aImplNames)
+ {
+ if (aImplNames.hasElements())
+ {
+ // only the first entry is used, there should be only one grammar checker per language
+ const OUString aImplName( aImplNames[0] );
+ aTmpGCImplNamesByLang[rElementName] = aImplName;
+ }
+ }
+ else
+ {
+ SAL_WARN( "linguistic", "failed to get aImplNames. Wrong type?" );
+ }
+ }
+ }
+ catch (uno::Exception const &)
+ {
+ TOOLS_WARN_EXCEPTION( "linguistic", "exception caught. Failed to get configured services" );
+ }
+
+ {
+ // ---- THREAD SAFE START ----
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+ m_aGCImplNamesByLang.swap(aTmpGCImplNamesByLang);
+ // ---- THREAD SAFE END ----
+ }
+}
+
+
+sal_Bool SAL_CALL GrammarCheckingIterator::supportsService(
+ const OUString & rServiceName )
+{
+ return cppu::supportsService(this, rServiceName);
+}
+
+
+OUString SAL_CALL GrammarCheckingIterator::getImplementationName( )
+{
+ return "com.sun.star.lingu2.ProofreadingIterator";
+}
+
+
+uno::Sequence< OUString > SAL_CALL GrammarCheckingIterator::getSupportedServiceNames( )
+{
+ return { "com.sun.star.linguistic2.ProofreadingIterator" };
+}
+
+
+void GrammarCheckingIterator::SetServiceList(
+ const lang::Locale &rLocale,
+ const uno::Sequence< OUString > &rSvcImplNames )
+{
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+
+ OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false);
+ OUString aImplName;
+ if (rSvcImplNames.hasElements())
+ aImplName = rSvcImplNames[0]; // there is only one grammar checker per language
+
+ if (!LinguIsUnspecified(sBcp47) && !sBcp47.isEmpty())
+ {
+ if (!aImplName.isEmpty())
+ m_aGCImplNamesByLang[sBcp47] = aImplName;
+ else
+ m_aGCImplNamesByLang.erase(sBcp47);
+ }
+}
+
+
+uno::Sequence< OUString > GrammarCheckingIterator::GetServiceList(
+ const lang::Locale &rLocale ) const
+{
+ ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
+
+ const OUString aImplName = getServiceForLocale(rLocale).first; // there is only one grammar checker per language
+
+ if (!aImplName.isEmpty())
+ return { aImplName };
+ return {};
+}
+
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+linguistic_GrammarCheckingIterator_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new GrammarCheckingIterator());
+}
+
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */