diff options
Diffstat (limited to 'linguistic/source/gciterator.cxx')
-rw-r--r-- | linguistic/source/gciterator.cxx | 1179 |
1 files changed, 1179 insertions, 0 deletions
diff --git a/linguistic/source/gciterator.cxx b/linguistic/source/gciterator.cxx new file mode 100644 index 000000000..28336c518 --- /dev/null +++ b/linguistic/source/gciterator.cxx @@ -0,0 +1,1179 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/macros.h> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/container/ElementExistException.hpp> +#include <com/sun/star/container/XNameAccess.hpp> +#include <com/sun/star/configuration/theDefaultProvider.hpp> +#include <com/sun/star/i18n/BreakIterator.hpp> +#include <com/sun/star/lang/IndexOutOfBoundsException.hpp> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> +#include <com/sun/star/linguistic2/XSupportedLocales.hpp> +#include <com/sun/star/linguistic2/XProofreader.hpp> +#include <com/sun/star/linguistic2/XProofreadingIterator.hpp> +#include <com/sun/star/linguistic2/SingleProofreadingError.hpp> +#include <com/sun/star/linguistic2/ProofreadingResult.hpp> +#include <com/sun/star/linguistic2/LinguServiceEvent.hpp> +#include <com/sun/star/linguistic2/LinguServiceEventFlags.hpp> +#include <com/sun/star/text/TextMarkupType.hpp> +#include <com/sun/star/text/TextMarkupDescriptor.hpp> +#include <com/sun/star/text/XMultiTextMarkup.hpp> +#include <com/sun/star/text/XFlatParagraph.hpp> +#include <com/sun/star/text/XFlatParagraphIterator.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> + +#include <sal/config.h> +#include <sal/log.hxx> +#include <o3tl/safeint.hxx> +#include <osl/conditn.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <comphelper/processfactory.hxx> +#include <comphelper/propertysequence.hxx> +#include <tools/debug.hxx> +#include <tools/diagnose_ex.h> + +#include <map> + +#include <linguistic/misc.hxx> + +#include "gciterator.hxx" + +using namespace linguistic; +using namespace ::com::sun::star; + +// white space list: obtained from the fonts.config.txt of a Linux system. +const sal_Unicode aWhiteSpaces[] = +{ + 0x0020, /* SPACE */ + 0x00a0, /* NO-BREAK SPACE */ + 0x00ad, /* SOFT HYPHEN */ + 0x115f, /* HANGUL CHOSEONG FILLER */ + 0x1160, /* HANGUL JUNGSEONG FILLER */ + 0x1680, /* OGHAM SPACE MARK */ + 0x2000, /* EN QUAD */ + 0x2001, /* EM QUAD */ + 0x2002, /* EN SPACE */ + 0x2003, /* EM SPACE */ + 0x2004, /* THREE-PER-EM SPACE */ + 0x2005, /* FOUR-PER-EM SPACE */ + 0x2006, /* SIX-PER-EM SPACE */ + 0x2007, /* FIGURE SPACE */ + 0x2008, /* PUNCTUATION SPACE */ + 0x2009, /* THIN SPACE */ + 0x200a, /* HAIR SPACE */ + 0x200b, /* ZERO WIDTH SPACE */ + 0x200c, /* ZERO WIDTH NON-JOINER */ + 0x200d, /* ZERO WIDTH JOINER */ + 0x200e, /* LEFT-TO-RIGHT MARK */ + 0x200f, /* RIGHT-TO-LEFT MARK */ + 0x2028, /* LINE SEPARATOR */ + 0x2029, /* PARAGRAPH SEPARATOR */ + 0x202a, /* LEFT-TO-RIGHT EMBEDDING */ + 0x202b, /* RIGHT-TO-LEFT EMBEDDING */ + 0x202c, /* POP DIRECTIONAL FORMATTING */ + 0x202d, /* LEFT-TO-RIGHT OVERRIDE */ + 0x202e, /* RIGHT-TO-LEFT OVERRIDE */ + 0x202f, /* NARROW NO-BREAK SPACE */ + 0x205f, /* MEDIUM MATHEMATICAL SPACE */ + 0x2060, /* WORD JOINER */ + 0x2061, /* FUNCTION APPLICATION */ + 0x2062, /* INVISIBLE TIMES */ + 0x2063, /* INVISIBLE SEPARATOR */ + 0x206A, /* INHIBIT SYMMETRIC SWAPPING */ + 0x206B, /* ACTIVATE SYMMETRIC SWAPPING */ + 0x206C, /* INHIBIT ARABIC FORM SHAPING */ + 0x206D, /* ACTIVATE ARABIC FORM SHAPING */ + 0x206E, /* NATIONAL DIGIT SHAPES */ + 0x206F, /* NOMINAL DIGIT SHAPES */ + 0x3000, /* IDEOGRAPHIC SPACE */ + 0x3164, /* HANGUL FILLER */ + 0xfeff, /* ZERO WIDTH NO-BREAK SPACE */ + 0xffa0, /* HALFWIDTH HANGUL FILLER */ + 0xfff9, /* INTERLINEAR ANNOTATION ANCHOR */ + 0xfffa, /* INTERLINEAR ANNOTATION SEPARATOR */ + 0xfffb /* INTERLINEAR ANNOTATION TERMINATOR */ +}; + +// Information about reason for proofreading (ProofInfo) + const sal_Int32 PROOFINFO_GET_PROOFRESULT = 1; + const sal_Int32 PROOFINFO_MARK_PARAGRAPH = 2; + +const int nWhiteSpaces = SAL_N_ELEMENTS( aWhiteSpaces ); + +static bool lcl_IsWhiteSpace( sal_Unicode cChar ) +{ + bool bFound = false; + for (int i = 0; i < nWhiteSpaces && !bFound; ++i) + { + if (cChar == aWhiteSpaces[i]) + bFound = true; + } + return bFound; +} + +static sal_Int32 lcl_SkipWhiteSpaces( const OUString &rText, sal_Int32 nStartPos ) +{ + // note having nStartPos point right behind the string is OK since that one + // is a correct end-of-sentence position to be returned from a grammar checker... + + const sal_Int32 nLen = rText.getLength(); + bool bIllegalArgument = false; + if (nStartPos < 0) + { + bIllegalArgument = true; + nStartPos = 0; + } + if (nStartPos > nLen) + { + bIllegalArgument = true; + nStartPos = nLen; + } + if (bIllegalArgument) + { + SAL_WARN( "linguistic", "lcl_SkipWhiteSpaces: illegal arguments" ); + } + + sal_Int32 nRes = nStartPos; + if (0 <= nStartPos && nStartPos < nLen) + { + const sal_Unicode* const pEnd = rText.getStr() + nLen; + const sal_Unicode *pText = rText.getStr() + nStartPos; + while (pText != pEnd && lcl_IsWhiteSpace(*pText)) + ++pText; + nRes = pText - rText.getStr(); + } + + DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_SkipWhiteSpaces return value out of range" ); + return nRes; +} + +static sal_Int32 lcl_BacktraceWhiteSpaces( const OUString &rText, sal_Int32 nStartPos ) +{ + // note: having nStartPos point right behind the string is OK since that one + // is a correct end-of-sentence position to be returned from a grammar checker... + + const sal_Int32 nLen = rText.getLength(); + bool bIllegalArgument = false; + if (nStartPos < 0) + { + bIllegalArgument = true; + nStartPos = 0; + } + if (nStartPos > nLen) + { + bIllegalArgument = true; + nStartPos = nLen; + } + if (bIllegalArgument) + { + SAL_WARN( "linguistic", "lcl_BacktraceWhiteSpaces: illegal arguments" ); + } + + sal_Int32 nRes = nStartPos; + sal_Int32 nPosBefore = nStartPos - 1; + const sal_Unicode *pStart = rText.getStr(); + if (0 <= nPosBefore && nPosBefore < nLen && lcl_IsWhiteSpace( pStart[ nPosBefore ] )) + { + nStartPos = nPosBefore; + const sal_Unicode *pText = rText.getStr() + nStartPos; + while (pText > pStart && lcl_IsWhiteSpace( *pText )) + --pText; + // now add 1 since we want to point to the first char after the last char in the sentence... + nRes = pText - pStart + 1; + } + + DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_BacktraceWhiteSpaces return value out of range" ); + return nRes; +} + + +extern "C" { + +static void lcl_workerfunc (void * gci) +{ + osl_setThreadName("GrammarCheckingIterator"); + + static_cast<GrammarCheckingIterator*>(gci)->DequeueAndCheck(); +} + +} + +static lang::Locale lcl_GetPrimaryLanguageOfSentence( + const uno::Reference< text::XFlatParagraph >& xFlatPara, + sal_Int32 nStartIndex ) +{ + //get the language of the first word + return xFlatPara->getLanguageOfText( nStartIndex, 1 ); +} + + +LngXStringKeyMap::LngXStringKeyMap() {} + +void SAL_CALL LngXStringKeyMap::insertValue(const OUString& aKey, const css::uno::Any& aValue) +{ + std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey); + if (aIter != maMap.end()) + throw css::container::ElementExistException(); + + maMap[aKey] = aValue; +} + +css::uno::Any SAL_CALL LngXStringKeyMap::getValue(const OUString& aKey) +{ + std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey); + if (aIter == maMap.end()) + throw css::container::NoSuchElementException(); + + return (*aIter).second; +} + +sal_Bool SAL_CALL LngXStringKeyMap::hasValue(const OUString& aKey) +{ + return maMap.find(aKey) != maMap.end(); +} + +::sal_Int32 SAL_CALL LngXStringKeyMap::getCount() { return maMap.size(); } + +OUString SAL_CALL LngXStringKeyMap::getKeyByIndex(::sal_Int32 nIndex) +{ + if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size()) + throw css::lang::IndexOutOfBoundsException(); + + return OUString(); +} + +css::uno::Any SAL_CALL LngXStringKeyMap::getValueByIndex(::sal_Int32 nIndex) +{ + if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size()) + throw css::lang::IndexOutOfBoundsException(); + + return css::uno::Any(); +} + + +osl::Mutex& GrammarCheckingIterator::MyMutex() +{ + static osl::Mutex SINGLETON; + return SINGLETON; +} + +GrammarCheckingIterator::GrammarCheckingIterator() : + m_bEnd( false ), + m_bGCServicesChecked( false ), + m_nDocIdCounter( 0 ), + m_thread(nullptr), + m_aEventListeners( MyMutex() ), + m_aNotifyListeners( MyMutex() ) +{ +} + + +GrammarCheckingIterator::~GrammarCheckingIterator() +{ + TerminateThread(); +} + +void GrammarCheckingIterator::TerminateThread() +{ + oslThread t; + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + t = m_thread; + m_thread = nullptr; + m_bEnd = true; + m_aWakeUpThread.set(); + } + if (t != nullptr) + { + osl_joinWithThread(t); + osl_destroyThread(t); + } +} + +sal_Int32 GrammarCheckingIterator::NextDocId() +{ + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + m_nDocIdCounter += 1; + return m_nDocIdCounter; +} + + +OUString GrammarCheckingIterator::GetOrCreateDocId( + const uno::Reference< lang::XComponent > &xComponent ) +{ + // internal method; will always be called with locked mutex + + OUString aRes; + if (xComponent.is()) + { + if (m_aDocIdMap.find( xComponent.get() ) != m_aDocIdMap.end()) + { + // return already existing entry + aRes = m_aDocIdMap[ xComponent.get() ]; + } + else // add new entry + { + sal_Int32 nRes = NextDocId(); + aRes = OUString::number( nRes ); + m_aDocIdMap[ xComponent.get() ] = aRes; + xComponent->addEventListener( this ); + } + } + return aRes; +} + + +void GrammarCheckingIterator::AddEntry( + const uno::WeakReference< text::XFlatParagraphIterator >& xFlatParaIterator, + const uno::WeakReference< text::XFlatParagraph >& xFlatPara, + const OUString & rDocId, + sal_Int32 nStartIndex, + bool bAutomatic ) +{ + // we may not need/have a xFlatParaIterator (e.g. if checkGrammarAtPos was called) + // but we always need a xFlatPara... + uno::Reference< text::XFlatParagraph > xPara( xFlatPara ); + if (!xPara.is()) + return; + + FPEntry aNewFPEntry; + aNewFPEntry.m_xParaIterator = xFlatParaIterator; + aNewFPEntry.m_xPara = xFlatPara; + aNewFPEntry.m_aDocId = rDocId; + aNewFPEntry.m_nStartIndex = nStartIndex; + aNewFPEntry.m_bAutomatic = bAutomatic; + + // add new entry to the end of this queue + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + if (!m_thread) + m_thread = osl_createThread( lcl_workerfunc, this ); + m_aFPEntriesQueue.push_back( aNewFPEntry ); + + // wake up the thread in order to do grammar checking + m_aWakeUpThread.set(); +} + + +void GrammarCheckingIterator::ProcessResult( + const linguistic2::ProofreadingResult &rRes, + const uno::Reference< text::XFlatParagraphIterator > &rxFlatParagraphIterator, + bool bIsAutomaticChecking ) +{ + DBG_ASSERT( rRes.xFlatParagraph.is(), "xFlatParagraph is missing" ); + //no guard necessary as no members are used + bool bContinueWithNextPara = false; + if (!rRes.xFlatParagraph.is() || rRes.xFlatParagraph->isModified()) + { + // if paragraph was modified/deleted meanwhile continue with the next one... + bContinueWithNextPara = true; + } + else // paragraph is still unchanged... + { + // mark found errors... + + sal_Int32 nTextLen = rRes.aText.getLength(); + bool bBoundariesOk = 0 <= rRes.nStartOfSentencePosition && rRes.nStartOfSentencePosition <= nTextLen && + 0 <= rRes.nBehindEndOfSentencePosition && rRes.nBehindEndOfSentencePosition <= nTextLen && + 0 <= rRes.nStartOfNextSentencePosition && rRes.nStartOfNextSentencePosition <= nTextLen && + rRes.nStartOfSentencePosition <= rRes.nBehindEndOfSentencePosition && + rRes.nBehindEndOfSentencePosition <= rRes.nStartOfNextSentencePosition; + DBG_ASSERT( bBoundariesOk, "inconsistent sentence boundaries" ); + + uno::Reference< text::XMultiTextMarkup > xMulti( rRes.xFlatParagraph, uno::UNO_QUERY ); + if (xMulti.is()) // use new API for markups + { + try + { + // length = number of found errors + 1 sentence markup + sal_Int32 nErrors = rRes.aErrors.getLength(); + uno::Sequence< text::TextMarkupDescriptor > aDescriptors( nErrors + 1 ); + text::TextMarkupDescriptor * pDescriptors = aDescriptors.getArray(); + + // at pos 0 .. nErrors-1 -> all grammar errors + for (const linguistic2::SingleProofreadingError &rError : rRes.aErrors) + { + text::TextMarkupDescriptor &rDesc = *pDescriptors++; + + rDesc.nType = rError.nErrorType; + rDesc.nOffset = rError.nErrorStart; + rDesc.nLength = rError.nErrorLength; + + // the proofreader may return SPELLING but right now our core + // does only handle PROOFREADING if the result is from the proofreader... + // (later on we may wish to color spelling errors found by the proofreader + // differently for example. But no special handling right now. + if (rDesc.nType == text::TextMarkupType::SPELLCHECK) + rDesc.nType = text::TextMarkupType::PROOFREADING; + + uno::Reference< container::XStringKeyMap > xKeyMap( + new LngXStringKeyMap()); + for( const beans::PropertyValue& rProperty : rError.aProperties ) + { + if ( rProperty.Name == "LineColor" ) + { + xKeyMap->insertValue(rProperty.Name, + rProperty.Value); + rDesc.xMarkupInfoContainer = xKeyMap; + } + else if ( rProperty.Name == "LineType" ) + { + xKeyMap->insertValue(rProperty.Name, + rProperty.Value); + rDesc.xMarkupInfoContainer = xKeyMap; + } + } + } + + // at pos nErrors -> sentence markup + // nSentenceLength: includes the white-spaces following the sentence end... + const sal_Int32 nSentenceLength = rRes.nStartOfNextSentencePosition - rRes.nStartOfSentencePosition; + pDescriptors->nType = text::TextMarkupType::SENTENCE; + pDescriptors->nOffset = rRes.nStartOfSentencePosition; + pDescriptors->nLength = nSentenceLength; + + xMulti->commitMultiTextMarkup( aDescriptors ) ; + } + catch (lang::IllegalArgumentException &) + { + TOOLS_WARN_EXCEPTION( "linguistic", "commitMultiTextMarkup" ); + } + } + + // other sentences left to be checked in this paragraph? + if (rRes.nStartOfNextSentencePosition < rRes.aText.getLength()) + { + AddEntry( rxFlatParagraphIterator, rRes.xFlatParagraph, rRes.aDocumentIdentifier, rRes.nStartOfNextSentencePosition, bIsAutomaticChecking ); + } + else // current paragraph finished + { + // set "already checked" flag for the current flat paragraph + if (rRes.xFlatParagraph.is()) + rRes.xFlatParagraph->setChecked( text::TextMarkupType::PROOFREADING, true ); + + bContinueWithNextPara = true; + } + } + + if (bContinueWithNextPara) + { + // we need to continue with the next paragraph + uno::Reference< text::XFlatParagraph > xFlatParaNext; + if (rxFlatParagraphIterator.is()) + xFlatParaNext = rxFlatParagraphIterator->getNextPara(); + { + AddEntry( rxFlatParagraphIterator, xFlatParaNext, rRes.aDocumentIdentifier, 0, bIsAutomaticChecking ); + } + } +} + + +uno::Reference< linguistic2::XProofreader > GrammarCheckingIterator::GetGrammarChecker( + const lang::Locale &rLocale ) +{ + uno::Reference< linguistic2::XProofreader > xRes; + + // ---- THREAD SAFE START ---- + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + + // check supported locales for each grammarchecker if not already done + if (!m_bGCServicesChecked) + { + GetConfiguredGCSvcs_Impl(); + m_bGCServicesChecked = true; + } + + const LanguageType nLang = LanguageTag::convertToLanguageType( rLocale, false); + GCImplNames_t::const_iterator aLangIt( m_aGCImplNamesByLang.find( nLang ) ); + if (aLangIt != m_aGCImplNamesByLang.end()) // matching configured language found? + { + OUString aSvcImplName( aLangIt->second ); + GCReferences_t::const_iterator aImplNameIt( m_aGCReferencesByService.find( aSvcImplName ) ); + if (aImplNameIt != m_aGCReferencesByService.end()) // matching impl name found? + { + xRes = aImplNameIt->second; + } + else // the service is to be instantiated here for the first time... + { + try + { + uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() ); + uno::Reference< linguistic2::XProofreader > xGC( + xContext->getServiceManager()->createInstanceWithContext(aSvcImplName, xContext), + uno::UNO_QUERY_THROW ); + uno::Reference< linguistic2::XSupportedLocales > xSuppLoc( xGC, uno::UNO_QUERY_THROW ); + + if (xSuppLoc->hasLocale( rLocale )) + { + m_aGCReferencesByService[ aSvcImplName ] = xGC; + xRes = xGC; + + uno::Reference< linguistic2::XLinguServiceEventBroadcaster > xBC( xGC, uno::UNO_QUERY ); + if (xBC.is()) + xBC->addLinguServiceEventListener( this ); + } + else + { + SAL_WARN( "linguistic", "grammar checker does not support required locale" ); + } + } + catch (uno::Exception &) + { + SAL_WARN( "linguistic", "instantiating grammar checker failed" ); + } + } + } + // ---- THREAD SAFE END ---- + + return xRes; +} + +static uno::Sequence<beans::PropertyValue> +lcl_makeProperties(uno::Reference<text::XFlatParagraph> const& xFlatPara, sal_Int32 nProofInfo) +{ + uno::Reference<beans::XPropertySet> const xProps( + xFlatPara, uno::UNO_QUERY_THROW); + css::uno::Any a (nProofInfo); + return comphelper::InitPropertySequence({ + { "FieldPositions", xProps->getPropertyValue("FieldPositions") }, + { "FootnotePositions", xProps->getPropertyValue("FootnotePositions") }, + { "ProofInfo", a } + }); +} + +void GrammarCheckingIterator::DequeueAndCheck() +{ + for (;;) + { + // ---- THREAD SAFE START ---- + bool bQueueEmpty = false; + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + if (m_bEnd) + { + break; + } + bQueueEmpty = m_aFPEntriesQueue.empty(); + } + // ---- THREAD SAFE END ---- + + if (!bQueueEmpty) + { + uno::Reference< text::XFlatParagraphIterator > xFPIterator; + uno::Reference< text::XFlatParagraph > xFlatPara; + FPEntry aFPEntryItem; + OUString aCurDocId; + // ---- THREAD SAFE START ---- + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + aFPEntryItem = m_aFPEntriesQueue.front(); + xFPIterator = aFPEntryItem.m_xParaIterator; + xFlatPara = aFPEntryItem.m_xPara; + m_aCurCheckedDocId = aFPEntryItem.m_aDocId; + aCurDocId = m_aCurCheckedDocId; + + m_aFPEntriesQueue.pop_front(); + } + // ---- THREAD SAFE END ---- + + if (xFlatPara.is() && xFPIterator.is()) + { + try + { + OUString aCurTxt( xFlatPara->getText() ); + lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, aFPEntryItem.m_nStartIndex ); + + const bool bModified = xFlatPara->isModified(); + if (!bModified) + { + linguistic2::ProofreadingResult aRes; + + // ---- THREAD SAFE START ---- + { + osl::ClearableMutexGuard aGuard(MyMutex()); + + sal_Int32 nStartPos = aFPEntryItem.m_nStartIndex; + sal_Int32 nSuggestedEnd + = GetSuggestedEndOfSentence(aCurTxt, nStartPos, aCurLocale); + DBG_ASSERT((nSuggestedEnd == 0 && aCurTxt.isEmpty()) + || nSuggestedEnd > nStartPos, + "nSuggestedEndOfSentencePos calculation failed?"); + + uno::Reference<linguistic2::XProofreader> xGC = + GetGrammarChecker(aCurLocale); + if (xGC.is()) + { + aGuard.clear(); + uno::Sequence<beans::PropertyValue> const aProps( + lcl_makeProperties(xFlatPara, PROOFINFO_MARK_PARAGRAPH)); + aRes = xGC->doProofreading(aCurDocId, aCurTxt, aCurLocale, + nStartPos, nSuggestedEnd, aProps); + + //!! work-around to prevent looping if the grammar checker + //!! failed to properly identify the sentence end + if (aRes.nBehindEndOfSentencePosition <= nStartPos + && aRes.nBehindEndOfSentencePosition != nSuggestedEnd) + { + SAL_WARN( + "linguistic", + "!! Grammarchecker failed to provide end of sentence !!"); + aRes.nBehindEndOfSentencePosition = nSuggestedEnd; + } + + aRes.xFlatParagraph = xFlatPara; + aRes.nStartOfSentencePosition = nStartPos; + } + else + { + // no grammar checker -> no error + // but we need to provide the data below in order to continue with the next sentence + aRes.aDocumentIdentifier = aCurDocId; + aRes.xFlatParagraph = xFlatPara; + aRes.aText = aCurTxt; + aRes.aLocale = aCurLocale; + aRes.nStartOfSentencePosition = nStartPos; + aRes.nBehindEndOfSentencePosition = nSuggestedEnd; + } + aRes.nStartOfNextSentencePosition + = lcl_SkipWhiteSpaces(aCurTxt, aRes.nBehindEndOfSentencePosition); + aRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( + aCurTxt, aRes.nStartOfNextSentencePosition); + + //guard has to be cleared as ProcessResult calls out of this class + } + // ---- THREAD SAFE END ---- + ProcessResult( aRes, xFPIterator, aFPEntryItem.m_bAutomatic ); + } + else + { + // the paragraph changed meanwhile... (and maybe is still edited) + // thus we simply continue to ask for the next to be checked. + uno::Reference< text::XFlatParagraph > xFlatParaNext( xFPIterator->getNextPara() ); + AddEntry( xFPIterator, xFlatParaNext, aCurDocId, 0, aFPEntryItem.m_bAutomatic ); + } + } + catch (css::uno::Exception &) + { + TOOLS_WARN_EXCEPTION("linguistic", "GrammarCheckingIterator::DequeueAndCheck ignoring"); + } + } + + // ---- THREAD SAFE START ---- + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + m_aCurCheckedDocId.clear(); + } + // ---- THREAD SAFE END ---- + } + else + { + // ---- THREAD SAFE START ---- + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + if (m_bEnd) + { + break; + } + // Check queue state again + if (m_aFPEntriesQueue.empty()) + m_aWakeUpThread.reset(); + } + // ---- THREAD SAFE END ---- + + //if the queue is empty + // IMPORTANT: Don't call condition.wait() with locked + // mutex. Otherwise you would keep out other threads + // to add entries to the queue! A condition is thread- + // safe implemented. + m_aWakeUpThread.wait(); + } + } +} + + +void SAL_CALL GrammarCheckingIterator::startProofreading( + const uno::Reference< ::uno::XInterface > & xDoc, + const uno::Reference< text::XFlatParagraphIteratorProvider > & xIteratorProvider ) +{ + // get paragraph to start checking with + const bool bAutomatic = true; + uno::Reference<text::XFlatParagraphIterator> xFPIterator = xIteratorProvider->getFlatParagraphIterator( + text::TextMarkupType::PROOFREADING, bAutomatic ); + uno::Reference< text::XFlatParagraph > xPara( xFPIterator.is()? xFPIterator->getFirstPara() : nullptr ); + uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); + + // ---- THREAD SAFE START ---- + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + if (xPara.is() && xComponent.is()) + { + OUString aDocId = GetOrCreateDocId( xComponent ); + + // create new entry and add it to queue + AddEntry( xFPIterator, xPara, aDocId, 0, bAutomatic ); + } + // ---- THREAD SAFE END ---- +} + + +linguistic2::ProofreadingResult SAL_CALL GrammarCheckingIterator::checkSentenceAtPosition( + const uno::Reference< uno::XInterface >& xDoc, + const uno::Reference< text::XFlatParagraph >& xFlatPara, + const OUString& rText, + const lang::Locale&, + sal_Int32 nStartOfSentencePos, + sal_Int32 nSuggestedEndOfSentencePos, + sal_Int32 nErrorPosInPara ) +{ + // for the context menu... + + linguistic2::ProofreadingResult aRes; + + uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); + if (xFlatPara.is() && xComponent.is() && + ( nErrorPosInPara < 0 || nErrorPosInPara < rText.getLength())) + { + // iterate through paragraph until we find the sentence we are interested in + linguistic2::ProofreadingResult aTmpRes; + sal_Int32 nStartPos = nStartOfSentencePos >= 0 ? nStartOfSentencePos : 0; + + bool bFound = false; + do + { + lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, nStartPos ); + sal_Int32 nOldStartOfSentencePos = nStartPos; + uno::Reference< linguistic2::XProofreader > xGC; + OUString aDocId; + + // ---- THREAD SAFE START ---- + { + ::osl::ClearableGuard< ::osl::Mutex > aGuard( MyMutex() ); + aDocId = GetOrCreateDocId( xComponent ); + nSuggestedEndOfSentencePos = GetSuggestedEndOfSentence( rText, nStartPos, aCurLocale ); + DBG_ASSERT( nSuggestedEndOfSentencePos > nStartPos, "nSuggestedEndOfSentencePos calculation failed?" ); + + xGC = GetGrammarChecker( aCurLocale ); + } + // ---- THREAD SAFE START ---- + sal_Int32 nEndPos = -1; + if (xGC.is()) + { + uno::Sequence<beans::PropertyValue> const aProps( + lcl_makeProperties(xFlatPara, PROOFINFO_GET_PROOFRESULT)); + aTmpRes = xGC->doProofreading( aDocId, rText, + aCurLocale, nStartPos, nSuggestedEndOfSentencePos, aProps ); + + //!! work-around to prevent looping if the grammar checker + //!! failed to properly identify the sentence end + if (aTmpRes.nBehindEndOfSentencePosition <= nStartPos) + { + SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!" ); + aTmpRes.nBehindEndOfSentencePosition = nSuggestedEndOfSentencePos; + } + + aTmpRes.xFlatParagraph = xFlatPara; + aTmpRes.nStartOfSentencePosition = nStartPos; + nEndPos = aTmpRes.nBehindEndOfSentencePosition; + + if ((nErrorPosInPara< 0 || nStartPos <= nErrorPosInPara) && nErrorPosInPara < nEndPos) + bFound = true; + } + if (nEndPos == -1) // no result from grammar checker + nEndPos = nSuggestedEndOfSentencePos; + nStartPos = lcl_SkipWhiteSpaces( rText, nEndPos ); + aTmpRes.nBehindEndOfSentencePosition = nEndPos; + aTmpRes.nStartOfNextSentencePosition = nStartPos; + aTmpRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( rText, aTmpRes.nStartOfNextSentencePosition ); + + // prevent endless loop by forcefully advancing if needs be... + if (nStartPos <= nOldStartOfSentencePos) + { + SAL_WARN( "linguistic", "end-of-sentence detection failed?" ); + nStartPos = nOldStartOfSentencePos + 1; + } + } + while (!bFound && nStartPos < rText.getLength()); + + if (bFound && !xFlatPara->isModified()) + aRes = aTmpRes; + } + + return aRes; +} + + +sal_Int32 GrammarCheckingIterator::GetSuggestedEndOfSentence( + const OUString &rText, + sal_Int32 nSentenceStartPos, + const lang::Locale &rLocale ) +{ + // internal method; will always be called with locked mutex + + if (!m_xBreakIterator.is()) + { + uno::Reference< uno::XComponentContext > xContext = ::comphelper::getProcessComponentContext(); + m_xBreakIterator = i18n::BreakIterator::create(xContext); + } + sal_Int32 nTextLen = rText.getLength(); + sal_Int32 nEndPosition(0); + sal_Int32 nTmpStartPos = nSentenceStartPos; + do + { + sal_Int32 const nPrevEndPosition(nEndPosition); + nEndPosition = nTextLen; + if (nTmpStartPos < nTextLen) + { + nEndPosition = m_xBreakIterator->endOfSentence( rText, nTmpStartPos, rLocale ); + if (nEndPosition <= nPrevEndPosition) + { + // fdo#68750 if there's no progress at all then presumably + // there's no end of sentence in this paragraph so just + // set the end position to end of paragraph + nEndPosition = nTextLen; + } + } + if (nEndPosition < 0) + nEndPosition = nTextLen; + + ++nTmpStartPos; + } + while (nEndPosition <= nSentenceStartPos && nEndPosition < nTextLen); + if (nEndPosition > nTextLen) + nEndPosition = nTextLen; + return nEndPosition; +} + + +void SAL_CALL GrammarCheckingIterator::resetIgnoreRules( ) +{ + for (auto const& elem : m_aGCReferencesByService) + { + uno::Reference< linguistic2::XProofreader > xGC(elem.second); + if (xGC.is()) + xGC->resetIgnoreRules(); + } +} + + +sal_Bool SAL_CALL GrammarCheckingIterator::isProofreading( + const uno::Reference< uno::XInterface >& xDoc ) +{ + // ---- THREAD SAFE START ---- + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + + bool bRes = false; + + uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); + if (xComponent.is()) + { + // if the component was already used in one of the two calls to check text + // i.e. in startGrammarChecking or checkGrammarAtPos it will be found in the + // m_aDocIdMap unless the document already disposed. + // If it is not found then it is not yet being checked (or requested to being checked) + const DocMap_t::const_iterator aIt( m_aDocIdMap.find( xComponent.get() ) ); + if (aIt != m_aDocIdMap.end()) + { + // check in document is checked automatically in the background... + OUString aDocId = aIt->second; + if (!m_aCurCheckedDocId.isEmpty() && m_aCurCheckedDocId == aDocId) + { + // an entry for that document was dequeued and is currently being checked. + bRes = true; + } + else + { + // we need to check if there is an entry for that document in the queue... + // That is the document is going to be checked sooner or later. + + sal_Int32 nSize = m_aFPEntriesQueue.size(); + for (sal_Int32 i = 0; i < nSize && !bRes; ++i) + { + if (aDocId == m_aFPEntriesQueue[i].m_aDocId) + bRes = true; + } + } + } + } + // ---- THREAD SAFE END ---- + + return bRes; +} + + +void SAL_CALL GrammarCheckingIterator::processLinguServiceEvent( + const linguistic2::LinguServiceEvent& rLngSvcEvent ) +{ + if (rLngSvcEvent.nEvent != linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN) + return; + + try + { + uno::Reference< uno::XInterface > xThis( static_cast< OWeakObject * >(this) ); + linguistic2::LinguServiceEvent aEvent( xThis, linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN ); + m_aNotifyListeners.notifyEach( + &linguistic2::XLinguServiceEventListener::processLinguServiceEvent, + aEvent); + } + catch (uno::RuntimeException &) + { + throw; + } + catch (const ::uno::Exception &) + { + // ignore + TOOLS_WARN_EXCEPTION("linguistic", "processLinguServiceEvent"); + } +} + + +sal_Bool SAL_CALL GrammarCheckingIterator::addLinguServiceEventListener( + const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener ) +{ + if (xListener.is()) + { + m_aNotifyListeners.addInterface( xListener ); + } + return true; +} + + +sal_Bool SAL_CALL GrammarCheckingIterator::removeLinguServiceEventListener( + const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener ) +{ + if (xListener.is()) + { + m_aNotifyListeners.removeInterface( xListener ); + } + return true; +} + + +void SAL_CALL GrammarCheckingIterator::dispose() +{ + lang::EventObject aEvt( static_cast<linguistic2::XProofreadingIterator *>(this) ); + m_aEventListeners.disposeAndClear( aEvt ); + + TerminateThread(); + + // ---- THREAD SAFE START ---- + { + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + + // release all UNO references + + m_xBreakIterator.clear(); + + // clear containers with UNO references AND have those references released + GCReferences_t aTmpEmpty1; + DocMap_t aTmpEmpty2; + FPQueue_t aTmpEmpty3; + m_aGCReferencesByService.swap( aTmpEmpty1 ); + m_aDocIdMap.swap( aTmpEmpty2 ); + m_aFPEntriesQueue.swap( aTmpEmpty3 ); + } + // ---- THREAD SAFE END ---- +} + + +void SAL_CALL GrammarCheckingIterator::addEventListener( + const uno::Reference< lang::XEventListener >& xListener ) +{ + if (xListener.is()) + { + m_aEventListeners.addInterface( xListener ); + } +} + + +void SAL_CALL GrammarCheckingIterator::removeEventListener( + const uno::Reference< lang::XEventListener >& xListener ) +{ + if (xListener.is()) + { + m_aEventListeners.removeInterface( xListener ); + } +} + + +void SAL_CALL GrammarCheckingIterator::disposing( const lang::EventObject &rSource ) +{ + // if the component (document) is disposing release all references + //!! There is no need to remove entries from the queue that are from this document + //!! since the respectives xFlatParagraphs should become invalid (isModified() == true) + //!! and the call to xFlatParagraphIterator->getNextPara() will result in an empty reference. + //!! And if an entry is currently checked by a grammar checker upon return the results + //!! should be ignored. + //!! Also GetOrCreateDocId will not use that very same Id again... + //!! All of the above resulting in that we only have to get rid of the implementation pointer here. + uno::Reference< lang::XComponent > xDoc( rSource.Source, uno::UNO_QUERY ); + if (xDoc.is()) + { + // ---- THREAD SAFE START ---- + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + m_aDocIdMap.erase( xDoc.get() ); + // ---- THREAD SAFE END ---- + } +} + + +uno::Reference< util::XChangesBatch > const & GrammarCheckingIterator::GetUpdateAccess() const +{ + if (!m_xUpdateAccess.is()) + { + try + { + // get configuration provider + uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); + uno::Reference< lang::XMultiServiceFactory > xConfigurationProvider = + configuration::theDefaultProvider::get( xContext ); + + // get configuration update access + beans::PropertyValue aValue; + aValue.Name = "nodepath"; + aValue.Value <<= OUString("org.openoffice.Office.Linguistic/ServiceManager"); + uno::Sequence< uno::Any > aProps{ uno::Any(aValue) }; + m_xUpdateAccess.set( + xConfigurationProvider->createInstanceWithArguments( + "com.sun.star.configuration.ConfigurationUpdateAccess", aProps ), + uno::UNO_QUERY_THROW ); + } + catch (uno::Exception &) + { + } + } + + return m_xUpdateAccess; +} + + +void GrammarCheckingIterator::GetConfiguredGCSvcs_Impl() +{ + GCImplNames_t aTmpGCImplNamesByLang; + + try + { + // get node names (locale iso strings) for configured grammar checkers + uno::Reference< container::XNameAccess > xNA( GetUpdateAccess(), uno::UNO_QUERY_THROW ); + xNA.set( xNA->getByName( "GrammarCheckerList" ), uno::UNO_QUERY_THROW ); + const uno::Sequence< OUString > aElementNames( xNA->getElementNames() ); + + for (const OUString& rElementName : aElementNames) + { + uno::Sequence< OUString > aImplNames; + uno::Any aTmp( xNA->getByName( rElementName ) ); + if (aTmp >>= aImplNames) + { + if (aImplNames.hasElements()) + { + // only the first entry is used, there should be only one grammar checker per language + const OUString aImplName( aImplNames[0] ); + const LanguageType nLang = LanguageTag::convertToLanguageType( rElementName ); + aTmpGCImplNamesByLang[ nLang ] = aImplName; + } + } + else + { + SAL_WARN( "linguistic", "failed to get aImplNames. Wrong type?" ); + } + } + } + catch (uno::Exception const &) + { + TOOLS_WARN_EXCEPTION( "linguistic", "exception caught. Failed to get configured services" ); + } + + { + // ---- THREAD SAFE START ---- + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + m_aGCImplNamesByLang = aTmpGCImplNamesByLang; + // ---- THREAD SAFE END ---- + } +} + + +sal_Bool SAL_CALL GrammarCheckingIterator::supportsService( + const OUString & rServiceName ) +{ + return cppu::supportsService(this, rServiceName); +} + + +OUString SAL_CALL GrammarCheckingIterator::getImplementationName( ) +{ + return "com.sun.star.lingu2.ProofreadingIterator"; +} + + +uno::Sequence< OUString > SAL_CALL GrammarCheckingIterator::getSupportedServiceNames( ) +{ + return { "com.sun.star.linguistic2.ProofreadingIterator" }; +} + + +void GrammarCheckingIterator::SetServiceList( + const lang::Locale &rLocale, + const uno::Sequence< OUString > &rSvcImplNames ) +{ + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + + LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); + OUString aImplName; + if (rSvcImplNames.hasElements()) + aImplName = rSvcImplNames[0]; // there is only one grammar checker per language + + if (!LinguIsUnspecified(nLanguage) && nLanguage != LANGUAGE_DONTKNOW) + { + if (!aImplName.isEmpty()) + m_aGCImplNamesByLang[ nLanguage ] = aImplName; + else + m_aGCImplNamesByLang.erase( nLanguage ); + } +} + + +uno::Sequence< OUString > GrammarCheckingIterator::GetServiceList( + const lang::Locale &rLocale ) const +{ + ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); + + OUString aImplName; // there is only one grammar checker per language + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + GCImplNames_t::const_iterator aIt( m_aGCImplNamesByLang.find( nLang ) ); + if (aIt != m_aGCImplNamesByLang.end()) + aImplName = aIt->second; + + if (!aImplName.isEmpty()) + return { aImplName }; + return {}; +} + + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +linguistic_GrammarCheckingIterator_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new GrammarCheckingIterator()); +} + + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |