summaryrefslogtreecommitdiffstats
path: root/editeng/source/misc/svxacorr.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /editeng/source/misc/svxacorr.cxx
parentInitial commit. (diff)
downloadlibreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'editeng/source/misc/svxacorr.cxx')
-rw-r--r--editeng/source/misc/svxacorr.cxx3069
1 files changed, 3069 insertions, 0 deletions
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
new file mode 100644
index 000000000..ff93ecee8
--- /dev/null
+++ b/editeng/source/misc/svxacorr.cxx
@@ -0,0 +1,3069 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <memory>
+#include <string_view>
+#include <sal/config.h>
+
+#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
+#include <com/sun/star/embed/XStorage.hpp>
+#include <com/sun/star/io/IOException.hpp>
+#include <com/sun/star/io/XStream.hpp>
+#include <tools/urlobj.hxx>
+#include <i18nlangtag/mslangid.hxx>
+#include <i18nutil/transliteration.hxx>
+#include <sal/log.hxx>
+#include <osl/diagnose.h>
+#include <utility>
+#include <vcl/svapp.hxx>
+#include <vcl/settings.hxx>
+#include <svl/fstathelper.hxx>
+#include <svl/urihelper.hxx>
+#include <unotools/charclass.hxx>
+#include <com/sun/star/i18n/UnicodeType.hpp>
+#include <unotools/collatorwrapper.hxx>
+#include <com/sun/star/i18n/UnicodeScript.hpp>
+#include <com/sun/star/i18n/OrdinalSuffix.hpp>
+#include <unotools/localedatawrapper.hxx>
+#include <unotools/transliterationwrapper.hxx>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/storagehelper.hxx>
+#include <o3tl/string_view.hxx>
+#include <editeng/editids.hrc>
+#include <sot/storage.hxx>
+#include <editeng/udlnitem.hxx>
+#include <editeng/wghtitem.hxx>
+#include <editeng/postitem.hxx>
+#include <editeng/crossedoutitem.hxx>
+#include <editeng/escapementitem.hxx>
+#include <editeng/svxacorr.hxx>
+#include <editeng/unolingu.hxx>
+#include <vcl/window.hxx>
+#include <com/sun/star/xml/sax/InputSource.hpp>
+#include <com/sun/star/xml/sax/FastParser.hpp>
+#include <com/sun/star/xml/sax/Writer.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <unotools/streamwrap.hxx>
+#include "SvXMLAutoCorrectImport.hxx"
+#include "SvXMLAutoCorrectExport.hxx"
+#include "SvXMLAutoCorrectTokenHandler.hxx"
+#include <ucbhelper/content.hxx>
+#include <com/sun/star/ucb/ContentCreationException.hpp>
+#include <com/sun/star/ucb/XCommandEnvironment.hpp>
+#include <com/sun/star/ucb/TransferInfo.hpp>
+#include <com/sun/star/ucb/NameClash.hpp>
+#include <tools/diagnose_ex.h>
+#include <xmloff/xmltoken.hxx>
+#include <unordered_map>
+#include <rtl/character.hxx>
+
+using namespace ::com::sun::star::ucb;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::xml::sax;
+using namespace ::com::sun::star;
+using namespace ::xmloff::token;
+using namespace ::utl;
+
+namespace {
+
+enum class Flags {
+ NONE = 0x00,
+ FullStop = 0x01,
+ ExclamationMark = 0x02,
+ QuestionMark = 0x04,
+};
+
+}
+
+namespace o3tl {
+ template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
+}
+const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
+
+constexpr OUStringLiteral pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml";
+constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml";
+constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml";
+
+const char
+ /* also at these beginnings - Brackets and all kinds of begin characters */
+ sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
+ /* also at these ends - Brackets and all kinds of begin characters */
+ sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
+
+static OUString EncryptBlockName_Imp(const OUString& rName);
+
+static bool NonFieldWordDelim( const sal_Unicode c )
+{
+ return ' ' == c || '\t' == c || 0x0a == c ||
+ cNonBreakingSpace == c || 0x2011 == c;
+}
+
+static bool IsWordDelim( const sal_Unicode c )
+{
+ return c == 0x1 || NonFieldWordDelim(c);
+}
+
+
+static bool IsLowerLetter( sal_Int32 nCharType )
+{
+ return CharClass::isLetterType( nCharType ) &&
+ ( css::i18n::KCharacterType::LOWER & nCharType);
+}
+
+static bool IsUpperLetter( sal_Int32 nCharType )
+{
+ return CharClass::isLetterType( nCharType ) &&
+ ( css::i18n::KCharacterType::UPPER & nCharType);
+}
+
+static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
+ sal_Int32 nStt, sal_Int32 nEnd )
+{
+ for( ; nStt < nEnd; ++nStt )
+ {
+ css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
+ switch( nScript )
+ {
+ case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
+ case css::i18n::UnicodeScript_kHangulJamo:
+ case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
+ case css::i18n::UnicodeScript_kHiragana:
+ case css::i18n::UnicodeScript_kKatakana:
+ case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
+ case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
+ case css::i18n::UnicodeScript_kCJKCompatibility:
+ case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
+ case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
+ case css::i18n::UnicodeScript_kHangulSyllable:
+ case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
+ case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
+ return true;
+ default: ; //do nothing
+ }
+ }
+ return false;
+}
+
+static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
+ sal_Int32 nStt, sal_Int32 nEnd )
+{
+ for( ; nStt < nEnd; ++nStt )
+ {
+ if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
+ return true;
+ }
+ return false;
+}
+
+static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
+{
+ // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
+ if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
+ return true;
+
+ bool bRet = false;
+ for( ; *pArr; ++pArr )
+ if( *pArr == c )
+ {
+ bRet = true;
+ break;
+ }
+ return bRet;
+}
+
+SvxAutoCorrDoc::~SvxAutoCorrDoc()
+{
+}
+
+// Called by the functions:
+// - FnCapitalStartWord
+// - FnCapitalStartSentence
+// after the exchange of characters. Then the words, if necessary, can be inserted
+// into the exception list.
+void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
+ sal_Unicode )
+{
+}
+
+LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
+{
+ return LANGUAGE_SYSTEM;
+}
+
+static const LanguageTag& GetAppLang()
+{
+ return Application::GetSettings().GetLanguageTag();
+}
+
+/// Never use an unresolved LANGUAGE_SYSTEM.
+static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
+{
+ LanguageType eLang = rDoc.GetLanguage( nPos );
+ if (eLang == LANGUAGE_SYSTEM)
+ eLang = GetAppLang().getLanguageType(); // the current work locale
+ return eLang;
+}
+
+static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
+{
+ static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
+ LanguageTag aLcl( nLang );
+ if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
+ xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
+ return *xLclDtWrp;
+}
+static TransliterationWrapper& GetIgnoreTranslWrapper()
+{
+ static int bIsInit = 0;
+ static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
+ TransliterationFlags::IGNORE_KANA |
+ TransliterationFlags::IGNORE_WIDTH );
+ if( !bIsInit )
+ {
+ aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
+ bIsInit = 1;
+ }
+ return aWrp;
+}
+static CollatorWrapper& GetCollatorWrapper()
+{
+ static CollatorWrapper aCollWrp = []()
+ {
+ CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
+ tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
+ return tmp;
+ }();
+ return aCollWrp;
+}
+
+bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
+{
+ return cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
+ cChar == ' ' || cChar == '\'' || cChar == '\"' ||
+ cChar == '*' || cChar == '_' || cChar == '%' ||
+ cChar == '.' || cChar == ',' || cChar == ';' ||
+ cChar == ':' || cChar == '?' || cChar == '!' ||
+ cChar == '<' || cChar == '>' ||
+ cChar == '/' || cChar == '-';
+}
+
+namespace
+{
+ bool IsCompoundWordDelimChar(sal_Unicode cChar)
+ {
+ return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
+ }
+}
+
+bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
+{
+ return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' ||
+ cChar == '/' /*case for the urls exception*/;
+}
+
+ACFlags SvxAutoCorrect::GetDefaultFlags()
+{
+ ACFlags nRet = ACFlags::Autocorrect
+ | ACFlags::CapitalStartSentence
+ | ACFlags::CapitalStartWord
+ | ACFlags::ChgOrdinalNumber
+ | ACFlags::ChgToEnEmDash
+ | ACFlags::AddNonBrkSpace
+ | ACFlags::TransliterateRTL
+ | ACFlags::ChgAngleQuotes
+ | ACFlags::ChgWeightUnderl
+ | ACFlags::SetINetAttr
+ | ACFlags::ChgQuotes
+ | ACFlags::SaveWordCplSttLst
+ | ACFlags::SaveWordWordStartLst
+ | ACFlags::CorrectCapsLock;
+ LanguageType eLang = GetAppLang().getLanguageType();
+ if( eLang.anyOf(
+ LANGUAGE_ENGLISH,
+ LANGUAGE_ENGLISH_US,
+ LANGUAGE_ENGLISH_UK,
+ LANGUAGE_ENGLISH_AUS,
+ LANGUAGE_ENGLISH_CAN,
+ LANGUAGE_ENGLISH_NZ,
+ LANGUAGE_ENGLISH_EIRE,
+ LANGUAGE_ENGLISH_SAFRICA,
+ LANGUAGE_ENGLISH_JAMAICA,
+ LANGUAGE_ENGLISH_CARIBBEAN))
+ nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
+ return nRet;
+}
+
+constexpr sal_Unicode cEmDash = 0x2014;
+constexpr sal_Unicode cEnDash = 0x2013;
+constexpr sal_Unicode cApostrophe = 0x2019;
+constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
+constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
+constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
+constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
+// stop characters for searching preceding quotes
+// (the first character is also the opening quote we are looking for)
+const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
+const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
+// preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
+const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
+const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
+const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
+
+SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
+ OUString aUserAutocorrFile )
+ : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
+ , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
+ , eCharClassLang( LANGUAGE_DONTKNOW )
+ , nFlags(SvxAutoCorrect::GetDefaultFlags())
+ , cStartDQuote( 0 )
+ , cEndDQuote( 0 )
+ , cStartSQuote( 0 )
+ , cEndSQuote( 0 )
+{
+}
+
+SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
+ : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
+ , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
+ , aSwFlags( rCpy.aSwFlags )
+ , eCharClassLang(rCpy.eCharClassLang)
+ , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
+ , cStartDQuote( rCpy.cStartDQuote )
+ , cEndDQuote( rCpy.cEndDQuote )
+ , cStartSQuote( rCpy.cStartSQuote )
+ , cEndSQuote( rCpy.cEndSQuote )
+{
+}
+
+
+SvxAutoCorrect::~SvxAutoCorrect()
+{
+}
+
+void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
+{
+ pCharClass.reset( new CharClass( LanguageTag( eLang)) );
+ eCharClassLang = eLang;
+}
+
+void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
+{
+ ACFlags nOld = nFlags;
+ nFlags = bOn ? nFlags | nFlag
+ : nFlags & ~nFlag;
+
+ if( !bOn )
+ {
+ if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
+ nFlags &= ~ACFlags::CplSttLstLoad;
+ if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
+ nFlags &= ~ACFlags::WordStartLstLoad;
+ if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
+ nFlags &= ~ACFlags::ChgWordLstLoad;
+ }
+}
+
+
+// Correct TWo INitial CApitals
+void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang )
+{
+ CharClass& rCC = GetCharClass( eLang );
+
+ // Delete all non alphanumeric. Test the characters at the beginning/end of
+ // the word ( recognizes: "(min.", "/min.", and so on.)
+ for( ; nSttPos < nEndPos; ++nSttPos )
+ if( rCC.isLetterNumeric( rTxt, nSttPos ))
+ break;
+ for( ; nSttPos < nEndPos; --nEndPos )
+ if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
+ break;
+
+ // Is the word a compounded word separated by delimiters?
+ // If so, keep track of all delimiters so each constituent
+ // word can be checked for two initial capital letters.
+ std::deque<sal_Int32> aDelimiters;
+
+ // Always check for two capitals at the beginning
+ // of the entire word, so start at nSttPos.
+ aDelimiters.push_back(nSttPos);
+
+ // Find all compound word delimiters
+ for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
+ {
+ if (IsCompoundWordDelimChar(rTxt[ n ]))
+ {
+ aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
+ }
+ }
+
+ // Decide where to put the terminating delimiter.
+ // If the last AutoCorrect char was a newline, then the AutoCorrect
+ // char will not be included in rTxt.
+ // If the last AutoCorrect char was not a newline, then the AutoCorrect
+ // character will be the last character in rTxt.
+ if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
+ aDelimiters.push_back(nEndPos);
+
+ // Iterate through the word and all words that compose it.
+ // Two capital letters at the beginning of word?
+ for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
+ {
+ nSttPos = aDelimiters[nI];
+ nEndPos = aDelimiters[nI + 1];
+
+ if( nSttPos+2 < nEndPos &&
+ IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
+ IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
+ // Is the third character a lower case
+ IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
+ // Do not replace special attributes
+ 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
+ {
+ // test if the word is in an exception list
+ OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
+ if( !FindInWordStartExceptList(eLang, sWord) )
+ {
+ // Check that word isn't correctly spelt before correcting:
+ css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
+ LinguMgr::GetSpellChecker();
+ if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
+ {
+ Sequence< css::beans::PropertyValue > aEmptySeq;
+ if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
+ {
+ return;
+ }
+ }
+ sal_Unicode cSave = rTxt[ nSttPos ];
+ OUString sChar = rCC.lowercase( OUString(cSave) );
+ if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
+ {
+ if( ACFlags::SaveWordWordStartLst & nFlags )
+ rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
+ }
+ }
+ }
+ }
+}
+
+// Format ordinal numbers suffixes (1st -> 1^st)
+bool SvxAutoCorrect::FnChgOrdinalNumber(
+ SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang)
+{
+ // 1st, 2nd, 3rd, 4 - 0th
+ // 201th or 201st
+ // 12th or 12nd
+ bool bChg = false;
+
+ // In some languages ordinal suffixes should never be
+ // changed to superscript. Let's break for those languages.
+ if (!eLang.anyOf(
+ LANGUAGE_SWEDISH,
+ LANGUAGE_SWEDISH_FINLAND))
+ {
+ CharClass& rCC = GetCharClass(eLang);
+
+ for (; nSttPos < nEndPos; ++nSttPos)
+ if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
+ break;
+ for (; nSttPos < nEndPos; --nEndPos)
+ if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
+ break;
+
+
+ // Get the last number in the string to check
+ sal_Int32 nNumEnd = nEndPos;
+ bool bFoundEnd = false;
+ bool isValidNumber = true;
+ sal_Int32 i = nEndPos;
+ while (i > nSttPos)
+ {
+ i--;
+ bool isDigit = rCC.isDigit(rTxt, i);
+ if (bFoundEnd)
+ isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
+
+ if (isDigit && !bFoundEnd)
+ {
+ bFoundEnd = true;
+ nNumEnd = i;
+ }
+ }
+
+ if (bFoundEnd && isValidNumber) {
+ sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
+
+ // Check if the characters after that number correspond to the ordinal suffix
+ uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
+ = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
+
+ const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
+ for (OUString const & sSuffix : aSuffixes)
+ {
+ std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);
+
+ if (sSuffix == sEnd)
+ {
+ // Check if the ordinal suffix has to be set as super script
+ if (rCC.isLetter(sSuffix))
+ {
+ // Do the change
+ SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
+ DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
+ rDoc.SetAttr(nNumEnd + 1, nEndPos,
+ SID_ATTR_CHAR_ESCAPEMENT,
+ aSvxEscapementItem);
+ bChg = true;
+ }
+ }
+ }
+ }
+ }
+ return bChg;
+}
+
+// Replace dashes
+bool SvxAutoCorrect::FnChgToEnEmDash(
+ SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang )
+{
+ bool bRet = false;
+ CharClass& rCC = GetCharClass( eLang );
+ if (eLang == LANGUAGE_SYSTEM)
+ eLang = GetAppLang().getLanguageType();
+ bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
+
+ // replace " - " or " --" with "enDash"
+ if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
+ {
+ sal_Unicode cCh = rTxt[ nSttPos ];
+ if( '-' == cCh )
+ {
+ if( 1 < nEndPos - nSttPos &&
+ ' ' == rTxt[ nSttPos-1 ] &&
+ '-' == rTxt[ nSttPos+1 ])
+ {
+ sal_Int32 n;
+ for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
+ sImplSttSkipChars,(cCh = rTxt[ n ]));
+ ++n )
+ ;
+
+ // found: " --[<AnySttChars>][A-z0-9]
+ if( rCC.isLetterNumeric( OUString(cCh) ) )
+ {
+ for( n = nSttPos-1; n && lcl_IsInAsciiArr(
+ sImplEndSkipChars,(cCh = rTxt[ --n ])); )
+ ;
+
+ // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
+ if( rCC.isLetterNumeric( OUString(cCh) ))
+ {
+ rDoc.Delete( nSttPos, nSttPos + 2 );
+ rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
+ bRet = true;
+ }
+ }
+ }
+ }
+ else if( 3 < nSttPos &&
+ ' ' == rTxt[ nSttPos-1 ] &&
+ '-' == rTxt[ nSttPos-2 ])
+ {
+ sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
+ if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
+ {
+ --nTmpPos;
+ ++nLen;
+ cCh = rTxt[ nTmpPos-1 ];
+ }
+ if( ' ' == cCh )
+ {
+ for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
+ sImplSttSkipChars,(cCh = rTxt[ n ]));
+ ++n )
+ ;
+
+ // found: " - [<AnySttChars>][A-z0-9]
+ if( rCC.isLetterNumeric( OUString(cCh) ) )
+ {
+ cCh = ' ';
+ for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
+ sImplEndSkipChars,(cCh = rTxt[ --n ])); )
+ ;
+ // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
+ if( rCC.isLetterNumeric( OUString(cCh) ))
+ {
+ rDoc.Delete( nTmpPos, nTmpPos + nLen );
+ rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
+ bRet = true;
+ }
+ }
+ }
+ }
+ }
+
+ // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
+ // [0-9]--[0-9] double dash always replaced with "enDash"
+ // Finnish and Hungarian use enDash instead of emDash.
+ bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
+ if( 4 <= nEndPos - nSttPos )
+ {
+ OUString sTmp( rTxt.substr( nSttPos, nEndPos - nSttPos ) );
+ sal_Int32 nFndPos = sTmp.indexOf("--");
+ if( nFndPos != -1 && nFndPos &&
+ nFndPos + 2 < sTmp.getLength() &&
+ ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
+ lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
+ ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
+ lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
+ {
+ nSttPos = nSttPos + nFndPos;
+ rDoc.Delete( nSttPos, nSttPos + 2 );
+ rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
+ rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
+ bRet = true;
+ }
+ }
+ return bRet;
+}
+
+// Add non-breaking space before specific punctuation marks in French text
+bool SvxAutoCorrect::FnAddNonBrkSpace(
+ SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nEndPos,
+ LanguageType eLang, bool& io_bNbspRunNext )
+{
+ bool bRet = false;
+
+ CharClass& rCC = GetCharClass( eLang );
+
+ if ( rCC.getLanguageTag().getLanguage() == "fr" )
+ {
+ bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
+ OUString allChars = ":;?!%";
+ OUString chars( allChars );
+ if ( bFrCA )
+ chars = ":";
+
+ sal_Unicode cChar = rTxt[ nEndPos ];
+ bool bHasSpace = chars.indexOf( cChar ) != -1;
+ bool bIsSpecial = allChars.indexOf( cChar ) != -1;
+ if ( bIsSpecial )
+ {
+ // Get the last word delimiter position
+ sal_Int32 nSttWdPos = nEndPos;
+ bool bWasWordDelim = false;
+ while( nSttWdPos )
+ {
+ bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
+ if (bWasWordDelim)
+ break;
+ }
+
+ //See if the text is the start of a protocol string, e.g. have text of
+ //"http" see if it is the start of "http:" and if so leave it alone
+ sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
+ sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
+ if (nIndex + nProtocolLen <= rTxt.getLength())
+ {
+ if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
+ return false;
+ }
+
+ // Check the presence of "://" in the word
+ sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
+ if ( nStrPos == -1 && nEndPos > 0 )
+ {
+ // Check the previous char
+ sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
+ if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
+ {
+ // Remove any previous normal space
+ sal_Int32 nPos = nEndPos - 1;
+ while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
+ {
+ if ( nPos == 0 ) break;
+ nPos--;
+ cPrevChar = rTxt[ nPos ];
+ }
+
+ nPos++;
+ if ( nEndPos - nPos > 0 )
+ rDoc.Delete( nPos, nEndPos );
+
+ // Add the non-breaking space at the end pos
+ if ( bHasSpace )
+ rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
+ io_bNbspRunNext = true;
+ bRet = true;
+ }
+ else if ( chars.indexOf( cPrevChar ) != -1 )
+ io_bNbspRunNext = true;
+ }
+ }
+ else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
+ {
+ // Remove the hardspace right before to avoid formatting URLs
+ sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
+ sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
+ if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
+ {
+ rDoc.Delete( nEndPos - 2, nEndPos - 1 );
+ bRet = true;
+ }
+ }
+ }
+
+ return bRet;
+}
+
+// URL recognition
+bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang )
+{
+ OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
+ GetCharClass( eLang ) ));
+ bool bRet = !sURL.isEmpty();
+ if( bRet ) // so, set attribute:
+ rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
+ return bRet;
+}
+
+// Automatic *bold*, /italic/, -strikeout- and _underline_
+bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nEndPos )
+{
+ // Condition:
+ // at the beginning: _, *, / or ~ after Space with the following !Space
+ // at the end: _, *, / or ~ before Space (word delimiter?)
+
+ sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout
+ if( ++nEndPos != rTxt.getLength() &&
+ !IsWordDelim( rTxt[ nEndPos ] ) )
+ return false;
+
+ --nEndPos;
+
+ bool bAlphaNum = false;
+ sal_Int32 nPos = nEndPos;
+ sal_Int32 nFndPos = -1;
+ CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
+
+ while( nPos )
+ {
+ switch( sal_Unicode c = rTxt[ --nPos ] )
+ {
+ case '_':
+ case '-':
+ case '/':
+ case '*':
+ if( c == cInsChar )
+ {
+ if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
+ IsWordDelim( rTxt[ nPos-1 ])) &&
+ !IsWordDelim( rTxt[ nPos+1 ]))
+ nFndPos = nPos;
+ else
+ // Condition is not satisfied, so cancel
+ nFndPos = -1;
+ nPos = 0;
+ }
+ break;
+ default:
+ if( !bAlphaNum )
+ bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
+ }
+ }
+
+ if( -1 != nFndPos )
+ {
+ // first delete the Character at the end - this allows insertion
+ // of an empty hint in SetAttr which would be removed by Delete
+ // (fdo#62536, AUTOFMT in Writer)
+ rDoc.Delete( nEndPos, nEndPos + 1 );
+ rDoc.Delete( nFndPos, nFndPos + 1 );
+ // Span the Attribute over the area
+ // the end.
+ if( '*' == cInsChar ) // Bold
+ {
+ SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
+ rDoc.SetAttr( nFndPos, nEndPos - 1,
+ SID_ATTR_CHAR_WEIGHT,
+ aSvxWeightItem);
+ }
+ else if( '/' == cInsChar ) // Italic
+ {
+ SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
+ rDoc.SetAttr( nFndPos, nEndPos - 1,
+ SID_ATTR_CHAR_POSTURE,
+ aSvxPostureItem);
+ }
+ else if( '-' == cInsChar ) // Strikeout
+ {
+ SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
+ rDoc.SetAttr( nFndPos, nEndPos - 1,
+ SID_ATTR_CHAR_STRIKEOUT,
+ aSvxCrossedOutItem);
+ }
+ else // Underline
+ {
+ SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
+ rDoc.SetAttr( nFndPos, nEndPos - 1,
+ SID_ATTR_CHAR_UNDERLINE,
+ aSvxUnderlineItem);
+ }
+ }
+
+ return -1 != nFndPos;
+}
+
+// Capitalize first letter of every sentence
+void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
+ const OUString& rTxt, bool bNormalPos,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang )
+{
+
+ if( rTxt.isEmpty() || nEndPos <= nSttPos )
+ return;
+
+ CharClass& rCC = GetCharClass( eLang );
+ OUString aText( rTxt );
+ const sal_Unicode *pStart = aText.getStr(),
+ *pStr = pStart + nEndPos,
+ *pWordStt = nullptr,
+ *pDelim = nullptr;
+
+ bool bAtStart = false;
+ do {
+ --pStr;
+ if (rCC.isLetter(aText, pStr - pStart))
+ {
+ if( !pWordStt )
+ pDelim = pStr+1;
+ pWordStt = pStr;
+ }
+ else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
+ {
+ if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
+ pWordStt - 1 == pStr &&
+ // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
+ (pStart + 1) <= pStr &&
+ rCC.isLetter(aText, pStr-1 - pStart))
+ pWordStt = --pStr;
+ else
+ break;
+ }
+ bAtStart = (pStart == pStr);
+ } while( !bAtStart );
+
+ if (!pWordStt)
+ return; // no character to be replaced
+
+
+ if (rCC.isDigit(aText, pStr - pStart))
+ return; // already ok
+
+ if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
+ return; // already ok
+
+ //See if the text is the start of a protocol string, e.g. have text of
+ //"http" see if it is the start of "http:" and if so leave it alone
+ sal_Int32 nIndex = pWordStt - pStart;
+ sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
+ if (nIndex + nProtocolLen <= rTxt.getLength())
+ {
+ if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
+ return; // already ok
+ }
+
+ if (0x1 == *pWordStt || 0x2 == *pWordStt)
+ return; // already ok
+
+ // Only capitalize, if string before specified characters is long enough
+ if( *pDelim && 2 >= pDelim - pWordStt &&
+ lcl_IsInAsciiArr( ".-)>", *pDelim ) )
+ return;
+
+ // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
+ if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
+ return;
+
+ if( !bAtStart ) // Still no beginning of a paragraph?
+ {
+ if (NonFieldWordDelim(*pStr))
+ {
+ for (;;)
+ {
+ bAtStart = (pStart == pStr--);
+ if (bAtStart || !NonFieldWordDelim(*pStr))
+ break;
+ }
+ }
+ // Asian full stop, full width full stop, full width exclamation mark
+ // and full width question marks are treated as word delimiters
+ else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
+ 0xFF1F != *pStr )
+ return; // no valid separator -> no replacement
+ }
+
+ // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
+ if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
+ return;
+
+ if( bAtStart ) // at the beginning of a paragraph?
+ {
+ // Check out the previous paragraph, if it exists.
+ // If so, then check to paragraph separator at the end.
+ OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
+ if (!pPrevPara)
+ {
+ // valid separator -> replace
+ OUString sChar( *pWordStt );
+ sChar = rCC.titlecase(sChar); //see fdo#56740
+ if (sChar != OUStringChar(*pWordStt))
+ rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
+ return;
+ }
+
+ aText = *pPrevPara;
+ bAtStart = false;
+ pStart = aText.getStr();
+ pStr = pStart + aText.getLength();
+
+ do { // overwrite all blanks
+ --pStr;
+ if (!NonFieldWordDelim(*pStr))
+ break;
+ bAtStart = (pStart == pStr);
+ } while( !bAtStart );
+
+ if( bAtStart )
+ return; // no valid separator -> no replacement
+ }
+
+ // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
+ // all three can happen, but not more than once!
+ const sal_Unicode* pExceptStt = nullptr;
+ bool bContinue = true;
+ Flags nFlag = Flags::NONE;
+ do
+ {
+ switch (*pStr)
+ {
+ // Western and Asian full stop
+ case '.':
+ case 0x3002:
+ case 0xFF0E:
+ {
+ if (pStr >= pStart + 2 && *(pStr - 2) == '.')
+ {
+ //e.g. text "f.o.o. word": Now currently considering
+ //capitalizing word but second last character of
+ //previous word is a . So probably last word is an
+ //anagram that ends in . and not truly the end of a
+ //previous sentence, so don't autocapitalize this word
+ return;
+ }
+ if (nFlag & Flags::FullStop)
+ return; // no valid separator -> no replacement
+ nFlag |= Flags::FullStop;
+ pExceptStt = pStr;
+ }
+ break;
+ case '!':
+ case 0xFF01:
+ {
+ if (nFlag & Flags::ExclamationMark)
+ return; // no valid separator -> no replacement
+ nFlag |= Flags::ExclamationMark;
+ }
+ break;
+ case '?':
+ case 0xFF1F:
+ {
+ if (nFlag & Flags::QuestionMark)
+ return; // no valid separator -> no replacement
+ nFlag |= Flags::QuestionMark;
+ }
+ break;
+ default:
+ if (nFlag == Flags::NONE)
+ return; // no valid separator -> no replacement
+ else
+ bContinue = false;
+ break;
+ }
+
+ if (bContinue && pStr-- == pStart)
+ {
+ return; // no valid separator -> no replacement
+ }
+ } while (bContinue);
+ if (Flags::FullStop != nFlag)
+ pExceptStt = nullptr;
+
+ // Only capitalize, if string is long enough
+ if( 2 > ( pStr - pStart ) )
+ return;
+
+ if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
+ {
+ bool bValid = false, bAlphaFnd = false;
+ const sal_Unicode* pTmpStr = pStr;
+ while( !bValid )
+ {
+ if( rCC.isDigit( aText, pTmpStr - pStart ) )
+ {
+ bValid = true;
+ pStr = pTmpStr - 1;
+ }
+ else if( rCC.isLetter( aText, pTmpStr - pStart ) )
+ {
+ if( bAlphaFnd )
+ {
+ bValid = true;
+ pStr = pTmpStr;
+ }
+ else
+ bAlphaFnd = true;
+ }
+ else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
+ break;
+
+ if( pTmpStr == pStart )
+ break;
+
+ --pTmpStr;
+ }
+
+ if( !bValid )
+ return; // no valid separator -> no replacement
+ }
+
+ bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
+
+ // Search for the beginning of the word
+ while (!NonFieldWordDelim(*pStr))
+ {
+ if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
+ bNumericOnly = false;
+
+ if( pStart == pStr )
+ break;
+
+ --pStr;
+ }
+
+ if( bNumericOnly ) // consists of only numbers, then not
+ return;
+
+ if (NonFieldWordDelim(*pStr))
+ ++pStr;
+
+ OUString sWord;
+
+ // check on the basis of the exception list
+ if( pExceptStt )
+ {
+ sWord = OUString(pStr, pExceptStt - pStr + 1);
+ if( FindInCplSttExceptList(eLang, sWord) )
+ return;
+
+ // Delete all non alphanumeric. Test the characters at the
+ // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
+ OUString sTmp( sWord );
+ while( !sTmp.isEmpty() &&
+ !rCC.isLetterNumeric( sTmp, 0 ) )
+ sTmp = sTmp.copy(1);
+
+ // Remove all non alphanumeric characters towards the end up until
+ // the last one.
+ sal_Int32 nLen = sTmp.getLength();
+ while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
+ --nLen;
+ if( nLen + 1 < sTmp.getLength() )
+ sTmp = sTmp.copy( 0, nLen + 1 );
+
+ if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
+ FindInCplSttExceptList(eLang, sTmp))
+ return;
+
+ if(FindInCplSttExceptList(eLang, sWord, true))
+ return;
+ }
+
+ // Ok, then replace
+ sal_Unicode cSave = *pWordStt;
+ nSttPos = pWordStt - rTxt.getStr();
+ OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
+ bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
+
+ // Perhaps someone wants to have the word
+ if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
+ rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
+}
+
+// Correct accidental use of cAPS LOCK key
+bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nSttPos, sal_Int32 nEndPos,
+ LanguageType eLang )
+{
+ if (nEndPos - nSttPos < 2)
+ // string must be at least 2-character long.
+ return false;
+
+ CharClass& rCC = GetCharClass( eLang );
+
+ // Check the first 2 letters.
+ if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
+ return false;
+
+ if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
+ return false;
+
+ OUStringBuffer aConverted;
+ aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
+ aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
+
+ // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
+ if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
+ return false;
+
+ for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
+ {
+ if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
+ // A lowercase letter disqualifies the whole text.
+ return false;
+
+ if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
+ // Another uppercase letter. Convert it.
+ aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
+ else
+ // This is not an alphabetic letter. Leave it as-is.
+ aConverted.append( rTxt[i] );
+ }
+
+ // Replace the word.
+ rDoc.Delete(nSttPos, nEndPos);
+ rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
+
+ return true;
+}
+
+
+sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
+ LanguageType eLang ) const
+{
+ sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
+ ? GetStartDoubleQuote()
+ : GetStartSingleQuote() )
+ : ( '\"' == cInsChar
+ ? GetEndDoubleQuote()
+ : GetEndSingleQuote() );
+ if( !cRet )
+ {
+ // then through the Language find the right character
+ if( LANGUAGE_NONE == eLang )
+ cRet = cInsChar;
+ else
+ {
+ LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
+ OUString sRet( bSttQuote
+ ? ( '\"' == cInsChar
+ ? rLcl.getDoubleQuotationMarkStart()
+ : rLcl.getQuotationMarkStart() )
+ : ( '\"' == cInsChar
+ ? rLcl.getDoubleQuotationMarkEnd()
+ : rLcl.getQuotationMarkEnd() ));
+ cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
+ }
+ }
+ return cRet;
+}
+
+void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
+ sal_Unicode cInsChar, bool bSttQuote,
+ bool bIns, LanguageType eLang, ACQuotes eType ) const
+{
+ sal_Unicode cRet;
+
+ if ( eType == ACQuotes::DoubleAngleQuote )
+ {
+ bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
+ // pressing " inside a quotation -> use second level angle quotes
+ bool bLeftQuote = '\"' == cInsChar &&
+ // start position and Romanian OR
+ // not start position and Hungarian
+ bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
+ cRet = ( '<' == cInsChar || bLeftQuote )
+ ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
+ : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
+ }
+ else if ( eType == ACQuotes::UseApostrophe )
+ cRet = cApostrophe;
+ else
+ cRet = GetQuote( cInsChar, bSttQuote, eLang );
+
+ OUString sChg( cInsChar );
+ if( bIns )
+ rDoc.Insert( nInsPos, sChg );
+ else
+ rDoc.Replace( nInsPos, sChg );
+
+ sChg = OUString(cRet);
+
+ if( eType == ACQuotes::NonBreakingSpace )
+ {
+ if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
+ {
+ if( !bSttQuote )
+ ++nInsPos;
+ }
+ }
+ else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
+ {
+ rDoc.Delete( nInsPos-1, nInsPos);
+ --nInsPos;
+ }
+
+ rDoc.Replace( nInsPos, sChg );
+
+ // i' -> I' in English (last step for the Undo)
+ if( eType == ACQuotes::CapitalizeIAm )
+ rDoc.Replace( nInsPos-1, "I" );
+}
+
+OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
+ sal_Unicode cInsChar, bool bSttQuote )
+{
+ const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
+ sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
+
+ OUString sRet(cRet);
+
+ if( '\"' == cInsChar )
+ {
+ if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
+ {
+ if( bSttQuote )
+ sRet += " ";
+ else
+ sRet = " " + sRet;
+ }
+ }
+ return sRet;
+}
+
+// search preceding opening quote in the paragraph before the insert position
+static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
+ const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
+{
+ sal_Unicode cTmpChar;
+
+ do {
+ cTmpChar = rTxt[ --nPos ];
+ if ( cTmpChar == sPrecedingChar )
+ return true;
+
+ for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
+ if ( cTmpChar == *pCh )
+ return false;
+
+ } while ( nPos > 0 );
+
+ return false;
+}
+
+// WARNING: rText may become invalid, see comment below
+void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+ sal_Int32 nInsPos, sal_Unicode cChar,
+ bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
+{
+ bool bIsNextRun = io_bNbspRunNext;
+ io_bNbspRunNext = false; // if it was set, then it has to be turned off
+
+ do{ // only for middle check loop !!
+ if( cChar )
+ {
+ // Prevent double space
+ if( nInsPos && ' ' == cChar &&
+ IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
+ ' ' == rTxt[ nInsPos - 1 ])
+ {
+ break;
+ }
+
+ bool bSingle = '\'' == cChar;
+ bool bIsReplaceQuote =
+ (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
+ (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
+ if( bIsReplaceQuote )
+ {
+ bool bSttQuote = !nInsPos;
+ ACQuotes eType = ACQuotes::NONE;
+ const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
+ if (!bSttQuote)
+ {
+ sal_Unicode cPrev = rTxt[ nInsPos-1 ];
+ bSttQuote = NonFieldWordDelim(cPrev) ||
+ lcl_IsInAsciiArr( "([{", cPrev ) ||
+ ( cEmDash == cPrev ) ||
+ ( cEnDash == cPrev );
+ // tdf#38394 use opening quotation mark << in French l'<<word>>
+ if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
+ primary(eLang) == primary(LANGUAGE_FRENCH) &&
+ ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
+ // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
+ OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
+ ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
+ // abbreviated form of que
+ ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
+ ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
+ {
+ bSttQuote = true;
+ }
+ // tdf#108423 for capitalization of English i'm
+ else if ( bSingle && ( cPrev == 'i' ) &&
+ primary(eLang) == primary(LANGUAGE_ENGLISH) &&
+ ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
+ {
+ eType = ACQuotes::CapitalizeIAm;
+ }
+ // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
+ else if ( !bSingle && nInsPos &&
+ ( ( eLang == LANGUAGE_HUNGARIAN &&
+ lcl_HasPrecedingChar( rTxt, nInsPos,
+ bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
+ bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
+ ( eLang.anyOf(
+ LANGUAGE_ROMANIAN,
+ LANGUAGE_ROMANIAN_MOLDOVA ) &&
+ lcl_HasPrecedingChar( rTxt, nInsPos,
+ bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
+ bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
+ {
+ LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
+ // only if the opening double quotation mark is the default one
+ if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
+ eType = ACQuotes::DoubleAngleQuote;
+ }
+ else if ( bSingle && nInsPos && !bSttQuote &&
+ // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
+ // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
+ // tdf#123786 the same for Russian and Ukrainian
+ ( ( eLang.anyOf (
+ LANGUAGE_CZECH,
+ LANGUAGE_GERMAN,
+ LANGUAGE_GERMAN_SWISS,
+ LANGUAGE_GERMAN_AUSTRIAN,
+ LANGUAGE_GERMAN_LUXEMBOURG,
+ LANGUAGE_GERMAN_LIECHTENSTEIN,
+ LANGUAGE_ICELANDIC,
+ LANGUAGE_SLOVAK,
+ LANGUAGE_SLOVENIAN ) &&
+ !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) ||
+ ( eLang.anyOf (
+ LANGUAGE_RUSSIAN,
+ LANGUAGE_UKRAINIAN ) &&
+ !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) )
+ {
+ LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
+ CharClass& rCC = GetCharClass( eLang );
+ if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
+ rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
+ // use apostrophe only after letters, not after digits or punctuation
+ rCC.isLetter(rTxt, nInsPos-1) )
+ {
+ eType = ACQuotes::UseApostrophe;
+ }
+ }
+ }
+
+ if ( eType == ACQuotes::NONE && !bSingle &&
+ ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
+ eType = ACQuotes::NonBreakingSpace;
+
+ InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
+ break;
+ }
+ // tdf#133524 change "<<" and ">>" to double angle quotation marks
+ else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
+ IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
+ ('<' == cChar || '>' == cChar) &&
+ nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
+ {
+ const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
+ if ( eLang.anyOf(
+ LANGUAGE_CATALAN, // primary level
+ LANGUAGE_CATALAN_VALENCIAN, // primary level
+ LANGUAGE_FINNISH, // alternative primary level
+ LANGUAGE_FRENCH_SWISS, // second level
+ LANGUAGE_GALICIAN, // primary level
+ LANGUAGE_HUNGARIAN, // second level
+ LANGUAGE_POLISH, // second level
+ LANGUAGE_PORTUGUESE, // primary level
+ LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
+ LANGUAGE_ROMANIAN, // second level
+ LANGUAGE_ROMANIAN_MOLDOVA, // second level
+ LANGUAGE_SWEDISH, // alternative primary level
+ LANGUAGE_SWEDISH_FINLAND, // alternative primary level
+ LANGUAGE_UKRAINIAN, // primary level
+ LANGUAGE_USER_ARAGONESE, // primary level
+ LANGUAGE_USER_ASTURIAN ) || // primary level
+ primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level
+ primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level
+ {
+ InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
+ break;
+ }
+ }
+
+ if( bInsert )
+ rDoc.Insert( nInsPos, OUString(cChar) );
+ else
+ rDoc.Replace( nInsPos, OUString(cChar) );
+
+ // Hardspaces autocorrection
+ if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
+ {
+ if ( NeedsHardspaceAutocorr( cChar ) &&
+ FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
+ {
+ ;
+ }
+ else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
+ {
+ // Remove the NBSP if it wasn't an autocorrection
+ if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
+ cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
+ {
+ // Look for the last HARD_SPACE
+ sal_Int32 nPos = nInsPos - 1;
+ bool bContinue = true;
+ while ( bContinue )
+ {
+ const sal_Unicode cTmpChar = rTxt[ nPos ];
+ if ( cTmpChar == cNonBreakingSpace )
+ {
+ rDoc.Delete( nPos, nPos + 1 );
+ bContinue = false;
+ }
+ else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
+ bContinue = false;
+ nPos--;
+ }
+ }
+ }
+ }
+ }
+
+ if( !nInsPos )
+ break;
+
+ sal_Int32 nPos = nInsPos - 1;
+
+ if( IsWordDelim( rTxt[ nPos ]))
+ break;
+
+ // Set bold or underline automatically?
+ if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
+ {
+ if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
+ {
+ FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
+ }
+ break;
+ }
+
+ while( nPos && !IsWordDelim( rTxt[ --nPos ]))
+ ;
+
+ // Found a Paragraph-start or a Blank, search for the word shortcut in
+ // auto.
+ sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
+ if( !nPos && !IsWordDelim( rTxt[ 0 ]))
+ --nCapLttrPos; // begin of paragraph and no blank
+
+ const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
+ CharClass& rCC = GetCharClass( eLang );
+
+ // no symbol characters
+ if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
+ break;
+
+ if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
+ // tdf#134940 fix regression of arrow "-->" resulted by premature
+ // replacement of "--" since '>' was added to IsAutoCorrectChar()
+ '>' != cChar )
+ {
+ // WARNING ATTENTION: rTxt is an alias of the text node's OUString
+ // and becomes INVALID if ChgAutoCorrWord returns true!
+ // => use aPara/pPara to create a valid copy of the string!
+ OUString aPara;
+ OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
+
+ bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
+ *this, pPara );
+ if( !bChgWord )
+ {
+ sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
+ while( nCapLttrPos1 < nInsPos &&
+ lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
+ )
+ ++nCapLttrPos1;
+ while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
+ lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
+ )
+ --nInsPos1;
+
+ if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
+ nCapLttrPos1 < nInsPos1 &&
+ rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
+ {
+ bChgWord = true;
+ nCapLttrPos = nCapLttrPos1;
+ }
+ }
+
+ if( bChgWord )
+ {
+ if( !aPara.isEmpty() )
+ {
+ sal_Int32 nEnd = nCapLttrPos;
+ while( nEnd < aPara.getLength() &&
+ !IsWordDelim( aPara[ nEnd ]))
+ ++nEnd;
+
+ // Capital letter at beginning of paragraph?
+ if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
+ {
+ FnCapitalStartSentence( rDoc, aPara, false,
+ nCapLttrPos, nEnd, eLang );
+ }
+
+ if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
+ {
+ FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
+ }
+ }
+ break;
+ }
+ }
+
+ if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
+ {
+ // WARNING ATTENTION: rTxt is an alias of the text node's OUString
+ // and becomes INVALID if TransliterateRTLWord returns true!
+ if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
+ break;
+ }
+
+ if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
+ (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
+ ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
+ FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
+ ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
+ ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
+ FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
+ ;
+ else
+ {
+ bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
+ bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
+
+ if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
+ FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
+ {
+ // Correct accidental use of cAPS LOCK key (do this only when
+ // the caps or shift lock key is pressed). Turn off the caps
+ // lock afterwards.
+ pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
+ }
+
+ // Capital letter at beginning of paragraph ?
+ if( !bUnsupported &&
+ IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
+ {
+ FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
+ }
+
+ // Two capital letters at beginning of word ??
+ if( !bUnsupported &&
+ IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
+ {
+ FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
+ }
+
+ if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
+ {
+ FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
+ }
+ }
+
+ } while( false );
+}
+
+SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
+ LanguageType eLang )
+{
+ LanguageTag aLanguageTag( eLang);
+ if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
+ (void)CreateLanguageFile(aLanguageTag);
+ return *(m_aLangTable.find(aLanguageTag)->second);
+}
+
+void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
+{
+ auto const iter = m_aLangTable.find(LanguageTag(eLang));
+ if (iter != m_aLangTable.end() && iter->second)
+ iter->second->SaveCplSttExceptList();
+ else
+ {
+ SAL_WARN("editeng", "Save an empty list? ");
+ }
+}
+
+void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
+{
+ auto const iter = m_aLangTable.find(LanguageTag(eLang));
+ if (iter != m_aLangTable.end() && iter->second)
+ iter->second->SaveWordStartExceptList();
+ else
+ {
+ SAL_WARN("editeng", "Save an empty list? ");
+ }
+}
+
+// Adds a single word. The list will immediately be written to the file!
+bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
+ LanguageType eLang )
+{
+ SvxAutoCorrectLanguageLists* pLists = nullptr;
+ // either the right language is present or it will be this in the general list
+ auto iter = m_aLangTable.find(LanguageTag(eLang));
+ if (iter != m_aLangTable.end())
+ pLists = iter->second.get();
+ else
+ {
+ LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
+ iter = m_aLangTable.find(aLangTagUndetermined);
+ if (iter != m_aLangTable.end())
+ pLists = iter->second.get();
+ else if(CreateLanguageFile(aLangTagUndetermined))
+ pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
+ }
+ OSL_ENSURE(pLists, "No auto correction data");
+ return pLists && pLists->AddToCplSttExceptList(rNew);
+}
+
+// Adds a single word. The list will immediately be written to the file!
+bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
+ LanguageType eLang )
+{
+ SvxAutoCorrectLanguageLists* pLists = nullptr;
+ //either the right language is present or it is set in the general list
+ auto iter = m_aLangTable.find(LanguageTag(eLang));
+ if (iter != m_aLangTable.end())
+ pLists = iter->second.get();
+ else
+ {
+ LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
+ iter = m_aLangTable.find(aLangTagUndetermined);
+ if (iter != m_aLangTable.end())
+ pLists = iter->second.get();
+ else if(CreateLanguageFile(aLangTagUndetermined))
+ pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
+ }
+ OSL_ENSURE(pLists, "No auto correction file!");
+ return pLists && pLists->AddToWordStartExceptList(rNew);
+}
+
+OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
+ sal_Int32 nPos)
+{
+ OUString sRet;
+ if( !nPos )
+ return sRet;
+
+ sal_Int32 nEnd = nPos;
+
+ // it must be followed by a blank or tab!
+ if( ( nPos < rTxt.getLength() &&
+ !IsWordDelim( rTxt[ nPos ])) ||
+ IsWordDelim( rTxt[ --nPos ]))
+ return sRet;
+
+ while( nPos && !IsWordDelim( rTxt[ --nPos ]))
+ ;
+
+ // Found a Paragraph-start or a Blank, search for the word shortcut in
+ // auto.
+ sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
+ if( !nPos && !IsWordDelim( rTxt[ 0 ]))
+ --nCapLttrPos; // Beginning of paragraph and no Blank!
+
+ while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
+ if( ++nCapLttrPos >= nEnd )
+ return sRet;
+
+ if( 3 > nEnd - nCapLttrPos )
+ return sRet;
+
+ const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
+
+ CharClass& rCC = GetCharClass(eLang);
+
+ if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
+ return sRet;
+
+ sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
+ return sRet;
+}
+
+// static
+std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
+ const sal_Int32 nPos)
+{
+ constexpr sal_Int32 nMinLen = 3;
+ constexpr sal_Int32 nMaxLen = 9;
+ std::vector<OUString> aRes;
+ if (nPos >= nMinLen)
+ {
+ sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
+ // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
+ if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
+ {
+ while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
+ ++nBegin;
+ }
+ if (nBegin + nMinLen <= nPos)
+ {
+ OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
+ aRes.push_back(sRes);
+ bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
+ for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
+ {
+ bool bAdd = bLastStartedWithDelim;
+ bLastStartedWithDelim = IsWordDelim(sRes[i]);
+ bAdd = bAdd || bLastStartedWithDelim;
+ if (bAdd)
+ aRes.push_back(sRes.copy(i));
+ }
+ }
+ }
+ return aRes;
+}
+
+bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
+{
+ OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
+
+ OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
+ OUString sShareDirFile( sUserDirFile );
+
+ SvxAutoCorrectLanguageLists* pLists = nullptr;
+
+ tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
+
+ auto nFndPos = aLastFileTable.find(rLanguageTag);
+ if(nFndPos != aLastFileTable.end() &&
+ (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
+ nAktTime - nLastCheckTime < nMinTime)
+ {
+ // no need to test the file, because the last check is not older then
+ // 2 minutes.
+ if( bNewFile )
+ {
+ sShareDirFile = sUserDirFile;
+ pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
+ LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
+ m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
+ aLastFileTable.erase(nFndPos);
+ }
+ }
+ else if(
+ ( FStatHelper::IsDocument( sUserDirFile ) ||
+ FStatHelper::IsDocument( sShareDirFile =
+ GetAutoCorrFileName( rLanguageTag ) ) ||
+ FStatHelper::IsDocument( sShareDirFile =
+ GetAutoCorrFileName( rLanguageTag, false, false, true) )
+ ) ||
+ ( sShareDirFile = sUserDirFile, bNewFile )
+ )
+ {
+ pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
+ LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
+ m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
+ if (nFndPos != aLastFileTable.end())
+ aLastFileTable.erase(nFndPos);
+ }
+ else if( !bNewFile )
+ {
+ aLastFileTable[rLanguageTag] = nAktTime.GetTime();
+ }
+ return pLists != nullptr;
+}
+
+bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
+ LanguageType eLang )
+{
+ LanguageTag aLanguageTag( eLang);
+ auto const iter = m_aLangTable.find(aLanguageTag);
+ if (iter != m_aLangTable.end())
+ return iter->second->PutText(rShort, rLong);
+ if(CreateLanguageFile(aLanguageTag))
+ return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
+ return false;
+}
+
+void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
+ std::vector<SvxAutocorrWord>& aDeleteEntries,
+ LanguageType eLang )
+{
+ LanguageTag aLanguageTag( eLang);
+ auto const iter = m_aLangTable.find(aLanguageTag);
+ if (iter != m_aLangTable.end())
+ {
+ iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
+ }
+ else if(CreateLanguageFile( aLanguageTag ))
+ {
+ m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
+ }
+}
+
+// - return the replacement text (only for SWG-Format, all other
+// can be taken from the word list!)
+bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
+{
+ return false;
+}
+
+void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
+{
+}
+
+// Text with attribution (only the SWG - SWG format!)
+bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
+ const OUString&, const OUString&, SfxObjectShell&, OUString& )
+{
+ return false;
+}
+
+OUString EncryptBlockName_Imp(const OUString& rName)
+{
+ OUStringBuffer aName;
+ aName.append('#').append(rName);
+ for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
+ {
+ if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
+ aName[nPos] &= 0x0f;
+ }
+ return aName.makeStringAndClear();
+}
+
+/* This code is copied from SwXMLTextBlocks::GeneratePackageName */
+static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
+{
+ OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
+ OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
+
+ for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
+ {
+ switch (aBuf[nPos])
+ {
+ case '!':
+ case '/':
+ case ':':
+ case '.':
+ case '\\':
+ aBuf[nPos] = '_';
+ break;
+ default:
+ break;
+ }
+ }
+
+ rPackageName = aBuf.makeStringAndClear();
+}
+
+static const SvxAutocorrWord* lcl_SearchWordsInList(
+ SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
+ sal_Int32& rStt, sal_Int32 nEndPos)
+{
+ const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
+ return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
+}
+
+// the search for the words in the substitution table
+const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
+ const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
+ SvxAutoCorrDoc&, LanguageTag& rLang )
+{
+ const SvxAutocorrWord* pRet = nullptr;
+ LanguageTag aLanguageTag( rLang);
+ if( aLanguageTag.isSystemLocale() )
+ aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());
+
+ /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
+ * list instead? */
+
+ // First search for eLang, then US-English -> English
+ // and last in LANGUAGE_UNDETERMINED
+ if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
+ pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
+ if( pRet )
+ {
+ rLang = aLanguageTag;
+ return pRet;
+ }
+ else
+ return nullptr;
+ }
+
+ // If it still could not be found here, then keep on searching
+ LanguageType eLang = aLanguageTag.getLanguageType();
+ // the primary language for example EN
+ aLanguageTag.reset(aLanguageTag.getLanguage());
+ LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
+ if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
+ (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false)))
+ {
+ //the language is available - so bring it on
+ std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
+ pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
+ if( pRet )
+ {
+ rLang = aLanguageTag;
+ return pRet;
+ }
+ }
+
+ if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
+ pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
+ if( pRet )
+ {
+ rLang = aLanguageTag;
+ return pRet;
+ }
+ }
+ return nullptr;
+}
+
+bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
+ const OUString& sWord )
+{
+ LanguageTag aLanguageTag( eLang);
+
+ /* TODO-BCP47: again horrible ugliness */
+
+ // First search for eLang, then primary language of eLang
+ // and last in LANGUAGE_UNDETERMINED
+
+ if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ auto const& pList = m_aLangTable.find(aLanguageTag)->second;
+ if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() )
+ return true;
+ }
+
+ // If it still could not be found here, then keep on searching
+ // the primary language for example EN
+ aLanguageTag.reset(aLanguageTag.getLanguage());
+ LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
+ if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
+ (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false)))
+ {
+ //the language is available - so bring it on
+ auto const& pList = m_aLangTable.find(aLanguageTag)->second;
+ if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() )
+ return true;
+ }
+
+ if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ auto const& pList = m_aLangTable.find(aLanguageTag)->second;
+ if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() )
+ return true;
+ }
+ return false;
+}
+
+static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
+{
+ SvStringsISortDtor::const_iterator it = pList->find( "~" );
+ SvStringsISortDtor::size_type nPos = it - pList->begin();
+ if( nPos < pList->size() )
+ {
+ OUString sLowerWord(sWord.toAsciiLowerCase());
+ OUString sAbr;
+ for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
+ {
+ sAbr = (*pList)[ n ];
+ if (sAbr[0] != '~')
+ break;
+ // ~ and ~. are not allowed!
+ if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
+ {
+ OUString sLowerAbk(sAbr.toAsciiLowerCase());
+ for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
+ {
+ if( !--i ) // agrees
+ return true;
+
+ if( sLowerAbk[i] != sLowerWord[--ii])
+ break;
+ }
+ }
+ }
+ }
+ OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
+ "Wrongly sorted exception list?" );
+ return false;
+}
+
+bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
+ const OUString& sWord, bool bAbbreviation)
+{
+ LanguageTag aLanguageTag( eLang);
+
+ /* TODO-BCP47: did I mention terrible horrible ugliness? */
+
+ // First search for eLang, then primary language of eLang
+ // and last in LANGUAGE_UNDETERMINED
+
+ if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
+ if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
+ return true;
+ }
+
+ // If it still could not be found here, then keep on searching
+ // the primary language for example EN
+ aLanguageTag.reset(aLanguageTag.getLanguage());
+ LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
+ if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
+ (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false)))
+ {
+ //the language is available - so bring it on
+ const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
+ if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
+ return true;
+ }
+
+ if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
+ CreateLanguageFile(aLanguageTag, false))
+ {
+ //the language is available - so bring it on
+ const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
+ if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
+ return true;
+ }
+ return false;
+}
+
+OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
+ bool bNewFile, bool bTst, bool bUnlocalized ) const
+{
+ OUString sRet, sExt( rLanguageTag.getBcp47() );
+ if (bUnlocalized)
+ {
+ // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
+ std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
+ if (!vecFallBackStrings.empty())
+ sExt = vecFallBackStrings[0];
+ }
+
+ sExt = "_" + sExt + ".dat";
+ if( bNewFile )
+ sRet = sUserAutoCorrFile + sExt;
+ else if( !bTst )
+ sRet = sShareAutoCorrFile + sExt;
+ else
+ {
+ // test first in the user directory - if not exist, then
+ sRet = sUserAutoCorrFile + sExt;
+ if( !FStatHelper::IsDocument( sRet ))
+ sRet = sShareAutoCorrFile + sExt;
+ }
+ return sRet;
+}
+
+SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
+ SvxAutoCorrect& rParent,
+ OUString aShareAutoCorrectFile,
+ OUString aUserAutoCorrectFile)
+: sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
+ sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
+ aModifiedDate( Date::EMPTY ),
+ aModifiedTime( tools::Time::EMPTY ),
+ aLastCheckTime( tools::Time::EMPTY ),
+ rAutoCorrect(rParent),
+ nFlags(ACFlags::NONE)
+{
+}
+
+SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
+{
+}
+
+bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
+{
+ // Access the file system only every 2 minutes to check the date stamp
+ bool bRet = false;
+
+ tools::Time nMinTime( 0, 2 );
+ tools::Time nAktTime( tools::Time::SYSTEM );
+ if( aLastCheckTime <= nAktTime) // overflow?
+ return false;
+ nAktTime -= aLastCheckTime;
+ if( nAktTime > nMinTime ) // min time past
+ {
+ Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
+ if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
+ &aTstDate, &aTstTime ) &&
+ ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
+ {
+ bRet = true;
+ // then remove all the lists fast!
+ if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
+ {
+ pCplStt_ExcptLst.reset();
+ }
+ if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
+ {
+ pWordStart_ExcptLst.reset();
+ }
+ if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
+ {
+ pAutocorr_List.reset();
+ }
+ nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
+ }
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+ }
+ return bRet;
+}
+
+void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
+ std::unique_ptr<SvStringsISortDtor>& rpLst,
+ const OUString& sStrmName,
+ tools::SvRef<SotStorage>& rStg)
+{
+ if( rpLst )
+ rpLst->clear();
+ else
+ rpLst.reset( new SvStringsISortDtor );
+
+ {
+ if( rStg.is() && rStg->IsStream( sStrmName ) )
+ {
+ tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
+ ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
+ if( ERRCODE_NONE != xStrm->GetError())
+ {
+ xStrm.clear();
+ rStg.clear();
+ RemoveStream_Imp( sStrmName );
+ }
+ else
+ {
+ uno::Reference< uno::XComponentContext > xContext =
+ comphelper::getProcessComponentContext();
+
+ xml::sax::InputSource aParserInput;
+ aParserInput.sSystemId = sStrmName;
+
+ xStrm->Seek( 0 );
+ xStrm->SetBufferSize( 8 * 1024 );
+ aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
+
+ // get filter
+ uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
+
+ // connect parser and filter
+ uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
+ uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
+ xParser->setFastDocumentHandler( xFilter );
+ xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
+ xParser->setTokenHandler( xTokenHandler );
+
+ // parse
+ try
+ {
+ xParser->parseStream( aParserInput );
+ }
+ catch( const xml::sax::SAXParseException& )
+ {
+ // re throw ?
+ }
+ catch( const xml::sax::SAXException& )
+ {
+ // re throw ?
+ }
+ catch( const io::IOException& )
+ {
+ // re throw ?
+ }
+ }
+ }
+
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+ }
+
+}
+
+void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
+ const SvStringsISortDtor& rLst,
+ const OUString& sStrmName,
+ tools::SvRef<SotStorage> const &rStg,
+ bool bConvert )
+{
+ if( !rStg.is() )
+ return;
+
+ if( rLst.empty() )
+ {
+ rStg->Remove( sStrmName );
+ rStg->Commit();
+ }
+ else
+ {
+ tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
+ ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
+ if( xStrm.is() )
+ {
+ xStrm->SetSize( 0 );
+ xStrm->SetBufferSize( 8192 );
+ xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
+
+
+ uno::Reference< uno::XComponentContext > xContext =
+ comphelper::getProcessComponentContext();
+
+ uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
+ uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
+ xWriter->setOutputStream(xOut);
+
+ uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
+ rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
+
+ xExp->exportDoc( XML_BLOCK_LIST );
+
+ xStrm->Commit();
+ if( xStrm->GetError() == ERRCODE_NONE )
+ {
+ xStrm.clear();
+ if (!bConvert)
+ {
+ rStg->Commit();
+ if( ERRCODE_NONE != rStg->GetError() )
+ {
+ rStg->Remove( sStrmName );
+ rStg->Commit();
+ }
+ }
+ }
+ }
+ }
+}
+
+SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
+{
+ if( pAutocorr_List )
+ pAutocorr_List->DeleteAndDestroyAll();
+ else
+ pAutocorr_List.reset( new SvxAutocorrWordList() );
+
+ try
+ {
+ uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
+ uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
+ uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
+
+ xml::sax::InputSource aParserInput;
+ aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
+ aParserInput.aInputStream = xStrm->getInputStream();
+
+ // get parser
+ uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
+ SAL_INFO("editeng", "AutoCorrect Import" );
+ uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
+ uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
+
+ // connect parser and filter
+ xParser->setFastDocumentHandler( xFilter );
+ xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
+ xParser->setTokenHandler(xTokenHandler);
+
+ // parse
+ xParser->parseStream( aParserInput );
+ }
+ catch ( const uno::Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
+ }
+
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+
+ return pAutocorr_List.get();
+}
+
+const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
+{
+ if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
+ {
+ LoadAutocorrWordList();
+ if( !pAutocorr_List )
+ {
+ OSL_ENSURE( false, "No valid list" );
+ pAutocorr_List.reset( new SvxAutocorrWordList() );
+ }
+ nFlags |= ACFlags::ChgWordLstLoad;
+ }
+ return pAutocorr_List.get();
+}
+
+SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
+{
+ if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
+ {
+ LoadCplSttExceptList();
+ if( !pCplStt_ExcptLst )
+ {
+ OSL_ENSURE( false, "No valid list" );
+ pCplStt_ExcptLst.reset( new SvStringsISortDtor );
+ }
+ nFlags |= ACFlags::CplSttLstLoad;
+ }
+ return pCplStt_ExcptLst.get();
+}
+
+bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
+{
+ bool bRet = false;
+ if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
+ {
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
+
+ xStg = nullptr;
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+ bRet = true;
+ }
+ return bRet;
+}
+
+bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
+{
+ bool bRet = false;
+ if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
+ {
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
+
+ xStg = nullptr;
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+ bRet = true;
+ }
+ return bRet;
+}
+
+SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
+{
+ try
+ {
+ tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
+ if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
+ LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
+ }
+ catch (const css::ucb::ContentCreationException&)
+ {
+ }
+ return pCplStt_ExcptLst.get();
+}
+
+void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
+{
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
+
+ xStg = nullptr;
+
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+}
+
+SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
+{
+ try
+ {
+ tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
+ if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
+ LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
+ }
+ catch (const css::ucb::ContentCreationException &)
+ {
+ TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
+ }
+ return pWordStart_ExcptLst.get();
+}
+
+void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
+{
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
+
+ xStg = nullptr;
+ // Set time stamp
+ FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
+ &aModifiedDate, &aModifiedTime );
+ aLastCheckTime = tools::Time( tools::Time::SYSTEM );
+}
+
+SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
+{
+ if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
+ {
+ LoadWordStartExceptList();
+ if( !pWordStart_ExcptLst )
+ {
+ OSL_ENSURE( false, "No valid list" );
+ pWordStart_ExcptLst.reset( new SvStringsISortDtor );
+ }
+ nFlags |= ACFlags::WordStartLstLoad;
+ }
+ return pWordStart_ExcptLst.get();
+}
+
+void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
+{
+ if( sShareAutoCorrFile != sUserAutoCorrFile )
+ {
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+ if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
+ xStg->IsStream( rName ) )
+ {
+ xStg->Remove( rName );
+ xStg->Commit();
+
+ xStg = nullptr;
+ }
+ }
+}
+
+void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
+{
+ // The conversion needs to happen if the file is already in the user
+ // directory and is in the old format. Additionally it needs to
+ // happen when the file is being copied from share to user.
+
+ bool bError = false, bConvert = false, bCopy = false;
+ INetURLObject aDest;
+ INetURLObject aSource;
+
+ if (sUserAutoCorrFile != sShareAutoCorrFile )
+ {
+ aSource = INetURLObject ( sShareAutoCorrFile );
+ aDest = INetURLObject ( sUserAutoCorrFile );
+ if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
+ {
+ aDest.SetExtension ( u"bak" );
+ bConvert = true;
+ }
+ bCopy = true;
+ }
+ else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
+ {
+ aSource = INetURLObject ( sUserAutoCorrFile );
+ aDest = INetURLObject ( sUserAutoCorrFile );
+ aDest.SetExtension ( u"bak" );
+ bCopy = bConvert = true;
+ }
+ if (bCopy)
+ {
+ try
+ {
+ OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
+ sal_Int32 nSlashPos = sMain.lastIndexOf('/');
+ sMain = sMain.copy(0, nSlashPos);
+ ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
+ TransferInfo aInfo;
+ aInfo.NameClash = NameClash::OVERWRITE;
+ aInfo.NewTitle = aDest.GetLastName();
+ aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
+ aInfo.MoveData = false;
+ aNewContent.executeCommand( "transfer", Any(aInfo));
+ }
+ catch (...)
+ {
+ bError = true;
+ }
+ }
+ if (bConvert && !bError)
+ {
+ tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
+ tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
+
+ if( xSrcStg.is() && xDstStg.is() )
+ {
+ std::unique_ptr<SvStringsISortDtor> pTmpWordList;
+
+ if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
+ LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );
+
+ if (pTmpWordList)
+ {
+ SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
+ pTmpWordList.reset();
+ }
+
+
+ if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
+ LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
+
+ if (pTmpWordList)
+ {
+ SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
+ pTmpWordList->clear();
+ }
+
+ GetAutocorrWordList();
+ MakeBlocklist_Imp( *xDstStg );
+ sShareAutoCorrFile = sUserAutoCorrFile;
+ xDstStg = nullptr;
+ try
+ {
+ ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
+ aContent.executeCommand ( "delete", Any ( true ) );
+ }
+ catch (...)
+ {
+ }
+ }
+ }
+ else if( bCopy && !bError )
+ sShareAutoCorrFile = sUserAutoCorrFile;
+}
+
+bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
+{
+ bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
+ if( !bRemove )
+ {
+ tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
+ ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
+ if( refList.is() )
+ {
+ refList->SetSize( 0 );
+ refList->SetBufferSize( 8192 );
+ refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
+
+ uno::Reference< uno::XComponentContext > xContext =
+ comphelper::getProcessComponentContext();
+
+ uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
+ uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
+ xWriter->setOutputStream(xOut);
+
+ rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
+
+ xExp->exportDoc( XML_BLOCK_LIST );
+
+ refList->Commit();
+ bRet = ERRCODE_NONE == refList->GetError();
+ if( bRet )
+ {
+ refList.clear();
+ rStg.Commit();
+ if( ERRCODE_NONE != rStg.GetError() )
+ {
+ bRemove = true;
+ bRet = false;
+ }
+ }
+ }
+ else
+ bRet = false;
+ }
+
+ if( bRemove )
+ {
+ rStg.Remove( pXMLImplAutocorr_ListStr );
+ rStg.Commit();
+ }
+
+ return bRet;
+}
+
+bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
+{
+ // First get the current list!
+ GetAutocorrWordList();
+
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
+
+ if( bRet )
+ {
+ for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
+ {
+ std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
+ if( xFoundEntry )
+ {
+ if( !xFoundEntry->IsTextOnly() )
+ {
+ OUString aName( aWordToDelete.GetShort() );
+ if (xStorage->IsOLEStorage())
+ aName = EncryptBlockName_Imp(aName);
+ else
+ GeneratePackageName ( aWordToDelete.GetShort(), aName );
+
+ if( xStorage->IsContained( aName ) )
+ {
+ xStorage->Remove( aName );
+ bRet = xStorage->Commit();
+ }
+ }
+ }
+ }
+
+ for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
+ {
+ SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
+ std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
+ if( xRemoved )
+ {
+ if( !xRemoved->IsTextOnly() )
+ {
+ // Still have to remove the Storage
+ OUString sStorageName( aWordToAdd.GetShort() );
+ if (xStorage->IsOLEStorage())
+ sStorageName = EncryptBlockName_Imp(sStorageName);
+ else
+ GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
+
+ if( xStorage->IsContained( sStorageName ) )
+ xStorage->Remove( sStorageName );
+ }
+ }
+ bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
+
+ if ( !bRet )
+ {
+ break;
+ }
+ }
+
+ if ( bRet )
+ {
+ bRet = MakeBlocklist_Imp( *xStorage );
+ }
+ }
+ return bRet;
+}
+
+bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
+{
+ // First get the current list!
+ GetAutocorrWordList();
+
+ MakeUserStorage_Impl();
+ tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+
+ bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
+
+ // Update the word list
+ if( bRet )
+ {
+ SvxAutocorrWord aNew(rShort, rLong, true );
+ std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
+ if( xRemove )
+ {
+ if( !xRemove->IsTextOnly() )
+ {
+ // Still have to remove the Storage
+ OUString sStgNm( rShort );
+ if (xStg->IsOLEStorage())
+ sStgNm = EncryptBlockName_Imp(sStgNm);
+ else
+ GeneratePackageName ( rShort, sStgNm);
+
+ if( xStg->IsContained( sStgNm ) )
+ xStg->Remove( sStgNm );
+ }
+ }
+
+ if( pAutocorr_List->Insert( std::move(aNew) ) )
+ {
+ bRet = MakeBlocklist_Imp( *xStg );
+ xStg = nullptr;
+ }
+ else
+ {
+ bRet = false;
+ }
+ }
+ return bRet;
+}
+
+void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
+ SfxObjectShell& rShell )
+{
+ // First get the current list!
+ GetAutocorrWordList();
+
+ MakeUserStorage_Impl();
+
+ try
+ {
+ uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
+ OUString sLong;
+ bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
+ xStg = nullptr;
+
+ // Update the word list
+ if( bRet )
+ {
+ if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
+ {
+ tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
+ MakeBlocklist_Imp( *xStor );
+ }
+ }
+ }
+ catch ( const uno::Exception& )
+ {
+ }
+}
+
+// Keep the list sorted ...
+struct SvxAutocorrWordList::CompareSvxAutocorrWordList
+{
+ bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
+ {
+ CollatorWrapper& rCmp = ::GetCollatorWrapper();
+ return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
+ }
+};
+
+namespace {
+
+typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
+
+}
+
+struct SvxAutocorrWordList::Impl
+{
+
+ // only one of these contains the data
+ // maSortedVector is manually sorted so we can optimise data movement
+ mutable AutocorrWordSetType maSortedVector;
+ mutable AutocorrWordHashType maHash; // key is 'Short'
+
+ void DeleteAndDestroyAll()
+ {
+ maHash.clear();
+ maSortedVector.clear();
+ }
+};
+
+SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
+
+SvxAutocorrWordList::~SvxAutocorrWordList()
+{
+}
+
+void SvxAutocorrWordList::DeleteAndDestroyAll()
+{
+ mpImpl->DeleteAndDestroyAll();
+}
+
+// returns true if inserted
+const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
+{
+ if ( mpImpl->maSortedVector.empty() ) // use the hash
+ {
+ OUString aShort = aWord.GetShort();
+ auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
+ if (inserted)
+ return &(it->second);
+ return nullptr;
+ }
+ else
+ {
+ auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
+ CollatorWrapper& rCmp = ::GetCollatorWrapper();
+ if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
+ {
+ it = mpImpl->maSortedVector.insert(it, std::move(aWord));
+ return &*it;
+ }
+ return nullptr;
+ }
+}
+
+void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
+{
+ (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
+}
+
+bool SvxAutocorrWordList::empty() const
+{
+ return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
+}
+
+std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
+{
+
+ if ( mpImpl->maSortedVector.empty() ) // use the hash
+ {
+ AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
+ if( it != mpImpl->maHash.end() )
+ {
+ SvxAutocorrWord pMatch = std::move(it->second);
+ mpImpl->maHash.erase (it);
+ return pMatch;
+ }
+ }
+ else
+ {
+ auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
+ if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
+ {
+ SvxAutocorrWord pMatch = std::move(*it);
+ mpImpl->maSortedVector.erase (it);
+ return pMatch;
+ }
+ }
+ return std::optional<SvxAutocorrWord>();
+}
+
+// return the sorted contents - defer sorting until we have to.
+const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
+{
+ // convert from hash to set permanently
+ if ( mpImpl->maSortedVector.empty() )
+ {
+ std::vector<SvxAutocorrWord> tmp;
+ tmp.reserve(mpImpl->maHash.size());
+ for (auto & rPair : mpImpl->maHash)
+ tmp.emplace_back(std::move(rPair.second));
+ mpImpl->maHash.clear();
+ // sort twice - this gets the list into mostly-sorted order, which
+ // reduces the number of times we need to invoke the expensive ICU collate fn.
+ std::sort(tmp.begin(), tmp.end(),
+ [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
+ {
+ return lhs.GetShort() < rhs.GetShort();
+ });
+ // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
+ // stable_sort is twice as fast as sort in this situation because it does
+ // fewer comparison operations.
+ std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
+ mpImpl->maSortedVector = std::move(tmp);
+ }
+ return mpImpl->maSortedVector;
+}
+
+const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
+ const OUString &rTxt,
+ sal_Int32 &rStt,
+ sal_Int32 nEndPos) const
+{
+ const OUString& rChk = pFnd->GetShort();
+
+ sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
+ sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
+ sal_Int32 nSttWdPos = nEndPos;
+
+ // direct replacement of keywords surrounded by colons (for example, ":name:")
+ bool bColonNameColon = rTxt.getLength() > nEndPos &&
+ rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
+ if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
+ return nullptr;
+
+ bool bWasWordDelim = false;
+ sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
+ if (bColonNameColon)
+ nCalcStt++;
+ if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
+ ( nCalcStt < rStt &&
+ IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
+ {
+ TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
+ OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
+ if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
+ {
+ rStt = nCalcStt;
+ if (!left_wildcard)
+ {
+ // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
+ if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
+ return nullptr;
+ return pFnd;
+ }
+ // get the first word delimiter position before the matching ".*word" pattern
+ while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
+ ;
+ if (bWasWordDelim) rStt++;
+ OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
+ // avoid double spaces before simple "word" replacement
+ left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
+ if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
+ return pNew;
+ }
+ } else
+ // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
+ if ( right_wildcard )
+ {
+
+ OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
+ // Get the last word delimiter position
+ bool not_suffix;
+
+ while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
+ ;
+ // search the first occurrence (with a left word delimitation, if needed)
+ sal_Int32 nFndPos = -1;
+ do {
+ nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
+ if (nFndPos == -1)
+ break;
+ not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
+ } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
+
+ if ( nFndPos != -1 )
+ {
+ sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
+
+ if ( left_wildcard )
+ {
+ // get the first word delimiter position before the matching ".*word.*" pattern
+ while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
+ ;
+ if (bWasWordDelim) nFndPos++;
+ }
+ if (nEndPos + extra_repl <= nFndPos)
+ {
+ return nullptr;
+ }
+ // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
+ OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
+
+ OUString aLong;
+ rStt = nFndPos;
+ if ( !left_wildcard )
+ {
+ sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
+ aLong = pFnd->GetLong() + (siz > 0 ? rTxt.subView(nFndPos + sTmp.getLength(), siz) : u"");
+ } else {
+ OUStringBuffer buf;
+ do {
+ nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
+ if (nSttWdPos != -1)
+ {
+ sal_Int32 nTmp(nFndPos);
+ while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
+ nTmp++;
+ if (nTmp < nSttWdPos)
+ break; // word delimiter found
+ buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
+ nFndPos = nSttWdPos + sTmp.getLength();
+ }
+ } while (nSttWdPos != -1);
+ if (nEndPos - nFndPos > extra_repl)
+ buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos));
+ aLong = buf.makeStringAndClear();
+ }
+ if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
+ {
+ if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
+ return pNew;
+ }
+ }
+ }
+ return nullptr;
+}
+
+const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
+ sal_Int32 nEndPos) const
+{
+ for (auto const& elem : mpImpl->maHash)
+ {
+ if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
+ return pTmp;
+ }
+
+ for (auto const& elem : mpImpl->maSortedVector)
+ {
+ if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
+ return pTmp;
+ }
+ return nullptr;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */