diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
commit | ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch) | |
tree | 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /editeng/source/misc/svxacorr.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip |
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'editeng/source/misc/svxacorr.cxx')
-rw-r--r-- | editeng/source/misc/svxacorr.cxx | 3069 |
1 files changed, 3069 insertions, 0 deletions
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx new file mode 100644 index 000000000..ff93ecee8 --- /dev/null +++ b/editeng/source/misc/svxacorr.cxx @@ -0,0 +1,3069 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <memory> +#include <string_view> +#include <sal/config.h> + +#include <com/sun/star/linguistic2/XSpellChecker1.hpp> +#include <com/sun/star/embed/XStorage.hpp> +#include <com/sun/star/io/IOException.hpp> +#include <com/sun/star/io/XStream.hpp> +#include <tools/urlobj.hxx> +#include <i18nlangtag/mslangid.hxx> +#include <i18nutil/transliteration.hxx> +#include <sal/log.hxx> +#include <osl/diagnose.h> +#include <utility> +#include <vcl/svapp.hxx> +#include <vcl/settings.hxx> +#include <svl/fstathelper.hxx> +#include <svl/urihelper.hxx> +#include <unotools/charclass.hxx> +#include <com/sun/star/i18n/UnicodeType.hpp> +#include <unotools/collatorwrapper.hxx> +#include <com/sun/star/i18n/UnicodeScript.hpp> +#include <com/sun/star/i18n/OrdinalSuffix.hpp> +#include <unotools/localedatawrapper.hxx> +#include <unotools/transliterationwrapper.hxx> +#include <comphelper/processfactory.hxx> +#include <comphelper/storagehelper.hxx> +#include <o3tl/string_view.hxx> +#include <editeng/editids.hrc> +#include <sot/storage.hxx> +#include <editeng/udlnitem.hxx> +#include <editeng/wghtitem.hxx> +#include <editeng/postitem.hxx> +#include <editeng/crossedoutitem.hxx> +#include <editeng/escapementitem.hxx> +#include <editeng/svxacorr.hxx> +#include <editeng/unolingu.hxx> +#include <vcl/window.hxx> +#include <com/sun/star/xml/sax/InputSource.hpp> +#include <com/sun/star/xml/sax/FastParser.hpp> +#include <com/sun/star/xml/sax/Writer.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <unotools/streamwrap.hxx> +#include "SvXMLAutoCorrectImport.hxx" +#include "SvXMLAutoCorrectExport.hxx" +#include "SvXMLAutoCorrectTokenHandler.hxx" +#include <ucbhelper/content.hxx> +#include <com/sun/star/ucb/ContentCreationException.hpp> +#include <com/sun/star/ucb/XCommandEnvironment.hpp> +#include <com/sun/star/ucb/TransferInfo.hpp> +#include <com/sun/star/ucb/NameClash.hpp> +#include <tools/diagnose_ex.h> +#include <xmloff/xmltoken.hxx> +#include <unordered_map> +#include <rtl/character.hxx> + +using namespace ::com::sun::star::ucb; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star; +using namespace ::xmloff::token; +using namespace ::utl; + +namespace { + +enum class Flags { + NONE = 0x00, + FullStop = 0x01, + ExclamationMark = 0x02, + QuestionMark = 0x04, +}; + +} + +namespace o3tl { + template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {}; +} +const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space + +constexpr OUStringLiteral pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"; +constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"; +constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml"; + +const char + /* also at these beginnings - Brackets and all kinds of begin characters */ + sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94", + /* also at these ends - Brackets and all kinds of begin characters */ + sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94"; + +static OUString EncryptBlockName_Imp(const OUString& rName); + +static bool NonFieldWordDelim( const sal_Unicode c ) +{ + return ' ' == c || '\t' == c || 0x0a == c || + cNonBreakingSpace == c || 0x2011 == c; +} + +static bool IsWordDelim( const sal_Unicode c ) +{ + return c == 0x1 || NonFieldWordDelim(c); +} + + +static bool IsLowerLetter( sal_Int32 nCharType ) +{ + return CharClass::isLetterType( nCharType ) && + ( css::i18n::KCharacterType::LOWER & nCharType); +} + +static bool IsUpperLetter( sal_Int32 nCharType ) +{ + return CharClass::isLetterType( nCharType ) && + ( css::i18n::KCharacterType::UPPER & nCharType); +} + +static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, + sal_Int32 nStt, sal_Int32 nEnd ) +{ + for( ; nStt < nEnd; ++nStt ) + { + css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); + switch( nScript ) + { + case css::i18n::UnicodeScript_kCJKRadicalsSupplement: + case css::i18n::UnicodeScript_kHangulJamo: + case css::i18n::UnicodeScript_kCJKSymbolPunctuation: + case css::i18n::UnicodeScript_kHiragana: + case css::i18n::UnicodeScript_kKatakana: + case css::i18n::UnicodeScript_kHangulCompatibilityJamo: + case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: + case css::i18n::UnicodeScript_kCJKCompatibility: + case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA: + case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: + case css::i18n::UnicodeScript_kHangulSyllable: + case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: + case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: + return true; + default: ; //do nothing + } + } + return false; +} + +static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, + sal_Int32 nStt, sal_Int32 nEnd ) +{ + for( ; nStt < nEnd; ++nStt ) + { + if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) + return true; + } + return false; +} + +static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c ) +{ + // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks + if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) ) + return true; + + bool bRet = false; + for( ; *pArr; ++pArr ) + if( *pArr == c ) + { + bRet = true; + break; + } + return bRet; +} + +SvxAutoCorrDoc::~SvxAutoCorrDoc() +{ +} + +// Called by the functions: +// - FnCapitalStartWord +// - FnCapitalStartSentence +// after the exchange of characters. Then the words, if necessary, can be inserted +// into the exception list. +void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, + sal_Unicode ) +{ +} + +LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const +{ + return LANGUAGE_SYSTEM; +} + +static const LanguageTag& GetAppLang() +{ + return Application::GetSettings().GetLanguageTag(); +} + +/// Never use an unresolved LANGUAGE_SYSTEM. +static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) +{ + LanguageType eLang = rDoc.GetLanguage( nPos ); + if (eLang == LANGUAGE_SYSTEM) + eLang = GetAppLang().getLanguageType(); // the current work locale + return eLang; +} + +static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) +{ + static std::unique_ptr<LocaleDataWrapper> xLclDtWrp; + LanguageTag aLcl( nLang ); + if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl) + xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl))); + return *xLclDtWrp; +} +static TransliterationWrapper& GetIgnoreTranslWrapper() +{ + static int bIsInit = 0; + static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), + TransliterationFlags::IGNORE_KANA | + TransliterationFlags::IGNORE_WIDTH ); + if( !bIsInit ) + { + aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); + bIsInit = 1; + } + return aWrp; +} +static CollatorWrapper& GetCollatorWrapper() +{ + static CollatorWrapper aCollWrp = []() + { + CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); + tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); + return tmp; + }(); + return aCollWrp; +} + +bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) +{ + return cChar == '\0' || cChar == '\t' || cChar == 0x0a || + cChar == ' ' || cChar == '\'' || cChar == '\"' || + cChar == '*' || cChar == '_' || cChar == '%' || + cChar == '.' || cChar == ',' || cChar == ';' || + cChar == ':' || cChar == '?' || cChar == '!' || + cChar == '<' || cChar == '>' || + cChar == '/' || cChar == '-'; +} + +namespace +{ + bool IsCompoundWordDelimChar(sal_Unicode cChar) + { + return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); + } +} + +bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) +{ + return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || + cChar == '/' /*case for the urls exception*/; +} + +ACFlags SvxAutoCorrect::GetDefaultFlags() +{ + ACFlags nRet = ACFlags::Autocorrect + | ACFlags::CapitalStartSentence + | ACFlags::CapitalStartWord + | ACFlags::ChgOrdinalNumber + | ACFlags::ChgToEnEmDash + | ACFlags::AddNonBrkSpace + | ACFlags::TransliterateRTL + | ACFlags::ChgAngleQuotes + | ACFlags::ChgWeightUnderl + | ACFlags::SetINetAttr + | ACFlags::ChgQuotes + | ACFlags::SaveWordCplSttLst + | ACFlags::SaveWordWordStartLst + | ACFlags::CorrectCapsLock; + LanguageType eLang = GetAppLang().getLanguageType(); + if( eLang.anyOf( + LANGUAGE_ENGLISH, + LANGUAGE_ENGLISH_US, + LANGUAGE_ENGLISH_UK, + LANGUAGE_ENGLISH_AUS, + LANGUAGE_ENGLISH_CAN, + LANGUAGE_ENGLISH_NZ, + LANGUAGE_ENGLISH_EIRE, + LANGUAGE_ENGLISH_SAFRICA, + LANGUAGE_ENGLISH_JAMAICA, + LANGUAGE_ENGLISH_CARIBBEAN)) + nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); + return nRet; +} + +constexpr sal_Unicode cEmDash = 0x2014; +constexpr sal_Unicode cEnDash = 0x2013; +constexpr sal_Unicode cApostrophe = 0x2019; +constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB; +constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB; +constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039; +constexpr sal_Unicode cRightSingleAngleQuote = 0x203A; +// stop characters for searching preceding quotes +// (the first character is also the opening quote we are looking for) +const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,, +const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >> +// preceding << for Romanian, handle also alternative primary closing quotation mark U+201C +const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 }; +const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 }; +const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 }; + +SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile, + OUString aUserAutocorrFile ) + : sShareAutoCorrFile(std::move( aShareAutocorrFile )) + , sUserAutoCorrFile(std::move( aUserAutocorrFile )) + , eCharClassLang( LANGUAGE_DONTKNOW ) + , nFlags(SvxAutoCorrect::GetDefaultFlags()) + , cStartDQuote( 0 ) + , cEndDQuote( 0 ) + , cStartSQuote( 0 ) + , cEndSQuote( 0 ) +{ +} + +SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) + : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) + , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) + , aSwFlags( rCpy.aSwFlags ) + , eCharClassLang(rCpy.eCharClassLang) + , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad)) + , cStartDQuote( rCpy.cStartDQuote ) + , cEndDQuote( rCpy.cEndDQuote ) + , cStartSQuote( rCpy.cStartSQuote ) + , cEndSQuote( rCpy.cEndSQuote ) +{ +} + + +SvxAutoCorrect::~SvxAutoCorrect() +{ +} + +void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) +{ + pCharClass.reset( new CharClass( LanguageTag( eLang)) ); + eCharClassLang = eLang; +} + +void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) +{ + ACFlags nOld = nFlags; + nFlags = bOn ? nFlags | nFlag + : nFlags & ~nFlag; + + if( !bOn ) + { + if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) + nFlags &= ~ACFlags::CplSttLstLoad; + if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) + nFlags &= ~ACFlags::WordStartLstLoad; + if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) + nFlags &= ~ACFlags::ChgWordLstLoad; + } +} + + +// Correct TWo INitial CApitals +void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + CharClass& rCC = GetCharClass( eLang ); + + // Delete all non alphanumeric. Test the characters at the beginning/end of + // the word ( recognizes: "(min.", "/min.", and so on.) + for( ; nSttPos < nEndPos; ++nSttPos ) + if( rCC.isLetterNumeric( rTxt, nSttPos )) + break; + for( ; nSttPos < nEndPos; --nEndPos ) + if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) + break; + + // Is the word a compounded word separated by delimiters? + // If so, keep track of all delimiters so each constituent + // word can be checked for two initial capital letters. + std::deque<sal_Int32> aDelimiters; + + // Always check for two capitals at the beginning + // of the entire word, so start at nSttPos. + aDelimiters.push_back(nSttPos); + + // Find all compound word delimiters + for (sal_Int32 n = nSttPos; n < nEndPos; ++n) + { + if (IsCompoundWordDelimChar(rTxt[ n ])) + { + aDelimiters.push_back( n + 1 ); // Get position of char after delimiter + } + } + + // Decide where to put the terminating delimiter. + // If the last AutoCorrect char was a newline, then the AutoCorrect + // char will not be included in rTxt. + // If the last AutoCorrect char was not a newline, then the AutoCorrect + // character will be the last character in rTxt. + if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) + aDelimiters.push_back(nEndPos); + + // Iterate through the word and all words that compose it. + // Two capital letters at the beginning of word? + for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) + { + nSttPos = aDelimiters[nI]; + nEndPos = aDelimiters[nI + 1]; + + if( nSttPos+2 < nEndPos && + IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && + IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && + // Is the third character a lower case + IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && + // Do not replace special attributes + 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) + { + // test if the word is in an exception list + OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); + if( !FindInWordStartExceptList(eLang, sWord) ) + { + // Check that word isn't correctly spelt before correcting: + css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = + LinguMgr::GetSpellChecker(); + if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) ) + { + Sequence< css::beans::PropertyValue > aEmptySeq; + if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq)) + { + return; + } + } + sal_Unicode cSave = rTxt[ nSttPos ]; + OUString sChar = rCC.lowercase( OUString(cSave) ); + if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) + { + if( ACFlags::SaveWordWordStartLst & nFlags ) + rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); + } + } + } + } +} + +// Format ordinal numbers suffixes (1st -> 1^st) +bool SvxAutoCorrect::FnChgOrdinalNumber( + SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang) +{ + // 1st, 2nd, 3rd, 4 - 0th + // 201th or 201st + // 12th or 12nd + bool bChg = false; + + // In some languages ordinal suffixes should never be + // changed to superscript. Let's break for those languages. + if (!eLang.anyOf( + LANGUAGE_SWEDISH, + LANGUAGE_SWEDISH_FINLAND)) + { + CharClass& rCC = GetCharClass(eLang); + + for (; nSttPos < nEndPos; ++nSttPos) + if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos])) + break; + for (; nSttPos < nEndPos; --nEndPos) + if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1])) + break; + + + // Get the last number in the string to check + sal_Int32 nNumEnd = nEndPos; + bool bFoundEnd = false; + bool isValidNumber = true; + sal_Int32 i = nEndPos; + while (i > nSttPos) + { + i--; + bool isDigit = rCC.isDigit(rTxt, i); + if (bFoundEnd) + isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); + + if (isDigit && !bFoundEnd) + { + bFoundEnd = true; + nNumEnd = i; + } + } + + if (bFoundEnd && isValidNumber) { + sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1)); + + // Check if the characters after that number correspond to the ordinal suffix + uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix + = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); + + const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); + for (OUString const & sSuffix : aSuffixes) + { + std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1); + + if (sSuffix == sEnd) + { + // Check if the ordinal suffix has to be set as super script + if (rCC.isLetter(sSuffix)) + { + // Do the change + SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, + DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); + rDoc.SetAttr(nNumEnd + 1, nEndPos, + SID_ATTR_CHAR_ESCAPEMENT, + aSvxEscapementItem); + bChg = true; + } + } + } + } + } + return bChg; +} + +// Replace dashes +bool SvxAutoCorrect::FnChgToEnEmDash( + SvxAutoCorrDoc& rDoc, std::u16string_view rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + bool bRet = false; + CharClass& rCC = GetCharClass( eLang ); + if (eLang == LANGUAGE_SYSTEM) + eLang = GetAppLang().getLanguageType(); + bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); + + // replace " - " or " --" with "enDash" + if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) + { + sal_Unicode cCh = rTxt[ nSttPos ]; + if( '-' == cCh ) + { + if( 1 < nEndPos - nSttPos && + ' ' == rTxt[ nSttPos-1 ] && + '-' == rTxt[ nSttPos+1 ]) + { + sal_Int32 n; + for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr( + sImplSttSkipChars,(cCh = rTxt[ n ])); + ++n ) + ; + + // found: " --[<AnySttChars>][A-z0-9] + if( rCC.isLetterNumeric( OUString(cCh) ) ) + { + for( n = nSttPos-1; n && lcl_IsInAsciiArr( + sImplEndSkipChars,(cCh = rTxt[ --n ])); ) + ; + + // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9] + if( rCC.isLetterNumeric( OUString(cCh) )) + { + rDoc.Delete( nSttPos, nSttPos + 2 ); + rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); + bRet = true; + } + } + } + } + else if( 3 < nSttPos && + ' ' == rTxt[ nSttPos-1 ] && + '-' == rTxt[ nSttPos-2 ]) + { + sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; + if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) + { + --nTmpPos; + ++nLen; + cCh = rTxt[ nTmpPos-1 ]; + } + if( ' ' == cCh ) + { + for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr( + sImplSttSkipChars,(cCh = rTxt[ n ])); + ++n ) + ; + + // found: " - [<AnySttChars>][A-z0-9] + if( rCC.isLetterNumeric( OUString(cCh) ) ) + { + cCh = ' '; + for( n = nTmpPos-1; n && lcl_IsInAsciiArr( + sImplEndSkipChars,(cCh = rTxt[ --n ])); ) + ; + // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9] + if( rCC.isLetterNumeric( OUString(cCh) )) + { + rDoc.Delete( nTmpPos, nTmpPos + nLen ); + rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); + bRet = true; + } + } + } + } + } + + // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" + // [0-9]--[0-9] double dash always replaced with "enDash" + // Finnish and Hungarian use enDash instead of emDash. + bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); + if( 4 <= nEndPos - nSttPos ) + { + OUString sTmp( rTxt.substr( nSttPos, nEndPos - nSttPos ) ); + sal_Int32 nFndPos = sTmp.indexOf("--"); + if( nFndPos != -1 && nFndPos && + nFndPos + 2 < sTmp.getLength() && + ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) || + lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) && + ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) || + lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] ))) + { + nSttPos = nSttPos + nFndPos; + rDoc.Delete( nSttPos, nSttPos + 2 ); + rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) && + rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) ); + bRet = true; + } + } + return bRet; +} + +// Add non-breaking space before specific punctuation marks in French text +bool SvxAutoCorrect::FnAddNonBrkSpace( + SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nEndPos, + LanguageType eLang, bool& io_bNbspRunNext ) +{ + bool bRet = false; + + CharClass& rCC = GetCharClass( eLang ); + + if ( rCC.getLanguageTag().getLanguage() == "fr" ) + { + bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); + OUString allChars = ":;?!%"; + OUString chars( allChars ); + if ( bFrCA ) + chars = ":"; + + sal_Unicode cChar = rTxt[ nEndPos ]; + bool bHasSpace = chars.indexOf( cChar ) != -1; + bool bIsSpecial = allChars.indexOf( cChar ) != -1; + if ( bIsSpecial ) + { + // Get the last word delimiter position + sal_Int32 nSttWdPos = nEndPos; + bool bWasWordDelim = false; + while( nSttWdPos ) + { + bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); + if (bWasWordDelim) + break; + } + + //See if the text is the start of a protocol string, e.g. have text of + //"http" see if it is the start of "http:" and if so leave it alone + sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); + sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1; + if (nIndex + nProtocolLen <= rTxt.getLength()) + { + if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid) + return false; + } + + // Check the presence of "://" in the word + sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 ); + if ( nStrPos == -1 && nEndPos > 0 ) + { + // Check the previous char + sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; + if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) + { + // Remove any previous normal space + sal_Int32 nPos = nEndPos - 1; + while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) + { + if ( nPos == 0 ) break; + nPos--; + cPrevChar = rTxt[ nPos ]; + } + + nPos++; + if ( nEndPos - nPos > 0 ) + rDoc.Delete( nPos, nEndPos ); + + // Add the non-breaking space at the end pos + if ( bHasSpace ) + rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); + io_bNbspRunNext = true; + bRet = true; + } + else if ( chars.indexOf( cPrevChar ) != -1 ) + io_bNbspRunNext = true; + } + } + else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) ) + { + // Remove the hardspace right before to avoid formatting URLs + sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; + sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; + if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) + { + rDoc.Delete( nEndPos - 2, nEndPos - 1 ); + bRet = true; + } + } + } + + return bRet; +} + +// URL recognition +bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, + GetCharClass( eLang ) )); + bool bRet = !sURL.isEmpty(); + if( bRet ) // so, set attribute: + rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); + return bRet; +} + +// Automatic *bold*, /italic/, -strikeout- and _underline_ +bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nEndPos ) +{ + // Condition: + // at the beginning: _, *, / or ~ after Space with the following !Space + // at the end: _, *, / or ~ before Space (word delimiter?) + + sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout + if( ++nEndPos != rTxt.getLength() && + !IsWordDelim( rTxt[ nEndPos ] ) ) + return false; + + --nEndPos; + + bool bAlphaNum = false; + sal_Int32 nPos = nEndPos; + sal_Int32 nFndPos = -1; + CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); + + while( nPos ) + { + switch( sal_Unicode c = rTxt[ --nPos ] ) + { + case '_': + case '-': + case '/': + case '*': + if( c == cInsChar ) + { + if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || + IsWordDelim( rTxt[ nPos-1 ])) && + !IsWordDelim( rTxt[ nPos+1 ])) + nFndPos = nPos; + else + // Condition is not satisfied, so cancel + nFndPos = -1; + nPos = 0; + } + break; + default: + if( !bAlphaNum ) + bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); + } + } + + if( -1 != nFndPos ) + { + // first delete the Character at the end - this allows insertion + // of an empty hint in SetAttr which would be removed by Delete + // (fdo#62536, AUTOFMT in Writer) + rDoc.Delete( nEndPos, nEndPos + 1 ); + rDoc.Delete( nFndPos, nFndPos + 1 ); + // Span the Attribute over the area + // the end. + if( '*' == cInsChar ) // Bold + { + SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); + rDoc.SetAttr( nFndPos, nEndPos - 1, + SID_ATTR_CHAR_WEIGHT, + aSvxWeightItem); + } + else if( '/' == cInsChar ) // Italic + { + SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); + rDoc.SetAttr( nFndPos, nEndPos - 1, + SID_ATTR_CHAR_POSTURE, + aSvxPostureItem); + } + else if( '-' == cInsChar ) // Strikeout + { + SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); + rDoc.SetAttr( nFndPos, nEndPos - 1, + SID_ATTR_CHAR_STRIKEOUT, + aSvxCrossedOutItem); + } + else // Underline + { + SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); + rDoc.SetAttr( nFndPos, nEndPos - 1, + SID_ATTR_CHAR_UNDERLINE, + aSvxUnderlineItem); + } + } + + return -1 != nFndPos; +} + +// Capitalize first letter of every sentence +void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, + const OUString& rTxt, bool bNormalPos, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + + if( rTxt.isEmpty() || nEndPos <= nSttPos ) + return; + + CharClass& rCC = GetCharClass( eLang ); + OUString aText( rTxt ); + const sal_Unicode *pStart = aText.getStr(), + *pStr = pStart + nEndPos, + *pWordStt = nullptr, + *pDelim = nullptr; + + bool bAtStart = false; + do { + --pStr; + if (rCC.isLetter(aText, pStr - pStart)) + { + if( !pWordStt ) + pDelim = pStr+1; + pWordStt = pStr; + } + else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) + { + if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words + pWordStt - 1 == pStr && + // Installation at beginning of paragraph. Replaced < by <= (#i38971#) + (pStart + 1) <= pStr && + rCC.isLetter(aText, pStr-1 - pStart)) + pWordStt = --pStr; + else + break; + } + bAtStart = (pStart == pStr); + } while( !bAtStart ); + + if (!pWordStt) + return; // no character to be replaced + + + if (rCC.isDigit(aText, pStr - pStart)) + return; // already ok + + if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) + return; // already ok + + //See if the text is the start of a protocol string, e.g. have text of + //"http" see if it is the start of "http:" and if so leave it alone + sal_Int32 nIndex = pWordStt - pStart; + sal_Int32 nProtocolLen = pDelim - pWordStt + 1; + if (nIndex + nProtocolLen <= rTxt.getLength()) + { + if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid) + return; // already ok + } + + if (0x1 == *pWordStt || 0x2 == *pWordStt) + return; // already ok + + // Only capitalize, if string before specified characters is long enough + if( *pDelim && 2 >= pDelim - pWordStt && + lcl_IsInAsciiArr( ".-)>", *pDelim ) ) + return; + + // tdf#59666 don't capitalize single Greek letters (except in Greek texts) + if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) + return; + + if( !bAtStart ) // Still no beginning of a paragraph? + { + if (NonFieldWordDelim(*pStr)) + { + for (;;) + { + bAtStart = (pStart == pStr--); + if (bAtStart || !NonFieldWordDelim(*pStr)) + break; + } + } + // Asian full stop, full width full stop, full width exclamation mark + // and full width question marks are treated as word delimiters + else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && + 0xFF1F != *pStr ) + return; // no valid separator -> no replacement + } + + // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list + if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) + return; + + if( bAtStart ) // at the beginning of a paragraph? + { + // Check out the previous paragraph, if it exists. + // If so, then check to paragraph separator at the end. + OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); + if (!pPrevPara) + { + // valid separator -> replace + OUString sChar( *pWordStt ); + sChar = rCC.titlecase(sChar); //see fdo#56740 + if (sChar != OUStringChar(*pWordStt)) + rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); + return; + } + + aText = *pPrevPara; + bAtStart = false; + pStart = aText.getStr(); + pStr = pStart + aText.getLength(); + + do { // overwrite all blanks + --pStr; + if (!NonFieldWordDelim(*pStr)) + break; + bAtStart = (pStart == pStr); + } while( !bAtStart ); + + if( bAtStart ) + return; // no valid separator -> no replacement + } + + // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. + // all three can happen, but not more than once! + const sal_Unicode* pExceptStt = nullptr; + bool bContinue = true; + Flags nFlag = Flags::NONE; + do + { + switch (*pStr) + { + // Western and Asian full stop + case '.': + case 0x3002: + case 0xFF0E: + { + if (pStr >= pStart + 2 && *(pStr - 2) == '.') + { + //e.g. text "f.o.o. word": Now currently considering + //capitalizing word but second last character of + //previous word is a . So probably last word is an + //anagram that ends in . and not truly the end of a + //previous sentence, so don't autocapitalize this word + return; + } + if (nFlag & Flags::FullStop) + return; // no valid separator -> no replacement + nFlag |= Flags::FullStop; + pExceptStt = pStr; + } + break; + case '!': + case 0xFF01: + { + if (nFlag & Flags::ExclamationMark) + return; // no valid separator -> no replacement + nFlag |= Flags::ExclamationMark; + } + break; + case '?': + case 0xFF1F: + { + if (nFlag & Flags::QuestionMark) + return; // no valid separator -> no replacement + nFlag |= Flags::QuestionMark; + } + break; + default: + if (nFlag == Flags::NONE) + return; // no valid separator -> no replacement + else + bContinue = false; + break; + } + + if (bContinue && pStr-- == pStart) + { + return; // no valid separator -> no replacement + } + } while (bContinue); + if (Flags::FullStop != nFlag) + pExceptStt = nullptr; + + // Only capitalize, if string is long enough + if( 2 > ( pStr - pStart ) ) + return; + + if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) + { + bool bValid = false, bAlphaFnd = false; + const sal_Unicode* pTmpStr = pStr; + while( !bValid ) + { + if( rCC.isDigit( aText, pTmpStr - pStart ) ) + { + bValid = true; + pStr = pTmpStr - 1; + } + else if( rCC.isLetter( aText, pTmpStr - pStart ) ) + { + if( bAlphaFnd ) + { + bValid = true; + pStr = pTmpStr; + } + else + bAlphaFnd = true; + } + else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) + break; + + if( pTmpStr == pStart ) + break; + + --pTmpStr; + } + + if( !bValid ) + return; // no valid separator -> no replacement + } + + bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; + + // Search for the beginning of the word + while (!NonFieldWordDelim(*pStr)) + { + if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) + bNumericOnly = false; + + if( pStart == pStr ) + break; + + --pStr; + } + + if( bNumericOnly ) // consists of only numbers, then not + return; + + if (NonFieldWordDelim(*pStr)) + ++pStr; + + OUString sWord; + + // check on the basis of the exception list + if( pExceptStt ) + { + sWord = OUString(pStr, pExceptStt - pStr + 1); + if( FindInCplSttExceptList(eLang, sWord) ) + return; + + // Delete all non alphanumeric. Test the characters at the + // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) + OUString sTmp( sWord ); + while( !sTmp.isEmpty() && + !rCC.isLetterNumeric( sTmp, 0 ) ) + sTmp = sTmp.copy(1); + + // Remove all non alphanumeric characters towards the end up until + // the last one. + sal_Int32 nLen = sTmp.getLength(); + while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) + --nLen; + if( nLen + 1 < sTmp.getLength() ) + sTmp = sTmp.copy( 0, nLen + 1 ); + + if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && + FindInCplSttExceptList(eLang, sTmp)) + return; + + if(FindInCplSttExceptList(eLang, sWord, true)) + return; + } + + // Ok, then replace + sal_Unicode cSave = *pWordStt; + nSttPos = pWordStt - rTxt.getStr(); + OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 + bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); + + // Perhaps someone wants to have the word + if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) + rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); +} + +// Correct accidental use of cAPS LOCK key +bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + if (nEndPos - nSttPos < 2) + // string must be at least 2-character long. + return false; + + CharClass& rCC = GetCharClass( eLang ); + + // Check the first 2 letters. + if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) + return false; + + if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) + return false; + + OUStringBuffer aConverted; + aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); + aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); + + // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list + if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) + return false; + + for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) + { + if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) + // A lowercase letter disqualifies the whole text. + return false; + + if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) + // Another uppercase letter. Convert it. + aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); + else + // This is not an alphabetic letter. Leave it as-is. + aConverted.append( rTxt[i] ); + } + + // Replace the word. + rDoc.Delete(nSttPos, nEndPos); + rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); + + return true; +} + + +sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, + LanguageType eLang ) const +{ + sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar + ? GetStartDoubleQuote() + : GetStartSingleQuote() ) + : ( '\"' == cInsChar + ? GetEndDoubleQuote() + : GetEndSingleQuote() ); + if( !cRet ) + { + // then through the Language find the right character + if( LANGUAGE_NONE == eLang ) + cRet = cInsChar; + else + { + LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); + OUString sRet( bSttQuote + ? ( '\"' == cInsChar + ? rLcl.getDoubleQuotationMarkStart() + : rLcl.getQuotationMarkStart() ) + : ( '\"' == cInsChar + ? rLcl.getDoubleQuotationMarkEnd() + : rLcl.getQuotationMarkEnd() )); + cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; + } + } + return cRet; +} + +void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, + sal_Unicode cInsChar, bool bSttQuote, + bool bIns, LanguageType eLang, ACQuotes eType ) const +{ + sal_Unicode cRet; + + if ( eType == ACQuotes::DoubleAngleQuote ) + { + bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS; + // pressing " inside a quotation -> use second level angle quotes + bool bLeftQuote = '\"' == cInsChar && + // start position and Romanian OR + // not start position and Hungarian + bSttQuote == (eLang != LANGUAGE_HUNGARIAN); + cRet = ( '<' == cInsChar || bLeftQuote ) + ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote ) + : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote ); + } + else if ( eType == ACQuotes::UseApostrophe ) + cRet = cApostrophe; + else + cRet = GetQuote( cInsChar, bSttQuote, eLang ); + + OUString sChg( cInsChar ); + if( bIns ) + rDoc.Insert( nInsPos, sChg ); + else + rDoc.Replace( nInsPos, sChg ); + + sChg = OUString(cRet); + + if( eType == ACQuotes::NonBreakingSpace ) + { + if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) )) + { + if( !bSttQuote ) + ++nInsPos; + } + } + else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' ) + { + rDoc.Delete( nInsPos-1, nInsPos); + --nInsPos; + } + + rDoc.Replace( nInsPos, sChg ); + + // i' -> I' in English (last step for the Undo) + if( eType == ACQuotes::CapitalizeIAm ) + rDoc.Replace( nInsPos-1, "I" ); +} + +OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, + sal_Unicode cInsChar, bool bSttQuote ) +{ + const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); + sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); + + OUString sRet(cRet); + + if( '\"' == cInsChar ) + { + if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) + { + if( bSttQuote ) + sRet += " "; + else + sRet = " " + sRet; + } + } + return sRet; +} + +// search preceding opening quote in the paragraph before the insert position +static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, + const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars ) +{ + sal_Unicode cTmpChar; + + do { + cTmpChar = rTxt[ --nPos ]; + if ( cTmpChar == sPrecedingChar ) + return true; + + for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) + if ( cTmpChar == *pCh ) + return false; + + } while ( nPos > 0 ); + + return false; +} + +// WARNING: rText may become invalid, see comment below +void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nInsPos, sal_Unicode cChar, + bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) +{ + bool bIsNextRun = io_bNbspRunNext; + io_bNbspRunNext = false; // if it was set, then it has to be turned off + + do{ // only for middle check loop !! + if( cChar ) + { + // Prevent double space + if( nInsPos && ' ' == cChar && + IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && + ' ' == rTxt[ nInsPos - 1 ]) + { + break; + } + + bool bSingle = '\'' == cChar; + bool bIsReplaceQuote = + (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || + (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); + if( bIsReplaceQuote ) + { + bool bSttQuote = !nInsPos; + ACQuotes eType = ACQuotes::NONE; + const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); + if (!bSttQuote) + { + sal_Unicode cPrev = rTxt[ nInsPos-1 ]; + bSttQuote = NonFieldWordDelim(cPrev) || + lcl_IsInAsciiArr( "([{", cPrev ) || + ( cEmDash == cPrev ) || + ( cEnDash == cPrev ); + // tdf#38394 use opening quotation mark << in French l'<<word>> + if ( !bSingle && !bSttQuote && cPrev == cApostrophe && + primary(eLang) == primary(LANGUAGE_FRENCH) && + ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) && + // abbreviated form of ce, de, je, la, le, ne, me, te, se or si + OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) || + ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) && + // abbreviated form of que + ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) && + ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) ) + { + bSttQuote = true; + } + // tdf#108423 for capitalization of English i'm + else if ( bSingle && ( cPrev == 'i' ) && + primary(eLang) == primary(LANGUAGE_ENGLISH) && + ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) ) + { + eType = ACQuotes::CapitalizeIAm; + } + // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations + else if ( !bSingle && nInsPos && + ( ( eLang == LANGUAGE_HUNGARIAN && + lcl_HasPrecedingChar( rTxt, nInsPos, + bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0], + bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) || + ( eLang.anyOf( + LANGUAGE_ROMANIAN, + LANGUAGE_ROMANIAN_MOLDOVA ) && + lcl_HasPrecedingChar( rTxt, nInsPos, + bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0], + bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) ) + { + LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); + // only if the opening double quotation mark is the default one + if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) ) + eType = ACQuotes::DoubleAngleQuote; + } + else if ( bSingle && nInsPos && !bSttQuote && + // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic, + // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018. + // tdf#123786 the same for Russian and Ukrainian + ( ( eLang.anyOf ( + LANGUAGE_CZECH, + LANGUAGE_GERMAN, + LANGUAGE_GERMAN_SWISS, + LANGUAGE_GERMAN_AUSTRIAN, + LANGUAGE_GERMAN_LUXEMBOURG, + LANGUAGE_GERMAN_LIECHTENSTEIN, + LANGUAGE_ICELANDIC, + LANGUAGE_SLOVAK, + LANGUAGE_SLOVENIAN ) && + !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) || + ( eLang.anyOf ( + LANGUAGE_RUSSIAN, + LANGUAGE_UKRAINIAN ) && + !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) ) + { + LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); + CharClass& rCC = GetCharClass( eLang ); + if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) || + rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) && + // use apostrophe only after letters, not after digits or punctuation + rCC.isLetter(rTxt, nInsPos-1) ) + { + eType = ACQuotes::UseApostrophe; + } + } + } + + if ( eType == ACQuotes::NONE && !bSingle && + ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) ) + eType = ACQuotes::NonBreakingSpace; + + InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType ); + break; + } + // tdf#133524 change "<<" and ">>" to double angle quotation marks + else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && + IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) && + ('<' == cChar || '>' == cChar) && + nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] ) + { + const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); + if ( eLang.anyOf( + LANGUAGE_CATALAN, // primary level + LANGUAGE_CATALAN_VALENCIAN, // primary level + LANGUAGE_FINNISH, // alternative primary level + LANGUAGE_FRENCH_SWISS, // second level + LANGUAGE_GALICIAN, // primary level + LANGUAGE_HUNGARIAN, // second level + LANGUAGE_POLISH, // second level + LANGUAGE_PORTUGUESE, // primary level + LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level + LANGUAGE_ROMANIAN, // second level + LANGUAGE_ROMANIAN_MOLDOVA, // second level + LANGUAGE_SWEDISH, // alternative primary level + LANGUAGE_SWEDISH_FINLAND, // alternative primary level + LANGUAGE_UKRAINIAN, // primary level + LANGUAGE_USER_ARAGONESE, // primary level + LANGUAGE_USER_ASTURIAN ) || // primary level + primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level + primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level + { + InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote ); + break; + } + } + + if( bInsert ) + rDoc.Insert( nInsPos, OUString(cChar) ); + else + rDoc.Replace( nInsPos, OUString(cChar) ); + + // Hardspaces autocorrection + if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) + { + if ( NeedsHardspaceAutocorr( cChar ) && + FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) ) + { + ; + } + else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) + { + // Remove the NBSP if it wasn't an autocorrection + if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && + cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) + { + // Look for the last HARD_SPACE + sal_Int32 nPos = nInsPos - 1; + bool bContinue = true; + while ( bContinue ) + { + const sal_Unicode cTmpChar = rTxt[ nPos ]; + if ( cTmpChar == cNonBreakingSpace ) + { + rDoc.Delete( nPos, nPos + 1 ); + bContinue = false; + } + else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) + bContinue = false; + nPos--; + } + } + } + } + } + + if( !nInsPos ) + break; + + sal_Int32 nPos = nInsPos - 1; + + if( IsWordDelim( rTxt[ nPos ])) + break; + + // Set bold or underline automatically? + if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) + { + if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) + { + FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); + } + break; + } + + while( nPos && !IsWordDelim( rTxt[ --nPos ])) + ; + + // Found a Paragraph-start or a Blank, search for the word shortcut in + // auto. + sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character + if( !nPos && !IsWordDelim( rTxt[ 0 ])) + --nCapLttrPos; // begin of paragraph and no blank + + const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); + CharClass& rCC = GetCharClass( eLang ); + + // no symbol characters + if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) + break; + + if( IsAutoCorrFlag( ACFlags::Autocorrect ) && + // tdf#134940 fix regression of arrow "-->" resulted by premature + // replacement of "--" since '>' was added to IsAutoCorrectChar() + '>' != cChar ) + { + // WARNING ATTENTION: rTxt is an alias of the text node's OUString + // and becomes INVALID if ChgAutoCorrWord returns true! + // => use aPara/pPara to create a valid copy of the string! + OUString aPara; + OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; + + bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, + *this, pPara ); + if( !bChgWord ) + { + sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; + while( nCapLttrPos1 < nInsPos && + lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) + ) + ++nCapLttrPos1; + while( nCapLttrPos1 < nInsPos1 && nInsPos1 && + lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) + ) + --nInsPos1; + + if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && + nCapLttrPos1 < nInsPos1 && + rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) + { + bChgWord = true; + nCapLttrPos = nCapLttrPos1; + } + } + + if( bChgWord ) + { + if( !aPara.isEmpty() ) + { + sal_Int32 nEnd = nCapLttrPos; + while( nEnd < aPara.getLength() && + !IsWordDelim( aPara[ nEnd ])) + ++nEnd; + + // Capital letter at beginning of paragraph? + if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) + { + FnCapitalStartSentence( rDoc, aPara, false, + nCapLttrPos, nEnd, eLang ); + } + + if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) + { + FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); + } + } + break; + } + } + + if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN ) + { + // WARNING ATTENTION: rTxt is an alias of the text node's OUString + // and becomes INVALID if TransliterateRTLWord returns true! + if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) ) + break; + } + + if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && + (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 + ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && + FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || + ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && + ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && + FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) + ; + else + { + bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); + bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); + + if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && + FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) + { + // Correct accidental use of cAPS LOCK key (do this only when + // the caps or shift lock key is pressed). Turn off the caps + // lock afterwards. + pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); + } + + // Capital letter at beginning of paragraph ? + if( !bUnsupported && + IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) + { + FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); + } + + // Two capital letters at beginning of word ?? + if( !bUnsupported && + IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) + { + FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); + } + + if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) + { + FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); + } + } + + } while( false ); +} + +SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( + LanguageType eLang ) +{ + LanguageTag aLanguageTag( eLang); + if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) + (void)CreateLanguageFile(aLanguageTag); + return *(m_aLangTable.find(aLanguageTag)->second); +} + +void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) +{ + auto const iter = m_aLangTable.find(LanguageTag(eLang)); + if (iter != m_aLangTable.end() && iter->second) + iter->second->SaveCplSttExceptList(); + else + { + SAL_WARN("editeng", "Save an empty list? "); + } +} + +void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang) +{ + auto const iter = m_aLangTable.find(LanguageTag(eLang)); + if (iter != m_aLangTable.end() && iter->second) + iter->second->SaveWordStartExceptList(); + else + { + SAL_WARN("editeng", "Save an empty list? "); + } +} + +// Adds a single word. The list will immediately be written to the file! +bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, + LanguageType eLang ) +{ + SvxAutoCorrectLanguageLists* pLists = nullptr; + // either the right language is present or it will be this in the general list + auto iter = m_aLangTable.find(LanguageTag(eLang)); + if (iter != m_aLangTable.end()) + pLists = iter->second.get(); + else + { + LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); + iter = m_aLangTable.find(aLangTagUndetermined); + if (iter != m_aLangTable.end()) + pLists = iter->second.get(); + else if(CreateLanguageFile(aLangTagUndetermined)) + pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); + } + OSL_ENSURE(pLists, "No auto correction data"); + return pLists && pLists->AddToCplSttExceptList(rNew); +} + +// Adds a single word. The list will immediately be written to the file! +bool SvxAutoCorrect::AddWordStartException( const OUString& rNew, + LanguageType eLang ) +{ + SvxAutoCorrectLanguageLists* pLists = nullptr; + //either the right language is present or it is set in the general list + auto iter = m_aLangTable.find(LanguageTag(eLang)); + if (iter != m_aLangTable.end()) + pLists = iter->second.get(); + else + { + LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); + iter = m_aLangTable.find(aLangTagUndetermined); + if (iter != m_aLangTable.end()) + pLists = iter->second.get(); + else if(CreateLanguageFile(aLangTagUndetermined)) + pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); + } + OSL_ENSURE(pLists, "No auto correction file!"); + return pLists && pLists->AddToWordStartExceptList(rNew); +} + +OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, + sal_Int32 nPos) +{ + OUString sRet; + if( !nPos ) + return sRet; + + sal_Int32 nEnd = nPos; + + // it must be followed by a blank or tab! + if( ( nPos < rTxt.getLength() && + !IsWordDelim( rTxt[ nPos ])) || + IsWordDelim( rTxt[ --nPos ])) + return sRet; + + while( nPos && !IsWordDelim( rTxt[ --nPos ])) + ; + + // Found a Paragraph-start or a Blank, search for the word shortcut in + // auto. + sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character + if( !nPos && !IsWordDelim( rTxt[ 0 ])) + --nCapLttrPos; // Beginning of paragraph and no Blank! + + while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) + if( ++nCapLttrPos >= nEnd ) + return sRet; + + if( 3 > nEnd - nCapLttrPos ) + return sRet; + + const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); + + CharClass& rCC = GetCharClass(eLang); + + if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) + return sRet; + + sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); + return sRet; +} + +// static +std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt, + const sal_Int32 nPos) +{ + constexpr sal_Int32 nMinLen = 3; + constexpr sal_Int32 nMaxLen = 9; + std::vector<OUString> aRes; + if (nPos >= nMinLen) + { + sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0); + // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) + if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) + { + while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) + ++nBegin; + } + if (nBegin + nMinLen <= nPos) + { + OUString sRes( rTxt.substr(nBegin, nPos - nBegin) ); + aRes.push_back(sRes); + bool bLastStartedWithDelim = IsWordDelim(sRes[0]); + for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) + { + bool bAdd = bLastStartedWithDelim; + bLastStartedWithDelim = IsWordDelim(sRes[i]); + bAdd = bAdd || bLastStartedWithDelim; + if (bAdd) + aRes.push_back(sRes.copy(i)); + } + } + } + return aRes; +} + +bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) +{ + OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); + + OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); + OUString sShareDirFile( sUserDirFile ); + + SvxAutoCorrectLanguageLists* pLists = nullptr; + + tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); + + auto nFndPos = aLastFileTable.find(rLanguageTag); + if(nFndPos != aLastFileTable.end() && + (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && + nAktTime - nLastCheckTime < nMinTime) + { + // no need to test the file, because the last check is not older then + // 2 minutes. + if( bNewFile ) + { + sShareDirFile = sUserDirFile; + pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); + LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference + m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); + aLastFileTable.erase(nFndPos); + } + } + else if( + ( FStatHelper::IsDocument( sUserDirFile ) || + FStatHelper::IsDocument( sShareDirFile = + GetAutoCorrFileName( rLanguageTag ) ) || + FStatHelper::IsDocument( sShareDirFile = + GetAutoCorrFileName( rLanguageTag, false, false, true) ) + ) || + ( sShareDirFile = sUserDirFile, bNewFile ) + ) + { + pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); + LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference + m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); + if (nFndPos != aLastFileTable.end()) + aLastFileTable.erase(nFndPos); + } + else if( !bNewFile ) + { + aLastFileTable[rLanguageTag] = nAktTime.GetTime(); + } + return pLists != nullptr; +} + +bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, + LanguageType eLang ) +{ + LanguageTag aLanguageTag( eLang); + auto const iter = m_aLangTable.find(aLanguageTag); + if (iter != m_aLangTable.end()) + return iter->second->PutText(rShort, rLong); + if(CreateLanguageFile(aLanguageTag)) + return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong); + return false; +} + +void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, + std::vector<SvxAutocorrWord>& aDeleteEntries, + LanguageType eLang ) +{ + LanguageTag aLanguageTag( eLang); + auto const iter = m_aLangTable.find(aLanguageTag); + if (iter != m_aLangTable.end()) + { + iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); + } + else if(CreateLanguageFile( aLanguageTag )) + { + m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); + } +} + +// - return the replacement text (only for SWG-Format, all other +// can be taken from the word list!) +bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) +{ + return false; +} + +void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) +{ +} + +// Text with attribution (only the SWG - SWG format!) +bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, + const OUString&, const OUString&, SfxObjectShell&, OUString& ) +{ + return false; +} + +OUString EncryptBlockName_Imp(const OUString& rName) +{ + OUStringBuffer aName; + aName.append('#').append(rName); + for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos) + { + if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos])) + aName[nPos] &= 0x0f; + } + return aName.makeStringAndClear(); +} + +/* This code is copied from SwXMLTextBlocks::GeneratePackageName */ +static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName ) +{ + OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); + OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); + + for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) + { + switch (aBuf[nPos]) + { + case '!': + case '/': + case ':': + case '.': + case '\\': + aBuf[nPos] = '_'; + break; + default: + break; + } + } + + rPackageName = aBuf.makeStringAndClear(); +} + +static const SvxAutocorrWord* lcl_SearchWordsInList( + SvxAutoCorrectLanguageLists* pList, const OUString& rTxt, + sal_Int32& rStt, sal_Int32 nEndPos) +{ + const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); + return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); +} + +// the search for the words in the substitution table +const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( + const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos, + SvxAutoCorrDoc&, LanguageTag& rLang ) +{ + const SvxAutocorrWord* pRet = nullptr; + LanguageTag aLanguageTag( rLang); + if( aLanguageTag.isSystemLocale() ) + aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage()); + + /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback + * list instead? */ + + // First search for eLang, then US-English -> English + // and last in LANGUAGE_UNDETERMINED + if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; + pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); + if( pRet ) + { + rLang = aLanguageTag; + return pRet; + } + else + return nullptr; + } + + // If it still could not be found here, then keep on searching + LanguageType eLang = aLanguageTag.getLanguageType(); + // the primary language for example EN + aLanguageTag.reset(aLanguageTag.getLanguage()); + LanguageType nTmpKey = aLanguageTag.getLanguageType(false); + if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && + (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false))) + { + //the language is available - so bring it on + std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; + pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); + if( pRet ) + { + rLang = aLanguageTag; + return pRet; + } + } + + if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; + pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); + if( pRet ) + { + rLang = aLanguageTag; + return pRet; + } + } + return nullptr; +} + +bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang, + const OUString& sWord ) +{ + LanguageTag aLanguageTag( eLang); + + /* TODO-BCP47: again horrible ugliness */ + + // First search for eLang, then primary language of eLang + // and last in LANGUAGE_UNDETERMINED + + if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + auto const& pList = m_aLangTable.find(aLanguageTag)->second; + if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() ) + return true; + } + + // If it still could not be found here, then keep on searching + // the primary language for example EN + aLanguageTag.reset(aLanguageTag.getLanguage()); + LanguageType nTmpKey = aLanguageTag.getLanguageType(false); + if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && + (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false))) + { + //the language is available - so bring it on + auto const& pList = m_aLangTable.find(aLanguageTag)->second; + if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() ) + return true; + } + + if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + auto const& pList = m_aLangTable.find(aLanguageTag)->second; + if(pList->GetWordStartExceptList()->find(sWord) != pList->GetWordStartExceptList()->end() ) + return true; + } + return false; +} + +static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) +{ + SvStringsISortDtor::const_iterator it = pList->find( "~" ); + SvStringsISortDtor::size_type nPos = it - pList->begin(); + if( nPos < pList->size() ) + { + OUString sLowerWord(sWord.toAsciiLowerCase()); + OUString sAbr; + for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) + { + sAbr = (*pList)[ n ]; + if (sAbr[0] != '~') + break; + // ~ and ~. are not allowed! + if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) + { + OUString sLowerAbk(sAbr.toAsciiLowerCase()); + for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) + { + if( !--i ) // agrees + return true; + + if( sLowerAbk[i] != sLowerWord[--ii]) + break; + } + } + } + } + OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), + "Wrongly sorted exception list?" ); + return false; +} + +bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, + const OUString& sWord, bool bAbbreviation) +{ + LanguageTag aLanguageTag( eLang); + + /* TODO-BCP47: did I mention terrible horrible ugliness? */ + + // First search for eLang, then primary language of eLang + // and last in LANGUAGE_UNDETERMINED + + if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); + if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) + return true; + } + + // If it still could not be found here, then keep on searching + // the primary language for example EN + aLanguageTag.reset(aLanguageTag.getLanguage()); + LanguageType nTmpKey = aLanguageTag.getLanguageType(false); + if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && + (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false))) + { + //the language is available - so bring it on + const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); + if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) + return true; + } + + if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || + CreateLanguageFile(aLanguageTag, false)) + { + //the language is available - so bring it on + const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); + if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) + return true; + } + return false; +} + +OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, + bool bNewFile, bool bTst, bool bUnlocalized ) const +{ + OUString sRet, sExt( rLanguageTag.getBcp47() ); + if (bUnlocalized) + { + // we don't want variant, so we'll take "fr" instead of "fr-CA" for example + std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); + if (!vecFallBackStrings.empty()) + sExt = vecFallBackStrings[0]; + } + + sExt = "_" + sExt + ".dat"; + if( bNewFile ) + sRet = sUserAutoCorrFile + sExt; + else if( !bTst ) + sRet = sShareAutoCorrFile + sExt; + else + { + // test first in the user directory - if not exist, then + sRet = sUserAutoCorrFile + sExt; + if( !FStatHelper::IsDocument( sRet )) + sRet = sShareAutoCorrFile + sExt; + } + return sRet; +} + +SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( + SvxAutoCorrect& rParent, + OUString aShareAutoCorrectFile, + OUString aUserAutoCorrectFile) +: sShareAutoCorrFile(std::move( aShareAutoCorrectFile )), + sUserAutoCorrFile(std::move( aUserAutoCorrectFile )), + aModifiedDate( Date::EMPTY ), + aModifiedTime( tools::Time::EMPTY ), + aLastCheckTime( tools::Time::EMPTY ), + rAutoCorrect(rParent), + nFlags(ACFlags::NONE) +{ +} + +SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() +{ +} + +bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() +{ + // Access the file system only every 2 minutes to check the date stamp + bool bRet = false; + + tools::Time nMinTime( 0, 2 ); + tools::Time nAktTime( tools::Time::SYSTEM ); + if( aLastCheckTime <= nAktTime) // overflow? + return false; + nAktTime -= aLastCheckTime; + if( nAktTime > nMinTime ) // min time past + { + Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); + if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, + &aTstDate, &aTstTime ) && + ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) + { + bRet = true; + // then remove all the lists fast! + if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) + { + pCplStt_ExcptLst.reset(); + } + if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst ) + { + pWordStart_ExcptLst.reset(); + } + if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) + { + pAutocorr_List.reset(); + } + nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad ); + } + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); + } + return bRet; +} + +void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( + std::unique_ptr<SvStringsISortDtor>& rpLst, + const OUString& sStrmName, + tools::SvRef<SotStorage>& rStg) +{ + if( rpLst ) + rpLst->clear(); + else + rpLst.reset( new SvStringsISortDtor ); + + { + if( rStg.is() && rStg->IsStream( sStrmName ) ) + { + tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, + ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); + if( ERRCODE_NONE != xStrm->GetError()) + { + xStrm.clear(); + rStg.clear(); + RemoveStream_Imp( sStrmName ); + } + else + { + uno::Reference< uno::XComponentContext > xContext = + comphelper::getProcessComponentContext(); + + xml::sax::InputSource aParserInput; + aParserInput.sSystemId = sStrmName; + + xStrm->Seek( 0 ); + xStrm->SetBufferSize( 8 * 1024 ); + aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); + + // get filter + uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst ); + + // connect parser and filter + uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext ); + uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; + xParser->setFastDocumentHandler( xFilter ); + xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); + xParser->setTokenHandler( xTokenHandler ); + + // parse + try + { + xParser->parseStream( aParserInput ); + } + catch( const xml::sax::SAXParseException& ) + { + // re throw ? + } + catch( const xml::sax::SAXException& ) + { + // re throw ? + } + catch( const io::IOException& ) + { + // re throw ? + } + } + } + + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); + } + +} + +void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( + const SvStringsISortDtor& rLst, + const OUString& sStrmName, + tools::SvRef<SotStorage> const &rStg, + bool bConvert ) +{ + if( !rStg.is() ) + return; + + if( rLst.empty() ) + { + rStg->Remove( sStrmName ); + rStg->Commit(); + } + else + { + tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, + ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); + if( xStrm.is() ) + { + xStrm->SetSize( 0 ); + xStrm->SetBufferSize( 8192 ); + xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); + + + uno::Reference< uno::XComponentContext > xContext = + comphelper::getProcessComponentContext(); + + uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); + uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); + xWriter->setOutputStream(xOut); + + uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); + rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); + + xExp->exportDoc( XML_BLOCK_LIST ); + + xStrm->Commit(); + if( xStrm->GetError() == ERRCODE_NONE ) + { + xStrm.clear(); + if (!bConvert) + { + rStg->Commit(); + if( ERRCODE_NONE != rStg->GetError() ) + { + rStg->Remove( sStrmName ); + rStg->Commit(); + } + } + } + } + } +} + +SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() +{ + if( pAutocorr_List ) + pAutocorr_List->DeleteAndDestroyAll(); + else + pAutocorr_List.reset( new SvxAutocorrWordList() ); + + try + { + uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); + uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); + uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); + + xml::sax::InputSource aParserInput; + aParserInput.sSystemId = pXMLImplAutocorr_ListStr; + aParserInput.aInputStream = xStrm->getInputStream(); + + // get parser + uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); + SAL_INFO("editeng", "AutoCorrect Import" ); + uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); + uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; + + // connect parser and filter + xParser->setFastDocumentHandler( xFilter ); + xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); + xParser->setTokenHandler(xTokenHandler); + + // parse + xParser->parseStream( aParserInput ); + } + catch ( const uno::Exception& ) + { + TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); + } + + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); + + return pAutocorr_List.get(); +} + +const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() +{ + if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) + { + LoadAutocorrWordList(); + if( !pAutocorr_List ) + { + OSL_ENSURE( false, "No valid list" ); + pAutocorr_List.reset( new SvxAutocorrWordList() ); + } + nFlags |= ACFlags::ChgWordLstLoad; + } + return pAutocorr_List.get(); +} + +SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() +{ + if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) + { + LoadCplSttExceptList(); + if( !pCplStt_ExcptLst ) + { + OSL_ENSURE( false, "No valid list" ); + pCplStt_ExcptLst.reset( new SvStringsISortDtor ); + } + nFlags |= ACFlags::CplSttLstLoad; + } + return pCplStt_ExcptLst.get(); +} + +bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) +{ + bool bRet = false; + if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) + { + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); + + xStg = nullptr; + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); + bRet = true; + } + return bRet; +} + +bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew) +{ + bool bRet = false; + if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second ) + { + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); + + xStg = nullptr; + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); + bRet = true; + } + return bRet; +} + +SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() +{ + try + { + tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); + if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) + LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); + } + catch (const css::ucb::ContentCreationException&) + { + } + return pCplStt_ExcptLst.get(); +} + +void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() +{ + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); + + xStg = nullptr; + + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); +} + +SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList() +{ + try + { + tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); + if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) ) + LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); + } + catch (const css::ucb::ContentCreationException &) + { + TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList"); + } + return pWordStart_ExcptLst.get(); +} + +void SvxAutoCorrectLanguageLists::SaveWordStartExceptList() +{ + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); + + xStg = nullptr; + // Set time stamp + FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, + &aModifiedDate, &aModifiedTime ); + aLastCheckTime = tools::Time( tools::Time::SYSTEM ); +} + +SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList() +{ + if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() ) + { + LoadWordStartExceptList(); + if( !pWordStart_ExcptLst ) + { + OSL_ENSURE( false, "No valid list" ); + pWordStart_ExcptLst.reset( new SvStringsISortDtor ); + } + nFlags |= ACFlags::WordStartLstLoad; + } + return pWordStart_ExcptLst.get(); +} + +void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) +{ + if( sShareAutoCorrFile != sUserAutoCorrFile ) + { + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + if( xStg.is() && ERRCODE_NONE == xStg->GetError() && + xStg->IsStream( rName ) ) + { + xStg->Remove( rName ); + xStg->Commit(); + + xStg = nullptr; + } + } +} + +void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() +{ + // The conversion needs to happen if the file is already in the user + // directory and is in the old format. Additionally it needs to + // happen when the file is being copied from share to user. + + bool bError = false, bConvert = false, bCopy = false; + INetURLObject aDest; + INetURLObject aSource; + + if (sUserAutoCorrFile != sShareAutoCorrFile ) + { + aSource = INetURLObject ( sShareAutoCorrFile ); + aDest = INetURLObject ( sUserAutoCorrFile ); + if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) + { + aDest.SetExtension ( u"bak" ); + bConvert = true; + } + bCopy = true; + } + else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) + { + aSource = INetURLObject ( sUserAutoCorrFile ); + aDest = INetURLObject ( sUserAutoCorrFile ); + aDest.SetExtension ( u"bak" ); + bCopy = bConvert = true; + } + if (bCopy) + { + try + { + OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); + sal_Int32 nSlashPos = sMain.lastIndexOf('/'); + sMain = sMain.copy(0, nSlashPos); + ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); + TransferInfo aInfo; + aInfo.NameClash = NameClash::OVERWRITE; + aInfo.NewTitle = aDest.GetLastName(); + aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); + aInfo.MoveData = false; + aNewContent.executeCommand( "transfer", Any(aInfo)); + } + catch (...) + { + bError = true; + } + } + if (bConvert && !bError) + { + tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); + tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); + + if( xSrcStg.is() && xDstStg.is() ) + { + std::unique_ptr<SvStringsISortDtor> pTmpWordList; + + if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) ) + LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg ); + + if (pTmpWordList) + { + SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true ); + pTmpWordList.reset(); + } + + + if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) + LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); + + if (pTmpWordList) + { + SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); + pTmpWordList->clear(); + } + + GetAutocorrWordList(); + MakeBlocklist_Imp( *xDstStg ); + sShareAutoCorrFile = sUserAutoCorrFile; + xDstStg = nullptr; + try + { + ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); + aContent.executeCommand ( "delete", Any ( true ) ); + } + catch (...) + { + } + } + } + else if( bCopy && !bError ) + sShareAutoCorrFile = sUserAutoCorrFile; +} + +bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) +{ + bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); + if( !bRemove ) + { + tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, + ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); + if( refList.is() ) + { + refList->SetSize( 0 ); + refList->SetBufferSize( 8192 ); + refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); + + uno::Reference< uno::XComponentContext > xContext = + comphelper::getProcessComponentContext(); + + uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); + uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); + xWriter->setOutputStream(xOut); + + rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); + + xExp->exportDoc( XML_BLOCK_LIST ); + + refList->Commit(); + bRet = ERRCODE_NONE == refList->GetError(); + if( bRet ) + { + refList.clear(); + rStg.Commit(); + if( ERRCODE_NONE != rStg.GetError() ) + { + bRemove = true; + bRet = false; + } + } + } + else + bRet = false; + } + + if( bRemove ) + { + rStg.Remove( pXMLImplAutocorr_ListStr ); + rStg.Commit(); + } + + return bRet; +} + +bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries ) +{ + // First get the current list! + GetAutocorrWordList(); + + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); + + if( bRet ) + { + for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) + { + std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); + if( xFoundEntry ) + { + if( !xFoundEntry->IsTextOnly() ) + { + OUString aName( aWordToDelete.GetShort() ); + if (xStorage->IsOLEStorage()) + aName = EncryptBlockName_Imp(aName); + else + GeneratePackageName ( aWordToDelete.GetShort(), aName ); + + if( xStorage->IsContained( aName ) ) + { + xStorage->Remove( aName ); + bRet = xStorage->Commit(); + } + } + } + } + + for (const SvxAutocorrWord & aNewEntrie : aNewEntries) + { + SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); + std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); + if( xRemoved ) + { + if( !xRemoved->IsTextOnly() ) + { + // Still have to remove the Storage + OUString sStorageName( aWordToAdd.GetShort() ); + if (xStorage->IsOLEStorage()) + sStorageName = EncryptBlockName_Imp(sStorageName); + else + GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); + + if( xStorage->IsContained( sStorageName ) ) + xStorage->Remove( sStorageName ); + } + } + bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); + + if ( !bRet ) + { + break; + } + } + + if ( bRet ) + { + bRet = MakeBlocklist_Imp( *xStorage ); + } + } + return bRet; +} + +bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) +{ + // First get the current list! + GetAutocorrWordList(); + + MakeUserStorage_Impl(); + tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + + bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); + + // Update the word list + if( bRet ) + { + SvxAutocorrWord aNew(rShort, rLong, true ); + std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); + if( xRemove ) + { + if( !xRemove->IsTextOnly() ) + { + // Still have to remove the Storage + OUString sStgNm( rShort ); + if (xStg->IsOLEStorage()) + sStgNm = EncryptBlockName_Imp(sStgNm); + else + GeneratePackageName ( rShort, sStgNm); + + if( xStg->IsContained( sStgNm ) ) + xStg->Remove( sStgNm ); + } + } + + if( pAutocorr_List->Insert( std::move(aNew) ) ) + { + bRet = MakeBlocklist_Imp( *xStg ); + xStg = nullptr; + } + else + { + bRet = false; + } + } + return bRet; +} + +void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, + SfxObjectShell& rShell ) +{ + // First get the current list! + GetAutocorrWordList(); + + MakeUserStorage_Impl(); + + try + { + uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); + OUString sLong; + bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); + xStg = nullptr; + + // Update the word list + if( bRet ) + { + if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) + { + tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); + MakeBlocklist_Imp( *xStor ); + } + } + } + catch ( const uno::Exception& ) + { + } +} + +// Keep the list sorted ... +struct SvxAutocorrWordList::CompareSvxAutocorrWordList +{ + bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const + { + CollatorWrapper& rCmp = ::GetCollatorWrapper(); + return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; + } +}; + +namespace { + +typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; + +} + +struct SvxAutocorrWordList::Impl +{ + + // only one of these contains the data + // maSortedVector is manually sorted so we can optimise data movement + mutable AutocorrWordSetType maSortedVector; + mutable AutocorrWordHashType maHash; // key is 'Short' + + void DeleteAndDestroyAll() + { + maHash.clear(); + maSortedVector.clear(); + } +}; + +SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} + +SvxAutocorrWordList::~SvxAutocorrWordList() +{ +} + +void SvxAutocorrWordList::DeleteAndDestroyAll() +{ + mpImpl->DeleteAndDestroyAll(); +} + +// returns true if inserted +const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const +{ + if ( mpImpl->maSortedVector.empty() ) // use the hash + { + OUString aShort = aWord.GetShort(); + auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); + if (inserted) + return &(it->second); + return nullptr; + } + else + { + auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); + CollatorWrapper& rCmp = ::GetCollatorWrapper(); + if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) + { + it = mpImpl->maSortedVector.insert(it, std::move(aWord)); + return &*it; + } + return nullptr; + } +} + +void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) +{ + (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); +} + +bool SvxAutocorrWordList::empty() const +{ + return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); +} + +std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) +{ + + if ( mpImpl->maSortedVector.empty() ) // use the hash + { + AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); + if( it != mpImpl->maHash.end() ) + { + SvxAutocorrWord pMatch = std::move(it->second); + mpImpl->maHash.erase (it); + return pMatch; + } + } + else + { + auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); + if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) + { + SvxAutocorrWord pMatch = std::move(*it); + mpImpl->maSortedVector.erase (it); + return pMatch; + } + } + return std::optional<SvxAutocorrWord>(); +} + +// return the sorted contents - defer sorting until we have to. +const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const +{ + // convert from hash to set permanently + if ( mpImpl->maSortedVector.empty() ) + { + std::vector<SvxAutocorrWord> tmp; + tmp.reserve(mpImpl->maHash.size()); + for (auto & rPair : mpImpl->maHash) + tmp.emplace_back(std::move(rPair.second)); + mpImpl->maHash.clear(); + // sort twice - this gets the list into mostly-sorted order, which + // reduces the number of times we need to invoke the expensive ICU collate fn. + std::sort(tmp.begin(), tmp.end(), + [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) + { + return lhs.GetShort() < rhs.GetShort(); + }); + // This beast has some O(N log(N)) in a terribly slow ICU collate fn. + // stable_sort is twice as fast as sort in this situation because it does + // fewer comparison operations. + std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); + mpImpl->maSortedVector = std::move(tmp); + } + return mpImpl->maSortedVector; +} + +const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, + const OUString &rTxt, + sal_Int32 &rStt, + sal_Int32 nEndPos) const +{ + const OUString& rChk = pFnd->GetShort(); + + sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? + sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? + sal_Int32 nSttWdPos = nEndPos; + + // direct replacement of keywords surrounded by colons (for example, ":name:") + bool bColonNameColon = rTxt.getLength() > nEndPos && + rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); + if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard ) + return nullptr; + + bool bWasWordDelim = false; + sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; + if (bColonNameColon) + nCalcStt++; + if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || + ( nCalcStt < rStt && + IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) + { + TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); + OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard); + if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) + { + rStt = nCalcStt; + if (!left_wildcard) + { + // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 + if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) + return nullptr; + return pFnd; + } + // get the first word delimiter position before the matching ".*word" pattern + while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) + ; + if (bWasWordDelim) rStt++; + OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); + // avoid double spaces before simple "word" replacement + left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong(); + if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) + return pNew; + } + } else + // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support + if ( right_wildcard ) + { + + OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); + // Get the last word delimiter position + bool not_suffix; + + while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) + ; + // search the first occurrence (with a left word delimitation, if needed) + sal_Int32 nFndPos = -1; + do { + nFndPos = rTxt.indexOf( sTmp, nFndPos + 1); + if (nFndPos == -1) + break; + not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); + } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); + + if ( nFndPos != -1 ) + { + sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" + + if ( left_wildcard ) + { + // get the first word delimiter position before the matching ".*word.*" pattern + while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) + ; + if (bWasWordDelim) nFndPos++; + } + if (nEndPos + extra_repl <= nFndPos) + { + return nullptr; + } + // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" + OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl); + + OUString aLong; + rStt = nFndPos; + if ( !left_wildcard ) + { + sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); + aLong = pFnd->GetLong() + (siz > 0 ? rTxt.subView(nFndPos + sTmp.getLength(), siz) : u""); + } else { + OUStringBuffer buf; + do { + nSttWdPos = rTxt.indexOf( sTmp, nFndPos); + if (nSttWdPos != -1) + { + sal_Int32 nTmp(nFndPos); + while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp])) + nTmp++; + if (nTmp < nSttWdPos) + break; // word delimiter found + buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); + nFndPos = nSttWdPos + sTmp.getLength(); + } + } while (nSttWdPos != -1); + if (nEndPos - nFndPos > extra_repl) + buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos)); + aLong = buf.makeStringAndClear(); + } + if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) + { + if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) + return pNew; + } + } + } + return nullptr; +} + +const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, + sal_Int32 nEndPos) const +{ + for (auto const& elem : mpImpl->maHash) + { + if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) + return pTmp; + } + + for (auto const& elem : mpImpl->maSortedVector) + { + if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) + return pTmp; + } + return nullptr; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |