libreoffice/editeng/source/misc/svxacorr.cxx

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#include <memory>
#include <utility>
#include <algorithm>
#include <string_view>
#include <sal/config.h>

#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
#include <com/sun/star/embed/XStorage.hpp>
#include <com/sun/star/io/IOException.hpp>
#include <com/sun/star/io/XStream.hpp>
#include <tools/urlobj.hxx>
#include <i18nlangtag/mslangid.hxx>
#include <i18nutil/transliteration.hxx>
#include <sal/log.hxx>
#include <osl/diagnose.h>
#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <svl/fstathelper.hxx>
#include <svl/urihelper.hxx>
#include <unotools/charclass.hxx>
#include <com/sun/star/i18n/UnicodeType.hpp>
#include <unotools/collatorwrapper.hxx>
#include <com/sun/star/i18n/UnicodeScript.hpp>
#include <com/sun/star/i18n/OrdinalSuffix.hpp>
#include <unotools/localedatawrapper.hxx>
#include <unotools/transliterationwrapper.hxx>
#include <comphelper/processfactory.hxx>
#include <comphelper/sequence.hxx>
#include <comphelper/storagehelper.hxx>
#include <o3tl/string_view.hxx>
#include <editeng/editids.hrc>
#include <sot/storage.hxx>
#include <editeng/udlnitem.hxx>
#include <editeng/wghtitem.hxx>
#include <editeng/postitem.hxx>
#include <editeng/crossedoutitem.hxx>
#include <editeng/escapementitem.hxx>
#include <editeng/svxacorr.hxx>
#include <editeng/unolingu.hxx>
#include <vcl/window.hxx>
#include <com/sun/star/xml/sax/InputSource.hpp>
#include <com/sun/star/xml/sax/FastParser.hpp>
#include <com/sun/star/xml/sax/Writer.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
#include <unotools/streamwrap.hxx>
#include "SvXMLAutoCorrectImport.hxx"
#include "SvXMLAutoCorrectExport.hxx"
#include "SvXMLAutoCorrectTokenHandler.hxx"
#include <ucbhelper/content.hxx>
#include <com/sun/star/ucb/ContentCreationException.hpp>
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
#include <com/sun/star/ucb/TransferInfo.hpp>
#include <com/sun/star/ucb/NameClash.hpp>
#include <comphelper/diagnose_ex.hxx>
#include <xmloff/xmltoken.hxx>
#include <unordered_map>
#include <rtl/character.hxx>

using namespace ::com::sun::star::ucb;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::xml::sax;
using namespace ::com::sun::star;
using namespace ::xmloff::token;
using namespace ::utl;

namespace {

enum class Flags {
    NONE            = 0x00,
    FullStop        = 0x01,
    ExclamationMark = 0x02,
    QuestionMark    = 0x04,
};

}

namespace o3tl {
    template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
}
const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space

constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;

// tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
// Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
constexpr std::u16string_view
    /* also at these beginnings - Brackets and all kinds of begin characters */
    sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
    /* also at these ends - Brackets and all kinds of begin characters */
    sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";

static OUString EncryptBlockName_Imp(std::u16string_view rName);

static bool NonFieldWordDelim( const sal_Unicode c )
{
    return ' ' == c || '\t' == c || 0x0a == c ||
            cNonBreakingSpace == c || 0x2011 == c;
}

static bool IsWordDelim( const sal_Unicode c )
{
    return c == 0x1 || NonFieldWordDelim(c);
}


static bool IsLowerLetter( sal_Int32 nCharType )
{
    return CharClass::isLetterType( nCharType ) &&
           ( css::i18n::KCharacterType::LOWER & nCharType);
}

static bool IsUpperLetter( sal_Int32 nCharType )
{
    return CharClass::isLetterType( nCharType ) &&
            ( css::i18n::KCharacterType::UPPER & nCharType);
}

static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
                                   sal_Int32 nStt, sal_Int32 nEnd )
{
    for( ; nStt < nEnd; ++nStt )
    {
        css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
        switch( nScript )
        {
            case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
            case css::i18n::UnicodeScript_kHangulJamo:
            case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
            case css::i18n::UnicodeScript_kHiragana:
            case css::i18n::UnicodeScript_kKatakana:
            case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
            case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
            case css::i18n::UnicodeScript_kCJKCompatibility:
            case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
            case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
            case css::i18n::UnicodeScript_kHangulSyllable:
            case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
            case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
                return true;
            default: ; //do nothing
        }
    }
    return false;
}

static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
                                  sal_Int32 nStt, sal_Int32 nEnd )
{
    for( ; nStt < nEnd; ++nStt )
    {
        if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
            return true;
    }
    return false;
}

static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
{
    return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
}

SvxAutoCorrDoc::~SvxAutoCorrDoc()
{
}

// Called by the functions:
//  - FnCapitalStartWord
//  - FnCapitalStartSentence
// after the exchange of characters. Then the words, if necessary, can be inserted
// into the exception list.
void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
                                        sal_Unicode )
{
}

LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
{
    return LANGUAGE_SYSTEM;
}

static const LanguageTag& GetAppLang()
{
    return Application::GetSettings().GetLanguageTag();
}

/// Never use an unresolved LANGUAGE_SYSTEM.
static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
{
    LanguageType eLang = rDoc.GetLanguage( nPos );
    if (eLang == LANGUAGE_SYSTEM)
        eLang = GetAppLang().getLanguageType();     // the current work locale
    return eLang;
}

static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
{
    static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
    LanguageTag aLcl( nLang );
    if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
        xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
    return *xLclDtWrp;
}
static TransliterationWrapper& GetIgnoreTranslWrapper()
{
    static int bIsInit = 0;
    static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
                TransliterationFlags::IGNORE_KANA |
                TransliterationFlags::IGNORE_WIDTH );
    if( !bIsInit )
    {
        aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
        bIsInit = 1;
    }
    return aWrp;
}
static CollatorWrapper& GetCollatorWrapper()
{
    static CollatorWrapper aCollWrp = []()
    {
        CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
        tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
        return tmp;
    }();
    return aCollWrp;
}

bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
{
    return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
            cChar == ' '  || cChar == '\'' || cChar == '\"' ||
            cChar == '*'  || cChar == '_'  || cChar == '%' ||
            cChar == '.'  || cChar == ','  || cChar == ';' ||
            cChar == ':'  || cChar == '?' || cChar == '!' ||
            cChar == '<'  || cChar == '>' ||
            cChar == '/'  || cChar == '-';
}

namespace
{
    bool IsCompoundWordDelimChar(sal_Unicode cChar)
    {
        return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
    }
}

bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
{
    return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
        cChar == '/' /*case for the urls exception*/;
}

ACFlags SvxAutoCorrect::GetDefaultFlags()
{
    ACFlags nRet = ACFlags::Autocorrect
                    | ACFlags::CapitalStartSentence
                    | ACFlags::CapitalStartWord
                    | ACFlags::ChgOrdinalNumber
                    | ACFlags::ChgToEnEmDash
                    | ACFlags::AddNonBrkSpace
                    | ACFlags::TransliterateRTL
                    | ACFlags::ChgAngleQuotes
                    | ACFlags::ChgWeightUnderl
                    | ACFlags::SetINetAttr
                    | ACFlags::SetDOIAttr
                    | ACFlags::ChgQuotes
                    | ACFlags::SaveWordCplSttLst
                    | ACFlags::SaveWordWordStartLst
                    | ACFlags::CorrectCapsLock;
    LanguageType eLang = GetAppLang().getLanguageType();
    if( eLang.anyOf(
        LANGUAGE_ENGLISH,
        LANGUAGE_ENGLISH_US,
        LANGUAGE_ENGLISH_UK,
        LANGUAGE_ENGLISH_AUS,
        LANGUAGE_ENGLISH_CAN,
        LANGUAGE_ENGLISH_NZ,
        LANGUAGE_ENGLISH_EIRE,
        LANGUAGE_ENGLISH_SAFRICA,
        LANGUAGE_ENGLISH_JAMAICA,
        LANGUAGE_ENGLISH_CARIBBEAN))
        nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
    return nRet;
}

constexpr sal_Unicode cEmDash = 0x2014;
constexpr sal_Unicode cEnDash = 0x2013;
constexpr OUString sEmDash(u"\u2014"_ustr);
constexpr OUString sEnDash(u"\u2013"_ustr);
constexpr sal_Unicode cApostrophe = 0x2019;
constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
// stop characters for searching preceding quotes
// (the first character is also the opening quote we are looking for)
const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
// preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };

SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
                                OUString aUserAutocorrFile )
    : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
    , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
    , eCharClassLang( LANGUAGE_DONTKNOW )
    , nFlags(SvxAutoCorrect::GetDefaultFlags())
    , cStartDQuote( 0 )
    , cEndDQuote( 0 )
    , cStartSQuote( 0 )
    , cEndSQuote( 0 )
{
}

SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
    : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
    , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
    , aSwFlags( rCpy.aSwFlags )
    , eCharClassLang(rCpy.eCharClassLang)
    , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
    , cStartDQuote( rCpy.cStartDQuote )
    , cEndDQuote( rCpy.cEndDQuote )
    , cStartSQuote( rCpy.cStartSQuote )
    , cEndSQuote( rCpy.cEndSQuote )
{
}


SvxAutoCorrect::~SvxAutoCorrect()
{
}

void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
{
    moCharClass.emplace( LanguageTag( eLang) );
    eCharClassLang = eLang;
}

void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
{
    ACFlags nOld = nFlags;
    nFlags = bOn ? nFlags | nFlag
                 : nFlags & ~nFlag;

    if( !bOn )
    {
        if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
            nFlags &= ~ACFlags::CplSttLstLoad;
        if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
            nFlags &= ~ACFlags::WordStartLstLoad;
        if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
            nFlags &= ~ACFlags::ChgWordLstLoad;
    }
}


// Correct TWo INitial CApitals
void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    CharClass& rCC = GetCharClass( eLang );

    // Delete all non alphanumeric. Test the characters at the beginning/end of
    // the word ( recognizes: "(min.", "/min.", and so on.)
    for( ; nSttPos < nEndPos; ++nSttPos )
        if( rCC.isLetterNumeric( rTxt, nSttPos ))
            break;
    for( ; nSttPos < nEndPos; --nEndPos )
        if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
            break;

    // Is the word a compounded word separated by delimiters?
    // If so, keep track of all delimiters so each constituent
    // word can be checked for two initial capital letters.
    std::deque<sal_Int32> aDelimiters;

    // Always check for two capitals at the beginning
    // of the entire word, so start at nSttPos.
    aDelimiters.push_back(nSttPos);

    // Find all compound word delimiters
    for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
    {
        if (IsCompoundWordDelimChar(rTxt[ n ]))
        {
            aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
        }
    }

    // Decide where to put the terminating delimiter.
    // If the last AutoCorrect char was a newline, then the AutoCorrect
    // char will not be included in rTxt.
    // If the last AutoCorrect char was not a newline, then the AutoCorrect
    // character will be the last character in rTxt.
    if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
        aDelimiters.push_back(nEndPos);

    // Iterate through the word and all words that compose it.
    // Two capital letters at the beginning of word?
    for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
    {
        nSttPos = aDelimiters[nI];
        nEndPos = aDelimiters[nI + 1];

        if( nSttPos+2 < nEndPos &&
            IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
            IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
            // Is the third character a lower case
            IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
            // Do not replace special attributes
            0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
        {
            // test if the word is in an exception list
            OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
            if( !FindInWordStartExceptList(eLang, sWord) )
            {
                // Check that word isn't correctly spelt before correcting:
                css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
                    LinguMgr::GetSpellChecker();
                if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
                {
                    Sequence< css::beans::PropertyValue > aEmptySeq;
                    if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
                    {
                        return;
                    }
                }
                sal_Unicode cSave = rTxt[ nSttPos ];
                OUString sChar = rCC.lowercase( OUString(cSave) );
                if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
                {
                    if( ACFlags::SaveWordWordStartLst & nFlags )
                        rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
                }
            }
        }
    }
}

// Format ordinal numbers suffixes (1st -> 1^st)
bool SvxAutoCorrect::FnChgOrdinalNumber(
    SvxAutoCorrDoc& rDoc, const OUString& rTxt,
    sal_Int32 nSttPos, sal_Int32 nEndPos,
    LanguageType eLang)
{
    // 1st, 2nd, 3rd, 4 - 0th
    // 201th or 201st
    // 12th or 12nd
    bool bChg = false;

    // In some languages ordinal suffixes should never be
    // changed to superscript. Let's break for those languages.
    if (!eLang.anyOf(
         LANGUAGE_CATALAN,              // tdf#156792
         LANGUAGE_CATALAN_VALENCIAN,
         LANGUAGE_SWEDISH,
         LANGUAGE_SWEDISH_FINLAND))
    {
        CharClass& rCC = GetCharClass(eLang);

        for (; nSttPos < nEndPos; ++nSttPos)
            if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
                break;
        for (; nSttPos < nEndPos; --nEndPos)
            if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
                break;


        // Get the last number in the string to check
        sal_Int32 nNumEnd = nEndPos;
        bool bFoundEnd = false;
        bool isValidNumber = true;
        sal_Int32 i = nEndPos;
        while (i > nSttPos)
        {
            i--;
            bool isDigit = rCC.isDigit(rTxt, i);
            if (bFoundEnd)
                isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));

            if (isDigit && !bFoundEnd)
            {
                bFoundEnd = true;
                nNumEnd = i;
            }
        }

        if (bFoundEnd && isValidNumber) {
            sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
            std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);

            // Check if the characters after that number correspond to the ordinal suffix
            uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
                = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());

            uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());

            // add extra suffixes for languages not handled by i18npool/ICU
            if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) &&
                            ( nEndPos == nNumEnd + 3 || nEndPos == nNumEnd + 4 ) &&
                            ( sEnd[0] == 'a' || sEnd[0] == 'o' || sEnd[0] == 'r' ) )
            {
               auto aExtendedSuffixes = comphelper::sequenceToContainer< std::vector<OUString> >(aSuffixes);
               aExtendedSuffixes.push_back(u"as"_ustr); // plural form of 'a'
               aExtendedSuffixes.push_back(u"os"_ustr); // plural form of 'o'
               aExtendedSuffixes.push_back(u"ra"_ustr); // alternative form of 'a'
               aExtendedSuffixes.push_back(u"ro"_ustr); // alternative form of 'o'
               aExtendedSuffixes.push_back(u"ras"_ustr); // alternative form of "as"
               aExtendedSuffixes.push_back(u"ros"_ustr); // alternative form of "os"
               aSuffixes = comphelper::containerToSequence(aExtendedSuffixes);
            }

            for (OUString const & sSuffix : aSuffixes)
            {
                if (sSuffix == sEnd)
                {
                    // Check if the ordinal suffix has to be set as super script
                    if (rCC.isLetter(sSuffix))
                    {
                        sal_Int32 nNumberChanged = 0;
                        sal_Int32 nSuffixChanged = 0;
                        // exceptions for Portuguese
                        // add missing dot: 1a -> 1.ª
                        // and remove optional 'r': 1ro -> 1.º
                        if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) )
                        {
                            if ( sSuffix.startsWith("r") )
                            {
                                rDoc.Delete( nNumEnd + 1, nNumEnd + 2 );
                                nSuffixChanged = -1;
                            }
                            rDoc.Insert( nNumEnd + 1, u"."_ustr );
                            nNumberChanged = 1;
                        }

                        // Do the change
                        SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
                            DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
                        rDoc.SetAttr(nNumEnd + 1 + nNumberChanged,
                            nEndPos + nNumberChanged + nSuffixChanged,
                            SID_ATTR_CHAR_ESCAPEMENT,
                            aSvxEscapementItem);
                        bChg = true;
                        break;
                    }
                }
            }
        }
    }
    return bChg;
}

// Replace dashes
bool SvxAutoCorrect::FnChgToEnEmDash(
                                SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                sal_Int32 nSttPos, sal_Int32 nEndPos,
                                LanguageType eLang )
{
    bool bRet = false;
    CharClass& rCC = GetCharClass( eLang );
    if (eLang == LANGUAGE_SYSTEM)
        eLang = GetAppLang().getLanguageType();
    bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);

    // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
    // keep a local copy for later use
    OUString aOrigTxt = rTxt;
    sal_Int32 nFirstReplacementTextLengthChange = 0;

    // replace " - " or " --" with "enDash"
    if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
    {
        sal_Unicode cCh = rTxt[ nSttPos ];
        if( '-' == cCh )
        {
            if( 1 < nEndPos - nSttPos &&
                ' ' == rTxt[ nSttPos-1 ] &&
                '-' == rTxt[ nSttPos+1 ])
            {
                sal_Int32 n;
                for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
                            sImplSttSkipChars,(cCh = rTxt[ n ]));
                        ++n )
                    ;

                // found: " --[<AnySttChars>][A-z0-9]
                if( rCC.isLetterNumeric( OUString(cCh) ) )
                {
                    for( n = nSttPos-1; n && lcl_IsInArr(
                            sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                        ;

                    // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
                    if( rCC.isLetterNumeric( OUString(cCh) ))
                    {
                        rDoc.Delete( nSttPos, nSttPos + 2 );
                        rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
                        nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
                        bRet = true;
                    }
                }
            }
        }
        else if( 3 < nSttPos &&
                 ' ' == rTxt[ nSttPos-1 ] &&
                 '-' == rTxt[ nSttPos-2 ])
        {
            sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
            if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
            {
                --nTmpPos;
                ++nLen;
                cCh = rTxt[ nTmpPos-1 ];
            }
            if( ' ' == cCh )
            {
                for( n = nSttPos; n < nEndPos && lcl_IsInArr(
                            sImplSttSkipChars,(cCh = rTxt[ n ]));
                        ++n )
                    ;

                // found: " - [<AnySttChars>][A-z0-9]
                if( rCC.isLetterNumeric( OUString(cCh) ) )
                {
                    cCh = ' ';
                    for( n = nTmpPos-1; n && lcl_IsInArr(
                            sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                            ;
                    // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
                    if (rCC.isLetterNumeric(OUString(cCh)) || lcl_IsInArr(u".!?", cCh))
                    {
                        rDoc.Delete( nTmpPos, nTmpPos + nLen );
                        rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
                        nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
                        bRet = true;
                    }
                }
            }
        }
    }

    // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
    // [0-9]--[0-9] double dash always replaced with "enDash"
    // Finnish and Hungarian use enDash instead of emDash.
    bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
    if( 4 <= nEndPos - nSttPos )
    {
        std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
        size_t nFndPos = sTmpView.find(u"--");
        if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
        {
            // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
            // uses the index *both* as code unit index (when checking it as ASCII), *and*
            // as code point index (when passes to css::i18n::XCharacterClassification).
            // Oh well... Anyway, single-codepoint strings will workaround it.
            sal_Int32 nStart = nSttPos + nFndPos;
            sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
            OUString sStart(&chStart, 1);
            // No idea why sImplEndSkipChars is checked at start
            if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
            {
                sal_Int32 nEnd = nSttPos + nFndPos + 2;
                sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
                OUString sEnd(&chEnd, 1);
                // No idea why sImplSttSkipChars is checked at end
                if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
                {
                    nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
                    rDoc.Delete(nSttPos, nSttPos + 2);
                    rDoc.Insert(nSttPos,
                                (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
                                     ? sEnDash
                                     : sEmDash));
                    bRet = true;
                }
            }
        }
    }
    return bRet;
}

// Add non-breaking space before specific punctuation marks in French text
sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
                                SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
                                sal_Int32 nEndPos,
                                LanguageType eLang, bool& io_bNbspRunNext )
{
    sal_Int32 nRet = -1;

    CharClass& rCC = GetCharClass( eLang );

    if ( rCC.getLanguageTag().getLanguage() == "fr" )
    {
        bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
        OUString allChars = u":;?!%"_ustr;
        OUString chars( allChars );
        if ( bFrCA )
            chars = ":";

        sal_Unicode cChar = rTxt[ nEndPos ];
        bool bHasSpace = chars.indexOf( cChar ) != -1;
        bool bIsSpecial = allChars.indexOf( cChar ) != -1;
        if ( bIsSpecial )
        {
            // Get the last word delimiter position
            sal_Int32 nSttWdPos = nEndPos;
            bool bWasWordDelim = false;
            while( nSttWdPos )
            {
                bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
                if (bWasWordDelim)
                    break;
            }

            //See if the text is the start of a protocol string, e.g. have text of
            //"http" see if it is the start of "http:" and if so leave it alone
            size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
            size_t nProtocolLen = nEndPos - nSttWdPos + 1;
            if (nIndex + nProtocolLen <= rTxt.size())
            {
                if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
                    return -1;
            }

            // Check the presence of "://" in the word
            size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
            if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
            {
                // Check the previous char
                sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
                if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
                {
                    // Remove any previous normal space
                    sal_Int32 nPos = nEndPos - 1;
                    while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
                    {
                        if ( nPos == 0 ) break;
                        nPos--;
                        cPrevChar = rTxt[ nPos ];
                    }

                    nPos++;
                    if ( nEndPos - nPos > 0 )
                        rDoc.Delete( nPos, nEndPos );

                    // Add the non-breaking space at the end pos
                    if ( bHasSpace )
                        rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
                    io_bNbspRunNext = true;
                    nRet = nPos;
                }
                else if ( chars.indexOf( cPrevChar ) != -1 )
                    io_bNbspRunNext = true;
            }
        }
        else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
        {
            // Remove the hardspace right before to avoid formatting URLs
            sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
            sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
            if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
            {
                rDoc.Delete( nEndPos - 2, nEndPos - 1 );
                nRet = nEndPos - 1;
            }
        }
    }

    return nRet;
}

// URL recognition
bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
                                                GetCharClass( eLang ) ));
    bool bRet = !sURL.isEmpty();
    if( bRet )          // so, set attribute:
        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
    return bRet;
}

// DOI citation recognition
bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
    bool bRet = !sURL.isEmpty();
    if( bRet )          // so, set attribute:
        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
    return bRet;
}

// Automatic *bold*, /italic/, -strikeout- and _underline_
bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                        sal_Int32 nEndPos )
{
    // Condition:
    //  at the beginning:   _, *, / or ~ after Space with the following !Space
    //  at the end:         _, *, / or ~ before Space (word delimiter?)

    sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
    if( ++nEndPos != rTxt.getLength() &&
        !IsWordDelim( rTxt[ nEndPos ] ) )
        return false;

    --nEndPos;

    bool bAlphaNum = false;
    sal_Int32 nPos = nEndPos;
    sal_Int32  nFndPos = -1;
    CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );

    while( nPos )
    {
        switch( sal_Unicode c = rTxt[ --nPos ] )
        {
        case '_':
        case '-':
        case '/':
        case '*':
            if( c == cInsChar )
            {
                if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
                    IsWordDelim( rTxt[ nPos-1 ])) &&
                    !IsWordDelim( rTxt[ nPos+1 ]))
                        nFndPos = nPos;
                else
                    // Condition is not satisfied, so cancel
                    nFndPos = -1;
                nPos = 0;
            }
            break;
        default:
            if( !bAlphaNum )
                bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
        }
    }

    if( -1 != nFndPos )
    {
        // first delete the Character at the end - this allows insertion
        // of an empty hint in SetAttr which would be removed by Delete
        // (fdo#62536, AUTOFMT in Writer)
        rDoc.Delete( nEndPos, nEndPos + 1 );

        // Span the Attribute over the area
        // the end.
        if( '*' == cInsChar )           // Bold
        {
            SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_WEIGHT,
                          aSvxWeightItem);
        }
        else if( '/' == cInsChar )           // Italic
        {
            SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_POSTURE,
                          aSvxPostureItem);
        }
        else if( '-' == cInsChar )           // Strikeout
        {
            SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_STRIKEOUT,
                          aSvxCrossedOutItem);
        }
        else                            // Underline
        {
            SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_UNDERLINE,
                          aSvxUnderlineItem);
        }
        rDoc.Delete( nFndPos, nFndPos + 1 );
    }

    return -1 != nFndPos;
}

// Capitalize first letter of every sentence
void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
                                    const OUString& rTxt, bool bNormalPos,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{

    if( rTxt.isEmpty() || nEndPos <= nSttPos )
        return;

    CharClass& rCC = GetCharClass( eLang );
    OUString aText( rTxt );
    const sal_Unicode *pStart = aText.getStr(),
                      *pStr = pStart + nEndPos,
                      *pWordStt = nullptr,
                      *pDelim = nullptr;

    bool bAtStart = false;
    do {
        --pStr;
        if (rCC.isLetter(aText, pStr - pStart))
        {
            if( !pWordStt )
                pDelim = pStr+1;
            pWordStt = pStr;
        }
        else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
        {
            if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
                pWordStt - 1 == pStr &&
                // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
                (pStart + 1) <= pStr &&
                rCC.isLetter(aText, pStr-1 - pStart))
                pWordStt = --pStr;
            else
                break;
        }
        bAtStart = (pStart == pStr);
    } while( !bAtStart );

    if (!pWordStt)
        return;    // no character to be replaced


    if (rCC.isDigit(aText, pStr - pStart))
        return; // already ok

    if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
        return; // already ok

    //See if the text is the start of a protocol string, e.g. have text of
    //"http" see if it is the start of "http:" and if so leave it alone
    sal_Int32 nIndex = pWordStt - pStart;
    sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
    if (nIndex + nProtocolLen <= rTxt.getLength())
    {
        if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
            return; // already ok
    }

    if (0x1 == *pWordStt || 0x2 == *pWordStt)
        return; // already ok

    // Only capitalize, if string before specified characters is long enough
    if( *pDelim && 2 >= pDelim - pWordStt &&
        lcl_IsInArr( u".-)>", *pDelim ) )
        return;

    // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
    if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
        return;

    if( !bAtStart ) // Still no beginning of a paragraph?
    {
        if (NonFieldWordDelim(*pStr))
        {
            for (;;)
            {
                bAtStart = (pStart == pStr--);
                if (bAtStart || !NonFieldWordDelim(*pStr))
                    break;
            }
        }
        // Asian full stop, full width full stop, full width exclamation mark
        // and full width question marks are treated as word delimiters
        else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
                  0xFF1F != *pStr )
            return; // no valid separator -> no replacement
    }

    // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
    if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
        return;

    if( bAtStart )  // at the beginning of a paragraph?
    {
        // Check out the previous paragraph, if it exists.
        // If so, then check to paragraph separator at the end.
        OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
        if (!pPrevPara)
        {
            // valid separator -> replace
            OUString sChar( *pWordStt );
            sChar = rCC.titlecase(sChar); //see fdo#56740
            if (sChar != OUStringChar(*pWordStt))
               rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
            return;
        }

        aText = *pPrevPara;
        bAtStart = false;
        pStart = aText.getStr();
        pStr = pStart + aText.getLength();

        do {            // overwrite all blanks
            --pStr;
            if (!NonFieldWordDelim(*pStr))
                break;
            bAtStart = (pStart == pStr);
        } while( !bAtStart );

        if( bAtStart )
            return;  // no valid separator -> no replacement
    }

    // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
    // all three can happen, but not more than once!
    const sal_Unicode* pExceptStt = nullptr;
    bool bContinue = true;
    Flags nFlag = Flags::NONE;
    do
    {
        switch (*pStr)
        {
            // Western and Asian full stop
            case '.':
            case 0x3002:
            case 0xFF0E:
            {
                if (pStr >= pStart + 2 && *(pStr - 2) == '.')
                {
                    //e.g. text "f.o.o. word": Now currently considering
                    //capitalizing word but second last character of
                    //previous word is a .  So probably last word is an
                    //anagram that ends in . and not truly the end of a
                    //previous sentence, so don't autocapitalize this word
                    return;
                }
                if (nFlag & Flags::FullStop)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::FullStop;
                pExceptStt = pStr;
            }
            break;
            case '!':
            case 0xFF01:
            {
                if (nFlag & Flags::ExclamationMark)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::ExclamationMark;
            }
            break;
            case '?':
            case 0xFF1F:
            {
                if (nFlag & Flags::QuestionMark)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::QuestionMark;
            }
            break;
            default:
                if (nFlag == Flags::NONE)
                    return; // no valid separator -> no replacement
                else
                    bContinue = false;
                break;
        }

        if (bContinue && pStr-- == pStart)
        {
            return; // no valid separator -> no replacement
        }
    } while (bContinue);
    if (Flags::FullStop != nFlag)
        pExceptStt = nullptr;

    // Only capitalize, if string is long enough
    if( 2 > ( pStr - pStart ) )
        return;

    if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
    {
        bool bValid = false, bAlphaFnd = false;
        const sal_Unicode* pTmpStr = pStr;
        while( !bValid )
        {
            if( rCC.isDigit( aText, pTmpStr - pStart ) )
            {
                bValid = true;
                pStr = pTmpStr - 1;
            }
            else if( rCC.isLetter( aText, pTmpStr - pStart ) )
            {
                if( bAlphaFnd )
                {
                    bValid = true;
                    pStr = pTmpStr;
                }
                else
                    bAlphaFnd = true;
            }
            else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
                break;

            if( pTmpStr == pStart )
                break;

            --pTmpStr;
        }

        if( !bValid )
            return;       // no valid separator -> no replacement
    }

    bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';

    // Search for the beginning of the word
    while (!NonFieldWordDelim(*pStr))
    {
        if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
            bNumericOnly = false;

        if( pStart == pStr )
            break;

        --pStr;
    }

    if( bNumericOnly )      // consists of only numbers, then not
        return;

    if (NonFieldWordDelim(*pStr))
        ++pStr;

    OUString sWord;

    // check on the basis of the exception list
    if( pExceptStt )
    {
        sWord = OUString(pStr, pExceptStt - pStr + 1);
        if( FindInCplSttExceptList(eLang, sWord) )
            return;

        // Delete all non alphanumeric. Test the characters at the
        // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
        OUString sTmp( sWord );
        while( !sTmp.isEmpty() &&
                !rCC.isLetterNumeric( sTmp, 0 ) )
            sTmp = sTmp.copy(1);

        // Remove all non alphanumeric characters towards the end up until
        // the last one.
        sal_Int32 nLen = sTmp.getLength();
        while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
            --nLen;
        if( nLen + 1 < sTmp.getLength() )
            sTmp = sTmp.copy( 0, nLen + 1 );

        if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
            FindInCplSttExceptList(eLang, sTmp))
            return;

        if(FindInCplSttExceptList(eLang, sWord, true))
            return;
    }

    // Ok, then replace
    sal_Unicode cSave = *pWordStt;
    nSttPos = pWordStt - rTxt.getStr();
    OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
    bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );

    // Perhaps someone wants to have the word
    if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
        rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
}

// Correct accidental use of cAPS LOCK key
bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                        sal_Int32 nSttPos, sal_Int32 nEndPos,
                                        LanguageType eLang )
{
    if (nEndPos - nSttPos < 2)
        // string must be at least 2-character long.
        return false;

    CharClass& rCC = GetCharClass( eLang );

    // Check the first 2 letters.
    if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
        return false;

    if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
        return false;

    OUStringBuffer aConverted;
    aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
    aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );

    // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
    if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
        return false;

    for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
    {
        if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
            // A lowercase letter disqualifies the whole text.
            return false;

        if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
            // Another uppercase letter.  Convert it.
            aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
        else
            // This is not an alphabetic letter.  Leave it as-is.
            aConverted.append( rTxt[i] );
    }

    // Replace the word.
    rDoc.Delete(nSttPos, nEndPos);
    rDoc.Insert(nSttPos, aConverted.makeStringAndClear());

    return true;
}


sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
                                        LanguageType eLang ) const
{
    sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
                                    ? GetStartDoubleQuote()
                                    : GetStartSingleQuote() )
                                   : ( '\"' == cInsChar
                                    ? GetEndDoubleQuote()
                                    : GetEndSingleQuote() );
    if( !cRet )
    {
        // then through the Language find the right character
        if( LANGUAGE_NONE == eLang )
            cRet = cInsChar;
        else
        {
            LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
            OUString sRet( bSttQuote
                            ? ( '\"' == cInsChar
                                ? rLcl.getDoubleQuotationMarkStart()
                                : rLcl.getQuotationMarkStart() )
                            : ( '\"' == cInsChar
                                ? rLcl.getDoubleQuotationMarkEnd()
                                : rLcl.getQuotationMarkEnd() ));
            cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
        }
    }
    return cRet;
}

void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
                                    sal_Unicode cInsChar, bool bSttQuote,
                                    bool bIns, LanguageType eLang, ACQuotes eType ) const
{
    sal_Unicode cRet;

    if ( eType == ACQuotes::DoubleAngleQuote )
    {
        bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
        // pressing " inside a quotation -> use second level angle quotes
        bool bLeftQuote = '\"' == cInsChar &&
                // start position and Romanian OR
                // not start position and Hungarian
                bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
        cRet = ( '<' == cInsChar || bLeftQuote )
                ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
                : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
    }
    else if ( eType == ACQuotes::UseApostrophe )
        cRet = cApostrophe;
    else
        cRet = GetQuote( cInsChar, bSttQuote, eLang );

    OUString sChg( cInsChar );
    if( bIns )
        rDoc.Insert( nInsPos, sChg );
    else
        rDoc.Replace( nInsPos, sChg );

    sChg = OUString(cRet);

    if( eType == ACQuotes::NonBreakingSpace )
    {
        if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
        {
            if( !bSttQuote )
                ++nInsPos;
        }
    }
    else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
    {
        rDoc.Delete( nInsPos-1, nInsPos);
        --nInsPos;
    }

    rDoc.Replace( nInsPos, sChg );

    // i' -> I' in English (last step for the Undo)
    if( eType == ACQuotes::CapitalizeIAm )
        rDoc.Replace( nInsPos-1, u"I"_ustr );
}

OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
                                sal_Unicode cInsChar, bool bSttQuote )
{
    const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
    sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );

    OUString sRet(cRet);

    if( '\"' == cInsChar )
    {
        if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
        {
            if( bSttQuote )
                sRet += " ";
            else
                sRet = " " + sRet;
        }
    }
    return sRet;
}

// search preceding opening quote in the paragraph before the insert position
static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
                const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
{
    sal_Unicode cTmpChar;

    do {
        cTmpChar = rTxt[ --nPos ];
        if ( cTmpChar == sPrecedingChar )
            return true;

        if ( cTmpChar == sStopChar )
            return false;

        for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
            if ( cTmpChar == *pCh )
                return false;

    } while ( nPos > 0 );

    return false;
}

// WARNING: rText may become invalid, see comment below
void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nInsPos, sal_Unicode cChar,
                                    bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
{
    bool bIsNextRun = io_bNbspRunNext;
    io_bNbspRunNext = false;  // if it was set, then it has to be turned off

    do{                                 // only for middle check loop !!
        if( cChar )
        {
            // Prevent double space
            if( nInsPos && ' ' == cChar &&
                IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
                ' ' == rTxt[ nInsPos - 1 ])
            {
                break;
            }

            bool bSingle = '\'' == cChar;
            bool bIsReplaceQuote =
                        (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
                        (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
            if( bIsReplaceQuote )
            {
                bool bSttQuote = !nInsPos;
                ACQuotes eType = ACQuotes::NONE;
                const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
                if (!bSttQuote)
                {
                    sal_Unicode cPrev = rTxt[ nInsPos-1 ];
                    bSttQuote = NonFieldWordDelim(cPrev) ||
                        lcl_IsInArr( u"([{", cPrev ) ||
                        ( cEmDash == cPrev ) ||
                        ( cEnDash == cPrev );
                    // tdf#38394 use opening quotation mark << in French l'<<word>>
                    if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
                        primary(eLang) == primary(LANGUAGE_FRENCH) &&
                        ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
                               // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
                               u"cdjlnmtsCDJLNMTS"_ustr.indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
                          ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
                               // abbreviated form of que
                               ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
                               ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
                    {
                        bSttQuote = true;
                    }
                    // tdf#108423 for capitalization of English i'm
                    else if ( bSingle && ( cPrev == 'i' ) &&
                        primary(eLang) == primary(LANGUAGE_ENGLISH) &&
                        ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
                    {
                        eType = ACQuotes::CapitalizeIAm;
                    }
                    // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
                    else if ( !bSingle && nInsPos &&
                        ( ( eLang == LANGUAGE_HUNGARIAN &&
                            lcl_HasPrecedingChar( rTxt, nInsPos,
                                bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
                                bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
                                bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
                          ( eLang.anyOf(
                                LANGUAGE_ROMANIAN,
                                LANGUAGE_ROMANIAN_MOLDOVA ) &&
                            lcl_HasPrecedingChar( rTxt, nInsPos,
                                bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
                                bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
                                bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
                    {
                        LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                        // only if the opening double quotation mark is the default one
                        if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
                            eType = ACQuotes::DoubleAngleQuote;
                    }
                    else if ( bSingle && nInsPos && !bSttQuote &&
                        // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
                        // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
                        // tdf#123786 the same for Russian and Ukrainian
                        ( eLang.anyOf (
                                 LANGUAGE_CZECH,
                                 LANGUAGE_GERMAN,
                                 LANGUAGE_GERMAN_SWISS,
                                 LANGUAGE_GERMAN_AUSTRIAN,
                                 LANGUAGE_GERMAN_LUXEMBOURG,
                                 LANGUAGE_GERMAN_LIECHTENSTEIN,
                                 LANGUAGE_ICELANDIC,
                                 LANGUAGE_SLOVAK,
                                 LANGUAGE_SLOVENIAN ) ) )
                    {
                        sal_Unicode sStartChar = GetStartSingleQuote();
                        sal_Unicode sEndChar = GetEndSingleQuote();
                        if ( !sStartChar || !sEndChar ) {
                            LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                            if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
                            if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
                        }
                        if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
                        {
                            CharClass& rCC = GetCharClass( eLang );
                            if ( rCC.isLetter(rTxt, nInsPos-1) )
                            {
                                eType = ACQuotes::UseApostrophe;
                            }
                        }
                    }
                    else if ( bSingle && nInsPos && !bSttQuote &&
                          ( eLang.anyOf (
                                 LANGUAGE_RUSSIAN,
                                 LANGUAGE_UKRAINIAN ) &&
                            !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1],  aStopSingleQuoteEndRuUa + 2 ) ) )
                    {
                        LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                        CharClass& rCC = GetCharClass( eLang );
                        if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
                             // use apostrophe only after letters, not after digits or punctuation
                             rCC.isLetter(rTxt, nInsPos-1) )
                        {
                            eType = ACQuotes::UseApostrophe;
                        }
                    }
                }

                if ( eType == ACQuotes::NONE && !bSingle &&
                    ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
                    eType = ACQuotes::NonBreakingSpace;

                InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
                break;
            }
            // tdf#133524 change "<<" and ">>" to double angle quotation marks
            else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
                IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
                ('<' == cChar || '>' == cChar) &&
                nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
            {
                const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
                if ( eLang.anyOf(
                        LANGUAGE_CATALAN,              // primary level
                        LANGUAGE_CATALAN_VALENCIAN,    // primary level
                        LANGUAGE_FINNISH,              // alternative primary level
                        LANGUAGE_FRENCH_SWISS,         // second level
                        LANGUAGE_GALICIAN,             // primary level
                        LANGUAGE_HUNGARIAN,            // second level
                        LANGUAGE_POLISH,               // second level
                        LANGUAGE_PORTUGUESE,           // primary level
                        LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
                        LANGUAGE_ROMANIAN,             // second level
                        LANGUAGE_ROMANIAN_MOLDOVA,     // second level
                        LANGUAGE_SWEDISH,              // alternative primary level
                        LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
                        LANGUAGE_UKRAINIAN,            // primary level
                        LANGUAGE_USER_ARAGONESE,       // primary level
                        LANGUAGE_USER_ASTURIAN ) ||    // primary level
                    primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
                    primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
                {
                    InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
                    break;
                }
            }

            if( bInsert )
                rDoc.Insert( nInsPos, OUString(cChar) );
            else
                rDoc.Replace( nInsPos, OUString(cChar) );

            // Hardspaces autocorrection
            if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
            {
                // WARNING ATTENTION: rTxt is an alias of the text node's OUString
                // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
                sal_Int32 nUpdatedPos = -1;
                if (NeedsHardspaceAutocorr(cChar))
                    nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
                if (nUpdatedPos >= 0)
                {
                    nInsPos = nUpdatedPos;
                }
                else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
                {
                    // Remove the NBSP if it wasn't an autocorrection
                    if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
                            cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
                    {
                        // Look for the last HARD_SPACE
                        sal_Int32 nPos = nInsPos - 1;
                        bool bContinue = true;
                        while ( bContinue )
                        {
                            const sal_Unicode cTmpChar = rTxt[ nPos ];
                            if ( cTmpChar == cNonBreakingSpace )
                            {
                                rDoc.Delete( nPos, nPos + 1 );
                                bContinue = false;
                            }
                            else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
                                bContinue = false;
                            nPos--;
                        }
                    }
                }
            }
        }

        if( !nInsPos )
            break;

        sal_Int32 nPos = nInsPos - 1;

        if( IsWordDelim( rTxt[ nPos ]))
            break;

        // Set bold or underline automatically?
        if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
        {
            if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
            {
                FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
            }
            break;
        }

        while( nPos && !IsWordDelim( rTxt[ --nPos ]))
            ;

        // Found a Paragraph-start or a Blank, search for the word shortcut in
        // auto.
        sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
        if( !nPos && !IsWordDelim( rTxt[ 0 ]))
            --nCapLttrPos;          // begin of paragraph and no blank

        const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
        CharClass& rCC = GetCharClass( eLang );

        // no symbol characters
        if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
            break;

        if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
            // tdf#134940 fix regression of arrow "-->" resulted by premature
            // replacement of "--" since '>' was added to IsAutoCorrectChar()
            '>' != cChar )
        {
            // WARNING ATTENTION: rTxt is an alias of the text node's OUString
            // and becomes INVALID if ChgAutoCorrWord returns true!
            // => use aPara/pPara to create a valid copy of the string!
            OUString aPara;
            OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;

            bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
                                                    *this, pPara );
            if( !bChgWord )
            {
                sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
                while( nCapLttrPos1 < nInsPos &&
                        lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
                        )
                        ++nCapLttrPos1;
                while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
                        lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
                        )
                        --nInsPos1;

                if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
                    nCapLttrPos1 < nInsPos1 &&
                    rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
                {
                    bChgWord = true;
                    nCapLttrPos = nCapLttrPos1;
                }
            }

            if( bChgWord )
            {
                if( !aPara.isEmpty() )
                {
                    sal_Int32 nEnd = nCapLttrPos;
                    while( nEnd < aPara.getLength() &&
                            !IsWordDelim( aPara[ nEnd ]))
                        ++nEnd;

                    // Capital letter at beginning of paragraph?
                    if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
                    {
                        FnCapitalStartSentence( rDoc, aPara, false,
                                                nCapLttrPos, nEnd, eLang );
                    }

                    if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
                    {
                        FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
                    }
                }
                break;
            }
        }

        if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
        {
            // WARNING ATTENTION: rTxt is an alias of the text node's OUString
            // and becomes INVALID if TransliterateRTLWord returns true!
            if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
                break;
        }

        if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
                (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
                ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
                FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
            ;
        else
        {
            bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
            bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );

            if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
                 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
            {
                // Correct accidental use of cAPS LOCK key (do this only when
                // the caps or shift lock key is pressed). Turn off the caps
                // lock afterwards.
                pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
            }

            // Capital letter at beginning of paragraph ?
            if( !bUnsupported &&
                IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
            {
                FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
            }

            // Two capital letters at beginning of word ??
            if( !bUnsupported &&
                IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
            {
                FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
            }

            if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
            {
                FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
            }
        }

    } while( false );
}

SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
                                                        LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
        (void)CreateLanguageFile(aLanguageTag);
    const auto iter = m_aLangTable.find(aLanguageTag);
    assert(iter != m_aLangTable.end());
    return iter->second;
}

void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
{
    auto const iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        iter->second.SaveCplSttExceptList();
    else
    {
        SAL_WARN("editeng", "Save an empty list? ");
    }
}

void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
{
    auto const iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        iter->second.SaveWordStartExceptList();
    else
    {
        SAL_WARN("editeng", "Save an empty list? ");
    }
}

// Adds a single word. The list will immediately be written to the file!
bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
                                        LanguageType eLang )
{
    SvxAutoCorrectLanguageLists* pLists = nullptr;
    // either the right language is present or it will be this in the general list
    auto iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        pLists = &iter->second;
    else
    {
        LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
        iter = m_aLangTable.find(aLangTagUndetermined);
        if (iter != m_aLangTable.end())
            pLists = &iter->second;
        else if(CreateLanguageFile(aLangTagUndetermined))
        {
            iter = m_aLangTable.find(aLangTagUndetermined);
            assert(iter != m_aLangTable.end());
            pLists = &iter->second;
        }
    }
    OSL_ENSURE(pLists, "No auto correction data");
    return pLists && pLists->AddToCplSttExceptList(rNew);
}

// Adds a single word. The list will immediately be written to the file!
bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
                                         LanguageType eLang )
{
    SvxAutoCorrectLanguageLists* pLists = nullptr;
    //either the right language is present or it is set in the general list
    auto iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        pLists = &iter->second;
    else
    {
        LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
        iter = m_aLangTable.find(aLangTagUndetermined);
        if (iter != m_aLangTable.end())
            pLists = &iter->second;
        else if(CreateLanguageFile(aLangTagUndetermined))
        {
            iter = m_aLangTable.find(aLangTagUndetermined);
            assert(iter != m_aLangTable.end());
            pLists = &iter->second;
        }
    }
    OSL_ENSURE(pLists, "No auto correction file!");
    return pLists && pLists->AddToWordStartExceptList(rNew);
}

OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
                                             sal_Int32 nPos)
{
    OUString sRet;
    if( !nPos )
        return sRet;

    sal_Int32 nEnd = nPos;

    // it must be followed by a blank or tab!
    if( ( nPos < rTxt.getLength() &&
        !IsWordDelim( rTxt[ nPos ])) ||
        IsWordDelim( rTxt[ --nPos ]))
        return sRet;

    while( nPos && !IsWordDelim( rTxt[ --nPos ]))
        ;

    // Found a Paragraph-start or a Blank, search for the word shortcut in
    // auto.
    sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
    if( !nPos && !IsWordDelim( rTxt[ 0 ]))
        --nCapLttrPos;          // Beginning of paragraph and no Blank!

    while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
        if( ++nCapLttrPos >= nEnd )
            return sRet;

    if( 3 > nEnd - nCapLttrPos )
        return sRet;

    const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );

    CharClass& rCC = GetCharClass(eLang);

    if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
        return sRet;

    sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
    return sRet;
}

// static
std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
                                                          const sal_Int32 nPos)
{
    constexpr sal_Int32 nMinLen = 3;
    constexpr sal_Int32 nMaxLen = 9;
    std::vector<OUString> aRes;
    if (nPos >= nMinLen)
    {
        sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
        // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
        if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
        {
            while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
                ++nBegin;
        }
        if (nBegin + nMinLen <= nPos)
        {
            OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
            aRes.push_back(sRes);
            bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
            for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
            {
                bool bAdd = bLastStartedWithDelim;
                bLastStartedWithDelim = IsWordDelim(sRes[i]);
                bAdd = bAdd || bLastStartedWithDelim;
                if (bAdd)
                    aRes.push_back(sRes.copy(i));
            }
        }
    }
    return aRes;
}

bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
{
    OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");

    OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
    OUString sShareDirFile( sUserDirFile );

    SvxAutoCorrectLanguageLists* pLists = nullptr;

    tools::Time nAktTime(tools::Time::SYSTEM);

    auto nFndPos = aLastFileTable.find(rLanguageTag);
    bool lastCheckLessThan2MinutesAgo = nFndPos != aLastFileTable.end();
    if (lastCheckLessThan2MinutesAgo)
    {
        const tools::Time nLastCheckTime(tools::Time::fromEncodedTime(nFndPos->second));
        lastCheckLessThan2MinutesAgo
            = nLastCheckTime < nAktTime && nAktTime - nLastCheckTime < tools::Time(0, 2);
    }
    if (lastCheckLessThan2MinutesAgo)
    {
        // no need to test the file, because the last check is not older then
        // 2 minutes.
        if( bNewFile )
        {
            sShareDirFile = sUserDirFile;
            auto itBool = m_aLangTable.emplace(std::piecewise_construct,
                            std::forward_as_tuple(rLanguageTag),
                            std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
            pLists = &itBool.first->second;
            aLastFileTable.erase(nFndPos);
        }
    }
    else if(
             ( FStatHelper::IsDocument( sUserDirFile ) ||
               FStatHelper::IsDocument( sShareDirFile =
                   GetAutoCorrFileName( rLanguageTag ) ) ||
               FStatHelper::IsDocument( sShareDirFile =
                   GetAutoCorrFileName( rLanguageTag, false, false, true) )
             ) ||
        ( sShareDirFile = sUserDirFile, bNewFile )
          )
    {
        auto itBool = m_aLangTable.emplace(std::piecewise_construct,
                        std::forward_as_tuple(rLanguageTag),
                        std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
        pLists = &itBool.first->second;
        if (nFndPos != aLastFileTable.end())
            aLastFileTable.erase(nFndPos);
    }
    else if( !bNewFile )
    {
        aLastFileTable[rLanguageTag] = nAktTime.GetTime();
    }
    return pLists != nullptr;
}

bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
                                LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
        return iter->second.PutText(rShort, rLong);
    if (CreateLanguageFile(aLanguageTag))
    {
        auto const iter = m_aLangTable.find(aLanguageTag);
        assert (iter != m_aLangTable.end());
        return iter->second.PutText(rShort, rLong);
    }
    return false;
}

void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
                                              std::vector<SvxAutocorrWord>& aDeleteEntries,
                                              LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    auto iter = m_aLangTable.find(aLanguageTag);
    if (iter != m_aLangTable.end())
    {
        iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
    }
    else if(CreateLanguageFile( aLanguageTag ))
    {
        iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
    }
}

//  - return the replacement text (only for SWG-Format, all other
//    can be taken from the word list!)
bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
{
    return false;
}

void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
{
}

// Text with attribution (only the SWG - SWG format!)
bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
                              const OUString&, const OUString&, SfxObjectShell&, OUString& )
{
    return false;
}

OUString EncryptBlockName_Imp(std::u16string_view rName)
{
    OUStringBuffer aName;
    aName.append('#').append(rName);
    for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
    {
        if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
            aName[nPos] &= 0x0f;
    }
    return aName.makeStringAndClear();
}

/* This code is copied from SwXMLTextBlocks::GeneratePackageName */
static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
{
    OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
    OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));

    for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
    {
        switch (aBuf[nPos])
        {
            case '!':
            case '/':
            case ':':
            case '.':
            case '\\':
            // tdf#156769 - escape the question mark in the storage name
            case '?':
                aBuf[nPos] = '_';
                break;
            default:
                break;
        }
    }

    rPackageName = aBuf.makeStringAndClear();
}

static std::optional<SvxAutocorrWordList::WordSearchStatus>
lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList,
                       std::u16string_view rTxt,
                       sal_Int32& rStt, sal_Int32 nEndPos )
{
    const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
    return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
}

// the search for the words in the substitution table
std::optional<SvxAutocorrWordList::WordSearchStatus>
SvxAutoCorrect::SearchWordsInList(
    std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
    SvxAutoCorrDoc&, LanguageTag& rLang )
{
    LanguageTag aLanguageTag( rLang);
    if( aLanguageTag.isSystemLocale() )
        aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());

    /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
     * list instead? */

    // First search for eLang, then US-English -> English
    // and last in LANGUAGE_UNDETERMINED
    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        SvxAutoCorrectLanguageLists & rList = iter->second;
        auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
        if( pRet )
        {
            rLang = aLanguageTag;
            return pRet;
        }
    }

    // If it still could not be found here, then keep on searching
    LanguageType eLang = aLanguageTag.getLanguageType();
    // the primary language for example EN
    aLanguageTag.reset(aLanguageTag.getLanguage());
    LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
    if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
                (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
                 CreateLanguageFile(aLanguageTag, false)))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        SvxAutoCorrectLanguageLists& rList = iter->second;
        auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
        if( pRet )
        {
            rLang = aLanguageTag;
            return pRet;
        }
    }

    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        SvxAutoCorrectLanguageLists& rList = iter->second;
        auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
        if( pRet )
        {
            rLang = std::move(aLanguageTag);
            return pRet;
        }
    }
    return std::nullopt;
}

bool SvxAutoCorrect::SearchWordsNext(
    std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
    SvxAutocorrWordList::WordSearchStatus& rStatus )
{
    const SvxAutocorrWordList* pWordList = rStatus.GetAutocorrWordList();
    return pWordList->SearchWordsNext( rTxt, rStt, nEndPos, rStatus );
}

bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
                                             const OUString& sWord )
{
    LanguageTag aLanguageTag( eLang);

    /* TODO-BCP47: again horrible ugliness */

    // First search for eLang, then primary language of eLang
    // and last in LANGUAGE_UNDETERMINED

    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }

    // If it still could not be found here, then keep on searching
    // the primary language for example EN
    aLanguageTag.reset(aLanguageTag.getLanguage());
    LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
    if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
                (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
                 CreateLanguageFile(aLanguageTag, false)))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }

    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }
    return false;
}

static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
{
    SvStringsISortDtor::const_iterator it = pList->find(u"~"_ustr);
    SvStringsISortDtor::size_type nPos = it - pList->begin();
    if( nPos < pList->size() )
    {
        OUString sLowerWord(sWord.toAsciiLowerCase());
        OUString sAbr;
        for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
        {
            sAbr = (*pList)[ n ];
            if (sAbr[0] != '~')
                break;
            // ~ and ~. are not allowed!
            if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
            {
                OUString sLowerAbk(sAbr.toAsciiLowerCase());
                for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
                {
                    if( !--i )      // agrees
                        return true;

                    if( sLowerAbk[i] != sLowerWord[--ii])
                        break;
                }
            }
        }
    }
    OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
            "Wrongly sorted exception list?" );
    return false;
}

bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
                                const OUString& sWord, bool bAbbreviation)
{
    LanguageTag aLanguageTag( eLang);

    /* TODO-BCP47: did I mention terrible horrible ugliness? */

    // First search for eLang, then primary language of eLang
    // and last in LANGUAGE_UNDETERMINED

    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }

    // If it still could not be found here, then keep on searching
    // the primary language for example EN
    aLanguageTag.reset(aLanguageTag.getLanguage());
    LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
    if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
                (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
                 CreateLanguageFile(aLanguageTag, false)))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }

    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }
    return false;
}

OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
                                            bool bNewFile, bool bTst, bool bUnlocalized ) const
{
    OUString sRet, sExt( rLanguageTag.getBcp47() );
    if (bUnlocalized)
    {
        // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
        std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
        if (!vecFallBackStrings.empty())
           sExt = vecFallBackStrings[0];
    }

    sExt = "_" + sExt + ".dat";
    if( bNewFile )
        sRet = sUserAutoCorrFile + sExt;
    else if( !bTst )
        sRet = sShareAutoCorrFile + sExt;
    else
    {
        // test first in the user directory - if not exist, then
        sRet = sUserAutoCorrFile + sExt;
        if( !FStatHelper::IsDocument( sRet ))
            sRet = sShareAutoCorrFile + sExt;
    }
    return sRet;
}

SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
                SvxAutoCorrect& rParent,
                OUString aShareAutoCorrectFile,
                OUString aUserAutoCorrectFile)
:   sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
    sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
    aModifiedDate( Date::EMPTY ),
    aModifiedTime( tools::Time::EMPTY ),
    aLastCheckTime( tools::Time::EMPTY ),
    rAutoCorrect(rParent),
    nFlags(ACFlags::NONE)
{
}

SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
{
}

bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
{
    // Access the file system only every 2 minutes to check the date stamp
    bool bRet = false;

    tools::Time nMinTime( 0, 2 );
    tools::Time nAktTime( tools::Time::SYSTEM );
    if( aLastCheckTime <= nAktTime) // overflow?
        return false;
    nAktTime -= aLastCheckTime;
    if( nAktTime > nMinTime )     // min time past
    {
        Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
        if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                            &aTstDate, &aTstTime ) &&
            ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
        {
            bRet = true;
            // then remove all the lists fast!
            if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
            {
                pCplStt_ExcptLst.reset();
            }
            if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
            {
                pWordStart_ExcptLst.reset();
            }
            if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
            {
                pAutocorr_List.reset();
            }
            nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
        }
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
    }
    return bRet;
}

void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
                                        std::unique_ptr<SvStringsISortDtor>& rpLst,
                                        const OUString& sStrmName,
                                        rtl::Reference<SotStorage>& rStg)
{
    if( rpLst )
        rpLst->clear();
    else
        rpLst.reset( new SvStringsISortDtor );

    {
        if( rStg.is() && rStg->IsStream( sStrmName ) )
        {
            rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
                ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
            if( ERRCODE_NONE != xStrm->GetError())
            {
                xStrm.clear();
                rStg.clear();
                RemoveStream_Imp( sStrmName );
            }
            else
            {
                const uno::Reference< uno::XComponentContext >& xContext =
                    comphelper::getProcessComponentContext();

                xml::sax::InputSource aParserInput;
                aParserInput.sSystemId = sStrmName;

                xStrm->Seek( 0 );
                xStrm->SetBufferSize( 8 * 1024 );
                aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );

                // get filter
                uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );

                // connect parser and filter
                uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
                uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
                xParser->setFastDocumentHandler( xFilter );
                xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
                xParser->setTokenHandler( xTokenHandler );

                // parse
                try
                {
                    xParser->parseStream( aParserInput );
                }
                catch( const xml::sax::SAXParseException& )
                {
                    // re throw ?
                }
                catch( const xml::sax::SAXException& )
                {
                    // re throw ?
                }
                catch( const io::IOException& )
                {
                    // re throw ?
                }
            }
        }

        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                        &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
    }

}

void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
                            const SvStringsISortDtor& rLst,
                            const OUString& sStrmName,
                            rtl::Reference<SotStorage> const &rStg,
                            bool bConvert )
{
    if( !rStg.is() )
        return;

    if( rLst.empty() )
    {
        rStg->Remove( sStrmName );
        rStg->Commit();
    }
    else
    {
        rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
                ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
        if( xStrm.is() )
        {
            xStrm->SetSize( 0 );
            xStrm->SetBufferSize( 8192 );
            xStrm->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );


            const uno::Reference< uno::XComponentContext >& xContext =
                comphelper::getProcessComponentContext();

            uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
            uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
            xWriter->setOutputStream(xOut);

            uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
            rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );

            xExp->exportDoc( XML_BLOCK_LIST );

            xStrm->Commit();
            if( xStrm->GetError() == ERRCODE_NONE )
            {
                xStrm.clear();
                if (!bConvert)
                {
                    rStg->Commit();
                    if( ERRCODE_NONE != rStg->GetError() )
                    {
                        rStg->Remove( sStrmName );
                        rStg->Commit();
                    }
                }
            }
        }
    }
}

SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
{
    if( pAutocorr_List )
        pAutocorr_List->DeleteAndDestroyAll();
    else
        pAutocorr_List.reset( new SvxAutocorrWordList() );

    try
    {
        uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
        uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
        const uno::Reference< uno::XComponentContext >& xContext = comphelper::getProcessComponentContext();

        xml::sax::InputSource aParserInput;
        aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
        aParserInput.aInputStream = xStrm->getInputStream();

        // get parser
        uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
        SAL_INFO("editeng", "AutoCorrect Import" );
        uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
        uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;

        // connect parser and filter
        xParser->setFastDocumentHandler( xFilter );
        xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
        xParser->setTokenHandler(xTokenHandler);

        // parse
        xParser->parseStream( aParserInput );
    }
    catch ( const uno::Exception& )
    {
        TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
    }

    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                    &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );

    return pAutocorr_List.get();
}

const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
{
    if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadAutocorrWordList();
        if( !pAutocorr_List )
        {
            OSL_ENSURE( false, "No valid list" );
            pAutocorr_List.reset( new SvxAutocorrWordList() );
        }
        nFlags |= ACFlags::ChgWordLstLoad;
    }
    return pAutocorr_List.get();
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
{
    if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadCplSttExceptList();
        if( !pCplStt_ExcptLst )
        {
            OSL_ENSURE( false, "No valid list" );
            pCplStt_ExcptLst.reset( new SvStringsISortDtor );
        }
        nFlags |= ACFlags::CplSttLstLoad;
    }
    return pCplStt_ExcptLst.get();
}

bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
{
    bool bRet = false;
    if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
    {
        MakeUserStorage_Impl();
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

        SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );

        xStg = nullptr;
        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
        bRet = true;
    }
    return bRet;
}

bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
{
    bool bRet = false;
    if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
    {
        MakeUserStorage_Impl();
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

        SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );

        xStg = nullptr;
        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
        bRet = true;
    }
    return bRet;
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
{
    try
    {
        rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
        if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
            LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
    }
    catch (const css::ucb::ContentCreationException&)
    {
    }
    return pCplStt_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
{
    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );

    xStg = nullptr;

    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
{
    try
    {
        rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
        if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
            LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
    }
    catch (const css::ucb::ContentCreationException &)
    {
        TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
    }
    return pWordStart_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
{
    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );

    xStg = nullptr;
    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
{
    if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadWordStartExceptList();
        if( !pWordStart_ExcptLst )
        {
            OSL_ENSURE( false, "No valid list" );
            pWordStart_ExcptLst.reset( new SvStringsISortDtor );
        }
        nFlags |= ACFlags::WordStartLstLoad;
    }
    return pWordStart_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
{
    if( sShareAutoCorrFile != sUserAutoCorrFile )
    {
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
        if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
            xStg->IsStream( rName ) )
        {
            xStg->Remove( rName );
            xStg->Commit();

            xStg = nullptr;
        }
    }
}

void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
{
    // The conversion needs to happen if the file is already in the user
    // directory and is in the old format. Additionally it needs to
    // happen when the file is being copied from share to user.

    bool bError = false, bConvert = false, bCopy = false;
    INetURLObject aDest;
    INetURLObject aSource;

    if (sUserAutoCorrFile != sShareAutoCorrFile )
    {
        aSource = INetURLObject ( sShareAutoCorrFile );
        aDest = INetURLObject ( sUserAutoCorrFile );
        if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
        {
            aDest.SetExtension ( u"bak" );
            bConvert = true;
        }
        bCopy = true;
    }
    else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
    {
        aSource = INetURLObject ( sUserAutoCorrFile );
        aDest = INetURLObject ( sUserAutoCorrFile );
        aDest.SetExtension ( u"bak" );
        bCopy = bConvert = true;
    }
    if (bCopy)
    {
        try
        {
            OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
            sal_Int32 nSlashPos = sMain.lastIndexOf('/');
            sMain = sMain.copy(0, nSlashPos);
            ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
            TransferInfo aInfo;
            aInfo.NameClash = NameClash::OVERWRITE;
            aInfo.NewTitle = aDest.GetLastName();
            aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
            aInfo.MoveData  = false;
            aNewContent.executeCommand( u"transfer"_ustr, Any(aInfo));
        }
        catch (...)
        {
            bError = true;
        }
    }
    if (bConvert && !bError)
    {
        rtl::Reference<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
        rtl::Reference<SotStorage> xDstStg = new SotStorage(sUserAutoCorrFile, StreamMode::WRITE);

        if( xSrcStg.is() && xDstStg.is() )
        {
            std::unique_ptr<SvStringsISortDtor> pTmpWordList;

            if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
                LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );

            if (pTmpWordList)
            {
                SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
                pTmpWordList.reset();
            }


            if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
                LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );

            if (pTmpWordList)
            {
                SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
                pTmpWordList->clear();
            }

            GetAutocorrWordList();
            MakeBlocklist_Imp( *xDstStg );
            sShareAutoCorrFile = sUserAutoCorrFile;
            xDstStg = nullptr;
            try
            {
                ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
                aContent.executeCommand ( u"delete"_ustr, Any ( true ) );
            }
            catch (...)
            {
            }
        }
    }
    else if( bCopy && !bError )
        sShareAutoCorrFile = sUserAutoCorrFile;
}

bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
{
    bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
    if( !bRemove )
    {
        rtl::Reference<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
                    ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
        if( refList.is() )
        {
            refList->SetSize( 0 );
            refList->SetBufferSize( 8192 );
            refList->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );

            const uno::Reference< uno::XComponentContext >& xContext =
                comphelper::getProcessComponentContext();

            uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
            uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
            xWriter->setOutputStream(xOut);

            rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );

            xExp->exportDoc( XML_BLOCK_LIST );

            refList->Commit();
            bRet = ERRCODE_NONE == refList->GetError();
            if( bRet )
            {
                refList.clear();
                rStg.Commit();
                if( ERRCODE_NONE != rStg.GetError() )
                {
                    bRemove = true;
                    bRet = false;
                }
            }
        }
        else
            bRet = false;
    }

    if( bRemove )
    {
        rStg.Remove( pXMLImplAutocorr_ListStr );
        rStg.Commit();
    }

    return bRet;
}

bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStorage = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();

    if( bRet )
    {
        for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
        {
            std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
            if( xFoundEntry )
            {
                if( !xFoundEntry->IsTextOnly() )
                {
                    OUString aName( aWordToDelete.GetShort() );
                    if (xStorage->IsOLEStorage())
                        aName = EncryptBlockName_Imp(aName);
                    else
                        GeneratePackageName ( aWordToDelete.GetShort(), aName );

                    if( xStorage->IsContained( aName ) )
                    {
                        xStorage->Remove( aName );
                        bRet = xStorage->Commit();
                    }
                }
            }
        }

        for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
        {
            SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
            std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
            if( xRemoved )
            {
                if( !xRemoved->IsTextOnly() )
                {
                    // Still have to remove the Storage
                    OUString sStorageName( aWordToAdd.GetShort() );
                    if (xStorage->IsOLEStorage())
                        sStorageName = EncryptBlockName_Imp(sStorageName);
                    else
                        GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);

                    if( xStorage->IsContained( sStorageName ) )
                        xStorage->Remove( sStorageName );
                }
            }
            bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );

            if ( !bRet )
            {
                break;
            }
        }

        if ( bRet )
        {
            bRet = MakeBlocklist_Imp( *xStorage );
        }
    }
    return bRet;
}

bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();

    // Update the word list
    if( bRet )
    {
        SvxAutocorrWord aNew(rShort, rLong, true );
        std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
        if( xRemove )
        {
            if( !xRemove->IsTextOnly() )
            {
                // Still have to remove the Storage
                OUString sStgNm( rShort );
                if (xStg->IsOLEStorage())
                    sStgNm = EncryptBlockName_Imp(sStgNm);
                else
                    GeneratePackageName ( rShort, sStgNm);

                if( xStg->IsContained( sStgNm ) )
                    xStg->Remove( sStgNm );
            }
        }

        if( pAutocorr_List->Insert( std::move(aNew) ) )
        {
            bRet = MakeBlocklist_Imp( *xStg );
            xStg = nullptr;
        }
        else
        {
            bRet = false;
        }
    }
    return bRet;
}

void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
                                               SfxObjectShell& rShell )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();

    try
    {
        uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
        OUString sLong;
        bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
        xStg = nullptr;

        // Update the word list
        if( bRet )
        {
            if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
            {
                rtl::Reference<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
                MakeBlocklist_Imp( *xStor );
            }
        }
    }
    catch ( const uno::Exception& )
    {
    }
}

// Keep the list sorted ...
struct SvxAutocorrWordList::CompareSvxAutocorrWordList
{
    bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
    {
        CollatorWrapper& rCmp = ::GetCollatorWrapper();
        return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
    }
};

namespace {

typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;

}

struct SvxAutocorrWordList::Impl
{

    // only one of these contains the data
    // maSortedVector is manually sorted so we can optimise data movement
    mutable AutocorrWordSetType maSortedVector;
    mutable AutocorrWordHashType maHash; // key is 'Short'

    void DeleteAndDestroyAll()
    {
        maHash.clear();
        maSortedVector.clear();
    }
};

SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}

SvxAutocorrWordList::~SvxAutocorrWordList()
{
}

void SvxAutocorrWordList::DeleteAndDestroyAll()
{
    mpImpl->DeleteAndDestroyAll();
}

struct SvxAutocorrWordList::Iterator::Impl {
    typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType;
    typedef AutocorrWordHashType::const_iterator HashIterType;

    HashIterType  mHashIter, mHashEnd;
    VecIterType   mSortedVectorIter, mSortedVectorEnd;

    Impl(const HashIterType& hashIter, const HashIterType& hashEnd,
         const VecIterType& vecIter, const VecIterType& vecEnd)
        : mHashIter(hashIter), mHashEnd(hashEnd),
          mSortedVectorIter(vecIter), mSortedVectorEnd(vecEnd) {}

    bool Step() {
        // Iterate hash table, followed by sorted vector
        if (mHashIter != mHashEnd) {
            return ++mHashIter != mHashEnd
                   || mSortedVectorIter != mSortedVectorEnd;
        }
        return ++mSortedVectorIter != mSortedVectorEnd;
    }

    const SvxAutocorrWord& operator*() {
        return (mHashIter == mHashEnd) ? *mSortedVectorIter : mHashIter->second;
    }
    const SvxAutocorrWord* operator->() {
        return (mHashIter == mHashEnd) ? &*mSortedVectorIter : &mHashIter->second;
    }
};

SvxAutocorrWordList::Iterator::Iterator(
    std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl
) : mpImpl(std::move(pImpl))
{
}

SvxAutocorrWordList::Iterator::Iterator(
    const SvxAutocorrWordList::Iterator& it
) : mpImpl(new Impl(*(it.mpImpl)))
{
}

SvxAutocorrWordList::Iterator::~Iterator()
{
}

bool SvxAutocorrWordList::Iterator::Step()
{
    return mpImpl->Step();
}

const SvxAutocorrWord& SvxAutocorrWordList::Iterator::operator*() const
{
    return **mpImpl;
}

const SvxAutocorrWord* SvxAutocorrWordList::Iterator::operator->() const
{
    return mpImpl->operator->();
}

bool SvxAutocorrWordList::ContainsPattern(const OUString& aShort) const
{
    // check hash table first
    if (mpImpl->maHash.contains(aShort)) {
        return true;
    }

    // then do binary search on sorted vector
    CollatorWrapper& rCmp = ::GetCollatorWrapper();
    auto it = std::lower_bound(mpImpl->maSortedVector.begin(),
                               mpImpl->maSortedVector.end(),
                               aShort,
                               [&](const SvxAutocorrWord& elm,
                                   const OUString& val) {
                                       return rCmp.compareString(elm.GetShort(),
                                                                 val) < 0;
                                   } );
    if (it != mpImpl->maSortedVector.end()
        && rCmp.compareString(aShort, it->GetShort()) == 0)
    {
        return true;
    }

    return false;
}

// returns true if inserted
const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
{
    if ( mpImpl->maSortedVector.empty() ) // use the hash
    {
        OUString aShort = aWord.GetShort();
        auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
        if (inserted)
            return &(it->second);
        return nullptr;
    }
    else
    {
        auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
        CollatorWrapper& rCmp = ::GetCollatorWrapper();
        if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
        {
            it = mpImpl->maSortedVector.insert(it, std::move(aWord));
            return &*it;
        }
        return nullptr;
    }
}

void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
{
    (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
}

bool SvxAutocorrWordList::empty() const
{
    return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
}

std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
{

    if ( mpImpl->maSortedVector.empty() ) // use the hash
    {
        AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
        if( it != mpImpl->maHash.end() )
        {
            SvxAutocorrWord pMatch = std::move(it->second);
            mpImpl->maHash.erase (it);
            return pMatch;
        }
    }
    else
    {
        auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
        if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
        {
            SvxAutocorrWord pMatch = std::move(*it);
            mpImpl->maSortedVector.erase (it);
            return pMatch;
        }
    }
    return std::optional<SvxAutocorrWord>();
}

// return the sorted contents - defer sorting until we have to.
const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
{
    // convert from hash to set permanently
    if ( mpImpl->maSortedVector.empty() )
    {
        std::vector<SvxAutocorrWord> tmp;
        tmp.reserve(mpImpl->maHash.size());
        for (auto & rPair : mpImpl->maHash)
            tmp.emplace_back(std::move(rPair.second));
        mpImpl->maHash.clear();
        // sort twice - this gets the list into mostly-sorted order, which
        // reduces the number of times we need to invoke the expensive ICU collate fn.
        std::sort(tmp.begin(), tmp.end(),
            [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
            {
                return lhs.GetShort() < rhs.GetShort();
            });
        // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
        // stable_sort is twice as fast as sort in this situation because it does
        // fewer comparison operations.
        std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
        mpImpl->maSortedVector = std::move(tmp);
    }
    return mpImpl->maSortedVector;
}

std::optional<SvxAutocorrWord>
SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
                                 std::u16string_view rTxt,
                                 sal_Int32 &rStt,
                                 sal_Int32 nEndPos) const
{
    const OUString& rChk = pFnd->GetShort();

    sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
    sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
    assert(nEndPos >= 0);
    size_t nSttWdPos = nEndPos;

    // direct replacement of keywords surrounded by colons (for example, ":name:")
    bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
        rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
    if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
    {
        return std::nullopt;
    }

    bool bWasWordDelim = false;
    sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
    if (bColonNameColon)
        nCalcStt++;
    if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
          ( nCalcStt < rStt &&
            IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
    {
        TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
        OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
        if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
        {
            rStt = nCalcStt;
            if (!left_wildcard)
            {
                // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
                if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
                {
                    return std::nullopt;
                }
                return *pFnd;
            }
            // get the first word delimiter position before the matching ".*word" pattern
            while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
                ;
            if (bWasWordDelim) rStt++;

            // don't let wildcard pattern override non-wildcard one
            OUString aShort(rTxt.substr(rStt, nEndPos - rStt));
            if (ContainsPattern(aShort)) {
                return std::nullopt;
            }

            OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
            // avoid double spaces before simple "word" replacement
            left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
            return SvxAutocorrWord(aShort, left_pattern);
        }
    } else
    // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
    if ( right_wildcard )
    {

        OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
        // Get the last word delimiter position
        bool not_suffix;

        while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
            ;
        // search the first occurrence (with a left word delimitation, if needed)
        size_t nFndPos = rStt - 1;
        do {
            nFndPos = rTxt.find( sTmp, nFndPos + 1);
            if (nFndPos == std::u16string_view::npos)
                break;
            not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
        } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );

        if ( nFndPos != std::u16string_view::npos )
        {
            sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"

            if ( left_wildcard )
            {
                // get the first word delimiter position before the matching ".*word.*" pattern
                while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
                    ;
                if (bWasWordDelim) nFndPos++;
            }
            if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
            {
                return std::nullopt;
            }
            // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
            OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
            // don't let wildcard pattern override non-wildcard one
            if (ContainsPattern(aShort)) {
                return std::nullopt;
            }

            OUString aLong;
            rStt = nFndPos;
            if ( !left_wildcard )
            {
                sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
                aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
            } else {
                OUStringBuffer buf;
                do {
                    nSttWdPos = rTxt.find( sTmp, nFndPos);
                    if (nSttWdPos != std::u16string_view::npos)
                    {
                        sal_Int32 nTmp(nFndPos);
                        while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
                        {
                            nTmp++;
                        }
                        if (nTmp < static_cast<sal_Int32>(nSttWdPos)) {
                            break; // word delimiter found
                        }
                        buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
                        nFndPos = nSttWdPos + sTmp.getLength();
                    }
                } while (nSttWdPos != std::u16string_view::npos);
                if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) {
                    buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
                }
                aLong = buf.makeStringAndClear();
            }
            if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
            {
                return SvxAutocorrWord(aShort, aLong);
            }
        }
    }
    return std::nullopt;
}

std::optional<SvxAutocorrWordList::WordSearchStatus>
SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt,
                                       sal_Int32& rStt,
                                       sal_Int32 nEndPos) const
{
    for (auto it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
    {
        if(auto pTmp = WordMatches(&it->second, rTxt, rStt, nEndPos))
        {
            return WordSearchStatus(
                *pTmp, this,
                Iterator(std::make_unique<Iterator::Impl>(
                    it, mpImpl->maHash.end(),
                    mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end()
                ))
            );
        }
    }

    for (auto it = mpImpl->maSortedVector.begin();
         it != mpImpl->maSortedVector.end(); ++it)
    {
        if(auto pTmp = WordMatches(&*it, rTxt, rStt, nEndPos))
        {
            return WordSearchStatus(
                *pTmp, this,
                Iterator(std::make_unique<Iterator::Impl>(
                    mpImpl->maHash.end(), mpImpl->maHash.end(),
                    it, mpImpl->maSortedVector.end()
                ))
            );
        }
    }

    return std::nullopt;
}

bool
SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt,
                                     sal_Int32& rStt,
                                     sal_Int32 nEndPos,
                                     SvxAutocorrWordList::WordSearchStatus& rStatus) const
{
    while(rStatus.StepIter())
    {
        if(auto pTmp = WordMatches(rStatus.GetWordAtIter(),
                                   rTxt, rStt, nEndPos))
        {
            rStatus.mFnd = *pTmp;
            return true;
        }
    }

    return false;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */