4 files changed, 1683 insertions, 0 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode.cxx b/i18npool/source/characterclassification/cclass_unicode.cxx
new file mode 100644
index 000000000..f07e9f812
--- /dev/null
+++ b/i18npool/source/characterclassification/cclass_unicode.cxx
@@ -0,0 +1,307 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <cclass_unicode.hxx>
+#include <com/sun/star/i18n/KCharacterType.hpp>
+#include <com/sun/star/i18n/WordType.hpp>
+#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
+#include <unicode/uchar.h>
+#include <cppuhelper/exc_hlp.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <breakiteratorImpl.hxx>
+#include <transliteration_body.hxx>
+#include <rtl/ref.hxx>
+#include <utility>
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+namespace i18npool {
+
+//  class cclass_Unicode
+//  ----------------------------------------------------;
+
+cclass_Unicode::cclass_Unicode( uno::Reference < XComponentContext > xContext ) :
+        transToUpper( new Transliteration_casemapping() ),
+        transToLower( new Transliteration_casemapping() ),
+        transToTitle( new Transliteration_casemapping() ),
+        m_xContext(std::move( xContext )),
+        nStartTypes( 0 ),
+        nContTypes( 0 ),
+        cGroupSep( ',' ),
+        cDecimalSep( '.' ),
+        cDecimalSepAlt( 0 )
+{
+    transToUpper->setMappingType(MappingType::ToUpper);
+    transToLower->setMappingType(MappingType::ToLower);
+    transToTitle->setMappingType(MappingType::ToTitle);
+}
+
+cclass_Unicode::~cclass_Unicode() {
+    destroyParserTable();
+}
+
+
+OUString SAL_CALL
+cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
+    sal_Int32 len = Text.getLength();
+    if (nPos >= len)
+        return OUString();
+    if (nCount + nPos > len)
+        nCount = len - nPos;
+
+    transToUpper->setLocale(rLocale);
+    return transToUpper->transliterateString2String(Text, nPos, nCount);
+}
+
+OUString SAL_CALL
+cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
+    sal_Int32 len = Text.getLength();
+    if (nPos >= len)
+        return OUString();
+    if (nCount + nPos > len)
+        nCount = len - nPos;
+
+    transToLower->setLocale(rLocale);
+    return transToLower->transliterateString2String(Text, nPos, nCount);
+}
+
+OUString SAL_CALL
+cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
+    try
+    {
+        sal_Int32 len = Text.getLength();
+        if (nPos >= len)
+            return OUString();
+        if (nCount + nPos > len)
+            nCount = len - nPos;
+
+        transToTitle->setLocale(rLocale);
+        rtl_uString* pStr = rtl_uString_alloc(nCount);
+        sal_Unicode* out = pStr->buffer;
+        rtl::Reference< BreakIteratorImpl > xBrk(new BreakIteratorImpl(m_xContext));
+        Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
+                    WordType::ANYWORD_IGNOREWHITESPACES, true);
+        for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
+            if (i >= bdy.endPos)
+                bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
+                            WordType::ANYWORD_IGNOREWHITESPACES);
+            *out = (i == bdy.startPos) ?
+                transToTitle->transliterateChar2Char(Text[i]) : Text[i];
+        }
+        *out = 0;
+        return OUString( pStr, SAL_NO_ACQUIRE );
+    }
+    catch (const RuntimeException&)
+    {
+        throw;
+    }
+    catch (const Exception& e)
+    {
+        uno::Any a(cppu::getCaughtException());
+        throw lang::WrappedTargetRuntimeException(
+            "wrapped " + a.getValueTypeName() + ": " + e.Message,
+            uno::Reference<uno::XInterface>(), a);
+    }
+}
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
+    if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+    return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
+}
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
+    if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+    return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
+}
+
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
+    if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+    // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
+    // while OO.o enum UnicideScript starts from 0.
+    // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
+    return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
+}
+
+
+sal_Int32
+cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
+    using namespace ::com::sun::star::i18n::KCharacterType;
+
+    sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
+    switch ( u_charType(ch) ) {
+    // Upper
+    case U_UPPERCASE_LETTER :
+        return UPPER|LETTER|PRINTABLE|BASE_FORM;
+
+    // Lower
+    case U_LOWERCASE_LETTER :
+        return LOWER|LETTER|PRINTABLE|BASE_FORM;
+
+    // Title
+    case U_TITLECASE_LETTER :
+        return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
+
+    // Letter
+    case U_MODIFIER_LETTER :
+    case U_OTHER_LETTER :
+        return LETTER|PRINTABLE|BASE_FORM;
+
+    // Digit
+    case U_DECIMAL_DIGIT_NUMBER:
+    case U_LETTER_NUMBER:
+    case U_OTHER_NUMBER:
+        return DIGIT|PRINTABLE|BASE_FORM;
+
+    // Base
+    case U_NON_SPACING_MARK:
+    case U_ENCLOSING_MARK:
+    case U_COMBINING_SPACING_MARK:
+        return BASE_FORM|PRINTABLE;
+
+    // Print
+    case U_SPACE_SEPARATOR:
+
+    case U_DASH_PUNCTUATION:
+    case U_INITIAL_PUNCTUATION:
+    case U_FINAL_PUNCTUATION:
+    case U_CONNECTOR_PUNCTUATION:
+    case U_OTHER_PUNCTUATION:
+
+    case U_MATH_SYMBOL:
+    case U_CURRENCY_SYMBOL:
+    case U_MODIFIER_SYMBOL:
+    case U_OTHER_SYMBOL:
+        return PRINTABLE;
+
+    // Control
+    case U_CONTROL_CHAR:
+    case U_FORMAT_CHAR:
+        return CONTROL;
+
+    case U_LINE_SEPARATOR:
+    case U_PARAGRAPH_SEPARATOR:
+        return CONTROL|PRINTABLE;
+
+    // for all others
+    default:
+        return U_GENERAL_OTHER_TYPES;
+    }
+}
+
+sal_Int32 SAL_CALL
+cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
+    if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+    return getCharType(Text, &nPos, 0);
+
+}
+
+sal_Int32 SAL_CALL
+cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
+    if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+
+    sal_Int32 result = 0;
+
+    while (nCount > 0 && nPos < Text.getLength())
+    {
+        sal_Int32 nOrigPos = nPos;
+        result |= getCharType(Text, &nPos, 1);
+        sal_Int32 nUtf16Units = nPos - nOrigPos;
+        nCount -= nUtf16Units;
+    }
+
+    return result;
+}
+
+ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
+            const OUString& Text,
+            sal_Int32 nPos,
+            const Locale& rLocale,
+            sal_Int32 startCharTokenType,
+            const OUString& userDefinedCharactersStart,
+            sal_Int32 contCharTokenType,
+            const OUString& userDefinedCharactersCont )
+{
+    ParseResult r;
+    if ( Text.getLength() <= nPos )
+        return r;
+
+    setupParserTable( rLocale,
+        startCharTokenType, userDefinedCharactersStart,
+        contCharTokenType, userDefinedCharactersCont );
+    parseText( r, Text, nPos );
+
+    return r;
+}
+
+
+ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
+            sal_Int32 nTokenType,
+            const OUString& Text,
+            sal_Int32 nPos,
+            const Locale& rLocale,
+            sal_Int32 startCharTokenType,
+            const OUString& userDefinedCharactersStart,
+            sal_Int32 contCharTokenType,
+            const OUString& userDefinedCharactersCont )
+{
+    ParseResult r;
+    if ( Text.getLength() <= nPos )
+        return r;
+
+    setupParserTable( rLocale,
+        startCharTokenType, userDefinedCharactersStart,
+        contCharTokenType, userDefinedCharactersCont );
+    parseText( r, Text, nPos, nTokenType );
+
+    return r;
+}
+
+OUString SAL_CALL cclass_Unicode::getImplementationName()
+{
+    return "com.sun.star.i18n.CharacterClassification_Unicode";
+}
+
+sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
+{
+    return cppu::supportsService(this, rServiceName);
+}
+
+Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames()
+{
+    return { "com.sun.star.i18n.CharacterClassification_Unicode" };
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
+    css::uno::XComponentContext *context,
+    css::uno::Sequence<css::uno::Any> const &)
+{
+    return cppu::acquire(new i18npool::cclass_Unicode(context));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
new file mode 100644
index 000000000..313e42a0f
--- /dev/null
+++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
@@ -0,0 +1,1066 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <cclass_unicode.hxx>
+#include <unicode/uchar.h>
+#include <rtl/character.hxx>
+#include <rtl/math.hxx>
+#include <rtl/ustring.hxx>
+#include <com/sun/star/i18n/KParseTokens.hpp>
+#include <com/sun/star/i18n/KParseType.hpp>
+#include <com/sun/star/i18n/LocaleData2.hpp>
+#include <com/sun/star/i18n/NativeNumberMode.hpp>
+#include <com/sun/star/i18n/NativeNumberSupplier.hpp>
+
+#include <string.h>
+#include <string_view>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+#define TOKEN_DIGIT_FLAGS (ParserFlags::CHAR_VALUE | ParserFlags::VALUE | ParserFlags::VALUE_EXP | ParserFlags::VALUE_EXP_VALUE | ParserFlags::VALUE_DIGIT)
+
+namespace i18npool {
+
+// Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
+
+const sal_uInt8 cclass_Unicode::nDefCnt = 128;
+const ParserFlags cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
+{
+// (...) == Calc formula compiler specific, commented out and modified
+
+    /* \0 */    ParserFlags::EXCLUDED,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+    /*  9 \t */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,     // (ParserFlags::ILLEGAL)
+                ParserFlags::ILLEGAL,
+    /* 11 \v */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,     // (ParserFlags::ILLEGAL)
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+                ParserFlags::ILLEGAL,
+    /*  32   */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  33 ! */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  34 " */ ParserFlags::CHAR_STRING | ParserFlags::STRING_SEP,
+    /*  35 # */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::WORD_SEP)
+    /*  36 $ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
+    /*  37 % */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::VALUE)
+    /*  38 & */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  39 ' */ ParserFlags::NAME_SEP,
+    /*  40 ( */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  41 ) */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  42 * */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  43 + */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
+    /*  44 , */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
+    /*  45 - */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
+    /*  46 . */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::WORD | ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
+    /*  47 / */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    //for ( i = 48; i < 58; i++ )
+    /*  48 0 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  49 1 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  50 2 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  51 3 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  52 4 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  53 5 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  54 6 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  55 7 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  56 8 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  57 9 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+    /*  58 : */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::WORD)
+    /*  59 ; */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  60 < */ ParserFlags::CHAR_BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  61 = */ ParserFlags::CHAR | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  62 > */ ParserFlags::CHAR_BOOL | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  63 ? */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
+    /*  64 @ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    //for ( i = 65; i < 91; i++ )
+    /*  65 A */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  66 B */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  67 C */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  68 D */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  69 E */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  70 F */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  71 G */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  72 H */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  73 I */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  74 J */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  75 K */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  76 L */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  77 M */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  78 N */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  79 O */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  80 P */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  81 Q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  82 R */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  83 S */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  84 T */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  85 U */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  86 V */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  87 W */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  88 X */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  89 Y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  90 Z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  91 [ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /*  92 \ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /*  93 ] */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /*  94 ^ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+    /*  95 _ */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  96 ` */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    //for ( i = 97; i < 123; i++ )
+    /*  97 a */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  98 b */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /*  99 c */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 100 d */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 101 e */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 102 f */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 103 g */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 104 h */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 105 i */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 106 j */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 107 k */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 108 l */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 109 m */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 110 n */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 111 o */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 112 p */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 113 q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 114 r */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 115 s */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 116 t */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 117 u */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 118 v */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 119 w */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 120 x */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 121 y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 122 z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+    /* 123 { */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /* 124 | */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /* 125 } */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /* 126 ~ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,  // (ParserFlags::ILLEGAL // UNUSED)
+    /* 127   */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP   // (ParserFlags::ILLEGAL // UNUSED)
+};
+
+
+const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
+{
+    /* \0 */    KParseTokens::ASC_OTHER,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+    /*  9 \t */ KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+    /* 11 \v */ KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+                KParseTokens::ASC_CONTROL,
+    /*  32   */ KParseTokens::ASC_OTHER,
+    /*  33 ! */ KParseTokens::ASC_OTHER,
+    /*  34 " */ KParseTokens::ASC_OTHER,
+    /*  35 # */ KParseTokens::ASC_OTHER,
+    /*  36 $ */ KParseTokens::ASC_DOLLAR,
+    /*  37 % */ KParseTokens::ASC_OTHER,
+    /*  38 & */ KParseTokens::ASC_OTHER,
+    /*  39 ' */ KParseTokens::ASC_OTHER,
+    /*  40 ( */ KParseTokens::ASC_OTHER,
+    /*  41 ) */ KParseTokens::ASC_OTHER,
+    /*  42 * */ KParseTokens::ASC_OTHER,
+    /*  43 + */ KParseTokens::ASC_OTHER,
+    /*  44 , */ KParseTokens::ASC_OTHER,
+    /*  45 - */ KParseTokens::ASC_OTHER,
+    /*  46 . */ KParseTokens::ASC_DOT,
+    /*  47 / */ KParseTokens::ASC_OTHER,
+    //for ( i = 48; i < 58; i++ )
+    /*  48 0 */ KParseTokens::ASC_DIGIT,
+    /*  49 1 */ KParseTokens::ASC_DIGIT,
+    /*  50 2 */ KParseTokens::ASC_DIGIT,
+    /*  51 3 */ KParseTokens::ASC_DIGIT,
+    /*  52 4 */ KParseTokens::ASC_DIGIT,
+    /*  53 5 */ KParseTokens::ASC_DIGIT,
+    /*  54 6 */ KParseTokens::ASC_DIGIT,
+    /*  55 7 */ KParseTokens::ASC_DIGIT,
+    /*  56 8 */ KParseTokens::ASC_DIGIT,
+    /*  57 9 */ KParseTokens::ASC_DIGIT,
+    /*  58 : */ KParseTokens::ASC_COLON,
+    /*  59 ; */ KParseTokens::ASC_OTHER,
+    /*  60 < */ KParseTokens::ASC_OTHER,
+    /*  61 = */ KParseTokens::ASC_OTHER,
+    /*  62 > */ KParseTokens::ASC_OTHER,
+    /*  63 ? */ KParseTokens::ASC_OTHER,
+    /*  64 @ */ KParseTokens::ASC_OTHER,
+    //for ( i = 65; i < 91; i++ )
+    /*  65 A */ KParseTokens::ASC_UPALPHA,
+    /*  66 B */ KParseTokens::ASC_UPALPHA,
+    /*  67 C */ KParseTokens::ASC_UPALPHA,
+    /*  68 D */ KParseTokens::ASC_UPALPHA,
+    /*  69 E */ KParseTokens::ASC_UPALPHA,
+    /*  70 F */ KParseTokens::ASC_UPALPHA,
+    /*  71 G */ KParseTokens::ASC_UPALPHA,
+    /*  72 H */ KParseTokens::ASC_UPALPHA,
+    /*  73 I */ KParseTokens::ASC_UPALPHA,
+    /*  74 J */ KParseTokens::ASC_UPALPHA,
+    /*  75 K */ KParseTokens::ASC_UPALPHA,
+    /*  76 L */ KParseTokens::ASC_UPALPHA,
+    /*  77 M */ KParseTokens::ASC_UPALPHA,
+    /*  78 N */ KParseTokens::ASC_UPALPHA,
+    /*  79 O */ KParseTokens::ASC_UPALPHA,
+    /*  80 P */ KParseTokens::ASC_UPALPHA,
+    /*  81 Q */ KParseTokens::ASC_UPALPHA,
+    /*  82 R */ KParseTokens::ASC_UPALPHA,
+    /*  83 S */ KParseTokens::ASC_UPALPHA,
+    /*  84 T */ KParseTokens::ASC_UPALPHA,
+    /*  85 U */ KParseTokens::ASC_UPALPHA,
+    /*  86 V */ KParseTokens::ASC_UPALPHA,
+    /*  87 W */ KParseTokens::ASC_UPALPHA,
+    /*  88 X */ KParseTokens::ASC_UPALPHA,
+    /*  89 Y */ KParseTokens::ASC_UPALPHA,
+    /*  90 Z */ KParseTokens::ASC_UPALPHA,
+    /*  91 [ */ KParseTokens::ASC_OTHER,
+    /*  92 \ */ KParseTokens::ASC_OTHER,
+    /*  93 ] */ KParseTokens::ASC_OTHER,
+    /*  94 ^ */ KParseTokens::ASC_OTHER,
+    /*  95 _ */ KParseTokens::ASC_UNDERSCORE,
+    /*  96 ` */ KParseTokens::ASC_OTHER,
+    //for ( i = 97; i < 123; i++ )
+    /*  97 a */ KParseTokens::ASC_LOALPHA,
+    /*  98 b */ KParseTokens::ASC_LOALPHA,
+    /*  99 c */ KParseTokens::ASC_LOALPHA,
+    /* 100 d */ KParseTokens::ASC_LOALPHA,
+    /* 101 e */ KParseTokens::ASC_LOALPHA,
+    /* 102 f */ KParseTokens::ASC_LOALPHA,
+    /* 103 g */ KParseTokens::ASC_LOALPHA,
+    /* 104 h */ KParseTokens::ASC_LOALPHA,
+    /* 105 i */ KParseTokens::ASC_LOALPHA,
+    /* 106 j */ KParseTokens::ASC_LOALPHA,
+    /* 107 k */ KParseTokens::ASC_LOALPHA,
+    /* 108 l */ KParseTokens::ASC_LOALPHA,
+    /* 109 m */ KParseTokens::ASC_LOALPHA,
+    /* 110 n */ KParseTokens::ASC_LOALPHA,
+    /* 111 o */ KParseTokens::ASC_LOALPHA,
+    /* 112 p */ KParseTokens::ASC_LOALPHA,
+    /* 113 q */ KParseTokens::ASC_LOALPHA,
+    /* 114 r */ KParseTokens::ASC_LOALPHA,
+    /* 115 s */ KParseTokens::ASC_LOALPHA,
+    /* 116 t */ KParseTokens::ASC_LOALPHA,
+    /* 117 u */ KParseTokens::ASC_LOALPHA,
+    /* 118 v */ KParseTokens::ASC_LOALPHA,
+    /* 119 w */ KParseTokens::ASC_LOALPHA,
+    /* 120 x */ KParseTokens::ASC_LOALPHA,
+    /* 121 y */ KParseTokens::ASC_LOALPHA,
+    /* 122 z */ KParseTokens::ASC_LOALPHA,
+    /* 123 { */ KParseTokens::ASC_OTHER,
+    /* 124 | */ KParseTokens::ASC_OTHER,
+    /* 125 } */ KParseTokens::ASC_OTHER,
+    /* 126 ~ */ KParseTokens::ASC_OTHER,
+    /* 127   */ KParseTokens::ASC_OTHER
+};
+
+
+// static
+const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_uInt32 c )
+{
+    if ( !pStr )
+        return nullptr;
+    sal_Unicode cs[2];
+    auto const n = rtl::splitSurrogates(c, cs);
+    while ( *pStr )
+    {
+        if ( *pStr == cs[0] && (n == 1 || pStr[1] == cs[1]) )
+            return pStr;
+        pStr++;
+    }
+    return nullptr;
+}
+
+
+sal_Int32 cclass_Unicode::getParseTokensType(sal_uInt32 const c, bool const isFirst)
+{
+    if ( c < nDefCnt )
+        return pParseTokensType[ sal_uInt8(c) ];
+    else
+    {
+
+        //! all KParseTokens::UNI_... must be matched
+        switch (u_charType(c))
+        {
+            case U_UPPERCASE_LETTER :
+                return KParseTokens::UNI_UPALPHA;
+            case U_LOWERCASE_LETTER :
+                return KParseTokens::UNI_LOALPHA;
+            case U_TITLECASE_LETTER :
+                return KParseTokens::UNI_TITLE_ALPHA;
+            case U_MODIFIER_LETTER :
+                return KParseTokens::UNI_MODIFIER_LETTER;
+            case U_OTHER_LETTER :
+                // Non_Spacing_Mark could not be as leading character
+                if (isFirst) break;
+                [[fallthrough]]; // treat it as Other_Letter.
+            case U_NON_SPACING_MARK :
+                return KParseTokens::UNI_OTHER_LETTER;
+            case U_DECIMAL_DIGIT_NUMBER :
+                return KParseTokens::UNI_DIGIT;
+            case U_LETTER_NUMBER :
+                return KParseTokens::UNI_LETTER_NUMBER;
+            case U_OTHER_NUMBER :
+                return KParseTokens::UNI_OTHER_NUMBER;
+        }
+
+        return KParseTokens::UNI_OTHER;
+    }
+}
+
+void cclass_Unicode::setupInternational( const Locale& rLocale )
+{
+    bool bChanged = (aParserLocale.Language != rLocale.Language
+        || aParserLocale.Country != rLocale.Country
+        || aParserLocale.Variant != rLocale.Variant);
+    if ( bChanged )
+    {
+        aParserLocale.Language = rLocale.Language;
+        aParserLocale.Country = rLocale.Country;
+        aParserLocale.Variant = rLocale.Variant;
+    }
+    if ( !mxLocaleData.is() )
+    {
+        mxLocaleData.set( LocaleData2::create(m_xContext) );
+    }
+}
+
+
+void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
+            const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
+            const OUString& userDefinedCharactersCont )
+{
+    bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
+        rLocale.Country == aParserLocale.Country &&
+        rLocale.Variant == aParserLocale.Variant);
+    if ( !pTable || !bIntlEqual ||
+            startCharTokenType != nStartTypes ||
+            contCharTokenType != nContTypes ||
+            userDefinedCharactersStart != aStartChars ||
+            userDefinedCharactersCont != aContChars )
+        initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
+            contCharTokenType, userDefinedCharactersCont );
+}
+
+
+void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
+            const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
+            const OUString& userDefinedCharactersCont )
+{
+    // (Re)Init
+    setupInternational( rLocale );
+    // Memory of pTable is reused.
+    if ( !pTable )
+        pTable.reset(new ParserFlags[nDefCnt]);
+    memcpy( pTable.get(), pDefaultParserTable, sizeof(ParserFlags) * nDefCnt );
+    // Start and cont tables only need reallocation if different length.
+    if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
+    {
+        pStart.reset();
+    }
+    if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
+    {
+        pCont.reset();
+    }
+    nStartTypes = startCharTokenType;
+    nContTypes = contCharTokenType;
+    aStartChars = userDefinedCharactersStart;
+    aContChars = userDefinedCharactersCont;
+
+    // specials
+    if( mxLocaleData.is() )
+    {
+        LocaleDataItem2 aItem =
+            mxLocaleData->getLocaleItem2( aParserLocale );
+//!TODO: theoretically separators may be a string, adjustment would have to be
+//! done here and in parsing and in ::rtl::math::stringToDouble()
+        cGroupSep = aItem.thousandSeparator[0];
+        cDecimalSep = aItem.decimalSeparator[0];
+        cDecimalSepAlt = aItem.decimalSeparatorAlternative.toChar();
+    }
+
+    if (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER)
+    {
+        if ( cGroupSep < nDefCnt )
+            pTable[cGroupSep] |= ParserFlags::VALUE;
+    }
+    else
+    {
+        cGroupSep = 0;
+    }
+    if ( cDecimalSep < nDefCnt )
+        pTable[cDecimalSep] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+    if ( cDecimalSepAlt && cDecimalSepAlt < nDefCnt )
+        pTable[cDecimalSepAlt] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+
+    // Modify characters according to KParseTokens definitions.
+    {
+        using namespace KParseTokens;
+        sal_uInt8 i;
+
+        if ( !(nStartTypes & ASC_UPALPHA) )
+            for ( i = 65; i < 91; i++ )
+                pTable[i] &= ~ParserFlags::CHAR_WORD;  // not allowed as start character
+        if ( !(nContTypes & ASC_UPALPHA) )
+            for ( i = 65; i < 91; i++ )
+                pTable[i] &= ~ParserFlags::WORD;       // not allowed as cont character
+
+        if ( !(nStartTypes & ASC_LOALPHA) )
+            for ( i = 97; i < 123; i++ )
+                pTable[i] &= ~ParserFlags::CHAR_WORD;  // not allowed as start character
+        if ( !(nContTypes & ASC_LOALPHA) )
+            for ( i = 97; i < 123; i++ )
+                pTable[i] &= ~ParserFlags::WORD;       // not allowed as cont character
+
+        if ( nStartTypes & ASC_DIGIT )
+            for ( i = 48; i < 58; i++ )
+                pTable[i] |= ParserFlags::CHAR_WORD;   // allowed as start character
+        if ( !(nContTypes & ASC_DIGIT) )
+            for ( i = 48; i < 58; i++ )
+                pTable[i] &= ~ParserFlags::WORD;       // not allowed as cont character
+
+        if ( !(nStartTypes & ASC_UNDERSCORE) )
+            pTable[95] &= ~ParserFlags::CHAR_WORD;     // not allowed as start character
+        if ( !(nContTypes & ASC_UNDERSCORE) )
+            pTable[95] &= ~ParserFlags::WORD;          // not allowed as cont character
+
+        if ( nStartTypes & ASC_DOLLAR )
+            pTable[36] |= ParserFlags::CHAR_WORD;      // allowed as start character
+        if ( nContTypes & ASC_DOLLAR )
+            pTable[36] |= ParserFlags::WORD;           // allowed as cont character
+
+        if ( nStartTypes & ASC_DOT )
+            pTable[46] |= ParserFlags::CHAR_WORD;      // allowed as start character
+        if ( nContTypes & ASC_DOT )
+            pTable[46] |= ParserFlags::WORD;           // allowed as cont character
+
+        if ( nStartTypes & ASC_COLON )
+            pTable[58] |= ParserFlags::CHAR_WORD;      // allowed as start character
+        if ( nContTypes & ASC_COLON )
+            pTable[58] |= ParserFlags::WORD;           // allowed as cont character
+
+        if ( nStartTypes & ASC_CONTROL )
+            for ( i = 1; i < 32; i++ )
+                pTable[i] |= ParserFlags::CHAR_WORD;   // allowed as start character
+        if ( nContTypes & ASC_CONTROL )
+            for ( i = 1; i < 32; i++ )
+                pTable[i] |= ParserFlags::WORD;        // allowed as cont character
+
+        if ( nStartTypes & ASC_ANY_BUT_CONTROL )
+            for ( i = 32; i < nDefCnt; i++ )
+                pTable[i] |= ParserFlags::CHAR_WORD;   // allowed as start character
+        if ( nContTypes & ASC_ANY_BUT_CONTROL )
+            for ( i = 32; i < nDefCnt; i++ )
+                pTable[i] |= ParserFlags::WORD;        // allowed as cont character
+
+    }
+
+    // Merge in (positively override with) user defined characters.
+    // StartChars
+    sal_Int32 nLen = aStartChars.getLength();
+    if ( nLen )
+    {
+        if ( !pStart )
+            pStart.reset(new ParserFlags[ nLen ]);
+        const sal_Unicode* p = aStartChars.getStr();
+        for ( sal_Int32 j=0; j<nLen; j++, p++ )
+        {
+            pStart[j] = ParserFlags::CHAR_WORD;
+            if ( *p < nDefCnt )
+                pTable[*p] |= ParserFlags::CHAR_WORD;
+        }
+    }
+    // ContChars
+    nLen = aContChars.getLength();
+    if ( nLen )
+    {
+        if ( !pCont )
+            pCont.reset(new ParserFlags[ nLen ]);
+        const sal_Unicode* p = aContChars.getStr();
+        for ( sal_Int32 j=0; j<nLen; j++ )
+        {
+            pCont[j] = ParserFlags::WORD;
+            if ( *p < nDefCnt )
+                pTable[*p] |= ParserFlags::WORD;
+        }
+    }
+}
+
+
+void cclass_Unicode::destroyParserTable()
+{
+    pCont.reset();
+    pStart.reset();
+    pTable.reset();
+}
+
+
+ParserFlags cclass_Unicode::getFlags(sal_uInt32 const c, const cclass_Unicode::ScanState eState)
+{
+    ParserFlags nMask;
+    if ( c < nDefCnt )
+        nMask = pTable[ sal_uInt8(c) ];
+    else
+        nMask = getFlagsExtended(c, eState);
+    switch ( eState )
+    {
+        case ssGetChar :
+        case ssRewindFromValue :
+        case ssIgnoreLeadingInRewind :
+        case ssGetWordFirstChar :
+            if ( !(nMask & ParserFlags::CHAR_WORD) )
+            {
+                nMask |= getStartCharsFlags( c );
+                if ( nMask & ParserFlags::CHAR_WORD )
+                    nMask &= ~ParserFlags::EXCLUDED;
+            }
+        break;
+        case ssGetValue :
+        case ssGetWord :
+            if ( !(nMask & ParserFlags::WORD) )
+            {
+                nMask |= getContCharsFlags( c );
+                if ( nMask & ParserFlags::WORD )
+                    nMask &= ~ParserFlags::EXCLUDED;
+            }
+        break;
+        default:
+            ;   // other cases aren't needed, no compiler warning
+    }
+    return nMask;
+}
+
+
+ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c, const cclass_Unicode::ScanState eState) const
+{
+    if ( c == cGroupSep )
+        return ParserFlags::VALUE;
+    else if ( c == cDecimalSep )
+        return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+    else if ( cDecimalSepAlt && c == cDecimalSepAlt )
+        return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+    bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
+            eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
+    sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
+
+    //! all KParseTokens::UNI_... must be matched
+    switch (u_charType(c))
+    {
+        case U_UPPERCASE_LETTER :
+            return (nTypes & KParseTokens::UNI_UPALPHA) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL;
+        case U_LOWERCASE_LETTER :
+            return (nTypes & KParseTokens::UNI_LOALPHA) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL;
+        case U_TITLECASE_LETTER :
+            return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL;
+        case U_MODIFIER_LETTER :
+            return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL;
+        case U_NON_SPACING_MARK :
+        case U_COMBINING_SPACING_MARK :
+            // Non_Spacing_Mark can't be a leading character,
+            // nor can a spacing combining mark.
+            if (bStart)
+                return ParserFlags::ILLEGAL;
+            [[fallthrough]]; // treat it as Other_Letter.
+        case U_OTHER_LETTER :
+            return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL;
+        case U_DECIMAL_DIGIT_NUMBER :
+            return ((nTypes & KParseTokens::UNI_DIGIT) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+        case U_LETTER_NUMBER :
+            return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+        case U_OTHER_NUMBER :
+            return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
+                (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+                ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+        case U_SPACE_SEPARATOR :
+            return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
+                ParserFlags::CHAR_DONTCARE : (bStart ? ParserFlags::CHAR_WORD : (ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP) ));
+        case U_OTHER_PUNCTUATION:
+            // fdo#61754 Lets see (if we not at the start) if this is midletter
+            // punctuation and allow it in a word if it is similarly to
+            // U_NON_SPACING_MARK, for example U+00B7 MIDDLE DOT.
+            // tdf#123575 for U+30FB KATAKANA MIDDLE DOT property is not
+            // U_WB_MIDLETTER but U_WB_KATAKANA instead, explicitly test that
+            // and U+FF65 HALFWIDTH KATAKANA MIDDLE DOT.
+            if (bStart || (U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK)
+                        && c != 0x30FB && c != 0xFF65))
+                return ParserFlags::ILLEGAL;
+            else
+            {
+                //allowing it to continue the word
+                return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
+                    ParserFlags::WORD : ParserFlags::ILLEGAL;
+            }
+            break;
+    }
+
+    return ParserFlags::ILLEGAL;
+}
+
+
+ParserFlags cclass_Unicode::getStartCharsFlags( sal_uInt32 c )
+{
+    if ( pStart )
+    {
+        const sal_Unicode* pStr = aStartChars.getStr();
+        const sal_Unicode* p = StrChr( pStr, c );
+        if ( p )
+            return pStart[ p - pStr ];
+    }
+    return ParserFlags::ILLEGAL;
+}
+
+
+ParserFlags cclass_Unicode::getContCharsFlags( sal_Unicode c )
+{
+    if ( pCont )
+    {
+        const sal_Unicode* pStr = aContChars.getStr();
+        const sal_Unicode* p = StrChr( pStr, c );
+        if ( p )
+            return pCont[ p - pStr ];
+    }
+    return ParserFlags::ILLEGAL;
+}
+
+
+void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
+{
+    assert(r.LeadingWhiteSpace == 0);
+    ScanState eState = ssGetChar;
+
+    //! All the variables below (plus ParseResult) have to be reset on ssRewindFromValue!
+    OUStringBuffer aSymbol;
+    bool isFirst(true);
+    sal_Int32 index(nPos); // index of next code point after current
+    sal_Int32 postSymbolIndex(index); // index of code point following last quote
+    sal_uInt32 current((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
+    sal_uInt32 cLast = 0;
+    sal_Int32 nCodePoints(0);
+    int nDecSeps = 0;
+    bool bQuote = false;
+    bool bMightBeWord = true;
+    bool bMightBeWordLast = true;
+    bool bDecSepAltUsed = false;
+    //! All the variables above (plus ParseResult) have to be reset on ssRewindFromValue!
+    sal_Int32 nextCharIndex(nPos); // == index of nextChar
+
+    while ((current != 0) && (eState != ssStop))
+    {
+        ++nCodePoints;
+        ParserFlags nMask = getFlags(current, eState);
+        if ( nMask & ParserFlags::EXCLUDED )
+            eState = ssBounce;
+        if ( bMightBeWord )
+        {   // only relevant for ssGetValue fall back
+            if ( eState == ssGetChar || eState == ssRewindFromValue ||
+                    eState == ssIgnoreLeadingInRewind )
+                bMightBeWord = bool(nMask & ParserFlags::CHAR_WORD);
+            else
+                bMightBeWord = bool(nMask & ParserFlags::WORD);
+        }
+        sal_Int32 nParseTokensType = getParseTokensType(current, isFirst);
+        isFirst = false;
+        sal_Int32 const nextIndex(nextCharIndex); // == index of char following current
+        nextCharIndex = index; // == index of nextChar
+        sal_uInt32 nextChar((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
+        switch (eState)
+        {
+            case ssGetChar :
+            case ssRewindFromValue :
+            case ssIgnoreLeadingInRewind :
+            {
+                if ( (nMask & ParserFlags::CHAR_VALUE) && eState != ssRewindFromValue
+                        && eState != ssIgnoreLeadingInRewind )
+                {   //! must be first, may fall back to ssGetWord via bMightBeWord
+                    eState = ssGetValue;
+                    if ( nMask & ParserFlags::VALUE_DIGIT )
+                    {
+                        if (128 <= current)
+                            r.TokenType = KParseType::UNI_NUMBER;
+                        else
+                            r.TokenType = KParseType::ASC_NUMBER;
+                    }
+                    else if (current == cDecimalSep || (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt)))
+                    {
+                        if (nextChar)
+                            ++nDecSeps;
+                        else
+                            eState = ssRewindFromValue;
+                            // retry for ONE_SINGLE_CHAR or others
+                    }
+                }
+                else if ( nMask & ParserFlags::CHAR_WORD )
+                {
+                    eState = ssGetWord;
+                    r.TokenType = KParseType::IDENTNAME;
+                }
+                else if ( nMask & ParserFlags::NAME_SEP )
+                {
+                    eState = ssGetWordFirstChar;
+                    bQuote = true;
+                    postSymbolIndex = nextCharIndex;
+                    nParseTokensType = 0;   // will be taken of first real character
+                    r.TokenType = KParseType::SINGLE_QUOTE_NAME;
+                }
+                else if ( nMask & ParserFlags::CHAR_STRING )
+                {
+                    eState = ssGetString;
+                    postSymbolIndex = nextCharIndex;
+                    nParseTokensType = 0;   // will be taken of first real character
+                    r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
+                }
+                else if ( nMask & ParserFlags::CHAR_DONTCARE )
+                {
+                    if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
+                    {
+                        if (eState == ssRewindFromValue)
+                            eState = ssIgnoreLeadingInRewind;
+                        r.LeadingWhiteSpace = nextCharIndex - nPos;
+                        nCodePoints--; // exclude leading whitespace
+                        postSymbolIndex = nextCharIndex;
+                        nParseTokensType = 0;   // wait until real character
+                        bMightBeWord = true;
+                    }
+                    else
+                        eState = ssBounce;
+                }
+                else if ( nMask & ParserFlags::CHAR_BOOL )
+                {
+                    eState = ssGetBool;
+                    r.TokenType = KParseType::BOOLEAN;
+                }
+                else if ( nMask & ParserFlags::CHAR )
+                {   //! must be last
+                    eState = ssStop;
+                    r.TokenType = KParseType::ONE_SINGLE_CHAR;
+                }
+                else
+                    eState = ssBounce;      // not known
+            }
+            break;
+            case ssGetValue :
+            {
+                if ( nMask & ParserFlags::VALUE_DIGIT )
+                {
+                    if (128 <= current)
+                        r.TokenType = KParseType::UNI_NUMBER;
+                    else if ( r.TokenType != KParseType::UNI_NUMBER )
+                        r.TokenType = KParseType::ASC_NUMBER;
+                }
+                if ( nMask & ParserFlags::VALUE )
+                {
+                    if (current == cGroupSep)
+                    {
+                        if (getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT)
+                            nParseTokensType |= KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
+                        else
+                        {
+                            // Trailing group separator character is not a
+                            // group separator.
+                            eState = ssStopBack;
+                        }
+                    }
+                    else if ((current == cDecimalSep ||
+                                (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt))) &&
+                            ++nDecSeps > 1)
+                    {
+                        if (nCodePoints == 2)
+                            eState = ssRewindFromValue;
+                            // consecutive separators
+                        else
+                            eState = ssStopBack;
+                    }
+                    // else keep it going
+                }
+                else if (current == 'E' || current == 'e')
+                {
+                    ParserFlags nNext = getFlags(nextChar, eState);
+                    if ( nNext & ParserFlags::VALUE_EXP )
+                        ;   // keep it going
+                    else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
+                    {   // might be a numerical name (1.2efg)
+                        eState = ssGetWord;
+                        r.TokenType = KParseType::IDENTNAME;
+                    }
+                    else
+                        eState = ssStopBack;
+                }
+                else if ( nMask & ParserFlags::VALUE_SIGN )
+                {
+                    if ( (cLast == 'E') || (cLast == 'e') )
+                    {
+                        ParserFlags nNext = getFlags(nextChar, eState);
+                        if ( nNext & ParserFlags::VALUE_EXP_VALUE )
+                            ;   // keep it going
+                        else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
+                        {   // might be a numerical name (1.2e+fg)
+                            eState = ssGetWord;
+                            r.TokenType = KParseType::IDENTNAME;
+                        }
+                        else
+                            eState = ssStopBack;
+                    }
+                    else if ( bMightBeWord )
+                    {   // might be a numerical name (1.2+fg)
+                        eState = ssGetWord;
+                        r.TokenType = KParseType::IDENTNAME;
+                    }
+                    else
+                        eState = ssStopBack;
+                }
+                else if ( bMightBeWord && (nMask & ParserFlags::WORD) )
+                {   // might be a numerical name (1995.A1)
+                    eState = ssGetWord;
+                    r.TokenType = KParseType::IDENTNAME;
+                }
+                else
+                    eState = ssStopBack;
+            }
+            break;
+            case ssGetWordFirstChar :
+                eState = ssGetWord;
+                [[fallthrough]];
+            case ssGetWord :
+            {
+                if ( nMask & ParserFlags::WORD )
+                    ;   // keep it going
+                else if ( nMask & ParserFlags::NAME_SEP )
+                {
+                    if ( bQuote )
+                    {
+                        if ( cLast == '\\' )
+                        {   // escaped
+                            aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2));
+                            aSymbol.append(OUString(&current, 1));
+                        }
+                        else
+                        {
+                            eState = ssStop;
+                            aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+                        }
+                        postSymbolIndex = nextCharIndex;
+                    }
+                    else
+                        eState = ssStopBack;
+                }
+                else if ( bQuote )
+                    ;   // keep it going
+                else
+                    eState = ssStopBack;
+            }
+            break;
+            case ssGetString :
+            {
+                if ( nMask & ParserFlags::STRING_SEP )
+                {
+                    if ( cLast == '\\' )
+                    {   // escaped
+                        aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2));
+                        aSymbol.append(OUString(&current, 1));
+                    }
+                    else if (current == nextChar &&
+                            !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
+                    {   // "" => literal " escaped
+                        aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex));
+                        nextCharIndex = index;
+                        if (index < rText.getLength()) { ++nCodePoints; }
+                        nextChar = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
+                    }
+                    else
+                    {
+                        eState = ssStop;
+                        aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+                    }
+                    postSymbolIndex = nextCharIndex;
+                }
+            }
+            break;
+            case ssGetBool :
+            {
+                if ( nMask & ParserFlags::BOOL )
+                    eState = ssStop;    // maximum 2: <, >, <>, <=, >=
+                else
+                    eState = ssStopBack;
+            }
+            break;
+            case ssStopBack :
+            case ssBounce :
+            case ssStop :
+                ;   // nothing, no compiler warning
+            break;
+        }
+        if ( eState == ssRewindFromValue )
+        {
+            r = ParseResult();
+            index = nPos;
+            postSymbolIndex = nPos;
+            nextCharIndex = nPos;
+            aSymbol.setLength(0);
+            current = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
+            nCodePoints = (nPos < rText.getLength()) ? 1 : 0;
+            isFirst = true;
+            cLast = 0;
+            nDecSeps = 0;
+            bQuote = false;
+            bMightBeWord = true;
+            bMightBeWordLast = true;
+            bDecSepAltUsed = false;
+        }
+        else
+        {
+            if ( !(r.TokenType & nTokenType) )
+            {
+                if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
+                        && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
+                    ;   // keep a number that might be a word
+                else if (r.LeadingWhiteSpace == (nextCharIndex - nPos))
+                    ;   // keep ignored white space
+                else if ( !r.TokenType && eState == ssGetValue && (nMask & ParserFlags::VALUE_SEP) )
+                    ;   // keep uncertain value
+                else
+                    eState = ssBounce;
+            }
+            if ( eState == ssBounce )
+            {
+                r.TokenType = 0;
+                eState = ssStopBack;
+            }
+            if ( eState == ssStopBack )
+            {   // put back
+                nextChar = rText.iterateCodePoints(&index, -1);
+                nextCharIndex = nextIndex;
+                --nCodePoints;
+                bMightBeWord = bMightBeWordLast;
+                eState = ssStop;
+            }
+            if ( eState != ssStop )
+            {
+                if ( !r.StartFlags )
+                    r.StartFlags |= nParseTokensType;
+                else
+                    r.ContFlags |= nParseTokensType;
+            }
+            bMightBeWordLast = bMightBeWord;
+            cLast = current;
+            current = nextChar;
+        }
+    }
+    // r.CharLen is the length in characters (not code units) of the parsed
+    // token not including any leading white space.
+    r.CharLen = nCodePoints;
+    r.EndPos = nextCharIndex;
+    if ( r.TokenType & KParseType::ASC_NUMBER )
+    {
+        r.Value = rtl_math_uStringToDouble(rText.getStr() + nPos + r.LeadingWhiteSpace,
+            rText.getStr() + r.EndPos, (bDecSepAltUsed ? cDecimalSepAlt : cDecimalSep), cGroupSep, nullptr, nullptr);
+        if ( bMightBeWord )
+            r.TokenType |= KParseType::IDENTNAME;
+    }
+    else if ( r.TokenType & KParseType::UNI_NUMBER )
+    {
+        if ( !xNatNumSup.is() )
+        {
+            if ( m_xContext.is() )
+            {
+                xNatNumSup = NativeNumberSupplier::create( m_xContext );
+            }
+        }
+        OUString aTmp(rText.getStr() + nPos + r.LeadingWhiteSpace,
+                r.EndPos - nPos - r.LeadingWhiteSpace);
+        // transliterate to ASCII
+        aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
+                NativeNumberMode::NATNUM0 );
+        r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep );
+        if ( bMightBeWord )
+            r.TokenType |= KParseType::IDENTNAME;
+    }
+    else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
+    {
+        if (postSymbolIndex < nextCharIndex)
+        {   //! open quote
+            aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+            r.TokenType |= KParseType::MISSING_QUOTE;
+        }
+        r.DequotedNameOrString = aSymbol.makeStringAndClear();
+    }
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/characterclassification/characterclassificationImpl.cxx b/i18npool/source/characterclassification/characterclassificationImpl.cxx
new file mode 100644
index 000000000..aff424d8b
--- /dev/null
+++ b/i18npool/source/characterclassification/characterclassificationImpl.cxx
@@ -0,0 +1,219 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <cppuhelper/supportsservice.hxx>
+#include <characterclassificationImpl.hxx>
+#include <localedata.hxx>
+
+#include <com/sun/star/uno/XComponentContext.hpp>
+
+using namespace com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+CharacterClassificationImpl::CharacterClassificationImpl(
+        const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
+{
+    static constexpr OUStringLiteral sUnicode = u"Unicode";
+    if (createLocaleSpecificCharacterClassification(sUnicode, Locale()))
+        xUCI = cachedItem->xCI;
+}
+
+CharacterClassificationImpl::~CharacterClassificationImpl() {
+}
+
+
+OUString SAL_CALL
+CharacterClassificationImpl::toUpper( const OUString& Text, sal_Int32 nPos,
+        sal_Int32 nCount, const Locale& rLocale )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->toUpper(Text, nPos, nCount, rLocale);
+}
+
+OUString SAL_CALL
+CharacterClassificationImpl::toLower( const OUString& Text, sal_Int32 nPos,
+        sal_Int32 nCount, const Locale& rLocale )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->toLower(Text, nPos, nCount, rLocale);
+}
+
+OUString SAL_CALL
+CharacterClassificationImpl::toTitle( const OUString& Text, sal_Int32 nPos,
+        sal_Int32 nCount, const Locale& rLocale )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->toTitle(Text, nPos, nCount, rLocale);
+}
+
+sal_Int16 SAL_CALL
+CharacterClassificationImpl::getType( const OUString& Text, sal_Int32 nPos )
+{
+    if (xUCI.is())
+        return xUCI->getType(Text, nPos);
+    throw RuntimeException();
+}
+
+sal_Int16 SAL_CALL
+CharacterClassificationImpl::getCharacterDirection( const OUString& Text, sal_Int32 nPos )
+{
+    if (xUCI.is())
+        return xUCI->getCharacterDirection(Text, nPos);
+    throw RuntimeException();
+}
+
+sal_Int16 SAL_CALL
+CharacterClassificationImpl::getScript( const OUString& Text, sal_Int32 nPos )
+{
+    if (xUCI.is())
+        return xUCI->getScript(Text, nPos);
+    throw RuntimeException();
+}
+
+sal_Int32 SAL_CALL
+CharacterClassificationImpl::getCharacterType( const OUString& Text, sal_Int32 nPos,
+        const Locale& rLocale )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->getCharacterType(Text, nPos, rLocale);
+}
+
+sal_Int32 SAL_CALL
+CharacterClassificationImpl::getStringType( const OUString& Text, sal_Int32 nPos,
+        sal_Int32 nCount, const Locale& rLocale )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->getStringType(Text, nPos, nCount, rLocale);
+}
+
+ParseResult SAL_CALL CharacterClassificationImpl::parseAnyToken(
+        const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
+        sal_Int32 startCharTokenType, const OUString& userDefinedCharactersStart,
+        sal_Int32 contCharTokenType, const OUString& userDefinedCharactersCont )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->parseAnyToken(Text, nPos, rLocale,
+            startCharTokenType,userDefinedCharactersStart,
+            contCharTokenType, userDefinedCharactersCont);
+}
+
+
+ParseResult SAL_CALL CharacterClassificationImpl::parsePredefinedToken(
+        sal_Int32 nTokenType, const OUString& Text, sal_Int32 nPos,
+        const Locale& rLocale, sal_Int32 startCharTokenType,
+        const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
+        const OUString& userDefinedCharactersCont )
+{
+    return getLocaleSpecificCharacterClassification(rLocale)->parsePredefinedToken(
+            nTokenType, Text, nPos, rLocale, startCharTokenType, userDefinedCharactersStart,
+            contCharTokenType, userDefinedCharactersCont);
+}
+
+bool CharacterClassificationImpl::createLocaleSpecificCharacterClassification(const OUString& serviceName, const Locale& rLocale)
+{
+    // to share service between same Language but different Country code, like zh_CN and zh_SG
+    for (size_t l = 0; l < lookupTable.size(); l++) {
+        cachedItem = lookupTable[l];
+        if (serviceName == cachedItem->aName) {
+            lookupTable.emplace_back( rLocale, serviceName, cachedItem->xCI );
+            cachedItem = lookupTable.back();
+            return true;
+        }
+    }
+
+    Reference < XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
+            "com.sun.star.i18n.CharacterClassification_" + serviceName, m_xContext);
+
+    Reference < XCharacterClassification > xCI;
+    if ( xI.is() ) {
+        xCI.set( xI, UNO_QUERY );
+        if (xCI.is()) {
+            lookupTable.emplace_back( rLocale, serviceName, xCI );
+            cachedItem = lookupTable.back();
+            return true;
+        }
+    }
+    return false;
+}
+
+Reference < XCharacterClassification > const &
+CharacterClassificationImpl::getLocaleSpecificCharacterClassification(const Locale& rLocale)
+{
+    // reuse instance if locale didn't change
+    if (cachedItem && cachedItem->equals(rLocale))
+        return cachedItem->xCI;
+    else {
+        for (const auto & i : lookupTable) {
+            cachedItem = i;
+            if (cachedItem->equals(rLocale))
+                return cachedItem->xCI;
+        }
+
+        // Load service with name <base>_<lang>_<country> or
+        // <base>_<bcp47> and fallbacks.
+        bool bLoaded = createLocaleSpecificCharacterClassification(
+                LocaleDataImpl::getFirstLocaleServiceName( rLocale), rLocale);
+        if (!bLoaded)
+        {
+            ::std::vector< OUString > aFallbacks( LocaleDataImpl::getFallbackLocaleServiceNames( rLocale));
+            for (const auto& rFallback : aFallbacks)
+            {
+                bLoaded = createLocaleSpecificCharacterClassification(rFallback, rLocale);
+                if (bLoaded)
+                    break;
+            }
+        }
+        if (bLoaded)
+            return cachedItem->xCI;
+        else if (xUCI.is())
+        {
+            lookupTable.emplace_back( rLocale, "Unicode", xUCI );
+            cachedItem = lookupTable.back();
+            return cachedItem->xCI;
+        }
+    }
+    throw RuntimeException();
+}
+
+OUString SAL_CALL
+CharacterClassificationImpl::getImplementationName()
+{
+    return "com.sun.star.i18n.CharacterClassification";
+}
+
+sal_Bool SAL_CALL
+CharacterClassificationImpl::supportsService(const OUString& rServiceName)
+{
+    return cppu::supportsService(this, rServiceName);
+}
+
+Sequence< OUString > SAL_CALL
+CharacterClassificationImpl::getSupportedServiceNames()
+{
+    return { "com.sun.star.i18n.CharacterClassification" };
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_CharacterClassification_get_implementation(
+    css::uno::XComponentContext *context,
+    css::uno::Sequence<css::uno::Any> const &)
+{
+    return cppu::acquire(new i18npool::CharacterClassificationImpl(context));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/characterclassification/unoscripttypedetector.cxx b/i18npool/source/characterclassification/unoscripttypedetector.cxx
new file mode 100644
index 000000000..afcd2708c
--- /dev/null
+++ b/i18npool/source/characterclassification/unoscripttypedetector.cxx
@@ -0,0 +1,91 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <unoscripttypedetector.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <i18nutil/scripttypedetector.hxx>
+
+namespace com::sun::star::uno { class XComponentContext; }
+
+sal_Int16 SAL_CALL
+UnoScriptTypeDetector::getScriptDirection( const OUString& Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection )
+{
+    return ScriptTypeDetector::getScriptDirection(Text, nPos, defaultScriptDirection);
+}
+
+// return value '-1' means either the direction on nPos is not same as scriptDirection or nPos is out of range.
+sal_Int32 SAL_CALL
+UnoScriptTypeDetector::beginOfScriptDirection( const OUString& Text, sal_Int32 nPos, sal_Int16 direction )
+{
+    return ScriptTypeDetector::beginOfScriptDirection(Text, nPos, direction);
+}
+
+sal_Int32 SAL_CALL
+UnoScriptTypeDetector::endOfScriptDirection( const OUString& Text, sal_Int32 nPos, sal_Int16 direction )
+{
+    return ScriptTypeDetector::endOfScriptDirection(Text, nPos, direction);
+}
+
+sal_Int16 SAL_CALL
+UnoScriptTypeDetector::getCTLScriptType( const OUString& Text, sal_Int32 nPos )
+{
+    return ScriptTypeDetector::getCTLScriptType(Text, nPos);
+}
+
+// Begin of Script Type is inclusive.
+sal_Int32 SAL_CALL
+UnoScriptTypeDetector::beginOfCTLScriptType( const OUString& Text, sal_Int32 nPos )
+{
+    return ScriptTypeDetector::beginOfCTLScriptType(Text, nPos);
+}
+
+// End of the Script Type is exclusive, the return value pointing to the begin of next script type
+sal_Int32 SAL_CALL
+UnoScriptTypeDetector::endOfCTLScriptType( const OUString& Text, sal_Int32 nPos )
+{
+    return ScriptTypeDetector::endOfCTLScriptType(Text, nPos);
+}
+
+OUString SAL_CALL
+UnoScriptTypeDetector::getImplementationName()
+{
+    return "com.sun.star.i18n.ScriptTypeDetector";
+}
+
+sal_Bool SAL_CALL
+UnoScriptTypeDetector::supportsService(const OUString& ServiceName)
+{
+    return cppu::supportsService(this, ServiceName);
+}
+
+css::uno::Sequence< OUString > SAL_CALL
+UnoScriptTypeDetector::getSupportedServiceNames()
+{
+    return { "com.sun.star.i18n.ScriptTypeDetector" };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_ScriptTypeDetector_get_implementation(
+    css::uno::XComponentContext *,
+    css::uno::Sequence<css::uno::Any> const &)
+{
+    return cppu::acquire(new UnoScriptTypeDetector);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */