summaryrefslogtreecommitdiffstats
path: root/i18npool/source/characterclassification/cclass_unicode_parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/characterclassification/cclass_unicode_parser.cxx')
-rw-r--r--i18npool/source/characterclassification/cclass_unicode_parser.cxx1076
1 files changed, 1076 insertions, 0 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
new file mode 100644
index 0000000000..9ed95e2f90
--- /dev/null
+++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
@@ -0,0 +1,1076 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <cclass_unicode.hxx>
+#include <unicode/uchar.h>
+#include <rtl/character.hxx>
+#include <rtl/math.hxx>
+#include <rtl/ustring.hxx>
+#include <com/sun/star/i18n/KParseTokens.hpp>
+#include <com/sun/star/i18n/KParseType.hpp>
+#include <com/sun/star/i18n/LocaleData2.hpp>
+#include <com/sun/star/i18n/NativeNumberMode.hpp>
+#include <com/sun/star/i18n/NativeNumberSupplier.hpp>
+
+#include <string.h>
+#include <string_view>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+#define TOKEN_DIGIT_FLAGS (ParserFlags::CHAR_VALUE | ParserFlags::VALUE | ParserFlags::VALUE_EXP | ParserFlags::VALUE_EXP_VALUE | ParserFlags::VALUE_DIGIT)
+
+namespace i18npool {
+
+// Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
+
+const sal_uInt8 cclass_Unicode::nDefCnt = 128;
+const ParserFlags cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
+{
+// (...) == Calc formula compiler specific, commented out and modified
+
+ /* \0 */ ParserFlags::EXCLUDED,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ /* 9 \t */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
+ ParserFlags::ILLEGAL,
+ /* 11 \v */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ ParserFlags::ILLEGAL,
+ /* 32 */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 33 ! */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 34 " */ ParserFlags::CHAR_STRING | ParserFlags::STRING_SEP,
+ /* 35 # */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD_SEP)
+ /* 36 $ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
+ /* 37 % */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::VALUE)
+ /* 38 & */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 39 ' */ ParserFlags::NAME_SEP,
+ /* 40 ( */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 41 ) */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 42 * */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 43 + */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
+ /* 44 , */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
+ /* 45 - */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
+ /* 46 . */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD | ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
+ /* 47 / */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ //for ( i = 48; i < 58; i++ )
+ /* 48 0 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 49 1 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 50 2 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 51 3 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 52 4 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 53 5 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 54 6 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 55 7 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 56 8 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 57 9 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
+ /* 58 : */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD)
+ /* 59 ; */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 60 < */ ParserFlags::CHAR_BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 61 = */ ParserFlags::CHAR | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 62 > */ ParserFlags::CHAR_BOOL | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 63 ? */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
+ /* 64 @ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ //for ( i = 65; i < 91; i++ )
+ /* 65 A */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 66 B */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 67 C */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 68 D */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 69 E */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 70 F */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 71 G */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 72 H */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 73 I */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 74 J */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 75 K */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 76 L */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 77 M */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 78 N */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 79 O */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 80 P */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 81 Q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 82 R */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 83 S */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 84 T */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 85 U */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 86 V */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 87 W */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 88 X */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 89 Y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 90 Z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 91 [ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 92 \ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 93 ] */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 94 ^ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
+ /* 95 _ */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 96 ` */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ //for ( i = 97; i < 123; i++ )
+ /* 97 a */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 98 b */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 99 c */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 100 d */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 101 e */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 102 f */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 103 g */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 104 h */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 105 i */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 106 j */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 107 k */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 108 l */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 109 m */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 110 n */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 111 o */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 112 p */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 113 q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 114 r */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 115 s */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 116 t */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 117 u */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 118 v */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 119 w */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 120 x */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 121 y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 122 z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
+ /* 123 { */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 124 | */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 125 } */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 126 ~ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
+ /* 127 */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP // (ParserFlags::ILLEGAL // UNUSED)
+};
+
+
+const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
+{
+ /* \0 */ KParseTokens::ASC_OTHER,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ /* 9 \t */ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ /* 11 \v */ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ KParseTokens::ASC_CONTROL,
+ /* 32 */ KParseTokens::ASC_OTHER,
+ /* 33 ! */ KParseTokens::ASC_OTHER,
+ /* 34 " */ KParseTokens::ASC_OTHER,
+ /* 35 # */ KParseTokens::ASC_OTHER,
+ /* 36 $ */ KParseTokens::ASC_DOLLAR,
+ /* 37 % */ KParseTokens::ASC_OTHER,
+ /* 38 & */ KParseTokens::ASC_OTHER,
+ /* 39 ' */ KParseTokens::ASC_OTHER,
+ /* 40 ( */ KParseTokens::ASC_OTHER,
+ /* 41 ) */ KParseTokens::ASC_OTHER,
+ /* 42 * */ KParseTokens::ASC_OTHER,
+ /* 43 + */ KParseTokens::ASC_OTHER,
+ /* 44 , */ KParseTokens::ASC_OTHER,
+ /* 45 - */ KParseTokens::ASC_OTHER,
+ /* 46 . */ KParseTokens::ASC_DOT,
+ /* 47 / */ KParseTokens::ASC_OTHER,
+ //for ( i = 48; i < 58; i++ )
+ /* 48 0 */ KParseTokens::ASC_DIGIT,
+ /* 49 1 */ KParseTokens::ASC_DIGIT,
+ /* 50 2 */ KParseTokens::ASC_DIGIT,
+ /* 51 3 */ KParseTokens::ASC_DIGIT,
+ /* 52 4 */ KParseTokens::ASC_DIGIT,
+ /* 53 5 */ KParseTokens::ASC_DIGIT,
+ /* 54 6 */ KParseTokens::ASC_DIGIT,
+ /* 55 7 */ KParseTokens::ASC_DIGIT,
+ /* 56 8 */ KParseTokens::ASC_DIGIT,
+ /* 57 9 */ KParseTokens::ASC_DIGIT,
+ /* 58 : */ KParseTokens::ASC_COLON,
+ /* 59 ; */ KParseTokens::ASC_OTHER,
+ /* 60 < */ KParseTokens::ASC_OTHER,
+ /* 61 = */ KParseTokens::ASC_OTHER,
+ /* 62 > */ KParseTokens::ASC_OTHER,
+ /* 63 ? */ KParseTokens::ASC_OTHER,
+ /* 64 @ */ KParseTokens::ASC_OTHER,
+ //for ( i = 65; i < 91; i++ )
+ /* 65 A */ KParseTokens::ASC_UPALPHA,
+ /* 66 B */ KParseTokens::ASC_UPALPHA,
+ /* 67 C */ KParseTokens::ASC_UPALPHA,
+ /* 68 D */ KParseTokens::ASC_UPALPHA,
+ /* 69 E */ KParseTokens::ASC_UPALPHA,
+ /* 70 F */ KParseTokens::ASC_UPALPHA,
+ /* 71 G */ KParseTokens::ASC_UPALPHA,
+ /* 72 H */ KParseTokens::ASC_UPALPHA,
+ /* 73 I */ KParseTokens::ASC_UPALPHA,
+ /* 74 J */ KParseTokens::ASC_UPALPHA,
+ /* 75 K */ KParseTokens::ASC_UPALPHA,
+ /* 76 L */ KParseTokens::ASC_UPALPHA,
+ /* 77 M */ KParseTokens::ASC_UPALPHA,
+ /* 78 N */ KParseTokens::ASC_UPALPHA,
+ /* 79 O */ KParseTokens::ASC_UPALPHA,
+ /* 80 P */ KParseTokens::ASC_UPALPHA,
+ /* 81 Q */ KParseTokens::ASC_UPALPHA,
+ /* 82 R */ KParseTokens::ASC_UPALPHA,
+ /* 83 S */ KParseTokens::ASC_UPALPHA,
+ /* 84 T */ KParseTokens::ASC_UPALPHA,
+ /* 85 U */ KParseTokens::ASC_UPALPHA,
+ /* 86 V */ KParseTokens::ASC_UPALPHA,
+ /* 87 W */ KParseTokens::ASC_UPALPHA,
+ /* 88 X */ KParseTokens::ASC_UPALPHA,
+ /* 89 Y */ KParseTokens::ASC_UPALPHA,
+ /* 90 Z */ KParseTokens::ASC_UPALPHA,
+ /* 91 [ */ KParseTokens::ASC_OTHER,
+ /* 92 \ */ KParseTokens::ASC_OTHER,
+ /* 93 ] */ KParseTokens::ASC_OTHER,
+ /* 94 ^ */ KParseTokens::ASC_OTHER,
+ /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
+ /* 96 ` */ KParseTokens::ASC_OTHER,
+ //for ( i = 97; i < 123; i++ )
+ /* 97 a */ KParseTokens::ASC_LOALPHA,
+ /* 98 b */ KParseTokens::ASC_LOALPHA,
+ /* 99 c */ KParseTokens::ASC_LOALPHA,
+ /* 100 d */ KParseTokens::ASC_LOALPHA,
+ /* 101 e */ KParseTokens::ASC_LOALPHA,
+ /* 102 f */ KParseTokens::ASC_LOALPHA,
+ /* 103 g */ KParseTokens::ASC_LOALPHA,
+ /* 104 h */ KParseTokens::ASC_LOALPHA,
+ /* 105 i */ KParseTokens::ASC_LOALPHA,
+ /* 106 j */ KParseTokens::ASC_LOALPHA,
+ /* 107 k */ KParseTokens::ASC_LOALPHA,
+ /* 108 l */ KParseTokens::ASC_LOALPHA,
+ /* 109 m */ KParseTokens::ASC_LOALPHA,
+ /* 110 n */ KParseTokens::ASC_LOALPHA,
+ /* 111 o */ KParseTokens::ASC_LOALPHA,
+ /* 112 p */ KParseTokens::ASC_LOALPHA,
+ /* 113 q */ KParseTokens::ASC_LOALPHA,
+ /* 114 r */ KParseTokens::ASC_LOALPHA,
+ /* 115 s */ KParseTokens::ASC_LOALPHA,
+ /* 116 t */ KParseTokens::ASC_LOALPHA,
+ /* 117 u */ KParseTokens::ASC_LOALPHA,
+ /* 118 v */ KParseTokens::ASC_LOALPHA,
+ /* 119 w */ KParseTokens::ASC_LOALPHA,
+ /* 120 x */ KParseTokens::ASC_LOALPHA,
+ /* 121 y */ KParseTokens::ASC_LOALPHA,
+ /* 122 z */ KParseTokens::ASC_LOALPHA,
+ /* 123 { */ KParseTokens::ASC_OTHER,
+ /* 124 | */ KParseTokens::ASC_OTHER,
+ /* 125 } */ KParseTokens::ASC_OTHER,
+ /* 126 ~ */ KParseTokens::ASC_OTHER,
+ /* 127 */ KParseTokens::ASC_OTHER
+};
+
+
+// static
+const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_uInt32 c )
+{
+ if ( !pStr )
+ return nullptr;
+ sal_Unicode cs[2];
+ auto const n = rtl::splitSurrogates(c, cs);
+ while ( *pStr )
+ {
+ if ( *pStr == cs[0] && (n == 1 || pStr[1] == cs[1]) )
+ return pStr;
+ pStr++;
+ }
+ return nullptr;
+}
+
+
+sal_Int32 cclass_Unicode::getParseTokensType(sal_uInt32 const c, bool const isFirst)
+{
+ if ( c < nDefCnt )
+ return pParseTokensType[ sal_uInt8(c) ];
+ else
+ {
+
+ //! all KParseTokens::UNI_... must be matched
+ switch (u_charType(c))
+ {
+ case U_UPPERCASE_LETTER :
+ return KParseTokens::UNI_UPALPHA;
+ case U_LOWERCASE_LETTER :
+ return KParseTokens::UNI_LOALPHA;
+ case U_TITLECASE_LETTER :
+ return KParseTokens::UNI_TITLE_ALPHA;
+ case U_MODIFIER_LETTER :
+ return KParseTokens::UNI_MODIFIER_LETTER;
+ case U_OTHER_LETTER :
+ // Non_Spacing_Mark could not be as leading character
+ if (isFirst) break;
+ [[fallthrough]]; // treat it as Other_Letter.
+ case U_NON_SPACING_MARK :
+ return KParseTokens::UNI_OTHER_LETTER;
+ case U_DECIMAL_DIGIT_NUMBER :
+ return KParseTokens::UNI_DIGIT;
+ case U_LETTER_NUMBER :
+ return KParseTokens::UNI_LETTER_NUMBER;
+ case U_OTHER_NUMBER :
+ return KParseTokens::UNI_OTHER_NUMBER;
+ }
+
+ return KParseTokens::UNI_OTHER;
+ }
+}
+
+void cclass_Unicode::setupInternational( const Locale& rLocale )
+{
+ bool bChanged = (aParserLocale.Language != rLocale.Language
+ || aParserLocale.Country != rLocale.Country
+ || aParserLocale.Variant != rLocale.Variant);
+ if ( bChanged )
+ {
+ aParserLocale.Language = rLocale.Language;
+ aParserLocale.Country = rLocale.Country;
+ aParserLocale.Variant = rLocale.Variant;
+ }
+ if ( !mxLocaleData.is() )
+ {
+ mxLocaleData.set( LocaleData2::create(m_xContext) );
+ }
+}
+
+
+void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
+ const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
+ const OUString& userDefinedCharactersCont )
+{
+ bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
+ rLocale.Country == aParserLocale.Country &&
+ rLocale.Variant == aParserLocale.Variant);
+ if ( !pTable || !bIntlEqual ||
+ startCharTokenType != nStartTypes ||
+ contCharTokenType != nContTypes ||
+ userDefinedCharactersStart != aStartChars ||
+ userDefinedCharactersCont != aContChars )
+ initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
+ contCharTokenType, userDefinedCharactersCont );
+}
+
+
+void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
+ const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
+ const OUString& userDefinedCharactersCont )
+{
+ // (Re)Init
+ setupInternational( rLocale );
+ // Memory of pTable is reused.
+ if ( !pTable )
+ pTable.reset(new ParserFlags[nDefCnt]);
+ memcpy( pTable.get(), pDefaultParserTable, sizeof(ParserFlags) * nDefCnt );
+ // Start and cont tables only need reallocation if different length.
+ if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
+ {
+ pStart.reset();
+ }
+ if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
+ {
+ pCont.reset();
+ }
+ nStartTypes = startCharTokenType;
+ nContTypes = contCharTokenType;
+ aStartChars = userDefinedCharactersStart;
+ aContChars = userDefinedCharactersCont;
+
+ // specials
+ if( mxLocaleData.is() )
+ {
+ LocaleDataItem2 aItem =
+ mxLocaleData->getLocaleItem2( aParserLocale );
+//!TODO: theoretically separators may be a string, adjustment would have to be
+//! done here and in parsing and in ::rtl::math::stringToDouble()
+ cGroupSep = aItem.thousandSeparator[0];
+ cDecimalSep = aItem.decimalSeparator[0];
+ cDecimalSepAlt = aItem.decimalSeparatorAlternative.toChar();
+ }
+
+ if (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER)
+ {
+ if ( cGroupSep < nDefCnt )
+ pTable[cGroupSep] |= ParserFlags::VALUE;
+ }
+ else
+ {
+ cGroupSep = 0;
+ }
+ if ( cDecimalSep < nDefCnt )
+ pTable[cDecimalSep] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+ if ( cDecimalSepAlt && cDecimalSepAlt < nDefCnt )
+ pTable[cDecimalSepAlt] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+
+ // Modify characters according to KParseTokens definitions.
+ {
+ using namespace KParseTokens;
+ sal_uInt8 i;
+
+ if ( !(nStartTypes & ASC_UPALPHA) )
+ for ( i = 65; i < 91; i++ )
+ pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
+ if ( !(nContTypes & ASC_UPALPHA) )
+ for ( i = 65; i < 91; i++ )
+ pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
+
+ if ( !(nStartTypes & ASC_LOALPHA) )
+ for ( i = 97; i < 123; i++ )
+ pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
+ if ( !(nContTypes & ASC_LOALPHA) )
+ for ( i = 97; i < 123; i++ )
+ pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
+
+ if ( nStartTypes & ASC_DIGIT )
+ for ( i = 48; i < 58; i++ )
+ pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( !(nContTypes & ASC_DIGIT) )
+ for ( i = 48; i < 58; i++ )
+ pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
+
+ if ( !(nStartTypes & ASC_UNDERSCORE) )
+ pTable[95] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
+ if ( !(nContTypes & ASC_UNDERSCORE) )
+ pTable[95] &= ~ParserFlags::WORD; // not allowed as cont character
+
+ if ( nStartTypes & ASC_DOLLAR )
+ pTable[36] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( nContTypes & ASC_DOLLAR )
+ pTable[36] |= ParserFlags::WORD; // allowed as cont character
+
+ if ( nStartTypes & ASC_DOT )
+ pTable[46] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( nContTypes & ASC_DOT )
+ pTable[46] |= ParserFlags::WORD; // allowed as cont character
+
+ if ( nStartTypes & ASC_COLON )
+ pTable[58] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( nContTypes & ASC_COLON )
+ pTable[58] |= ParserFlags::WORD; // allowed as cont character
+
+ if ( nStartTypes & ASC_CONTROL )
+ for ( i = 1; i < 32; i++ )
+ pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( nContTypes & ASC_CONTROL )
+ for ( i = 1; i < 32; i++ )
+ pTable[i] |= ParserFlags::WORD; // allowed as cont character
+
+ if ( nStartTypes & ASC_ANY_BUT_CONTROL )
+ for ( i = 32; i < nDefCnt; i++ )
+ pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
+ if ( nContTypes & ASC_ANY_BUT_CONTROL )
+ for ( i = 32; i < nDefCnt; i++ )
+ pTable[i] |= ParserFlags::WORD; // allowed as cont character
+
+ }
+
+ // Merge in (positively override with) user defined characters.
+ // StartChars
+ sal_Int32 nLen = aStartChars.getLength();
+ if ( nLen )
+ {
+ if ( !pStart )
+ pStart.reset(new ParserFlags[ nLen ]);
+ const sal_Unicode* p = aStartChars.getStr();
+ for ( sal_Int32 j=0; j<nLen; j++, p++ )
+ {
+ pStart[j] = ParserFlags::CHAR_WORD;
+ if ( *p < nDefCnt )
+ pTable[*p] |= ParserFlags::CHAR_WORD;
+ }
+ }
+ // ContChars
+ nLen = aContChars.getLength();
+ if ( nLen )
+ {
+ if ( !pCont )
+ pCont.reset(new ParserFlags[ nLen ]);
+ const sal_Unicode* p = aContChars.getStr();
+ for ( sal_Int32 j=0; j<nLen; j++ )
+ {
+ pCont[j] = ParserFlags::WORD;
+ if ( *p < nDefCnt )
+ pTable[*p] |= ParserFlags::WORD;
+ }
+ }
+}
+
+
+void cclass_Unicode::destroyParserTable()
+{
+ pCont.reset();
+ pStart.reset();
+ pTable.reset();
+}
+
+
+ParserFlags cclass_Unicode::getFlags(sal_uInt32 const c, const cclass_Unicode::ScanState eState)
+{
+ ParserFlags nMask;
+ if ( c < nDefCnt )
+ nMask = pTable[ sal_uInt8(c) ];
+ else
+ nMask = getFlagsExtended(c, eState);
+ switch ( eState )
+ {
+ case ssGetChar :
+ case ssRewindFromValue :
+ case ssIgnoreLeadingInRewind :
+ case ssGetWordFirstChar :
+ if ( !(nMask & ParserFlags::CHAR_WORD) )
+ {
+ nMask |= getStartCharsFlags( c );
+ if ( nMask & ParserFlags::CHAR_WORD )
+ nMask &= ~ParserFlags::EXCLUDED;
+ }
+ break;
+ case ssGetValue :
+ case ssGetWord :
+ if ( !(nMask & ParserFlags::WORD) )
+ {
+ nMask |= getContCharsFlags( c );
+ if ( nMask & ParserFlags::WORD )
+ nMask &= ~ParserFlags::EXCLUDED;
+ }
+ break;
+ default:
+ ; // other cases aren't needed, no compiler warning
+ }
+ return nMask;
+}
+
+
+ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c, const cclass_Unicode::ScanState eState) const
+{
+ if ( c == cGroupSep )
+ return ParserFlags::VALUE;
+ else if ( c == cDecimalSep )
+ return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+ else if ( cDecimalSepAlt && c == cDecimalSepAlt )
+ return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
+ bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
+ eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
+ sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
+
+ //! all KParseTokens::UNI_... must be matched
+ switch (u_charType(c))
+ {
+ case U_UPPERCASE_LETTER :
+ return (nTypes & KParseTokens::UNI_UPALPHA) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL;
+ case U_LOWERCASE_LETTER :
+ return (nTypes & KParseTokens::UNI_LOALPHA) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL;
+ case U_TITLECASE_LETTER :
+ return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL;
+ case U_MODIFIER_LETTER :
+ return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL;
+ case U_NON_SPACING_MARK :
+ case U_COMBINING_SPACING_MARK :
+ // Non_Spacing_Mark can't be a leading character,
+ // nor can a spacing combining mark.
+ if (bStart)
+ return ParserFlags::ILLEGAL;
+ [[fallthrough]]; // treat it as Other_Letter.
+ case U_OTHER_LETTER :
+ return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL;
+ case U_DECIMAL_DIGIT_NUMBER :
+ return ((nTypes & KParseTokens::UNI_DIGIT) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+ case U_LETTER_NUMBER :
+ return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+ case U_OTHER_NUMBER :
+ return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
+ (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
+ ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
+ case U_SPACE_SEPARATOR :
+ return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
+ ParserFlags::CHAR_DONTCARE : (bStart ? ParserFlags::CHAR_WORD : (ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP) ));
+ case U_OTHER_PUNCTUATION:
+ // fdo#61754 Lets see (if we not at the start) if this is midletter
+ // punctuation and allow it in a word if it is similarly to
+ // U_NON_SPACING_MARK, for example U+00B7 MIDDLE DOT.
+ // tdf#123575 for U+30FB KATAKANA MIDDLE DOT property is not
+ // U_WB_MIDLETTER but U_WB_KATAKANA instead, explicitly test that
+ // and U+FF65 HALFWIDTH KATAKANA MIDDLE DOT.
+ if (bStart || (U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK)
+ && c != 0x30FB && c != 0xFF65))
+ return ParserFlags::ILLEGAL;
+ else
+ {
+ //allowing it to continue the word
+ return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
+ ParserFlags::WORD : ParserFlags::ILLEGAL;
+ }
+ break;
+ }
+
+ return ParserFlags::ILLEGAL;
+}
+
+
+ParserFlags cclass_Unicode::getStartCharsFlags( sal_uInt32 c )
+{
+ if ( pStart )
+ {
+ const sal_Unicode* pStr = aStartChars.getStr();
+ const sal_Unicode* p = StrChr( pStr, c );
+ if ( p )
+ return pStart[ p - pStr ];
+ }
+ return ParserFlags::ILLEGAL;
+}
+
+
+ParserFlags cclass_Unicode::getContCharsFlags( sal_Unicode c )
+{
+ if ( pCont )
+ {
+ const sal_Unicode* pStr = aContChars.getStr();
+ const sal_Unicode* p = StrChr( pStr, c );
+ if ( p )
+ return pCont[ p - pStr ];
+ }
+ return ParserFlags::ILLEGAL;
+}
+
+
+void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
+{
+ assert(r.LeadingWhiteSpace == 0);
+ ScanState eState = ssGetChar;
+
+ //! All the variables below (plus ParseResult) have to be reset on ssRewindFromValue!
+ OUStringBuffer aSymbol;
+ bool isFirst(true);
+ sal_Int32 index(nPos); // index of next code point after current
+ sal_Int32 postSymbolIndex(index); // index of code point following last quote
+ sal_uInt32 current((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
+ sal_uInt32 cLast = 0;
+ sal_Int32 nCodePoints(0);
+ int nDecSeps = 0;
+ bool bQuote = false;
+ bool bMightBeWord = true;
+ bool bMightBeWordLast = true;
+ bool bDecSepAltUsed = false;
+ //! All the variables above (plus ParseResult) have to be reset on ssRewindFromValue!
+ sal_Int32 nextCharIndex(nPos); // == index of nextChar
+
+ while ((current != 0) && (eState != ssStop))
+ {
+ ++nCodePoints;
+ ParserFlags nMask = getFlags(current, eState);
+ if ( nMask & ParserFlags::EXCLUDED )
+ eState = ssBounce;
+ if ( bMightBeWord )
+ { // only relevant for ssGetValue fall back
+ if ( eState == ssGetChar || eState == ssRewindFromValue ||
+ eState == ssIgnoreLeadingInRewind )
+ bMightBeWord = bool(nMask & ParserFlags::CHAR_WORD);
+ else
+ bMightBeWord = bool(nMask & ParserFlags::WORD);
+ }
+ sal_Int32 nParseTokensType = getParseTokensType(current, isFirst);
+ isFirst = false;
+ sal_Int32 const nextIndex(nextCharIndex); // == index of char following current
+ nextCharIndex = index; // == index of nextChar
+ sal_uInt32 nextChar((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
+ switch (eState)
+ {
+ case ssGetChar :
+ case ssRewindFromValue :
+ case ssIgnoreLeadingInRewind :
+ {
+ if ( (nMask & ParserFlags::CHAR_VALUE) && eState != ssRewindFromValue
+ && eState != ssIgnoreLeadingInRewind )
+ { //! must be first, may fall back to ssGetWord via bMightBeWord
+ eState = ssGetValue;
+ if ( nMask & ParserFlags::VALUE_DIGIT )
+ {
+ if (128 <= current)
+ r.TokenType = KParseType::UNI_NUMBER;
+ else
+ r.TokenType = KParseType::ASC_NUMBER;
+ }
+ else if (current == cDecimalSep || (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt)))
+ {
+ if (nextChar)
+ ++nDecSeps;
+ else
+ eState = ssRewindFromValue;
+ // retry for ONE_SINGLE_CHAR or others
+ }
+ }
+ else if ( nMask & ParserFlags::CHAR_WORD )
+ {
+ eState = ssGetWord;
+ r.TokenType = KParseType::IDENTNAME;
+ }
+ else if ( nMask & ParserFlags::NAME_SEP )
+ {
+ eState = ssGetWordFirstChar;
+ bQuote = true;
+ postSymbolIndex = nextCharIndex;
+ nParseTokensType = 0; // will be taken of first real character
+ r.TokenType = KParseType::SINGLE_QUOTE_NAME;
+ }
+ else if ( nMask & ParserFlags::CHAR_STRING )
+ {
+ eState = ssGetString;
+ postSymbolIndex = nextCharIndex;
+ nParseTokensType = 0; // will be taken of first real character
+ r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
+ }
+ else if ( nMask & ParserFlags::CHAR_DONTCARE )
+ {
+ if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
+ {
+ if (eState == ssRewindFromValue)
+ eState = ssIgnoreLeadingInRewind;
+ r.LeadingWhiteSpace = nextCharIndex - nPos;
+ nCodePoints--; // exclude leading whitespace
+ postSymbolIndex = nextCharIndex;
+ nParseTokensType = 0; // wait until real character
+ bMightBeWord = true;
+ }
+ else
+ eState = ssBounce;
+ }
+ else if ( nMask & ParserFlags::CHAR_BOOL )
+ {
+ eState = ssGetBool;
+ r.TokenType = KParseType::BOOLEAN;
+ }
+ else if ( nMask & ParserFlags::CHAR )
+ { //! must be last
+ eState = ssStop;
+ r.TokenType = KParseType::ONE_SINGLE_CHAR;
+ }
+ else
+ eState = ssBounce; // not known
+ }
+ break;
+ case ssGetValue :
+ {
+ if ( nMask & ParserFlags::VALUE_DIGIT )
+ {
+ if (128 <= current)
+ r.TokenType = KParseType::UNI_NUMBER;
+ else if ( r.TokenType != KParseType::UNI_NUMBER )
+ r.TokenType = KParseType::ASC_NUMBER;
+ }
+ if ( nMask & ParserFlags::VALUE )
+ {
+ if (current == cGroupSep)
+ {
+ // accept only if it is followed by 3 digits
+ sal_Int32 tempIndex(index);
+ sal_uInt32 const nextChar2((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0);
+ sal_uInt32 const nextChar3((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0);
+ if (getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT
+ && getFlags(nextChar2, eState) & ParserFlags::VALUE_DIGIT
+ && getFlags(nextChar3, eState) & ParserFlags::VALUE_DIGIT)
+ {
+ nParseTokensType |= KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
+ }
+ else
+ {
+ // Trailing group separator character is not a
+ // group separator.
+ eState = ssStopBack;
+ }
+ }
+ else if ((current == cDecimalSep ||
+ (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt))) &&
+ ++nDecSeps > 1)
+ {
+ if (nCodePoints == 2)
+ eState = ssRewindFromValue;
+ // consecutive separators
+ else
+ eState = ssStopBack;
+ }
+ // else keep it going
+ }
+ else if (current == 'E' || current == 'e')
+ {
+ ParserFlags nNext = getFlags(nextChar, eState);
+ if ( nNext & ParserFlags::VALUE_EXP )
+ ; // keep it going
+ else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
+ { // might be a numerical name (1.2efg)
+ eState = ssGetWord;
+ r.TokenType = KParseType::IDENTNAME;
+ }
+ else
+ eState = ssStopBack;
+ }
+ else if ( nMask & ParserFlags::VALUE_SIGN )
+ {
+ if ( (cLast == 'E') || (cLast == 'e') )
+ {
+ ParserFlags nNext = getFlags(nextChar, eState);
+ if ( nNext & ParserFlags::VALUE_EXP_VALUE )
+ ; // keep it going
+ else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
+ { // might be a numerical name (1.2e+fg)
+ eState = ssGetWord;
+ r.TokenType = KParseType::IDENTNAME;
+ }
+ else
+ eState = ssStopBack;
+ }
+ else if ( bMightBeWord )
+ { // might be a numerical name (1.2+fg)
+ eState = ssGetWord;
+ r.TokenType = KParseType::IDENTNAME;
+ }
+ else
+ eState = ssStopBack;
+ }
+ else if ( bMightBeWord && (nMask & ParserFlags::WORD) )
+ { // might be a numerical name (1995.A1)
+ eState = ssGetWord;
+ r.TokenType = KParseType::IDENTNAME;
+ }
+ else
+ eState = ssStopBack;
+ }
+ break;
+ case ssGetWordFirstChar :
+ eState = ssGetWord;
+ [[fallthrough]];
+ case ssGetWord :
+ {
+ if ( nMask & ParserFlags::WORD )
+ ; // keep it going
+ else if ( nMask & ParserFlags::NAME_SEP )
+ {
+ if ( bQuote )
+ {
+ if ( cLast == '\\' )
+ { // escaped
+ aSymbol.append(
+ OUString::Concat(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2))
+ + OUString(&current, 1));
+ }
+ else
+ {
+ eState = ssStop;
+ aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+ }
+ postSymbolIndex = nextCharIndex;
+ }
+ else
+ eState = ssStopBack;
+ }
+ else if ( bQuote )
+ ; // keep it going
+ else
+ eState = ssStopBack;
+ }
+ break;
+ case ssGetString :
+ {
+ if ( nMask & ParserFlags::STRING_SEP )
+ {
+ if ( cLast == '\\' )
+ { // escaped
+ aSymbol.append(
+ rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2)
+ + OUString(&current, 1));
+ }
+ else if (current == nextChar &&
+ !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
+ { // "" => literal " escaped
+ aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex));
+ nextCharIndex = index;
+ if (index < rText.getLength()) { ++nCodePoints; }
+ nextChar = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
+ }
+ else
+ {
+ eState = ssStop;
+ aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+ }
+ postSymbolIndex = nextCharIndex;
+ }
+ }
+ break;
+ case ssGetBool :
+ {
+ if ( nMask & ParserFlags::BOOL )
+ eState = ssStop; // maximum 2: <, >, <>, <=, >=
+ else
+ eState = ssStopBack;
+ }
+ break;
+ case ssStopBack :
+ case ssBounce :
+ case ssStop :
+ ; // nothing, no compiler warning
+ break;
+ }
+ if ( eState == ssRewindFromValue )
+ {
+ r = ParseResult();
+ index = nPos;
+ postSymbolIndex = nPos;
+ nextCharIndex = nPos;
+ aSymbol.setLength(0);
+ current = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
+ nCodePoints = (nPos < rText.getLength()) ? 1 : 0;
+ isFirst = true;
+ cLast = 0;
+ nDecSeps = 0;
+ bQuote = false;
+ bMightBeWord = true;
+ bMightBeWordLast = true;
+ bDecSepAltUsed = false;
+ }
+ else
+ {
+ if ( !(r.TokenType & nTokenType) )
+ {
+ if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
+ && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
+ ; // keep a number that might be a word
+ else if (r.LeadingWhiteSpace == (nextCharIndex - nPos))
+ ; // keep ignored white space
+ else if ( !r.TokenType && eState == ssGetValue && (nMask & ParserFlags::VALUE_SEP) )
+ ; // keep uncertain value
+ else
+ eState = ssBounce;
+ }
+ if ( eState == ssBounce )
+ {
+ r.TokenType = 0;
+ eState = ssStopBack;
+ }
+ if ( eState == ssStopBack )
+ { // put back
+ nextChar = rText.iterateCodePoints(&index, -1);
+ nextCharIndex = nextIndex;
+ --nCodePoints;
+ bMightBeWord = bMightBeWordLast;
+ eState = ssStop;
+ }
+ if ( eState != ssStop )
+ {
+ if ( !r.StartFlags )
+ r.StartFlags |= nParseTokensType;
+ else
+ r.ContFlags |= nParseTokensType;
+ }
+ bMightBeWordLast = bMightBeWord;
+ cLast = current;
+ current = nextChar;
+ }
+ }
+ // r.CharLen is the length in characters (not code units) of the parsed
+ // token not including any leading white space.
+ r.CharLen = nCodePoints;
+ r.EndPos = nextCharIndex;
+ if ( r.TokenType & KParseType::ASC_NUMBER )
+ {
+ r.Value = rtl_math_uStringToDouble(rText.getStr() + nPos + r.LeadingWhiteSpace,
+ rText.getStr() + r.EndPos, (bDecSepAltUsed ? cDecimalSepAlt : cDecimalSep), cGroupSep, nullptr, nullptr);
+ if ( bMightBeWord )
+ r.TokenType |= KParseType::IDENTNAME;
+ }
+ else if ( r.TokenType & KParseType::UNI_NUMBER )
+ {
+ if ( !xNatNumSup.is() )
+ {
+ if ( m_xContext.is() )
+ {
+ xNatNumSup = NativeNumberSupplier::create( m_xContext );
+ }
+ }
+ OUString aTmp(rText.getStr() + nPos + r.LeadingWhiteSpace,
+ r.EndPos - nPos - r.LeadingWhiteSpace);
+ // transliterate to ASCII
+ aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
+ NativeNumberMode::NATNUM0 );
+ r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep );
+ if ( bMightBeWord )
+ r.TokenType |= KParseType::IDENTNAME;
+ }
+ else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
+ {
+ if (postSymbolIndex < nextCharIndex)
+ { //! open quote
+ aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
+ r.TokenType |= KParseType::MISSING_QUOTE;
+ }
+ r.DequotedNameOrString = aSymbol.makeStringAndClear();
+ }
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */