From ed5640d8b587fbcfed7dd7967f3de04b37a76f26 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:06:44 +0200 Subject: Adding upstream version 4:7.4.7. Signed-off-by: Daniel Baumann --- starmath/source/parse5.cxx | 2773 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2773 insertions(+) create mode 100644 starmath/source/parse5.cxx (limited to 'starmath/source/parse5.cxx') diff --git a/starmath/source/parse5.cxx b/starmath/source/parse5.cxx new file mode 100644 index 000000000..06aa373aa --- /dev/null +++ b/starmath/source/parse5.cxx @@ -0,0 +1,2773 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace ::com::sun::star::i18n; + +//Definition of math keywords +const SmTokenTableEntry aTokenTable[] + = { { u"abs", TABS, '\0', TG::UnOper, 13 }, + { u"acute", TACUTE, MS_ACUTE, TG::Attribute, 5 }, + { u"aleph", TALEPH, MS_ALEPH, TG::Standalone, 5 }, + { u"alignb", TALIGNC, '\0', TG::Align, 0 }, + { u"alignc", TALIGNC, '\0', TG::Align, 0 }, + { u"alignl", TALIGNL, '\0', TG::Align, 0 }, + { u"alignm", TALIGNC, '\0', TG::Align, 0 }, + { u"alignr", TALIGNR, '\0', TG::Align, 0 }, + { u"alignt", TALIGNC, '\0', TG::Align, 0 }, + { u"and", TAND, MS_AND, TG::Product, 0 }, + { u"approx", TAPPROX, MS_APPROX, TG::Relation, 0 }, + { u"arccos", TACOS, '\0', TG::Function, 5 }, + { u"arccot", TACOT, '\0', TG::Function, 5 }, + { u"arcosh", TACOSH, '\0', TG::Function, 5 }, + { u"arcoth", TACOTH, '\0', TG::Function, 5 }, + { u"arcsin", TASIN, '\0', TG::Function, 5 }, + { u"arctan", TATAN, '\0', TG::Function, 5 }, + { u"arsinh", TASINH, '\0', TG::Function, 5 }, + { u"artanh", TATANH, '\0', TG::Function, 5 }, + { u"backepsilon", TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5 }, + { u"bar", TBAR, MS_BAR, TG::Attribute, 5 }, + { u"binom", TBINOM, '\0', TG::NONE, 5 }, + { u"bold", TBOLD, '\0', TG::FontAttr, 5 }, + { u"boper", TBOPER, '\0', TG::Product, 0 }, + { u"breve", TBREVE, MS_BREVE, TG::Attribute, 5 }, + { u"bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 }, + { u"cdot", TCDOT, MS_CDOT, TG::Product, 0 }, + { u"check", TCHECK, MS_CHECK, TG::Attribute, 5 }, + { u"circ", TCIRC, MS_CIRC, TG::Standalone, 5 }, + { u"circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5 }, + { u"color", TCOLOR, '\0', TG::FontAttr, 5 }, + { u"coprod", TCOPROD, MS_COPROD, TG::Oper, 5 }, + { u"cos", TCOS, '\0', TG::Function, 5 }, + { u"cosh", TCOSH, '\0', TG::Function, 5 }, + { u"cot", TCOT, '\0', TG::Function, 5 }, + { u"coth", TCOTH, '\0', TG::Function, 5 }, + { u"csub", TCSUB, '\0', TG::Power, 0 }, + { u"csup", TCSUP, '\0', TG::Power, 0 }, + { u"dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5 }, + { u"ddot", TDDOT, MS_DDOT, TG::Attribute, 5 }, + { u"def", TDEF, MS_DEF, TG::Relation, 0 }, + { u"div", TDIV, MS_DIV, TG::Product, 0 }, + { u"divides", TDIVIDES, MS_LINE, TG::Relation, 0 }, + { u"dlarrow", TDLARROW, MS_DLARROW, TG::Standalone, 5 }, + { u"dlrarrow", TDLRARROW, MS_DLRARROW, TG::Standalone, 5 }, + { u"dot", TDOT, MS_DOT, TG::Attribute, 5 }, + { u"dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5 }, // 5 to continue expression + { u"dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5 }, + { u"dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5 }, + { u"dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5 }, + { u"dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5 }, + { u"dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5 }, + { u"downarrow", TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5 }, + { u"drarrow", TDRARROW, MS_DRARROW, TG::Standalone, 5 }, + { u"emptyset", TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5 }, + { u"equiv", TEQUIV, MS_EQUIV, TG::Relation, 0 }, + { u"evaluate", TEVALUATE, '\0', TG::NONE, 0 }, + { u"exists", TEXISTS, MS_EXISTS, TG::Standalone, 5 }, + { u"exp", TEXP, '\0', TG::Function, 5 }, + { u"fact", TFACT, MS_FACT, TG::UnOper, 5 }, + { u"fixed", TFIXED, '\0', TG::Font, 0 }, + { u"font", TFONT, '\0', TG::FontAttr, 5 }, + { u"forall", TFORALL, MS_FORALL, TG::Standalone, 5 }, + { u"fourier", TFOURIER, MS_FOURIER, TG::Standalone, 5 }, + { u"frac", TFRAC, '\0', TG::NONE, 5 }, + { u"from", TFROM, '\0', TG::Limit, 0 }, + { u"func", TFUNC, '\0', TG::Function, 5 }, + { u"ge", TGE, MS_GE, TG::Relation, 0 }, + { u"geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 }, + { u"gg", TGG, MS_GG, TG::Relation, 0 }, + { u"grave", TGRAVE, MS_GRAVE, TG::Attribute, 5 }, + { u"gt", TGT, MS_GT, TG::Relation, 0 }, + { u"harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5 }, + { u"hat", THAT, MS_HAT, TG::Attribute, 5 }, + { u"hbar", THBAR, MS_HBAR, TG::Standalone, 5 }, + { u"hex", THEX, '\0', TG::NONE, 5 }, + { u"iiint", TIIINT, MS_IIINT, TG::Oper, 5 }, + { u"iint", TIINT, MS_IINT, TG::Oper, 5 }, + { u"im", TIM, MS_IM, TG::Standalone, 5 }, + { u"in", TIN, MS_IN, TG::Relation, 0 }, + { u"infinity", TINFINITY, MS_INFINITY, TG::Standalone, 5 }, + { u"infty", TINFINITY, MS_INFINITY, TG::Standalone, 5 }, + { u"int", TINT, MS_INT, TG::Oper, 5 }, + { u"intd", TINTD, MS_INT, TG::Oper, 5 }, + { u"intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0 }, + { u"it", TIT, '\0', TG::Product, 0 }, + { u"ital", TITALIC, '\0', TG::FontAttr, 5 }, + { u"italic", TITALIC, '\0', TG::FontAttr, 5 }, + { u"lambdabar", TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5 }, + { u"langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5 }, + { u"laplace", TLAPLACE, MS_LAPLACE, TG::Standalone, 5 }, + { u"lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5 }, + { u"lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5 }, + { u"ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5 }, + { u"ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5 }, + { u"le", TLE, MS_LE, TG::Relation, 0 }, + { u"left", TLEFT, '\0', TG::NONE, 5 }, + { u"leftarrow", TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5 }, + { u"leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 }, + { u"lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5 }, + { u"lim", TLIM, '\0', TG::Oper, 5 }, + { u"liminf", TLIMINF, '\0', TG::Oper, 5 }, + { u"limsup", TLIMSUP, '\0', TG::Oper, 5 }, + { u"lint", TLINT, MS_LINT, TG::Oper, 5 }, + { u"ll", TLL, MS_LL, TG::Relation, 0 }, + { u"lline", TLLINE, MS_VERTLINE, TG::LBrace, 5 }, + { u"llint", TLLINT, MS_LLINT, TG::Oper, 5 }, + { u"lllint", TLLLINT, MS_LLLINT, TG::Oper, 5 }, + { u"ln", TLN, '\0', TG::Function, 5 }, + { u"log", TLOG, '\0', TG::Function, 5 }, + { u"lrline", TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 }, + { u"lrdline", TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 }, + { u"lsub", TLSUB, '\0', TG::Power, 0 }, + { u"lsup", TLSUP, '\0', TG::Power, 0 }, + { u"lt", TLT, MS_LT, TG::Relation, 0 }, + { u"matrix", TMATRIX, '\0', TG::NONE, 5 }, + { u"minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5 }, + { u"mline", TMLINE, MS_VERTLINE, TG::NONE, 0 }, //! not in TG::RBrace, Level 0 + { u"nabla", TNABLA, MS_NABLA, TG::Standalone, 5 }, + { u"nbold", TNBOLD, '\0', TG::FontAttr, 5 }, + { u"ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0 }, + { u"neg", TNEG, MS_NEG, TG::UnOper, 5 }, + { u"neq", TNEQ, MS_NEQ, TG::Relation, 0 }, + { u"newline", TNEWLINE, '\0', TG::NONE, 0 }, + { u"ni", TNI, MS_NI, TG::Relation, 0 }, + { u"nitalic", TNITALIC, '\0', TG::FontAttr, 5 }, + { u"none", TNONE, '\0', TG::LBrace | TG::RBrace, 0 }, + { u"nospace", TNOSPACE, '\0', TG::Standalone, 5 }, + { u"notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5 }, + { u"notin", TNOTIN, MS_NOTIN, TG::Relation, 0 }, + { u"nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 }, + { u"nroot", TNROOT, MS_SQRT, TG::UnOper, 5 }, + { u"nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 }, + { u"nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 }, + { u"nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 }, + { u"nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 }, + { u"nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 }, + { u"odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0 }, + { u"odot", TODOT, MS_ODOT, TG::Product, 0 }, + { u"ominus", TOMINUS, MS_OMINUS, TG::Sum, 0 }, + { u"oper", TOPER, '\0', TG::Oper, 5 }, + { u"oplus", TOPLUS, MS_OPLUS, TG::Sum, 0 }, + { u"or", TOR, MS_OR, TG::Sum, 0 }, + { u"ortho", TORTHO, MS_ORTHO, TG::Relation, 0 }, + { u"otimes", TOTIMES, MS_OTIMES, TG::Product, 0 }, + { u"over", TOVER, '\0', TG::Product, 0 }, + { u"overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5 }, + { u"overline", TOVERLINE, '\0', TG::Attribute, 5 }, + { u"overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5 }, + { u"owns", TNI, MS_NI, TG::Relation, 0 }, + { u"parallel", TPARALLEL, MS_DLINE, TG::Relation, 0 }, + { u"partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 }, + { u"phantom", TPHANTOM, '\0', TG::FontAttr, 5 }, + { u"plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5 }, + { u"prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 }, + { u"preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 }, + { u"precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 }, + { u"prod", TPROD, MS_PROD, TG::Oper, 5 }, + { u"prop", TPROP, MS_PROP, TG::Relation, 0 }, + { u"rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0 }, //! 0 to terminate expression + { u"rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0 }, + { u"rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0 }, + { u"rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0 }, + { u"rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0 }, + { u"re", TRE, MS_RE, TG::Standalone, 5 }, + { u"rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0 }, //! 0 to terminate expression + { u"right", TRIGHT, '\0', TG::NONE, 0 }, + { u"rightarrow", TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5 }, + { u"rline", TRLINE, MS_VERTLINE, TG::RBrace, 0 }, //! 0 to terminate expression + { u"rsub", TRSUB, '\0', TG::Power, 0 }, + { u"rsup", TRSUP, '\0', TG::Power, 0 }, + { u"sans", TSANS, '\0', TG::Font, 0 }, + { u"serif", TSERIF, '\0', TG::Font, 0 }, + { u"setC", TSETC, MS_SETC, TG::Standalone, 5 }, + { u"setminus", TSETMINUS, MS_BACKSLASH, TG::Product, 0 }, + { u"setN", TSETN, MS_SETN, TG::Standalone, 5 }, + { u"setQ", TSETQ, MS_SETQ, TG::Standalone, 5 }, + { u"setquotient", TSETQUOTIENT, MS_SLASH, TG::Product, 0 }, + { u"setR", TSETR, MS_SETR, TG::Standalone, 5 }, + { u"setZ", TSETZ, MS_SETZ, TG::Standalone, 5 }, + { u"sim", TSIM, MS_SIM, TG::Relation, 0 }, + { u"simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0 }, + { u"sin", TSIN, '\0', TG::Function, 5 }, + { u"sinh", TSINH, '\0', TG::Function, 5 }, + { u"size", TSIZE, '\0', TG::FontAttr, 5 }, + { u"slash", TSLASH, MS_SLASH, TG::Product, 0 }, + { u"sqrt", TSQRT, MS_SQRT, TG::UnOper, 5 }, + { u"stack", TSTACK, '\0', TG::NONE, 5 }, + { u"sub", TRSUB, '\0', TG::Power, 0 }, + { u"subset", TSUBSET, MS_SUBSET, TG::Relation, 0 }, + { u"subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0 }, + { u"succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 }, + { u"succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 }, + { u"succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 }, + { u"sum", TSUM, MS_SUM, TG::Oper, 5 }, + { u"sup", TRSUP, '\0', TG::Power, 0 }, + { u"supset", TSUPSET, MS_SUPSET, TG::Relation, 0 }, + { u"supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0 }, + { u"tan", TTAN, '\0', TG::Function, 5 }, + { u"tanh", TTANH, '\0', TG::Function, 5 }, + { u"tilde", TTILDE, MS_TILDE, TG::Attribute, 5 }, + { u"times", TTIMES, MS_TIMES, TG::Product, 0 }, + { u"to", TTO, '\0', TG::Limit, 0 }, + { u"toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0 }, + { u"transl", TTRANSL, MS_TRANSL, TG::Relation, 0 }, + { u"transr", TTRANSR, MS_TRANSR, TG::Relation, 0 }, + { u"underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5 }, + { u"underline", TUNDERLINE, '\0', TG::Attribute, 5 }, + { u"union", TUNION, MS_UNION, TG::Sum, 0 }, + { u"uoper", TUOPER, '\0', TG::UnOper, 5 }, + { u"uparrow", TUPARROW, MS_UPARROW, TG::Standalone, 5 }, + { u"vec", TVEC, MS_VEC, TG::Attribute, 5 }, + { u"widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 }, + { u"wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5 }, + { u"widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5 }, + { u"wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 }, + { u"widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5 }, + { u"widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5 }, + { u"wp", TWP, MS_WP, TG::Standalone, 5 } }; + +// First character may be any alphabetic +const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS; + +// Continuing characters may be any alphabetic +const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS) + | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; +// First character for numbers, may be any numeric or dot +const sal_Int32 coNumStartFlags + = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS; +// Continuing characters for numbers, may be any numeric or dot or comma. +// tdf#127873: additionally accept ',' comma group separator as too many +// existing documents unwittingly may have used that as decimal separator +// in such locales (though it never was as this is always the en-US locale +// and the group separator is only parsed away). +const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS) + | KParseTokens::GROUP_SEPARATOR_IN_NUMBER; +// First character for numbers hexadecimal +const sal_Int32 coNum16StartFlags + = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS; + +// Continuing characters for numbers hexadecimal +const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS); +// user-defined char continuing characters may be any alphanumeric or dot. +const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER + | KParseTokens::ASC_DOT + | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; + +//Checks if keyword is in the list. +static inline bool findCompare(const SmTokenTableEntry& lhs, const OUString& s) +{ + return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0; +} + +//Returns the SmTokenTableEntry for a keyword +static const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName) +{ + if (rName.isEmpty()) + return nullptr; //avoid null pointer exceptions + //Looks for the first keyword after or equal to rName in alphabetical order. + auto findIter + = std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare); + if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent)) + return &*findIter; //check is equal + return nullptr; //not found +} + +static bool IsDelimiter(const OUString& rTxt, sal_Int32 nPos) +{ // returns 'true' iff cChar is '\0' or a delimiter + + assert(nPos <= rTxt.getLength()); //index out of range + if (nPos == rTxt.getLength()) + return true; //This is EOF + sal_Unicode cChar = rTxt[nPos]; + + // check if 'cChar' is in the delimiter table + static const sal_Unicode aDelimiterTable[] = { + ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-', '*', '/', '=', '[', + ']', '^', '_', '#', '%', '>', '<', '&', '|', '\\', '"', '~', '`' + }; //reordered by usage (by eye) for nanoseconds saving. + + //checks the array + for (auto const& cDelimiter : aDelimiterTable) + { + if (cDelimiter == cChar) + return true; + } + + //special chars support + sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType(rTxt, nPos); + return (nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR + || nTypJp == css::i18n::UnicodeType::CONTROL); +} + +// checks number used as arguments in Math formulas (e.g. 'size' command) +// Format: no negative numbers, must start with a digit, no exponent notation, ... +static bool lcl_IsNumber(const OUString& rText) +{ + bool bPoint = false; + const sal_Unicode* pBuffer = rText.getStr(); + for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) + { + const sal_Unicode cChar = *pBuffer; + if (cChar == '.') + { + if (bPoint) + return false; + else + bPoint = true; + } + else if (!rtl::isAsciiDigit(cChar)) + return false; + } + return true; +} +// checks number used as arguments in Math formulas (e.g. 'size' command) +// Format: no negative numbers, must start with a digit, no exponent notation, ... +static bool lcl_IsNotWholeNumber(const OUString& rText) +{ + const sal_Unicode* pBuffer = rText.getStr(); + for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) + if (!rtl::isAsciiDigit(*pBuffer)) + return true; + return false; +} +// checks hex number used as arguments in Math formulas (e.g. 'hex' command) +// Format: no negative numbers, must start with a digit, no exponent notation, ... +static bool lcl_IsNotWholeNumber16(const OUString& rText) +{ + const sal_Unicode* pBuffer = rText.getStr(); + for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) + if (!rtl::isAsciiCanonicHexDigit(*pBuffer)) + return true; + return false; +} + +//Text replace onto m_aBufferString +void SmParser5::Replace(sal_Int32 nPos, sal_Int32 nLen, const OUString& rText) +{ + assert(nPos + nLen <= m_aBufferString.getLength()); //checks if length allows text replace + + m_aBufferString = m_aBufferString.replaceAt(nPos, nLen, rText); //replace and reindex + sal_Int32 nChg = rText.getLength() - nLen; + m_nBufferIndex = m_nBufferIndex + nChg; + m_nTokenIndex = m_nTokenIndex + nChg; +} + +void SmParser5::NextToken() //Central part of the parser +{ + sal_Int32 nBufLen = m_aBufferString.getLength(); + ParseResult aRes; + sal_Int32 nRealStart; + bool bCont; + do + { + // skip white spaces + while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) + ++m_nBufferIndex; + + // Try to parse a number in a locale-independent manner using + // '.' as decimal separator. + // See https://bz.apache.org/ooo/show_bug.cgi?id=45779 + aRes + = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex, + coNumStartFlags, "", coNumContFlags, ""); + + if (aRes.TokenType == 0) + { + // Try again with the default token parsing. + aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "", + coContFlags, ""); + } + + nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; + m_nBufferIndex = nRealStart; + + bCont = false; + if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) + { + // keep data needed for tokens row and col entry up to date + ++m_nRow; + m_nBufferIndex = m_nColOff = nRealStart + 1; + bCont = true; + } + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) + { + //SkipComment + m_nBufferIndex = nRealStart + 2; + while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) + ++m_nBufferIndex; + bCont = true; + } + } + + } while (bCont); + + // set index of current token + m_nTokenIndex = m_nBufferIndex; + sal_uInt32 nCol = nRealStart - m_nColOff; + + bool bHandled = true; + if (nRealStart >= nBufLen) + { + m_aCurToken.eType = TEND; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 0; + m_aCurToken.aText.clear(); + } + else if (aRes.TokenType & KParseType::ANY_NUMBER) + { + assert(aRes.EndPos > 0); + if (m_aBufferString[aRes.EndPos - 1] == ',' && aRes.EndPos < nBufLen + && m_pSysCC->getType(m_aBufferString, aRes.EndPos) != UnicodeType::SPACE_SEPARATOR) + { + // Comma followed by a non-space char is unlikely for decimal/thousands separator. + --aRes.EndPos; + } + sal_Int32 n = aRes.EndPos - nRealStart; + assert(n >= 0); + m_aCurToken.eType = TNUMBER; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = m_aBufferString.copy(nRealStart, n); + + SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath", + "identifier really finished? (compatibility!)"); + } + else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING) + { + m_aCurToken.eType = TTEXT; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = aRes.DequotedNameOrString; + nCol++; + } + else if (aRes.TokenType & KParseType::IDENTNAME) + { + sal_Int32 n = aRes.EndPos - nRealStart; + assert(n >= 0); + OUString aName(m_aBufferString.copy(nRealStart, n)); + const SmTokenTableEntry* pEntry = GetTokenTableEntry(aName); + + if (pEntry) + { + m_aCurToken.eType = pEntry->eType; + m_aCurToken.setChar(pEntry->cMathChar); + m_aCurToken.nGroup = pEntry->nGroup; + m_aCurToken.nLevel = pEntry->nLevel; + m_aCurToken.aText = pEntry->aIdent; + } + else + { + m_aCurToken.eType = TIDENT; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = aName; + + SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath", + "identifier really finished? (compatibility!)"); + } + } + else if (aRes.TokenType == 0 && '_' == m_aBufferString[nRealStart]) + { + m_aCurToken.eType = TRSUB; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::Power; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "_"; + + aRes.EndPos = nRealStart + 1; + } + else if (aRes.TokenType & KParseType::BOOLEAN) + { + sal_Int32& rnEndPos = aRes.EndPos; + if (rnEndPos - nRealStart <= 2) + { + sal_Unicode ch = m_aBufferString[nRealStart]; + switch (ch) + { + case '<': + { + if (m_aBufferString.match("<<", nRealStart)) + { + m_aCurToken.eType = TLL; + m_aCurToken.setChar(MS_LL); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "<<"; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match("<=", nRealStart)) + { + m_aCurToken.eType = TLE; + m_aCurToken.setChar(MS_LE); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "<="; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match("<-", nRealStart)) + { + m_aCurToken.eType = TLEFTARROW; + m_aCurToken.setChar(MS_LEFTARROW); + m_aCurToken.nGroup = TG::Standalone; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "<-"; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match("<>", nRealStart)) + { + m_aCurToken.eType = TNEQ; + m_aCurToken.setChar(MS_NEQ); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "<>"; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match("", nRealStart)) + { + m_aCurToken.eType = TPLACE; + m_aCurToken.setChar(MS_PLACE); + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = ""; + + rnEndPos = nRealStart + 3; + } + else + { + m_aCurToken.eType = TLT; + m_aCurToken.setChar(MS_LT); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "<"; + } + } + break; + case '>': + { + if (m_aBufferString.match(">=", nRealStart)) + { + m_aCurToken.eType = TGE; + m_aCurToken.setChar(MS_GE); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = ">="; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match(">>", nRealStart)) + { + m_aCurToken.eType = TGG; + m_aCurToken.setChar(MS_GG); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = ">>"; + + rnEndPos = nRealStart + 2; + } + else + { + m_aCurToken.eType = TGT; + m_aCurToken.setChar(MS_GT); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = ">"; + } + } + break; + default: + bHandled = false; + } + } + } + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + sal_Int32& rnEndPos = aRes.EndPos; + if (rnEndPos - nRealStart == 1) + { + sal_Unicode ch = m_aBufferString[nRealStart]; + switch (ch) + { + case '%': + { + //! modifies aRes.EndPos + + OSL_ENSURE(rnEndPos >= nBufLen || '%' != m_aBufferString[rnEndPos], + "unexpected comment start"); + + // get identifier of user-defined character + ParseResult aTmpRes = m_pSysCC->parseAnyToken(m_aBufferString, rnEndPos, + KParseTokens::ANY_LETTER, "", + coUserDefinedCharContFlags, ""); + + sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace; + + // default setting for the case that no identifier + // i.e. a valid symbol-name is following the '%' + // character + m_aCurToken.eType = TTEXT; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "%"; + + if (aTmpRes.TokenType & KParseType::IDENTNAME) + { + sal_Int32 n = aTmpRes.EndPos - nTmpStart; + m_aCurToken.eType = TSPECIAL; + m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1); + + OSL_ENSURE(aTmpRes.EndPos > rnEndPos, "empty identifier"); + if (aTmpRes.EndPos > rnEndPos) + rnEndPos = aTmpRes.EndPos; + else + ++rnEndPos; + } + + // if no symbol-name was found we start-over with + // finding the next token right after the '%' sign. + // I.e. we leave rnEndPos unmodified. + } + break; + case '[': + { + m_aCurToken.eType = TLBRACKET; + m_aCurToken.setChar(MS_LBRACKET); + m_aCurToken.nGroup = TG::LBrace; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "["; + } + break; + case '\\': + { + m_aCurToken.eType = TESCAPE; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "\\"; + } + break; + case ']': + { + m_aCurToken.eType = TRBRACKET; + m_aCurToken.setChar(MS_RBRACKET); + m_aCurToken.nGroup = TG::RBrace; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "]"; + } + break; + case '^': + { + m_aCurToken.eType = TRSUP; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::Power; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "^"; + } + break; + case '`': + { + m_aCurToken.eType = TSBLANK; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::Blank; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "`"; + } + break; + case '{': + { + m_aCurToken.eType = TLGROUP; + m_aCurToken.setChar(MS_LBRACE); + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "{"; + } + break; + case '|': + { + m_aCurToken.eType = TOR; + m_aCurToken.setChar(MS_OR); + m_aCurToken.nGroup = TG::Sum; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "|"; + } + break; + case '}': + { + m_aCurToken.eType = TRGROUP; + m_aCurToken.setChar(MS_RBRACE); + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "}"; + } + break; + case '~': + { + m_aCurToken.eType = TBLANK; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::Blank; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "~"; + } + break; + case '#': + { + if (m_aBufferString.match("##", nRealStart)) + { + m_aCurToken.eType = TDPOUND; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "##"; + + rnEndPos = nRealStart + 2; + } + else + { + m_aCurToken.eType = TPOUND; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "#"; + } + } + break; + case '&': + { + m_aCurToken.eType = TAND; + m_aCurToken.setChar(MS_AND); + m_aCurToken.nGroup = TG::Product; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "&"; + } + break; + case '(': + { + m_aCurToken.eType = TLPARENT; + m_aCurToken.setChar(MS_LPARENT); + m_aCurToken.nGroup = TG::LBrace; + m_aCurToken.nLevel = 5; //! 0 to continue expression + m_aCurToken.aText = "("; + } + break; + case ')': + { + m_aCurToken.eType = TRPARENT; + m_aCurToken.setChar(MS_RPARENT); + m_aCurToken.nGroup = TG::RBrace; + m_aCurToken.nLevel = 0; //! 0 to terminate expression + m_aCurToken.aText = ")"; + } + break; + case '*': + { + m_aCurToken.eType = TMULTIPLY; + m_aCurToken.setChar(MS_MULTIPLY); + m_aCurToken.nGroup = TG::Product; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "*"; + } + break; + case '+': + { + if (m_aBufferString.match("+-", nRealStart)) + { + m_aCurToken.eType = TPLUSMINUS; + m_aCurToken.setChar(MS_PLUSMINUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "+-"; + + rnEndPos = nRealStart + 2; + } + else + { + m_aCurToken.eType = TPLUS; + m_aCurToken.setChar(MS_PLUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "+"; + } + } + break; + case '-': + { + if (m_aBufferString.match("-+", nRealStart)) + { + m_aCurToken.eType = TMINUSPLUS; + m_aCurToken.setChar(MS_MINUSPLUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "-+"; + + rnEndPos = nRealStart + 2; + } + else if (m_aBufferString.match("->", nRealStart)) + { + m_aCurToken.eType = TRIGHTARROW; + m_aCurToken.setChar(MS_RIGHTARROW); + m_aCurToken.nGroup = TG::Standalone; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "->"; + + rnEndPos = nRealStart + 2; + } + else + { + m_aCurToken.eType = TMINUS; + m_aCurToken.setChar(MS_MINUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "-"; + } + } + break; + case '.': + { + // Only one character? Then it can't be a number. + if (m_nBufferIndex < m_aBufferString.getLength() - 1) + { + // for compatibility with SO5.2 + // texts like .34 ...56 ... h ...78..90 + // will be treated as numbers + m_aCurToken.eType = TNUMBER; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + + sal_Int32 nTxtStart = m_nBufferIndex; + sal_Unicode cChar; + // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only + do + { + cChar = m_aBufferString[++m_nBufferIndex]; + } while ((cChar == '.' || rtl::isAsciiDigit(cChar)) + && (m_nBufferIndex < m_aBufferString.getLength() - 1)); + + m_aCurToken.aText + = m_aBufferString.copy(nTxtStart, m_nBufferIndex - nTxtStart); + aRes.EndPos = m_nBufferIndex; + } + else + bHandled = false; + } + break; + case '/': + { + m_aCurToken.eType = TDIVIDEBY; + m_aCurToken.setChar(MS_SLASH); + m_aCurToken.nGroup = TG::Product; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "/"; + } + break; + case '=': + { + m_aCurToken.eType = TASSIGN; + m_aCurToken.setChar(MS_ASSIGN); + m_aCurToken.nGroup = TG::Relation; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "="; + } + break; + default: + bHandled = false; + } + } + } + else + bHandled = false; + + if (!bHandled) + { + m_aCurToken.eType = TCHARACTER; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + + // tdf#129372: we may have to deal with surrogate pairs + // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates) + // in this case, we must read 2 sal_Unicode instead of 1 + int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1); + m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset); + + aRes.EndPos = nRealStart + nOffset; + } + m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); + + if (TEND != m_aCurToken.eType) + m_nBufferIndex = aRes.EndPos; +} + +void SmParser5::NextTokenColor(SmTokenType dvipload) +{ + sal_Int32 nBufLen = m_aBufferString.getLength(); + ParseResult aRes; + sal_Int32 nRealStart; + bool bCont; + + do + { + // skip white spaces + while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) + ++m_nBufferIndex; + //parse, there are few options, so less strict. + aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "", + coContFlags, ""); + nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; + m_nBufferIndex = nRealStart; + bCont = false; + if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) + { + // keep data needed for tokens row and col entry up to date + ++m_nRow; + m_nBufferIndex = m_nColOff = nRealStart + 1; + bCont = true; + } + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) + { + //SkipComment + m_nBufferIndex = nRealStart + 2; + while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) + ++m_nBufferIndex; + bCont = true; + } + } + } while (bCont); + + // set index of current token + m_nTokenIndex = m_nBufferIndex; + sal_uInt32 nCol = nRealStart - m_nColOff; + + if (nRealStart >= nBufLen) + m_aCurToken.eType = TEND; + else if (aRes.TokenType & KParseType::IDENTNAME) + { + sal_Int32 n = aRes.EndPos - nRealStart; + assert(n >= 0); + OUString aName(m_aBufferString.copy(nRealStart, n)); + switch (dvipload) + { + case TCOLOR: + m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName); + break; + case TDVIPSNAMESCOL: + m_aCurToken = starmathdatabase::Identify_ColorName_DVIPSNAMES(aName); + break; + default: + m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName); + break; + } + } + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + if (m_aBufferString[nRealStart] == '#' && !m_aBufferString.match("##", nRealStart)) + { + m_aCurToken.eType = THEX; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::Color; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "hex"; + } + } + else + m_aCurToken.eType = TNONE; + + m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); + if (TEND != m_aCurToken.eType) + m_nBufferIndex = aRes.EndPos; +} + +void SmParser5::NextTokenFontSize() +{ + sal_Int32 nBufLen = m_aBufferString.getLength(); + ParseResult aRes; + sal_Int32 nRealStart; + bool bCont; + bool hex = false; + + do + { + // skip white spaces + while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) + ++m_nBufferIndex; + //hexadecimal parser + aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coNum16StartFlags, ".", + coNum16ContFlags, ".,"); + if (aRes.TokenType == 0) + { + // Try again with the default token parsing. + aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "", + coContFlags, ""); + } + else + hex = true; + nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; + m_nBufferIndex = nRealStart; + bCont = false; + if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) + { + // keep data needed for tokens row and col entry up to date + ++m_nRow; + m_nBufferIndex = m_nColOff = nRealStart + 1; + bCont = true; + } + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) + { + //SkipComment + m_nBufferIndex = nRealStart + 2; + while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) + ++m_nBufferIndex; + bCont = true; + } + } + } while (bCont); + + // set index of current token + m_nTokenIndex = m_nBufferIndex; + sal_uInt32 nCol = nRealStart - m_nColOff; + + if (nRealStart >= nBufLen) + m_aCurToken.eType = TEND; + else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) + { + if (aRes.EndPos - nRealStart == 1) + { + switch (m_aBufferString[nRealStart]) + { + case '*': + m_aCurToken.eType = TMULTIPLY; + m_aCurToken.setChar(MS_MULTIPLY); + m_aCurToken.nGroup = TG::Product; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "*"; + break; + case '+': + m_aCurToken.eType = TPLUS; + m_aCurToken.setChar(MS_PLUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "+"; + break; + case '-': + m_aCurToken.eType = TMINUS; + m_aCurToken.setChar(MS_MINUS); + m_aCurToken.nGroup = TG::UnOper | TG::Sum; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = "-"; + break; + case '/': + m_aCurToken.eType = TDIVIDEBY; + m_aCurToken.setChar(MS_SLASH); + m_aCurToken.nGroup = TG::Product; + m_aCurToken.nLevel = 0; + m_aCurToken.aText = "/"; + break; + default: + m_aCurToken.eType = TNONE; + break; + } + } + else + m_aCurToken.eType = TNONE; + } + else if (hex) + { + assert(aRes.EndPos > 0); + sal_Int32 n = aRes.EndPos - nRealStart; + assert(n >= 0); + m_aCurToken.eType = THEX; + m_aCurToken.cMathChar = u""; + m_aCurToken.nGroup = TG::NONE; + m_aCurToken.nLevel = 5; + m_aCurToken.aText = m_aBufferString.copy(nRealStart, n); + } + else + m_aCurToken.eType = TNONE; + + m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); + if (TEND != m_aCurToken.eType) + m_nBufferIndex = aRes.EndPos; +} + +namespace +{ +SmNodeArray buildNodeArray(std::vector>& rSubNodes) +{ + SmNodeArray aSubArray(rSubNodes.size()); + for (size_t i = 0; i < rSubNodes.size(); ++i) + aSubArray[i] = rSubNodes[i].release(); + return aSubArray; +} +} //end namespace + +// grammar +/*************************************************************************************************/ + +std::unique_ptr SmParser5::DoTable() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::vector> aLineArray; + aLineArray.push_back(DoLine()); + while (m_aCurToken.eType == TNEWLINE) + { + NextToken(); + aLineArray.push_back(DoLine()); + } + assert(m_aCurToken.eType == TEND); + std::unique_ptr xSNode(new SmTableNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + xSNode->SetSubNodes(buildNodeArray(aLineArray)); + return xSNode; +} + +std::unique_ptr SmParser5::DoAlign(bool bUseExtraSpaces) +// parse alignment info (if any), then go on with rest of expression +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::unique_ptr xSNode; + + if (TokenInGroup(TG::Align)) + { + xSNode.reset(new SmAlignNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + + NextToken(); + + // allow for just one align statement in 5.0 + if (TokenInGroup(TG::Align)) + return DoError(SmParseError::DoubleAlign); + } + + auto pNode = DoExpression(bUseExtraSpaces); + + if (xSNode) + { + xSNode->SetSubNode(0, pNode.release()); + return xSNode; + } + return pNode; +} + +// Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE +std::unique_ptr SmParser5::DoLine() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::vector> ExpressionArray; + + // start with single expression that may have an alignment statement + // (and go on with expressions that must not have alignment + // statements in 'while' loop below. See also 'Expression()'.) + if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE) + ExpressionArray.push_back(DoAlign()); + + while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE) + ExpressionArray.push_back(DoExpression()); + + //If there's no expression, add an empty one. + //this is to avoid a formula tree without any caret + //positions, in visual formula editor. + if (ExpressionArray.empty()) + { + SmToken aTok; + aTok.eType = TNEWLINE; + ExpressionArray.emplace_back(std::unique_ptr(new SmExpressionNode(aTok))); + } + + auto xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + xSNode->SetSubNodes(buildNodeArray(ExpressionArray)); + return xSNode; +} + +std::unique_ptr SmParser5::DoExpression(bool bUseExtraSpaces) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::vector> RelationArray; + RelationArray.push_back(DoRelation()); + while (m_aCurToken.nLevel >= 4) + RelationArray.push_back(DoRelation()); + + if (RelationArray.size() > 1) + { + std::unique_ptr xSNode(new SmExpressionNode(m_aCurToken)); + xSNode->SetSubNodes(buildNodeArray(RelationArray)); + xSNode->SetUseExtraSpaces(bUseExtraSpaces); + return xSNode; + } + else + { + // This expression has only one node so just push this node. + return std::move(RelationArray[0]); + } +} + +std::unique_ptr SmParser5::DoRelation() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + int nDepthLimit = m_nParseDepth; + + auto xFirst = DoSum(); + while (TokenInGroup(TG::Relation)) + { + std::unique_ptr xSNode(new SmBinHorNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + auto xSecond = DoOpSubSup(); + auto xThird = DoSum(); + xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird)); + xFirst = std::move(xSNode); + + ++m_nParseDepth; + DepthProtect bDepthGuard(m_nParseDepth); + } + + m_nParseDepth = nDepthLimit; + + return xFirst; +} + +std::unique_ptr SmParser5::DoSum() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + int nDepthLimit = m_nParseDepth; + + auto xFirst = DoProduct(); + while (TokenInGroup(TG::Sum)) + { + std::unique_ptr xSNode(new SmBinHorNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + auto xSecond = DoOpSubSup(); + auto xThird = DoProduct(); + xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird)); + xFirst = std::move(xSNode); + + ++m_nParseDepth; + DepthProtect bDepthGuard(m_nParseDepth); + } + + m_nParseDepth = nDepthLimit; + + return xFirst; +} + +std::unique_ptr SmParser5::DoProduct() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + auto xFirst = DoPower(); + + int nDepthLimit = 0; + + while (TokenInGroup(TG::Product)) + { + //this linear loop builds a recursive structure, if it gets + //too deep then later processing, e.g. releasing the tree, + //can exhaust stack + if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT) + throw std::range_error("parser depth limit"); + + std::unique_ptr xSNode; + std::unique_ptr xOper; + + SmTokenType eType = m_aCurToken.eType; + switch (eType) + { + case TOVER: + xSNode.reset(new SmBinVerNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + xOper.reset(new SmRectangleNode(m_aCurToken)); + xOper->SetSelection(m_aCurESelection); + NextToken(); + break; + + case TBOPER: + xSNode.reset(new SmBinHorNode(m_aCurToken)); + + NextToken(); + + //Let the glyph node know it's a binary operation + m_aCurToken.eType = TBOPER; + m_aCurToken.nGroup = TG::Product; + xOper = DoGlyphSpecial(); + break; + + case TOVERBRACE: + case TUNDERBRACE: + xSNode.reset(new SmVerticalBraceNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + xOper.reset(new SmMathSymbolNode(m_aCurToken)); + xOper->SetSelection(m_aCurESelection); + + NextToken(); + break; + + case TWIDEBACKSLASH: + case TWIDESLASH: + { + SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken); + pSTmp->SetAscending(eType == TWIDESLASH); + xSNode.reset(pSTmp); + + xOper.reset(new SmPolyLineNode(m_aCurToken)); + xOper->SetSelection(m_aCurESelection); + NextToken(); + + break; + } + + default: + xSNode.reset(new SmBinHorNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + + xOper = DoOpSubSup(); + } + + auto xArg = DoPower(); + xSNode->SetSubNodesBinMo(std::move(xFirst), std::move(xOper), std::move(xArg)); + xFirst = std::move(xSNode); + ++nDepthLimit; + } + return xFirst; +} + +std::unique_ptr SmParser5::DoSubSup(TG nActiveGroup, std::unique_ptr xGivenNode) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit); + assert(m_aCurToken.nGroup == nActiveGroup); + + std::unique_ptr pNode(new SmSubSupNode(m_aCurToken)); + pNode->SetSelection(m_aCurESelection); + //! Of course 'm_aCurToken' is just the first sub-/supscript token. + //! It should be of no further interest. The positions of the + //! sub-/supscripts will be identified by the corresponding subnodes + //! index in the 'aSubNodes' array (enum value from 'SmSubSup'). + + pNode->SetUseLimits(nActiveGroup == TG::Limit); + + // initialize subnodes array + std::vector> aSubNodes(1 + SUBSUP_NUM_ENTRIES); + aSubNodes[0] = std::move(xGivenNode); + + // process all sub-/supscripts + int nIndex = 0; + while (TokenInGroup(nActiveGroup)) + { + SmTokenType eType(m_aCurToken.eType); + + switch (eType) + { + case TRSUB: + nIndex = static_cast(RSUB); + break; + case TRSUP: + nIndex = static_cast(RSUP); + break; + case TFROM: + case TCSUB: + nIndex = static_cast(CSUB); + break; + case TTO: + case TCSUP: + nIndex = static_cast(CSUP); + break; + case TLSUB: + nIndex = static_cast(LSUB); + break; + case TLSUP: + nIndex = static_cast(LSUP); + break; + default: + SAL_WARN("starmath", "unknown case"); + } + nIndex++; + assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES); + + std::unique_ptr xENode; + if (aSubNodes[nIndex]) // if already occupied at earlier iteration + { + // forget the earlier one, remember an error instead + aSubNodes[nIndex].reset(); + xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token. + } + else + { + // skip sub-/supscript token + NextToken(); + } + + // get sub-/supscript node + // (even when we saw a double-sub/supscript error in the above + // in order to minimize mess and continue parsing.) + std::unique_ptr xSNode; + if (eType == TFROM || eType == TTO) + { + // parse limits in old 4.0 and 5.0 style + xSNode = DoRelation(); + } + else + xSNode = DoTerm(true); + + aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode); + } + + pNode->SetSubNodes(buildNodeArray(aSubNodes)); + return pNode; +} + +std::unique_ptr SmParser5::DoSubSupEvaluate(std::unique_ptr xGivenNode) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::unique_ptr pNode(new SmSubSupNode(m_aCurToken)); + pNode->SetSelection(m_aCurESelection); + pNode->SetUseLimits(true); + + // initialize subnodes array + std::vector> aSubNodes(1 + SUBSUP_NUM_ENTRIES); + aSubNodes[0] = std::move(xGivenNode); + + // process all sub-/supscripts + int nIndex = 0; + while (TokenInGroup(TG::Limit)) + { + SmTokenType eType(m_aCurToken.eType); + + switch (eType) + { + case TFROM: + nIndex = static_cast(RSUB); + break; + case TTO: + nIndex = static_cast(RSUP); + break; + default: + SAL_WARN("starmath", "unknown case"); + } + nIndex++; + assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES); + + std::unique_ptr xENode; + if (aSubNodes[nIndex]) // if already occupied at earlier iteration + { + // forget the earlier one, remember an error instead + aSubNodes[nIndex].reset(); + xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token. + } + else + NextToken(); // skip sub-/supscript token + + // get sub-/supscript node + std::unique_ptr xSNode; + xSNode = DoTerm(true); + + aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode); + } + + pNode->SetSubNodes(buildNodeArray(aSubNodes)); + return pNode; +} + +std::unique_ptr SmParser5::DoOpSubSup() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + // get operator symbol + auto xNode = std::make_unique(m_aCurToken); + xNode->SetSelection(m_aCurESelection); + // skip operator token + NextToken(); + // get sub- supscripts if any + if (m_aCurToken.nGroup == TG::Power) + return DoSubSup(TG::Power, std::move(xNode)); + return xNode; +} + +std::unique_ptr SmParser5::DoPower() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + // get body for sub- supscripts on top of stack + std::unique_ptr xNode(DoTerm(false)); + + if (m_aCurToken.nGroup == TG::Power) + return DoSubSup(TG::Power, std::move(xNode)); + return xNode; +} + +std::unique_ptr SmParser5::DoBlank() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(TokenInGroup(TG::Blank)); + std::unique_ptr pBlankNode(new SmBlankNode(m_aCurToken)); + pBlankNode->SetSelection(m_aCurESelection); + + do + { + pBlankNode->IncreaseBy(m_aCurToken); + NextToken(); + } while (TokenInGroup(TG::Blank)); + + // Ignore trailing spaces, if corresponding option is set + if (m_aCurToken.eType == TNEWLINE + || (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() + && SM_MOD()->GetConfig()->IsIgnoreSpacesRight())) + { + pBlankNode->Clear(); + } + return pBlankNode; +} + +std::unique_ptr SmParser5::DoTerm(bool bGroupNumberIdent) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + switch (m_aCurToken.eType) + { + case TESCAPE: + return DoEscape(); + + case TNOSPACE: + case TLGROUP: + { + bool bNoSpace = m_aCurToken.eType == TNOSPACE; + if (bNoSpace) + NextToken(); + if (m_aCurToken.eType != TLGROUP) + return DoTerm(false); // nospace is no longer concerned + + NextToken(); + + // allow for empty group + if (m_aCurToken.eType == TRGROUP) + { + std::unique_ptr xSNode(new SmExpressionNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + xSNode->SetSubNodes(nullptr, nullptr); + + NextToken(); + return std::unique_ptr(xSNode.release()); + } + + auto pNode = DoAlign(!bNoSpace); + if (m_aCurToken.eType == TRGROUP) + { + NextToken(); + return pNode; + } + auto xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + std::unique_ptr xError(DoError(SmParseError::RgroupExpected)); + xSNode->SetSubNodes(std::move(pNode), std::move(xError)); + return std::unique_ptr(xSNode.release()); + } + + case TLEFT: + return DoBrace(); + case TEVALUATE: + return DoEvaluate(); + + case TBLANK: + case TSBLANK: + return DoBlank(); + + case TTEXT: + { + auto pNode = std::make_unique(m_aCurToken, FNT_TEXT); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + case TCHARACTER: + { + auto pNode = std::make_unique(m_aCurToken, FNT_VARIABLE); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + case TIDENT: + case TNUMBER: + { + auto pTextNode = std::make_unique( + m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE); + pTextNode->SetSelection(m_aCurESelection); + if (!bGroupNumberIdent) + { + NextToken(); + return std::unique_ptr(pTextNode.release()); + } + std::vector> aNodes; + // Some people want to be able to write "x_2n" for "x_{2n}" + // although e.g. LaTeX or AsciiMath interpret that as "x_2 n". + // The tokenizer skips whitespaces so we need some additional + // work to distinguish from "x_2 n". + // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and + // https://bugs.libreoffice.org/show_bug.cgi?id=55853 + sal_Int32 nBufLen = m_aBufferString.getLength(); + + // We need to be careful to call NextToken() only after having + // tested for a whitespace separator (otherwise it will be + // skipped!) + bool moveToNextToken = true; + while (m_nBufferIndex < nBufLen + && m_pSysCC->getType(m_aBufferString, m_nBufferIndex) + != UnicodeType::SPACE_SEPARATOR) + { + NextToken(); + if (m_aCurToken.eType != TNUMBER && m_aCurToken.eType != TIDENT) + { + // Neither a number nor an identifier. We just moved to + // the next token, so no need to do that again. + moveToNextToken = false; + break; + } + aNodes.emplace_back(std::unique_ptr(new SmTextNode( + m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE))); + } + if (moveToNextToken) + NextToken(); + if (aNodes.empty()) + return std::unique_ptr(pTextNode.release()); + // We have several concatenated identifiers and numbers. + // Let's group them into one SmExpressionNode. + aNodes.insert(aNodes.begin(), std::move(pTextNode)); + std::unique_ptr xNode(new SmExpressionNode(SmToken())); + xNode->SetSubNodes(buildNodeArray(aNodes)); + return std::unique_ptr(xNode.release()); + } + case TLEFTARROW: + case TRIGHTARROW: + case TUPARROW: + case TDOWNARROW: + case TCIRC: + case TDRARROW: + case TDLARROW: + case TDLRARROW: + case TEXISTS: + case TNOTEXISTS: + case TFORALL: + case TPARTIAL: + case TNABLA: + case TLAPLACE: + case TFOURIER: + case TTOWARD: + case TDOTSAXIS: + case TDOTSDIAG: + case TDOTSDOWN: + case TDOTSLOW: + case TDOTSUP: + case TDOTSVERT: + { + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + + case TSETN: + case TSETZ: + case TSETQ: + case TSETR: + case TSETC: + case THBAR: + case TLAMBDABAR: + case TBACKEPSILON: + case TALEPH: + case TIM: + case TRE: + case TWP: + case TEMPTYSET: + case TINFINITY: + { + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + + case TPLACE: + { + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + + case TSPECIAL: + return DoSpecial(); + + case TBINOM: + return DoBinom(); + + case TFRAC: + return DoFrac(); + + case TSTACK: + return DoStack(); + + case TMATRIX: + return DoMatrix(); + + case THEX: + NextTokenFontSize(); + if (m_aCurToken.eType == THEX) + { + auto pTextNode = std::make_unique(m_aCurToken, FNT_NUMBER); + pTextNode->SetSelection(m_aCurESelection); + NextToken(); + return pTextNode; + } + else + return DoError(SmParseError::NumberExpected); + default: + if (TokenInGroup(TG::LBrace)) + return DoBrace(); + if (TokenInGroup(TG::Oper)) + return DoOperator(); + if (TokenInGroup(TG::UnOper)) + return DoUnOper(); + if (TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr)) + { + std::stack, + std::vector>> + aStack; + bool bIsAttr; + for (;;) + { + bIsAttr = TokenInGroup(TG::Attribute); + if (!bIsAttr && !TokenInGroup(TG::FontAttr)) + break; + aStack.push(bIsAttr ? DoAttribute() : DoFontAttribute()); + } + + auto xFirstNode = DoPower(); + while (!aStack.empty()) + { + std::unique_ptr xNode = std::move(aStack.top()); + aStack.pop(); + xNode->SetSubNodes(nullptr, std::move(xFirstNode)); + xFirstNode = std::move(xNode); + } + return xFirstNode; + } + if (TokenInGroup(TG::Function)) + return DoFunction(); + return DoError(SmParseError::UnexpectedChar); + } +} + +std::unique_ptr SmParser5::DoEscape() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + NextToken(); + + switch (m_aCurToken.eType) + { + case TLPARENT: + case TRPARENT: + case TLBRACKET: + case TRBRACKET: + case TLDBRACKET: + case TRDBRACKET: + case TLBRACE: + case TLGROUP: + case TRBRACE: + case TRGROUP: + case TLANGLE: + case TRANGLE: + case TLCEIL: + case TRCEIL: + case TLFLOOR: + case TRFLOOR: + case TLLINE: + case TRLINE: + case TLDLINE: + case TRDLINE: + { + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return std::unique_ptr(pNode.release()); + } + default: + return DoError(SmParseError::UnexpectedToken); + } +} + +std::unique_ptr SmParser5::DoOperator() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(TokenInGroup(TG::Oper)); + + auto xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + + // get operator + auto xOperator = DoOper(); + + if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power) + xOperator = DoSubSup(m_aCurToken.nGroup, std::move(xOperator)); + + // get argument + auto xArg = DoPower(); + + xSNode->SetSubNodes(std::move(xOperator), std::move(xArg)); + return xSNode; +} + +std::unique_ptr SmParser5::DoOper() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + SmTokenType eType(m_aCurToken.eType); + std::unique_ptr pNode; + + switch (eType) + { + case TSUM: + case TPROD: + case TCOPROD: + case TINT: + case TINTD: + case TIINT: + case TIIINT: + case TLINT: + case TLLINT: + case TLLLINT: + pNode.reset(new SmMathSymbolNode(m_aCurToken)); + pNode->SetSelection(m_aCurESelection); + break; + + case TLIM: + case TLIMSUP: + case TLIMINF: + m_aCurToken.aText + = eType == TLIMSUP ? u"lim sup" : eType == TLIMINF ? u"lim inf" : u"lim"; + pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT)); + pNode->SetSelection(m_aCurESelection); + break; + + case TOPER: + NextToken(); + OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token"); + m_aCurToken.eType = TOPER; + pNode.reset(new SmGlyphSpecialNode(m_aCurToken)); + pNode->SetSelection(m_aCurESelection); + break; + + default: + assert(false && "unknown case"); + } + + NextToken(); + return pNode; +} + +std::unique_ptr SmParser5::DoUnOper() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(TokenInGroup(TG::UnOper)); + + SmToken aNodeToken = m_aCurToken; + ESelection aESelection = m_aCurESelection; + SmTokenType eType = m_aCurToken.eType; + bool bIsPostfix = eType == TFACT; + + std::unique_ptr xSNode; + std::unique_ptr xOper; + std::unique_ptr xExtra; + std::unique_ptr xArg; + + switch (eType) + { + case TABS: + case TSQRT: + NextToken(); + break; + + case TNROOT: + NextToken(); + xExtra = DoPower(); + break; + + case TUOPER: + NextToken(); + //Let the glyph know what it is... + m_aCurToken.eType = TUOPER; + m_aCurToken.nGroup = TG::UnOper; + xOper = DoGlyphSpecial(); + break; + + case TPLUS: + case TMINUS: + case TPLUSMINUS: + case TMINUSPLUS: + case TNEG: + case TFACT: + xOper = DoOpSubSup(); + break; + + default: + assert(false); + } + + // get argument + xArg = DoPower(); + + if (eType == TABS) + { + xSNode.reset(new SmBraceNode(aNodeToken)); + xSNode->SetSelection(aESelection); + xSNode->SetScaleMode(SmScaleMode::Height); + + // build nodes for left & right lines + // (text, group, level of the used token are of no interest here) + // we'll use row & column of the keyword for abs + aNodeToken.eType = TABS; + + aNodeToken.setChar(MS_VERTLINE); + std::unique_ptr xLeft(new SmMathSymbolNode(aNodeToken)); + xLeft->SetSelection(aESelection); + std::unique_ptr xRight(new SmMathSymbolNode(aNodeToken)); + xRight->SetSelection(aESelection); + + xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight)); + } + else if (eType == TSQRT || eType == TNROOT) + { + xSNode.reset(new SmRootNode(aNodeToken)); + xSNode->SetSelection(aESelection); + xOper.reset(new SmRootSymbolNode(aNodeToken)); + xOper->SetSelection(aESelection); + xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg)); + } + else + { + xSNode.reset(new SmUnHorNode(aNodeToken)); + xSNode->SetSelection(aESelection); + if (bIsPostfix) + xSNode->SetSubNodes(std::move(xArg), std::move(xOper)); + else + { + // prefix operator + xSNode->SetSubNodes(std::move(xOper), std::move(xArg)); + } + } + return xSNode; +} + +std::unique_ptr SmParser5::DoAttribute() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(TokenInGroup(TG::Attribute)); + + auto xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + std::unique_ptr xAttr; + SmScaleMode eScaleMode = SmScaleMode::None; + + // get appropriate node for the attribute itself + switch (m_aCurToken.eType) + { + case TUNDERLINE: + case TOVERLINE: + case TOVERSTRIKE: + xAttr.reset(new SmRectangleNode(m_aCurToken)); + xAttr->SetSelection(m_aCurESelection); + eScaleMode = SmScaleMode::Width; + break; + + case TWIDEVEC: + case TWIDEHARPOON: + case TWIDEHAT: + case TWIDETILDE: + xAttr.reset(new SmMathSymbolNode(m_aCurToken)); + xAttr->SetSelection(m_aCurESelection); + eScaleMode = SmScaleMode::Width; + break; + + default: + xAttr.reset(new SmMathSymbolNode(m_aCurToken)); + xAttr->SetSelection(m_aCurESelection); + } + + NextToken(); + + xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later + xSNode->SetScaleMode(eScaleMode); + return xSNode; +} + +std::unique_ptr SmParser5::DoFontAttribute() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(TokenInGroup(TG::FontAttr)); + + switch (m_aCurToken.eType) + { + case TITALIC: + case TNITALIC: + case TBOLD: + case TNBOLD: + case TPHANTOM: + { + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return pNode; + } + + case TSIZE: + return DoFontSize(); + + case TFONT: + return DoFont(); + + case TCOLOR: + return DoColor(); + + default: + assert(false); + return {}; + } +} + +std::unique_ptr SmParser5::DoColor() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(m_aCurToken.eType == TCOLOR); + sal_Int32 nBufferIndex = m_nBufferIndex; + NextTokenColor(TCOLOR); + SmToken aToken; + ESelection aESelection; + + if (m_aCurToken.eType == TDVIPSNAMESCOL) + NextTokenColor(TDVIPSNAMESCOL); + if (m_aCurToken.eType == TERROR) + return DoError(SmParseError::ColorExpected); + if (TokenInGroup(TG::Color)) + { + aToken = m_aCurToken; + aESelection = m_aCurESelection; + if (m_aCurToken.eType == TRGB) //loads r, g and b + { + sal_uInt32 nr, ng, nb, nc; + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + nr = m_aCurToken.aText.toUInt32(); + if (nr > 255) + return DoError(SmParseError::ColorExpected); + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + ng = m_aCurToken.aText.toUInt32(); + if (ng > 255) + return DoError(SmParseError::ColorExpected); + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + nb = m_aCurToken.aText.toUInt32(); + if (nb > 255) + return DoError(SmParseError::ColorExpected); + nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24; + aToken.cMathChar = OUString::number(nc, 16); + } + else if (m_aCurToken.eType == TRGBA) //loads r, g and b + { + sal_uInt32 nr, na, ng, nb, nc; + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + nr = m_aCurToken.aText.toUInt32(); + if (nr > 255) + return DoError(SmParseError::ColorExpected); + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + ng = m_aCurToken.aText.toUInt32(); + if (ng > 255) + return DoError(SmParseError::ColorExpected); + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + nb = m_aCurToken.aText.toUInt32(); + if (nb > 255) + return DoError(SmParseError::ColorExpected); + NextTokenFontSize(); + if (lcl_IsNotWholeNumber(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + na = m_aCurToken.aText.toUInt32(); + if (na > 255) + return DoError(SmParseError::ColorExpected); + nc = nb | ng << 8 | nr << 16 | na << 24; + aToken.cMathChar = OUString::number(nc, 16); + } + else if (m_aCurToken.eType == THEX) //loads hex code + { + sal_uInt32 nc; + NextTokenFontSize(); + if (lcl_IsNotWholeNumber16(m_aCurToken.aText)) + return DoError(SmParseError::ColorExpected); + nc = m_aCurToken.aText.toUInt32(16); + aToken.cMathChar = OUString::number(nc, 16); + } + aToken.aText = m_aBufferString.subView(nBufferIndex, m_nBufferIndex - nBufferIndex); + NextToken(); + } + else + return DoError(SmParseError::ColorExpected); + + std::unique_ptr xNode; + xNode.reset(new SmFontNode(aToken)); + xNode->SetSelection(aESelection); + return xNode; +} + +std::unique_ptr SmParser5::DoFont() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(m_aCurToken.eType == TFONT); + + std::unique_ptr xNode; + // last font rules, get that one + SmToken aToken; + ESelection aESelection = m_aCurESelection; + do + { + NextToken(); + + if (TokenInGroup(TG::Font)) + { + aToken = m_aCurToken; + NextToken(); + } + else + { + return DoError(SmParseError::FontExpected); + } + } while (m_aCurToken.eType == TFONT); + + xNode.reset(new SmFontNode(aToken)); + xNode->SetSelection(aESelection); + return xNode; +} + +std::unique_ptr SmParser5::DoFontSize() +{ + DepthProtect aDepthGuard(m_nParseDepth); + std::unique_ptr pFontNode(new SmFontNode(m_aCurToken)); + pFontNode->SetSelection(m_aCurESelection); + NextTokenFontSize(); + FontSizeType Type; + + switch (m_aCurToken.eType) + { + case THEX: + Type = FontSizeType::ABSOLUT; + break; + case TPLUS: + Type = FontSizeType::PLUS; + break; + case TMINUS: + Type = FontSizeType::MINUS; + break; + case TMULTIPLY: + Type = FontSizeType::MULTIPLY; + break; + case TDIVIDEBY: + Type = FontSizeType::DIVIDE; + break; + + default: + return DoError(SmParseError::SizeExpected); + } + + if (Type != FontSizeType::ABSOLUT) + { + NextTokenFontSize(); + if (m_aCurToken.eType != THEX) + return DoError(SmParseError::SizeExpected); + } + + // get number argument + Fraction aValue(1); + if (lcl_IsNumber(m_aCurToken.aText)) + { + aValue = m_aCurToken.aText.toDouble(); + //!! Reduce values in order to avoid numerical errors + if (aValue.GetDenominator() > 1000) + { + tools::Long nNum = aValue.GetNumerator(); + tools::Long nDenom = aValue.GetDenominator(); + while (nDenom > 1000) //remove big denominator + { + nNum /= 10; + nDenom /= 10; + } + aValue = Fraction(nNum, nDenom); + } + } + else + return DoError(SmParseError::SizeExpected); + + pFontNode->SetSizeParameter(aValue, Type); + NextToken(); + return pFontNode; +} + +std::unique_ptr SmParser5::DoBrace() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace)); + + std::unique_ptr xSNode(new SmBraceNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + std::unique_ptr pBody, pLeft, pRight; + SmScaleMode eScaleMode = SmScaleMode::None; + SmParseError eError = SmParseError::None; + + if (m_aCurToken.eType == TLEFT) + { + NextToken(); + + eScaleMode = SmScaleMode::Height; + + // check for left bracket + if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace)) + { + pLeft.reset(new SmMathSymbolNode(m_aCurToken)); + pLeft->SetSelection(m_aCurESelection); + + NextToken(); + pBody = DoBracebody(true); + + if (m_aCurToken.eType == TRIGHT) + { + NextToken(); + + // check for right bracket + if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace)) + { + pRight.reset(new SmMathSymbolNode(m_aCurToken)); + pRight->SetSelection(m_aCurESelection); + NextToken(); + } + else + eError = SmParseError::RbraceExpected; + } + else + eError = SmParseError::RightExpected; + } + else + eError = SmParseError::LbraceExpected; + } + else + { + assert(TokenInGroup(TG::LBrace)); + + pLeft.reset(new SmMathSymbolNode(m_aCurToken)); + pLeft->SetSelection(m_aCurESelection); + + NextToken(); + pBody = DoBracebody(false); + + SmTokenType eExpectedType = TUNKNOWN; + switch (pLeft->GetToken().eType) + { + case TLPARENT: + eExpectedType = TRPARENT; + break; + case TLBRACKET: + eExpectedType = TRBRACKET; + break; + case TLBRACE: + eExpectedType = TRBRACE; + break; + case TLDBRACKET: + eExpectedType = TRDBRACKET; + break; + case TLLINE: + eExpectedType = TRLINE; + break; + case TLDLINE: + eExpectedType = TRDLINE; + break; + case TLANGLE: + eExpectedType = TRANGLE; + break; + case TLFLOOR: + eExpectedType = TRFLOOR; + break; + case TLCEIL: + eExpectedType = TRCEIL; + break; + case TLRLINE: + eExpectedType = TLRLINE; + break; + case TLRDLINE: + eExpectedType = TLRDLINE; + break; + default: + SAL_WARN("starmath", "unknown case"); + } + + if (m_aCurToken.eType == eExpectedType) + { + pRight.reset(new SmMathSymbolNode(m_aCurToken)); + pRight->SetSelection(m_aCurESelection); + NextToken(); + } + else + eError = SmParseError::ParentMismatch; + } + + if (eError == SmParseError::None) + { + assert(pLeft); + assert(pRight); + xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight)); + xSNode->SetScaleMode(eScaleMode); + return xSNode; + } + return DoError(eError); +} + +std::unique_ptr SmParser5::DoBracebody(bool bIsLeftRight) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + auto pBody = std::make_unique(m_aCurToken); + pBody->SetSelection(m_aCurESelection); + + std::vector> aNodes; + // get body if any + if (bIsLeftRight) + { + do + { + if (m_aCurToken.eType == TMLINE) + { + SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken); + pTempNode->SetSelection(m_aCurESelection); + aNodes.emplace_back(std::unique_ptr(pTempNode)); + NextToken(); + } + else if (m_aCurToken.eType != TRIGHT) + { + aNodes.push_back(DoAlign()); + if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT) + aNodes.emplace_back(DoError(SmParseError::RightExpected)); + } + } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT); + } + else + { + do + { + if (m_aCurToken.eType == TMLINE) + { + SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken); + pTempNode->SetSelection(m_aCurESelection); + aNodes.emplace_back(std::unique_ptr(pTempNode)); + NextToken(); + } + else if (!TokenInGroup(TG::RBrace)) + { + aNodes.push_back(DoAlign()); + if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace)) + aNodes.emplace_back(DoError(SmParseError::RbraceExpected)); + } + } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace)); + } + + pBody->SetSubNodes(buildNodeArray(aNodes)); + pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None); + return pBody; +} + +std::unique_ptr SmParser5::DoEvaluate() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + // Create node + std::unique_ptr xSNode(new SmBraceNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + SmToken aToken(TRLINE, MS_VERTLINE, "evaluate", TG::RBrace, 5); + + // Parse body && left none + NextToken(); + std::unique_ptr pBody = DoPower(); + SmToken bToken(TNONE, '\0', "", TG::LBrace, 5); + std::unique_ptr pLeft; + pLeft.reset(new SmMathSymbolNode(bToken)); + + // Mount nodes + std::unique_ptr pRight; + pRight.reset(new SmMathSymbolNode(aToken)); + xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight)); + xSNode->SetScaleMode(SmScaleMode::Height); // scalable line + + // Parse from to + if (m_aCurToken.nGroup == TG::Limit) + { + std::unique_ptr rSNode; + rSNode = DoSubSupEvaluate(std::move(xSNode)); + rSNode->GetToken().eType = TEVALUATE; + return rSNode; + } + + return xSNode; +} + +std::unique_ptr SmParser5::DoFunction() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + if (m_aCurToken.eType == TFUNC) + { + NextToken(); // skip "FUNC"-statement + m_aCurToken.eType = TFUNC; + m_aCurToken.nGroup = TG::Function; + } + auto pNode = std::make_unique(m_aCurToken, FNT_FUNCTION); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return pNode; +} + +std::unique_ptr SmParser5::DoBinom() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + auto xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + + NextToken(); + + auto xFirst = DoSum(); + auto xSecond = DoSum(); + xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond)); + return xSNode; +} + +std::unique_ptr SmParser5::DoFrac() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::unique_ptr xSNode = std::make_unique(m_aCurToken); + xSNode->SetSelection(m_aCurESelection); + std::unique_ptr xOper = std::make_unique(m_aCurToken); + xOper->SetSelection(m_aCurESelection); + + NextToken(); + + auto xFirst = DoSum(); + auto xSecond = DoSum(); + xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond)); + return xSNode; +} + +std::unique_ptr SmParser5::DoStack() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::unique_ptr xSNode(new SmTableNode(m_aCurToken)); + xSNode->SetSelection(m_aCurESelection); + NextToken(); + if (m_aCurToken.eType != TLGROUP) + return DoError(SmParseError::LgroupExpected); + std::vector> aExprArr; + do + { + NextToken(); + aExprArr.push_back(DoAlign()); + } while (m_aCurToken.eType == TPOUND); + + if (m_aCurToken.eType == TRGROUP) + NextToken(); + else + aExprArr.emplace_back(DoError(SmParseError::RgroupExpected)); + + xSNode->SetSubNodes(buildNodeArray(aExprArr)); + return xSNode; +} + +std::unique_ptr SmParser5::DoMatrix() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + std::unique_ptr xMNode(new SmMatrixNode(m_aCurToken)); + xMNode->SetSelection(m_aCurESelection); + NextToken(); + if (m_aCurToken.eType != TLGROUP) + return DoError(SmParseError::LgroupExpected); + + std::vector> aExprArr; + do + { + NextToken(); + aExprArr.push_back(DoAlign()); + } while (m_aCurToken.eType == TPOUND); + + size_t nCol = aExprArr.size(); + size_t nRow = 1; + while (m_aCurToken.eType == TDPOUND) + { + NextToken(); + for (size_t i = 0; i < nCol; i++) + { + auto xNode = DoAlign(); + if (i < (nCol - 1)) + { + if (m_aCurToken.eType == TPOUND) + NextToken(); + else + xNode = DoError(SmParseError::PoundExpected); + } + aExprArr.emplace_back(std::move(xNode)); + } + ++nRow; + } + + if (m_aCurToken.eType == TRGROUP) + NextToken(); + else + { + std::unique_ptr xENode(DoError(SmParseError::RgroupExpected)); + if (aExprArr.empty()) + nRow = nCol = 1; + else + aExprArr.pop_back(); + aExprArr.emplace_back(std::move(xENode)); + } + + xMNode->SetSubNodes(buildNodeArray(aExprArr)); + xMNode->SetRowCol(static_cast(nRow), static_cast(nCol)); + return std::unique_ptr(xMNode.release()); +} + +std::unique_ptr SmParser5::DoSpecial() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + bool bReplace = false; + OUString& rName = m_aCurToken.aText; + OUString aNewName; + + // conversion of symbol names for 6.0 (XML) file format + // (name change on import / export. + // UI uses localized names XML file format does not.) + if (rName.startsWith("%")) + { + if (IsImportSymbolNames()) + { + aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.subView(1)); + bReplace = true; + } + else if (IsExportSymbolNames()) + { + aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.subView(1)); + bReplace = true; + } + } + if (!aNewName.isEmpty()) + aNewName = "%" + aNewName; + + if (bReplace && !aNewName.isEmpty() && rName != aNewName) + { + Replace(GetTokenIndex(), rName.getLength(), aNewName); + rName = aNewName; + } + + // add symbol name to list of used symbols + const OUString aSymbolName(m_aCurToken.aText.copy(1)); + if (!aSymbolName.isEmpty()) + m_aUsedSymbols.insert(aSymbolName); + + auto pNode = std::make_unique(m_aCurToken); + pNode->SetSelection(m_aCurESelection); + NextToken(); + return pNode; +} + +std::unique_ptr SmParser5::DoGlyphSpecial() +{ + DepthProtect aDepthGuard(m_nParseDepth); + + auto pNode = std::make_unique(m_aCurToken); + NextToken(); + return pNode; +} + +std::unique_ptr SmParser5::DoError(SmParseError eError) +{ + DepthProtect aDepthGuard(m_nParseDepth); + + // Identify error message + OUString sStrBuf(SmResId(RID_ERR_IDENT) + starmathdatabase::getParseErrorDesc(eError)); + + // Generate error node + m_aCurToken.eType = TERROR; + m_aCurToken.cMathChar = sStrBuf; + auto xSNode = std::make_unique(m_aCurToken); + SmErrorNode* pErr(new SmErrorNode(m_aCurToken)); + pErr->SetSelection(m_aCurESelection); + xSNode->SetSubNode(0, pErr); + + // Append error to the error list + SmErrorDesc aErrDesc(eError, xSNode.get(), m_aCurToken.cMathChar); + m_aErrDescList.push_back(aErrDesc); + + NextToken(); + + return xSNode; +} + +// end grammar + +SmParser5::SmParser5() + : m_nCurError(0) + , m_nBufferIndex(0) + , m_nTokenIndex(0) + , m_nRow(0) + , m_nColOff(0) + , m_bImportSymNames(false) + , m_bExportSymNames(false) + , m_nParseDepth(0) + , m_aNumCC(LanguageTag(LANGUAGE_ENGLISH_US)) + , m_pSysCC(&SM_MOD()->GetSysLocale().GetCharClass()) +{ +} + +SmParser5::~SmParser5() {} + +std::unique_ptr SmParser5::Parse(const OUString& rBuffer) +{ + m_aUsedSymbols.clear(); + + m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF); + m_nBufferIndex = 0; + m_nTokenIndex = 0; + m_nRow = 0; + m_nColOff = 0; + m_nCurError = -1; + + m_aErrDescList.clear(); + + NextToken(); + return DoTable(); +} + +std::unique_ptr SmParser5::ParseExpression(const OUString& rBuffer) +{ + m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF); + m_nBufferIndex = 0; + m_nTokenIndex = 0; + m_nRow = 0; + m_nColOff = 0; + m_nCurError = -1; + + m_aErrDescList.clear(); + + NextToken(); + return DoExpression(); +} + +const SmErrorDesc* SmParser5::NextError() +{ + if (!m_aErrDescList.empty()) + if (m_nCurError > 0) + return &m_aErrDescList[--m_nCurError]; + else + { + m_nCurError = 0; + return &m_aErrDescList[m_nCurError]; + } + else + return nullptr; +} + +const SmErrorDesc* SmParser5::PrevError() +{ + if (!m_aErrDescList.empty()) + if (m_nCurError < static_cast(m_aErrDescList.size() - 1)) + return &m_aErrDescList[++m_nCurError]; + else + { + m_nCurError = static_cast(m_aErrDescList.size() - 1); + return &m_aErrDescList[m_nCurError]; + } + else + return nullptr; +} + +const SmErrorDesc* SmParser5::GetError() const +{ + if (m_aErrDescList.empty()) + return nullptr; + return &m_aErrDescList.front(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3