summaryrefslogtreecommitdiffstats
path: root/sc/source/core/tool/stringutil.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sc/source/core/tool/stringutil.cxx')
-rw-r--r--sc/source/core/tool/stringutil.cxx466
1 files changed, 466 insertions, 0 deletions
diff --git a/sc/source/core/tool/stringutil.cxx b/sc/source/core/tool/stringutil.cxx
new file mode 100644
index 000000000..493f3fdee
--- /dev/null
+++ b/sc/source/core/tool/stringutil.cxx
@@ -0,0 +1,466 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <stringutil.hxx>
+#include <svl/numformat.hxx>
+#include <svl/zforlist.hxx>
+
+#include <rtl/ustrbuf.hxx>
+#include <rtl/strbuf.hxx>
+#include <rtl/math.hxx>
+
+ScSetStringParam::ScSetStringParam() :
+ mpNumFormatter(nullptr),
+ mbDetectNumberFormat(true),
+ meSetTextNumFormat(Never),
+ mbHandleApostrophe(true),
+ meStartListening(sc::SingleCellListening),
+ mbCheckLinkFormula(false)
+{
+}
+
+void ScSetStringParam::setTextInput()
+{
+ mbDetectNumberFormat = false;
+ mbHandleApostrophe = false;
+ meSetTextNumFormat = Always;
+}
+
+void ScSetStringParam::setNumericInput()
+{
+ mbDetectNumberFormat = true;
+ mbHandleApostrophe = true;
+ meSetTextNumFormat = Never;
+}
+
+bool ScStringUtil::parseSimpleNumber(
+ const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal)
+{
+ // Actually almost the entire pre-check is unnecessary and we could call
+ // rtl::math::stringToDouble() just after having exchanged ascii space with
+ // non-breaking space, if it wasn't for check of grouped digits. The NaN
+ // and Inf cases that are accepted by stringToDouble() could be detected
+ // using std::isfinite() on the result.
+
+ /* TODO: The grouped digits check isn't even valid for locales that do not
+ * group in thousands ... e.g. Indian locales. But that's something also
+ * the number scanner doesn't implement yet, only the formatter. */
+
+ OUStringBuffer aBuf;
+
+ sal_Int32 i = 0;
+ sal_Int32 n = rStr.getLength();
+ const sal_Unicode* p = rStr.getStr();
+ const sal_Unicode* pLast = p + (n-1);
+ sal_Int32 nPosDSep = -1, nPosGSep = -1;
+ sal_uInt32 nDigitCount = 0;
+ bool haveSeenDigit = false;
+ sal_Int32 nPosExponent = -1;
+
+ // Skip preceding spaces.
+ for (i = 0; i < n; ++i, ++p)
+ {
+ sal_Unicode c = *p;
+ if (c != 0x0020 && c != 0x00A0)
+ // first non-space character. Exit.
+ break;
+ }
+
+ if (i == n)
+ // the whole string is space. Fail.
+ return false;
+
+ n -= i; // Subtract the length of the preceding spaces.
+
+ // Determine the last non-space character.
+ for (; p != pLast; --pLast, --n)
+ {
+ sal_Unicode c = *pLast;
+ if (c != 0x0020 && c != 0x00A0)
+ // Non space character. Exit.
+ break;
+ }
+
+ for (i = 0; i < n; ++i, ++p)
+ {
+ sal_Unicode c = *p;
+ if (c == 0x0020 && gsep == 0x00A0)
+ // ascii space to unicode space if that is group separator
+ c = 0x00A0;
+
+ if ('0' <= c && c <= '9')
+ {
+ // this is a digit.
+ aBuf.append(c);
+ haveSeenDigit = true;
+ ++nDigitCount;
+ }
+ else if (c == dsep || (dsepa && c == dsepa))
+ {
+ // this is a decimal separator.
+
+ if (nPosDSep >= 0)
+ // a second decimal separator -> not a valid number.
+ return false;
+
+ if (nPosGSep >= 0 && i - nPosGSep != 4)
+ // the number has a group separator and the decimal sep is not
+ // positioned correctly.
+ return false;
+
+ nPosDSep = i;
+ nPosGSep = -1;
+ aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
+ nDigitCount = 0;
+ }
+ else if (c == gsep)
+ {
+ // this is a group (thousand) separator.
+
+ if (!haveSeenDigit)
+ // not allowed before digits.
+ return false;
+
+ if (nPosDSep >= 0)
+ // not allowed after the decimal separator.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ if (nPosExponent >= 0)
+ // not allowed in exponent.
+ return false;
+
+ nPosGSep = i;
+ nDigitCount = 0;
+ }
+ else if (c == '-' || c == '+')
+ {
+ // A sign must be the first character if it's given, or immediately
+ // follow the exponent character if present.
+ if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
+ aBuf.append(c);
+ else
+ return false;
+ }
+ else if (c == 'E' || c == 'e')
+ {
+ // this is an exponent designator.
+
+ if (nPosExponent >= 0)
+ // Only one exponent allowed.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ aBuf.append(c);
+ nPosExponent = i;
+ nPosDSep = -1;
+ nPosGSep = -1;
+ nDigitCount = 0;
+ }
+ else
+ return false;
+ }
+
+ // finished parsing the number.
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
+ sal_Int32 nParseEnd = 0;
+ rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
+ if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
+ // Not a valid number or not entire string consumed.
+ return false;
+
+ return true;
+}
+
+bool ScStringUtil::parseSimpleNumber(
+ const char* p, size_t n, char dsep, char gsep, double& rVal)
+{
+ // Actually almost the entire pre-check is unnecessary and we could call
+ // rtl::math::stringToDouble() just after having exchanged ascii space with
+ // non-breaking space, if it wasn't for check of grouped digits. The NaN
+ // and Inf cases that are accepted by stringToDouble() could be detected
+ // using std::isfinite() on the result.
+
+ /* TODO: The grouped digits check isn't even valid for locales that do not
+ * group in thousands ... e.g. Indian locales. But that's something also
+ * the number scanner doesn't implement yet, only the formatter. */
+
+ OStringBuffer aBuf;
+
+ size_t i = 0;
+ const char* pLast = p + (n-1);
+ sal_Int32 nPosDSep = -1, nPosGSep = -1;
+ sal_uInt32 nDigitCount = 0;
+ bool haveSeenDigit = false;
+ sal_Int32 nPosExponent = -1;
+
+ // Skip preceding spaces.
+ for (i = 0; i < n; ++i, ++p)
+ {
+ char c = *p;
+ if (c != ' ')
+ // first non-space character. Exit.
+ break;
+ }
+
+ if (i == n)
+ // the whole string is space. Fail.
+ return false;
+
+ n -= i; // Subtract the length of the preceding spaces.
+
+ // Determine the last non-space character.
+ for (; p != pLast; --pLast, --n)
+ {
+ char c = *pLast;
+ if (c != ' ')
+ // Non space character. Exit.
+ break;
+ }
+
+ for (i = 0; i < n; ++i, ++p)
+ {
+ char c = *p;
+
+ if ('0' <= c && c <= '9')
+ {
+ // this is a digit.
+ aBuf.append(c);
+ haveSeenDigit = true;
+ ++nDigitCount;
+ }
+ else if (c == dsep)
+ {
+ // this is a decimal separator.
+
+ if (nPosDSep >= 0)
+ // a second decimal separator -> not a valid number.
+ return false;
+
+ if (nPosGSep >= 0 && i - nPosGSep != 4)
+ // the number has a group separator and the decimal sep is not
+ // positioned correctly.
+ return false;
+
+ nPosDSep = i;
+ nPosGSep = -1;
+ aBuf.append(c);
+ nDigitCount = 0;
+ }
+ else if (c == gsep)
+ {
+ // this is a group (thousand) separator.
+
+ if (!haveSeenDigit)
+ // not allowed before digits.
+ return false;
+
+ if (nPosDSep >= 0)
+ // not allowed after the decimal separator.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ if (nPosExponent >= 0)
+ // not allowed in exponent.
+ return false;
+
+ nPosGSep = i;
+ nDigitCount = 0;
+ }
+ else if (c == '-' || c == '+')
+ {
+ // A sign must be the first character if it's given, or immediately
+ // follow the exponent character if present.
+ if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
+ aBuf.append(c);
+ else
+ return false;
+ }
+ else if (c == 'E' || c == 'e')
+ {
+ // this is an exponent designator.
+
+ if (nPosExponent >= 0)
+ // Only one exponent allowed.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ aBuf.append(c);
+ nPosExponent = i;
+ nPosDSep = -1;
+ nPosGSep = -1;
+ nDigitCount = 0;
+ }
+ else
+ return false;
+ }
+
+ // finished parsing the number.
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
+ sal_Int32 nParseEnd = 0;
+ rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
+ if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
+ // Not a valid number or not entire string consumed.
+ return false;
+
+ return true;
+}
+
+OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
+ sal_Unicode cTok, sal_Int32& rIndex )
+{
+ assert( !(rQuotedPairs.getLength()%2) );
+ assert( rQuotedPairs.indexOf(cTok) == -1 );
+
+ const sal_Unicode* pStr = rIn.getStr();
+ const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
+ sal_Unicode cQuotedEndChar = 0;
+ sal_Int32 nQuotedLen = rQuotedPairs.getLength();
+ sal_Int32 nLen = rIn.getLength();
+ sal_Int32 nTok = 0;
+ sal_Int32 nFirstChar = rIndex;
+ sal_Int32 i = nFirstChar;
+
+ // detect token position and length
+ pStr += i;
+ while ( i < nLen )
+ {
+ sal_Unicode c = *pStr;
+ if ( cQuotedEndChar )
+ {
+ // end of the quote reached ?
+ if ( c == cQuotedEndChar )
+ cQuotedEndChar = 0;
+ }
+ else
+ {
+ // Is the char a quote-begin char ?
+ sal_Int32 nQuoteIndex = 0;
+ while ( nQuoteIndex < nQuotedLen )
+ {
+ if ( pQuotedStr[nQuoteIndex] == c )
+ {
+ cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
+ break;
+ }
+ else
+ nQuoteIndex += 2;
+ }
+
+ // If the token-char matches then increase TokCount
+ if ( c == cTok )
+ {
+ ++nTok;
+
+ if ( nTok == nToken )
+ nFirstChar = i+1;
+ else
+ {
+ if ( nTok > nToken )
+ break;
+ }
+ }
+ }
+
+ ++pStr;
+ ++i;
+ }
+
+ if ( nTok >= nToken )
+ {
+ if ( i < nLen )
+ rIndex = i+1;
+ else
+ rIndex = -1;
+ return rIn.copy( nFirstChar, i-nFirstChar );
+ }
+ else
+ {
+ rIndex = -1;
+ return OUString();
+ }
+}
+
+bool ScStringUtil::isMultiline( std::u16string_view rStr )
+{
+ return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
+}
+
+ScInputStringType ScStringUtil::parseInputString(
+ SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
+{
+ ScInputStringType aRet;
+ aRet.mnFormatType = SvNumFormatType::ALL;
+ aRet.meType = ScInputStringType::Unknown;
+ aRet.maText = rStr;
+ aRet.mfValue = 0.0;
+
+ if (rStr.getLength() > 1 && rStr[0] == '=')
+ {
+ aRet.meType = ScInputStringType::Formula;
+ }
+ else if (rStr.getLength() > 1 && rStr[0] == '\'')
+ {
+ // for bEnglish, "'" at the beginning is always interpreted as text
+ // marker and stripped
+ aRet.maText = rStr.copy(1);
+ aRet.meType = ScInputStringType::Text;
+ }
+ else // test for English number format (only)
+ {
+ sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
+
+ if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
+ {
+ aRet.meType = ScInputStringType::Number;
+ aRet.mnFormatType = rFormatter.GetType(nNumFormat);
+ }
+ else if (!rStr.isEmpty())
+ aRet.meType = ScInputStringType::Text;
+
+ // the (English) number format is not set
+ //TODO: find and replace with matching local format???
+ }
+
+ return aRet;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */