diff options
Diffstat (limited to 'sc/source/core/tool/stringutil.cxx')
-rw-r--r-- | sc/source/core/tool/stringutil.cxx | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/sc/source/core/tool/stringutil.cxx b/sc/source/core/tool/stringutil.cxx new file mode 100644 index 000000000..493f3fdee --- /dev/null +++ b/sc/source/core/tool/stringutil.cxx @@ -0,0 +1,466 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <stringutil.hxx> +#include <svl/numformat.hxx> +#include <svl/zforlist.hxx> + +#include <rtl/ustrbuf.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/math.hxx> + +ScSetStringParam::ScSetStringParam() : + mpNumFormatter(nullptr), + mbDetectNumberFormat(true), + meSetTextNumFormat(Never), + mbHandleApostrophe(true), + meStartListening(sc::SingleCellListening), + mbCheckLinkFormula(false) +{ +} + +void ScSetStringParam::setTextInput() +{ + mbDetectNumberFormat = false; + mbHandleApostrophe = false; + meSetTextNumFormat = Always; +} + +void ScSetStringParam::setNumericInput() +{ + mbDetectNumberFormat = true; + mbHandleApostrophe = true; + meSetTextNumFormat = Never; +} + +bool ScStringUtil::parseSimpleNumber( + const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal) +{ + // Actually almost the entire pre-check is unnecessary and we could call + // rtl::math::stringToDouble() just after having exchanged ascii space with + // non-breaking space, if it wasn't for check of grouped digits. The NaN + // and Inf cases that are accepted by stringToDouble() could be detected + // using std::isfinite() on the result. + + /* TODO: The grouped digits check isn't even valid for locales that do not + * group in thousands ... e.g. Indian locales. But that's something also + * the number scanner doesn't implement yet, only the formatter. */ + + OUStringBuffer aBuf; + + sal_Int32 i = 0; + sal_Int32 n = rStr.getLength(); + const sal_Unicode* p = rStr.getStr(); + const sal_Unicode* pLast = p + (n-1); + sal_Int32 nPosDSep = -1, nPosGSep = -1; + sal_uInt32 nDigitCount = 0; + bool haveSeenDigit = false; + sal_Int32 nPosExponent = -1; + + // Skip preceding spaces. + for (i = 0; i < n; ++i, ++p) + { + sal_Unicode c = *p; + if (c != 0x0020 && c != 0x00A0) + // first non-space character. Exit. + break; + } + + if (i == n) + // the whole string is space. Fail. + return false; + + n -= i; // Subtract the length of the preceding spaces. + + // Determine the last non-space character. + for (; p != pLast; --pLast, --n) + { + sal_Unicode c = *pLast; + if (c != 0x0020 && c != 0x00A0) + // Non space character. Exit. + break; + } + + for (i = 0; i < n; ++i, ++p) + { + sal_Unicode c = *p; + if (c == 0x0020 && gsep == 0x00A0) + // ascii space to unicode space if that is group separator + c = 0x00A0; + + if ('0' <= c && c <= '9') + { + // this is a digit. + aBuf.append(c); + haveSeenDigit = true; + ++nDigitCount; + } + else if (c == dsep || (dsepa && c == dsepa)) + { + // this is a decimal separator. + + if (nPosDSep >= 0) + // a second decimal separator -> not a valid number. + return false; + + if (nPosGSep >= 0 && i - nPosGSep != 4) + // the number has a group separator and the decimal sep is not + // positioned correctly. + return false; + + nPosDSep = i; + nPosGSep = -1; + aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below + nDigitCount = 0; + } + else if (c == gsep) + { + // this is a group (thousand) separator. + + if (!haveSeenDigit) + // not allowed before digits. + return false; + + if (nPosDSep >= 0) + // not allowed after the decimal separator. + return false; + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + if (nPosExponent >= 0) + // not allowed in exponent. + return false; + + nPosGSep = i; + nDigitCount = 0; + } + else if (c == '-' || c == '+') + { + // A sign must be the first character if it's given, or immediately + // follow the exponent character if present. + if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1)) + aBuf.append(c); + else + return false; + } + else if (c == 'E' || c == 'e') + { + // this is an exponent designator. + + if (nPosExponent >= 0) + // Only one exponent allowed. + return false; + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + aBuf.append(c); + nPosExponent = i; + nPosDSep = -1; + nPosGSep = -1; + nDigitCount = 0; + } + else + return false; + } + + // finished parsing the number. + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok; + sal_Int32 nParseEnd = 0; + rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd); + if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength()) + // Not a valid number or not entire string consumed. + return false; + + return true; +} + +bool ScStringUtil::parseSimpleNumber( + const char* p, size_t n, char dsep, char gsep, double& rVal) +{ + // Actually almost the entire pre-check is unnecessary and we could call + // rtl::math::stringToDouble() just after having exchanged ascii space with + // non-breaking space, if it wasn't for check of grouped digits. The NaN + // and Inf cases that are accepted by stringToDouble() could be detected + // using std::isfinite() on the result. + + /* TODO: The grouped digits check isn't even valid for locales that do not + * group in thousands ... e.g. Indian locales. But that's something also + * the number scanner doesn't implement yet, only the formatter. */ + + OStringBuffer aBuf; + + size_t i = 0; + const char* pLast = p + (n-1); + sal_Int32 nPosDSep = -1, nPosGSep = -1; + sal_uInt32 nDigitCount = 0; + bool haveSeenDigit = false; + sal_Int32 nPosExponent = -1; + + // Skip preceding spaces. + for (i = 0; i < n; ++i, ++p) + { + char c = *p; + if (c != ' ') + // first non-space character. Exit. + break; + } + + if (i == n) + // the whole string is space. Fail. + return false; + + n -= i; // Subtract the length of the preceding spaces. + + // Determine the last non-space character. + for (; p != pLast; --pLast, --n) + { + char c = *pLast; + if (c != ' ') + // Non space character. Exit. + break; + } + + for (i = 0; i < n; ++i, ++p) + { + char c = *p; + + if ('0' <= c && c <= '9') + { + // this is a digit. + aBuf.append(c); + haveSeenDigit = true; + ++nDigitCount; + } + else if (c == dsep) + { + // this is a decimal separator. + + if (nPosDSep >= 0) + // a second decimal separator -> not a valid number. + return false; + + if (nPosGSep >= 0 && i - nPosGSep != 4) + // the number has a group separator and the decimal sep is not + // positioned correctly. + return false; + + nPosDSep = i; + nPosGSep = -1; + aBuf.append(c); + nDigitCount = 0; + } + else if (c == gsep) + { + // this is a group (thousand) separator. + + if (!haveSeenDigit) + // not allowed before digits. + return false; + + if (nPosDSep >= 0) + // not allowed after the decimal separator. + return false; + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + if (nPosExponent >= 0) + // not allowed in exponent. + return false; + + nPosGSep = i; + nDigitCount = 0; + } + else if (c == '-' || c == '+') + { + // A sign must be the first character if it's given, or immediately + // follow the exponent character if present. + if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1))) + aBuf.append(c); + else + return false; + } + else if (c == 'E' || c == 'e') + { + // this is an exponent designator. + + if (nPosExponent >= 0) + // Only one exponent allowed. + return false; + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + aBuf.append(c); + nPosExponent = i; + nPosDSep = -1; + nPosGSep = -1; + nDigitCount = 0; + } + else + return false; + } + + // finished parsing the number. + + if (nPosGSep >= 0 && nDigitCount != 3) + // must be exactly 3 digits since the last group separator. + return false; + + rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok; + sal_Int32 nParseEnd = 0; + rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd); + if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength()) + // Not a valid number or not entire string consumed. + return false; + + return true; +} + +OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs, + sal_Unicode cTok, sal_Int32& rIndex ) +{ + assert( !(rQuotedPairs.getLength()%2) ); + assert( rQuotedPairs.indexOf(cTok) == -1 ); + + const sal_Unicode* pStr = rIn.getStr(); + const sal_Unicode* pQuotedStr = rQuotedPairs.getStr(); + sal_Unicode cQuotedEndChar = 0; + sal_Int32 nQuotedLen = rQuotedPairs.getLength(); + sal_Int32 nLen = rIn.getLength(); + sal_Int32 nTok = 0; + sal_Int32 nFirstChar = rIndex; + sal_Int32 i = nFirstChar; + + // detect token position and length + pStr += i; + while ( i < nLen ) + { + sal_Unicode c = *pStr; + if ( cQuotedEndChar ) + { + // end of the quote reached ? + if ( c == cQuotedEndChar ) + cQuotedEndChar = 0; + } + else + { + // Is the char a quote-begin char ? + sal_Int32 nQuoteIndex = 0; + while ( nQuoteIndex < nQuotedLen ) + { + if ( pQuotedStr[nQuoteIndex] == c ) + { + cQuotedEndChar = pQuotedStr[nQuoteIndex+1]; + break; + } + else + nQuoteIndex += 2; + } + + // If the token-char matches then increase TokCount + if ( c == cTok ) + { + ++nTok; + + if ( nTok == nToken ) + nFirstChar = i+1; + else + { + if ( nTok > nToken ) + break; + } + } + } + + ++pStr; + ++i; + } + + if ( nTok >= nToken ) + { + if ( i < nLen ) + rIndex = i+1; + else + rIndex = -1; + return rIn.copy( nFirstChar, i-nFirstChar ); + } + else + { + rIndex = -1; + return OUString(); + } +} + +bool ScStringUtil::isMultiline( std::u16string_view rStr ) +{ + return rStr.find_first_of(u"\n\r") != std::u16string_view::npos; +} + +ScInputStringType ScStringUtil::parseInputString( + SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang ) +{ + ScInputStringType aRet; + aRet.mnFormatType = SvNumFormatType::ALL; + aRet.meType = ScInputStringType::Unknown; + aRet.maText = rStr; + aRet.mfValue = 0.0; + + if (rStr.getLength() > 1 && rStr[0] == '=') + { + aRet.meType = ScInputStringType::Formula; + } + else if (rStr.getLength() > 1 && rStr[0] == '\'') + { + // for bEnglish, "'" at the beginning is always interpreted as text + // marker and stripped + aRet.maText = rStr.copy(1); + aRet.meType = ScInputStringType::Text; + } + else // test for English number format (only) + { + sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang); + + if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue)) + { + aRet.meType = ScInputStringType::Number; + aRet.mnFormatType = rFormatter.GetType(nNumFormat); + } + else if (!rStr.isEmpty()) + aRet.meType = ScInputStringType::Text; + + // the (English) number format is not set + //TODO: find and replace with matching local format??? + } + + return aRet; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |