470 lines
14 KiB
C++
470 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <interpretercontext.hxx>
|
|
#include <stringutil.hxx>
|
|
#include <svl/numformat.hxx>
|
|
#include <svl/zforlist.hxx>
|
|
|
|
#include <rtl/ustrbuf.hxx>
|
|
#include <rtl/strbuf.hxx>
|
|
#include <rtl/math.hxx>
|
|
|
|
ScSetStringParam::ScSetStringParam() :
|
|
mpNumFormatter(nullptr),
|
|
mbDetectNumberFormat(true),
|
|
mbDetectScientificNumberFormat(true),
|
|
meSetTextNumFormat(Never),
|
|
mbHandleApostrophe(true),
|
|
meStartListening(sc::SingleCellListening),
|
|
mbCheckLinkFormula(false)
|
|
{
|
|
}
|
|
|
|
void ScSetStringParam::setTextInput()
|
|
{
|
|
mbDetectNumberFormat = false;
|
|
mbDetectScientificNumberFormat = false;
|
|
mbHandleApostrophe = false;
|
|
meSetTextNumFormat = Always;
|
|
}
|
|
|
|
void ScSetStringParam::setNumericInput()
|
|
{
|
|
mbDetectNumberFormat = true;
|
|
mbDetectScientificNumberFormat = true;
|
|
mbHandleApostrophe = true;
|
|
meSetTextNumFormat = Never;
|
|
}
|
|
|
|
bool ScStringUtil::parseSimpleNumber(
|
|
const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)
|
|
{
|
|
// Actually almost the entire pre-check is unnecessary and we could call
|
|
// rtl::math::stringToDouble() just after having exchanged ascii space with
|
|
// non-breaking space, if it wasn't for check of grouped digits. The NaN
|
|
// and Inf cases that are accepted by stringToDouble() could be detected
|
|
// using std::isfinite() on the result.
|
|
|
|
/* TODO: The grouped digits check isn't even valid for locales that do not
|
|
* group in thousands ... e.g. Indian locales. But that's something also
|
|
* the number scanner doesn't implement yet, only the formatter. */
|
|
|
|
OUStringBuffer aBuf;
|
|
|
|
sal_Int32 i = 0;
|
|
sal_Int32 n = rStr.getLength();
|
|
const sal_Unicode* p = rStr.getStr();
|
|
const sal_Unicode* pLast = p + (n-1);
|
|
sal_Int32 nPosDSep = -1, nPosGSep = -1;
|
|
sal_uInt32 nDigitCount = 0;
|
|
bool haveSeenDigit = false;
|
|
sal_Int32 nPosExponent = -1;
|
|
|
|
// Skip preceding spaces.
|
|
for (i = 0; i < n; ++i, ++p)
|
|
{
|
|
sal_Unicode c = *p;
|
|
if (c != 0x0020 && c != 0x00A0)
|
|
// first non-space character. Exit.
|
|
break;
|
|
}
|
|
|
|
if (i == n)
|
|
// the whole string is space. Fail.
|
|
return false;
|
|
|
|
n -= i; // Subtract the length of the preceding spaces.
|
|
|
|
// Determine the last non-space character.
|
|
for (; p != pLast; --pLast, --n)
|
|
{
|
|
sal_Unicode c = *pLast;
|
|
if (c != 0x0020 && c != 0x00A0)
|
|
// Non space character. Exit.
|
|
break;
|
|
}
|
|
|
|
for (i = 0; i < n; ++i, ++p)
|
|
{
|
|
sal_Unicode c = *p;
|
|
if (c == 0x0020 && gsep == 0x00A0)
|
|
// ascii space to unicode space if that is group separator
|
|
c = 0x00A0;
|
|
|
|
if ('0' <= c && c <= '9')
|
|
{
|
|
// this is a digit.
|
|
aBuf.append(c);
|
|
haveSeenDigit = true;
|
|
++nDigitCount;
|
|
}
|
|
else if (c == dsep || (dsepa && c == dsepa))
|
|
{
|
|
// this is a decimal separator.
|
|
|
|
if (nPosDSep >= 0)
|
|
// a second decimal separator -> not a valid number.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && i - nPosGSep != 4)
|
|
// the number has a group separator and the decimal sep is not
|
|
// positioned correctly.
|
|
return false;
|
|
|
|
nPosDSep = i;
|
|
nPosGSep = -1;
|
|
aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
|
|
nDigitCount = 0;
|
|
}
|
|
else if (c == gsep)
|
|
{
|
|
// this is a group (thousand) separator.
|
|
|
|
if (!haveSeenDigit)
|
|
// not allowed before digits.
|
|
return false;
|
|
|
|
if (nPosDSep >= 0)
|
|
// not allowed after the decimal separator.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
if (nPosExponent >= 0)
|
|
// not allowed in exponent.
|
|
return false;
|
|
|
|
nPosGSep = i;
|
|
nDigitCount = 0;
|
|
}
|
|
else if (c == '-' || c == '+')
|
|
{
|
|
// A sign must be the first character if it's given, or immediately
|
|
// follow the exponent character if present.
|
|
if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
|
|
aBuf.append(c);
|
|
else
|
|
return false;
|
|
}
|
|
else if (c == 'E' || c == 'e')
|
|
{
|
|
// this is an exponent designator.
|
|
|
|
if (nPosExponent >= 0 || !bDetectScientificNumber)
|
|
// Only one exponent allowed.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
aBuf.append(c);
|
|
nPosExponent = i;
|
|
nPosDSep = -1;
|
|
nPosGSep = -1;
|
|
nDigitCount = 0;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
// finished parsing the number.
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
|
|
sal_Int32 nParseEnd = 0;
|
|
rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
|
|
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
|
|
// Not a valid number or not entire string consumed.
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ScStringUtil::parseSimpleNumber(
|
|
const char* p, size_t n, char dsep, char gsep, double& rVal)
|
|
{
|
|
// Actually almost the entire pre-check is unnecessary and we could call
|
|
// rtl::math::stringToDouble() just after having exchanged ascii space with
|
|
// non-breaking space, if it wasn't for check of grouped digits. The NaN
|
|
// and Inf cases that are accepted by stringToDouble() could be detected
|
|
// using std::isfinite() on the result.
|
|
|
|
/* TODO: The grouped digits check isn't even valid for locales that do not
|
|
* group in thousands ... e.g. Indian locales. But that's something also
|
|
* the number scanner doesn't implement yet, only the formatter. */
|
|
|
|
OStringBuffer aBuf;
|
|
|
|
size_t i = 0;
|
|
const char* pLast = p + (n-1);
|
|
sal_Int32 nPosDSep = -1, nPosGSep = -1;
|
|
sal_uInt32 nDigitCount = 0;
|
|
bool haveSeenDigit = false;
|
|
sal_Int32 nPosExponent = -1;
|
|
|
|
// Skip preceding spaces.
|
|
for (i = 0; i < n; ++i, ++p)
|
|
{
|
|
char c = *p;
|
|
if (c != ' ')
|
|
// first non-space character. Exit.
|
|
break;
|
|
}
|
|
|
|
if (i == n)
|
|
// the whole string is space. Fail.
|
|
return false;
|
|
|
|
n -= i; // Subtract the length of the preceding spaces.
|
|
|
|
// Determine the last non-space character.
|
|
for (; p != pLast; --pLast, --n)
|
|
{
|
|
char c = *pLast;
|
|
if (c != ' ')
|
|
// Non space character. Exit.
|
|
break;
|
|
}
|
|
|
|
for (i = 0; i < n; ++i, ++p)
|
|
{
|
|
char c = *p;
|
|
|
|
if ('0' <= c && c <= '9')
|
|
{
|
|
// this is a digit.
|
|
aBuf.append(c);
|
|
haveSeenDigit = true;
|
|
++nDigitCount;
|
|
}
|
|
else if (c == dsep)
|
|
{
|
|
// this is a decimal separator.
|
|
|
|
if (nPosDSep >= 0)
|
|
// a second decimal separator -> not a valid number.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && i - nPosGSep != 4)
|
|
// the number has a group separator and the decimal sep is not
|
|
// positioned correctly.
|
|
return false;
|
|
|
|
nPosDSep = i;
|
|
nPosGSep = -1;
|
|
aBuf.append(c);
|
|
nDigitCount = 0;
|
|
}
|
|
else if (c == gsep)
|
|
{
|
|
// this is a group (thousand) separator.
|
|
|
|
if (!haveSeenDigit)
|
|
// not allowed before digits.
|
|
return false;
|
|
|
|
if (nPosDSep >= 0)
|
|
// not allowed after the decimal separator.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
if (nPosExponent >= 0)
|
|
// not allowed in exponent.
|
|
return false;
|
|
|
|
nPosGSep = i;
|
|
nDigitCount = 0;
|
|
}
|
|
else if (c == '-' || c == '+')
|
|
{
|
|
// A sign must be the first character if it's given, or immediately
|
|
// follow the exponent character if present.
|
|
if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
|
|
aBuf.append(c);
|
|
else
|
|
return false;
|
|
}
|
|
else if (c == 'E' || c == 'e')
|
|
{
|
|
// this is an exponent designator.
|
|
|
|
if (nPosExponent >= 0)
|
|
// Only one exponent allowed.
|
|
return false;
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
aBuf.append(c);
|
|
nPosExponent = i;
|
|
nPosDSep = -1;
|
|
nPosGSep = -1;
|
|
nDigitCount = 0;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
// finished parsing the number.
|
|
|
|
if (nPosGSep >= 0 && nDigitCount != 3)
|
|
// must be exactly 3 digits since the last group separator.
|
|
return false;
|
|
|
|
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
|
|
sal_Int32 nParseEnd = 0;
|
|
rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
|
|
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
|
|
// Not a valid number or not entire string consumed.
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
|
|
sal_Unicode cTok, sal_Int32& rIndex )
|
|
{
|
|
assert( !(rQuotedPairs.getLength()%2) );
|
|
assert( rQuotedPairs.indexOf(cTok) == -1 );
|
|
|
|
const sal_Unicode* pStr = rIn.getStr();
|
|
const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
|
|
sal_Unicode cQuotedEndChar = 0;
|
|
sal_Int32 nQuotedLen = rQuotedPairs.getLength();
|
|
sal_Int32 nLen = rIn.getLength();
|
|
sal_Int32 nTok = 0;
|
|
sal_Int32 nFirstChar = rIndex;
|
|
sal_Int32 i = nFirstChar;
|
|
|
|
// detect token position and length
|
|
pStr += i;
|
|
while ( i < nLen )
|
|
{
|
|
sal_Unicode c = *pStr;
|
|
if ( cQuotedEndChar )
|
|
{
|
|
// end of the quote reached ?
|
|
if ( c == cQuotedEndChar )
|
|
cQuotedEndChar = 0;
|
|
}
|
|
else
|
|
{
|
|
// Is the char a quote-begin char ?
|
|
sal_Int32 nQuoteIndex = 0;
|
|
while ( nQuoteIndex < nQuotedLen )
|
|
{
|
|
if ( pQuotedStr[nQuoteIndex] == c )
|
|
{
|
|
cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
|
|
break;
|
|
}
|
|
else
|
|
nQuoteIndex += 2;
|
|
}
|
|
|
|
// If the token-char matches then increase TokCount
|
|
if ( c == cTok )
|
|
{
|
|
++nTok;
|
|
|
|
if ( nTok == nToken )
|
|
nFirstChar = i+1;
|
|
else
|
|
{
|
|
if ( nTok > nToken )
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
++pStr;
|
|
++i;
|
|
}
|
|
|
|
if ( nTok >= nToken )
|
|
{
|
|
if ( i < nLen )
|
|
rIndex = i+1;
|
|
else
|
|
rIndex = -1;
|
|
return rIn.copy( nFirstChar, i-nFirstChar );
|
|
}
|
|
else
|
|
{
|
|
rIndex = -1;
|
|
return OUString();
|
|
}
|
|
}
|
|
|
|
bool ScStringUtil::isMultiline( std::u16string_view rStr )
|
|
{
|
|
return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
|
|
}
|
|
|
|
ScInputStringType ScStringUtil::parseInputString(
|
|
ScInterpreterContext& rContext, const OUString& rStr, LanguageType eLang )
|
|
{
|
|
ScInputStringType aRet;
|
|
aRet.mnFormatType = SvNumFormatType::ALL;
|
|
aRet.meType = ScInputStringType::Unknown;
|
|
aRet.maText = rStr;
|
|
aRet.mfValue = 0.0;
|
|
|
|
if (rStr.getLength() > 1 && rStr[0] == '=')
|
|
{
|
|
aRet.meType = ScInputStringType::Formula;
|
|
}
|
|
else if (rStr.getLength() > 1 && rStr[0] == '\'')
|
|
{
|
|
// for bEnglish, "'" at the beginning is always interpreted as text
|
|
// marker and stripped
|
|
aRet.maText = rStr.copy(1);
|
|
aRet.meType = ScInputStringType::Text;
|
|
}
|
|
else // test for English number format (only)
|
|
{
|
|
sal_uInt32 nNumFormat = rContext.NFGetStandardIndex(eLang);
|
|
|
|
if (rContext.NFIsNumberFormat(rStr, nNumFormat, aRet.mfValue))
|
|
{
|
|
aRet.meType = ScInputStringType::Number;
|
|
aRet.mnFormatType = rContext.NFGetType(nNumFormat);
|
|
}
|
|
else if (!rStr.isEmpty())
|
|
aRet.meType = ScInputStringType::Text;
|
|
|
|
// the (English) number format is not set
|
|
//TODO: find and replace with matching local format???
|
|
}
|
|
|
|
return aRet;
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|