diff options
Diffstat (limited to 'svl/source/numbers/zforfind.hxx')
-rw-r--r-- | svl/source/numbers/zforfind.hxx | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/svl/source/numbers/zforfind.hxx b/svl/source/numbers/zforfind.hxx new file mode 100644 index 0000000000..dea732b932 --- /dev/null +++ b/svl/source/numbers/zforfind.hxx @@ -0,0 +1,443 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX +#define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX + +#include <com/sun/star/uno/Sequence.hxx> +#include <rtl/ustring.hxx> +#include <svl/zforlist.hxx> +#include <tools/date.hxx> +#include <memory> +#include <optional> + +class SvNumberformat; +class SvNumberFormatter; +enum class SvNumFormatType : sal_Int16; + +#define SV_MAX_COUNT_INPUT_STRINGS 20 // max count of substrings in input scanner + +class ImpSvNumberInputScan +{ +public: + explicit ImpSvNumberInputScan( SvNumberFormatter* pFormatter ); + ~ImpSvNumberInputScan(); + +/*!*/ void ChangeIntl(); // MUST be called if language changes + + /// set reference date for offset calculation + void ChangeNullDate( const sal_uInt16 nDay, + const sal_uInt16 nMonth, + const sal_Int16 nYear ); + + /// convert input string to number + bool IsNumberFormat( const OUString& rString, /// input string + SvNumFormatType& F_Type, /// format type (in + out) + double& fOutNumber, /// value determined (out) + const SvNumberformat* pFormat, /// number format to which compare against + SvNumInputOptions eInputOptions); + + /// after IsNumberFormat: get decimal position + short GetDecPos() const { return nDecPos; } + /// after IsNumberFormat: get count of numeric substrings in input string + sal_uInt16 GetNumericsCount() const { return nNumericsCnt; } + + /// set threshold of two-digit year input + void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } + /// get threshold of two-digit year input + sal_uInt16 GetYear2000() const { return nYear2000; } + + /** Whether input can be forced to ISO 8601 format. + + Depends on locale's date separator and a specific date format order. + */ + bool CanForceToIso8601( DateOrder eDateOrder ); + + void InvalidateDateAcceptancePatterns(); + + /** Whether 'T' separator was detected in an ISO 8601 date+time format. + */ + bool HasIso8601Tsep() const { return bIso8601Tsep; } + +private: + SvNumberFormatter* pFormatter; + const SvNumberformat* mpFormat; //* The format to compare against, if any + std::unique_ptr<OUString[]> pUpperMonthText; //* Array of month names, uppercase + std::unique_ptr<OUString[]> pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase + std::unique_ptr<OUString[]> pUpperGenitiveMonthText; //* Array of genitive month names, uppercase + std::unique_ptr<OUString[]> pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase + std::unique_ptr<OUString[]> pUpperPartitiveMonthText; //* Array of partitive month names, uppercase + std::unique_ptr<OUString[]> pUpperPartitiveAbbrevMonthText;//* Array of partitive month names, abbreviated, uppercase + std::unique_ptr<OUString[]> pUpperDayText; //* Array of day of week names, uppercase + std::unique_ptr<OUString[]> pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase + OUString aUpperCurrSymbol; //* Currency symbol, uppercase + bool bTextInitialized; //* Whether days and months are initialized + bool bScanGenitiveMonths; //* Whether to scan an input for genitive months + bool bScanPartitiveMonths; //* Whether to scan an input for partitive months + std::optional<Date> moNullDate; //* 30Dec1899 + // Variables for provisional results: + OUString sStrArray[SV_MAX_COUNT_INPUT_STRINGS];//* Array of scanned substrings + bool IsNum[SV_MAX_COUNT_INPUT_STRINGS]; //* Whether a substring is numeric + sal_uInt16 nNums[SV_MAX_COUNT_INPUT_STRINGS]; //* Sequence of offsets to numeric strings + sal_uInt16 nStringsCnt; //* Total count of scanned substrings + sal_uInt16 nNumericsCnt; //* Count of numeric substrings + bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep} + sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings, + + // bit mask of nMatched... constants + static const sal_uInt8 nMatchedEndString; // 0x01 + static const sal_uInt8 nMatchedMidString; // 0x02 + static const sal_uInt8 nMatchedStartString; // 0x04 + static const sal_uInt8 nMatchedVirgin; // 0x08 + static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 + + int nSign; // Sign of number + int nMonth; // Month (1..x) if date + // negative => short format + short nMonthPos; // 1 = front, 2 = middle + // 3 = end + int nDayOfWeek; // Temporary (!) day of week (1..7,-1..-7) if date + sal_uInt16 nTimePos; // Index of first time separator (+1) + short nDecPos; // Index of substring containing "," (+1) + bool bNegCheck; // '( )' for negative + short nESign; // Sign of exponent + short nAmPm; // +1 AM, -1 PM, 0 if none + short nLogical; // -1 => False, 1 => True + bool mbEraCE; // Era if date, 0 => BCE, 1 => CE (currently only Gregorian) + sal_uInt16 nThousand; // Count of group (AKA thousand) separators + sal_uInt16 nPosThousandString; // Position of concatenated 000,000,000 string + SvNumFormatType eScannedType; // Scanned type + SvNumFormatType eSetType; // Preset Type + + sal_uInt16 nStringScanNumFor; // Fixed strings recognized in + // pFormat->NumFor[nNumForStringScan] + short nStringScanSign; // Sign resulting of FixString + sal_uInt16 nYear2000; // Two-digit threshold + // Year as 20xx + // default 18 + // number <= nYear2000 => 20xx + // number > nYear2000 => 19xx + + /** State of ISO 8601 detection. + + 0:= don't know yet + 1:= no + 2:= yes, <=2 digits in year + 3:= yes, 3 digits in year + 4:= yes, >=4 digits in year + + @see MayBeIso8601() + */ + sal_uInt8 nMayBeIso8601; + + /** Whether the 'T' time separator was detected in an ISO 8601 string. */ + bool bIso8601Tsep; + + /** State of dd-month-yy or yy-month-dd detection, with month name. + + 0:= don't know yet + 1:= no + 2:= yes, dd-month-yy + 3:= yes, yy-month-dd + + @see MayBeMonthDate() + */ + sal_uInt8 nMayBeMonthDate; + + /** Input matched this locale dependent date acceptance pattern. + -2 if not checked yet, -1 if no match, >=0 matched pattern. + + @see IsAcceptedDatePattern() + */ + sal_Int32 nAcceptedDatePattern; + css::uno::Sequence< OUString > sDateAcceptancePatterns; + + /** If input matched a date acceptance pattern that starts at input + particle sStrArray[nDatePatternStart]. + + @see IsAcceptedDatePattern() + */ + sal_uInt16 nDatePatternStart; + + /** Count of numbers that matched the accepted pattern, if any, else 0. + + @see GetDatePatternNumbers() + */ + sal_uInt16 nDatePatternNumbers; + + // Copy assignment is forbidden and not implemented. + ImpSvNumberInputScan (const ImpSvNumberInputScan &) = delete; + ImpSvNumberInputScan & operator= (const ImpSvNumberInputScan &) = delete; + + void Reset(); // Reset all variables before start of analysis + + void InitText(); // Init of months and days of week + + // Convert string to double. + // Only simple unsigned floating point values without any error detection, + // decimal separator has to be '.' + // If bForceFraction==true the string is taken to be the fractional part + // of 0.1234 without the leading 0. (thus being just "1234"). + static double StringToDouble( std::u16string_view aStr, + bool bForceFraction = false ); + + // Next number/string symbol + static bool NextNumberStringSymbol( const sal_Unicode*& pStr, + OUString& rSymbol ); + + // Concatenate ,000,23 blocks + // in input to 000123 + bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol ) const; + + // Divide numbers/strings into + // arrays and variables above. + // Leading blanks and blanks + // after numbers are thrown away + void NumberStringDivision( const OUString& rString ); + + + /** Whether rString contains word (!) rWhat at nPos. + rWhat will not be matched if it is a substring of a word. + */ + bool StringContainsWord( const OUString& rWhat, + const OUString& rString, + sal_Int32 nPos ) const; + + // optimized substring versions + + // Whether rString contains rWhat at nPos + static bool StringContains( const OUString& rWhat, + const OUString& rString, + sal_Int32 nPos ) + { + if (rWhat.isEmpty() || rString.getLength() <= nPos) + { + return false; + } + // mostly used with one character + if ( rWhat[ 0 ] != rString[ nPos ] ) + { + return false; + } + return StringContainsImpl( rWhat, rString, nPos ); + } + + // Whether pString contains rWhat at nPos + static bool StringPtrContains( const OUString& rWhat, + const sal_Unicode* pString, + sal_Int32 nPos ) // nPos MUST be a valid offset from pString + { + // mostly used with one character + if ( rWhat[ 0 ] != pString[ nPos ] ) + { + return false; + } + return StringPtrContainsImpl( rWhat, pString, nPos ); + } + + //! DO NOT use directly + static bool StringContainsImpl( const OUString& rWhat, + const OUString& rString, + sal_Int32 nPos ); + //! DO NOT use directly + static bool StringPtrContainsImpl( const OUString& rWhat, + const sal_Unicode* pString, + sal_Int32 nPos ); + + // Skip a special character + static inline bool SkipChar( sal_Unicode c, + std::u16string_view rString, + sal_Int32& nPos ); + + // Skip blank + static inline bool SkipBlanks( const OUString& rString, + sal_Int32& nPos ); + + // Jump over rWhat in rString at nPos + static inline bool SkipString( const OUString& rWhat, + const OUString& rString, + sal_Int32& nPos ); + + // Recognizes exactly ,111 as group separator + inline bool GetThousandSep( std::u16string_view rString, + sal_Int32& nPos, + sal_uInt16 nStringPos ) const; + // Get boolean value + short GetLogical( std::u16string_view rString ) const; + + // Get month and advance string position + short GetMonth( const OUString& rString, + sal_Int32& nPos ); + + // Get day of week and advance string position + int GetDayOfWeek( const OUString& rString, + sal_Int32& nPos ); + + // Get currency symbol and advance string position + bool GetCurrency( const OUString& rString, + sal_Int32& nPos ); + + // Get symbol AM or PM and advance string position + bool GetTimeAmPm( const OUString& rString, + sal_Int32& nPos ); + + // Get decimal separator and advance string position + inline bool GetDecSep( std::u16string_view rString, + sal_Int32& nPos ) const; + + // Get hundredth seconds separator and advance string position + inline bool GetTime100SecSep( std::u16string_view rString, + sal_Int32& nPos ) const; + + // Get sign and advance string position + // Including special case '(' + int GetSign( std::u16string_view rString, + sal_Int32& nPos ); + + // Get sign of exponent and advance string position + static short GetESign( std::u16string_view rString, + sal_Int32& nPos ); + + // Get next number as array offset + inline bool GetNextNumber( sal_uInt16& i, + sal_uInt16& j ) const; + + /** Converts time -> double (only decimals) + + @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM) + */ + bool GetTimeRef( double& fOutNumber, // result as double + sal_uInt16 nIndex, // Index of hour in input + sal_uInt16 nCnt, // Count of time substrings in input + SvNumInputOptions eInputOptions ) const; + sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ) const; // Day input, 0 if no match + sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ) const; // Month input, zero based return, NumberOfMonths if no match + sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match + + // Conversion of date to number + bool GetDateRef( double& fDays, // OUT: days diff to null date + sal_uInt16& nCounter ); // Count of date substrings + + // Analyze start of string + bool ScanStartString( const OUString& rString ); + + // Analyze middle substring + bool ScanMidString( const OUString& rString, + sal_uInt16 nStringPos, + sal_uInt16 nCurNumCount ); + + + // Analyze end of string + bool ScanEndString( const OUString& rString ); + + // Compare rString to substring of array indexed by nString + // nString == 0xFFFF => last substring + bool ScanStringNumFor( const OUString& rString, + sal_Int32 nPos, + sal_uInt16 nString, + bool bDontDetectNegation = false ); + + // if nMatchedAllStrings set nMatchedUsedAsReturn and return true, + // else do nothing and return false + bool MatchedReturn(); + + //! Be sure that the string to be analyzed is already converted to upper + //! case and if it contained native number digits that they are already + //! converted to ASCII. + + // Main analyzing function + bool IsNumberFormatMain( const OUString& rString, + const SvNumberformat* pFormat); // number format to match against + + /** Whether input matches locale dependent date acceptance pattern. + + @param nStartPatternAt + The pattern matching starts at input particle + sStrArray[nStartPatternAt]. + + NOTE: once called the result is remembered, subsequent calls with + different parameters do not check for a match and do not lead to a + different result. + */ + bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt ); + + /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string + matches separator in pattern at nParticle. + + Also detects a signed year case like M/D/-Y + + @returns TRUE if separator matched. + */ + bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos, bool & rSignedYear ); + + /** Returns count of numbers in accepted date pattern. + */ + sal_uInt16 GetDatePatternNumbers(); + + /** Whether numeric string nNumber is of type cType in accepted date + pattern, 'Y', 'M' or 'D'. + */ + bool IsDatePatternNumberOfType( sal_uInt16 nNumber, sal_Unicode cType ); + + /** Obtain order of accepted date pattern coded as, for example, + ('D'<<16)|('M'<<8)|'Y' + */ + sal_uInt32 GetDatePatternOrder(); + + /** Obtain date format order, from accepted date pattern if available or + otherwise the locale's default order. + + @param bFromFormatIfNoPattern + If <TRUE/> and no pattern was matched, obtain date order from + format if available, instead from format's or current locale. + */ + DateOrder GetDateOrder( bool bFromFormatIfNoPattern = false ); + + /** Whether input may be an ISO 8601 date format, yyyy-mm-dd... + + Checks if input has at least 3 numbers for yyyy-mm-dd and the separator + is '-', and 1<=mm<=12 and 1<=dd<=31. + + @see nMayBeIso8601 + */ + bool MayBeIso8601(); + + /** Whether input may be a dd-month-yy format, with month name, not + number. + + @see nMayBeMonthDate + */ + bool MayBeMonthDate(); + + /** Whether input is acceptable as ISO 8601 date format in the current + NfEvalDateFormat setting. + */ + bool IsAcceptableIso8601(); + + /** If month name in the middle was parsed, get the corresponding + LongDateOrder in GetDateRef(). + */ + LongDateOrder GetMiddleMonthLongDateOrder( bool bFormatTurn, + const LocaleDataWrapper* pLoc, + DateOrder eDateOrder ); +}; + +#endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |