diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/i18n/number_patternstring.cpp | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/i18n/number_patternstring.cpp')
-rw-r--r-- | intl/icu/source/i18n/number_patternstring.cpp | 1212 |
1 files changed, 1212 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/number_patternstring.cpp b/intl/icu/source/i18n/number_patternstring.cpp new file mode 100644 index 0000000000..aa082be5a8 --- /dev/null +++ b/intl/icu/source/i18n/number_patternstring.cpp @@ -0,0 +1,1212 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT +#define UNISTR_FROM_CHAR_EXPLICIT + +#include "uassert.h" +#include "number_patternstring.h" +#include "unicode/utf16.h" +#include "number_utils.h" +#include "number_roundingutils.h" +#include "number_mapper.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + + +void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, + UErrorCode& status) { + patternInfo.consumePattern(patternString, status); +} + +DecimalFormatProperties +PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, + UErrorCode& status) { + DecimalFormatProperties properties; + parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); + return properties; +} + +DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, + UErrorCode& status) { + return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); +} + +void +PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { + parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); +} + + +char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { + const Endpoints& endpoints = getEndpoints(flags); + if (index < 0 || index >= endpoints.end - endpoints.start) { + UPRV_UNREACHABLE_EXIT; + } + return pattern.charAt(endpoints.start + index); +} + +int32_t ParsedPatternInfo::length(int32_t flags) const { + return getLengthFromEndpoints(getEndpoints(flags)); +} + +int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { + return endpoints.end - endpoints.start; +} + +UnicodeString ParsedPatternInfo::getString(int32_t flags) const { + const Endpoints& endpoints = getEndpoints(flags); + if (endpoints.start == endpoints.end) { + return UnicodeString(); + } + // Create a new UnicodeString + return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); +} + +const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { + bool prefix = (flags & AFFIX_PREFIX) != 0; + bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; + bool padding = (flags & AFFIX_PADDING) != 0; + if (isNegative && padding) { + return negative.paddingEndpoints; + } else if (padding) { + return positive.paddingEndpoints; + } else if (prefix && isNegative) { + return negative.prefixEndpoints; + } else if (prefix) { + return positive.prefixEndpoints; + } else if (isNegative) { + return negative.suffixEndpoints; + } else { + return positive.suffixEndpoints; + } +} + +bool ParsedPatternInfo::positiveHasPlusSign() const { + return positive.hasPlusSign; +} + +bool ParsedPatternInfo::hasNegativeSubpattern() const { + return fHasNegativeSubpattern; +} + +bool ParsedPatternInfo::negativeHasMinusSign() const { + return negative.hasMinusSign; +} + +bool ParsedPatternInfo::hasCurrencySign() const { + return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); +} + +bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { + return AffixUtils::containsType(pattern, type, status); +} + +bool ParsedPatternInfo::hasBody() const { + return positive.integerTotal > 0; +} + +bool ParsedPatternInfo::currencyAsDecimal() const { + return positive.hasCurrencyDecimal; +} + +///////////////////////////////////////////////////// +/// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// +///////////////////////////////////////////////////// + +UChar32 ParsedPatternInfo::ParserState::peek() { + if (offset == pattern.length()) { + return -1; + } else { + return pattern.char32At(offset); + } +} + +UChar32 ParsedPatternInfo::ParserState::peek2() { + if (offset == pattern.length()) { + return -1; + } + int32_t cp1 = pattern.char32At(offset); + int32_t offset2 = offset + U16_LENGTH(cp1); + if (offset2 == pattern.length()) { + return -1; + } + return pattern.char32At(offset2); +} + +UChar32 ParsedPatternInfo::ParserState::next() { + int32_t codePoint = peek(); + offset += U16_LENGTH(codePoint); + return codePoint; +} + +void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + this->pattern = patternString; + + // This class is not intended for writing twice! + // Use move assignment to overwrite instead. + U_ASSERT(state.offset == 0); + + // pattern := subpattern (';' subpattern)? + currentSubpattern = &positive; + consumeSubpattern(status); + if (U_FAILURE(status)) { return; } + if (state.peek() == u';') { + state.next(); // consume the ';' + // Don't consume the negative subpattern if it is empty (trailing ';') + if (state.peek() != -1) { + fHasNegativeSubpattern = true; + currentSubpattern = &negative; + consumeSubpattern(status); + if (U_FAILURE(status)) { return; } + } + } + if (state.peek() != -1) { + state.toParseException(u"Found unquoted special character"); + status = U_UNQUOTED_SPECIAL; + } +} + +void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { + // subpattern := literals? number exponent? literals? + consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); + if (U_FAILURE(status)) { return; } + consumeAffix(currentSubpattern->prefixEndpoints, status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); + if (U_FAILURE(status)) { return; } + consumeFormat(status); + if (U_FAILURE(status)) { return; } + consumeExponent(status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); + if (U_FAILURE(status)) { return; } + consumeAffix(currentSubpattern->suffixEndpoints, status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); + if (U_FAILURE(status)) { return; } +} + +void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { + if (state.peek() != u'*') { + return; + } + if (currentSubpattern->hasPadding) { + state.toParseException(u"Cannot have multiple pad specifiers"); + status = U_MULTIPLE_PAD_SPECIFIERS; + return; + } + currentSubpattern->paddingLocation = paddingLocation; + currentSubpattern->hasPadding = true; + state.next(); // consume the '*' + currentSubpattern->paddingEndpoints.start = state.offset; + consumeLiteral(status); + currentSubpattern->paddingEndpoints.end = state.offset; +} + +void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { + // literals := { literal } + endpoints.start = state.offset; + while (true) { + switch (state.peek()) { + case u'#': + case u'@': + case u';': + case u'*': + case u'.': + case u',': + case u'0': + case u'1': + case u'2': + case u'3': + case u'4': + case u'5': + case u'6': + case u'7': + case u'8': + case u'9': + case -1: + // Characters that cannot appear unquoted in a literal + // break outer; + goto after_outer; + + case u'%': + currentSubpattern->hasPercentSign = true; + break; + + case u'‰': + currentSubpattern->hasPerMilleSign = true; + break; + + case u'¤': + currentSubpattern->hasCurrencySign = true; + break; + + case u'-': + currentSubpattern->hasMinusSign = true; + break; + + case u'+': + currentSubpattern->hasPlusSign = true; + break; + + default: + break; + } + consumeLiteral(status); + if (U_FAILURE(status)) { return; } + } + after_outer: + endpoints.end = state.offset; +} + +void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { + if (state.peek() == -1) { + state.toParseException(u"Expected unquoted literal but found EOL"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } else if (state.peek() == u'\'') { + state.next(); // consume the starting quote + while (state.peek() != u'\'') { + if (state.peek() == -1) { + state.toParseException(u"Expected quoted literal but found EOL"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } else { + state.next(); // consume a quoted character + } + } + state.next(); // consume the ending quote + } else { + // consume a non-quoted literal character + state.next(); + } +} + +void ParsedPatternInfo::consumeFormat(UErrorCode& status) { + consumeIntegerFormat(status); + if (U_FAILURE(status)) { return; } + if (state.peek() == u'.') { + state.next(); // consume the decimal point + currentSubpattern->hasDecimal = true; + currentSubpattern->widthExceptAffixes += 1; + consumeFractionFormat(status); + if (U_FAILURE(status)) { return; } + } else if (state.peek() == u'¤') { + // Check if currency is a decimal separator + switch (state.peek2()) { + case u'#': + case u'0': + case u'1': + case u'2': + case u'3': + case u'4': + case u'5': + case u'6': + case u'7': + case u'8': + case u'9': + break; + default: + // Currency symbol followed by a non-numeric character; + // treat as a normal affix. + return; + } + // Currency symbol is followed by a numeric character; + // treat as a decimal separator. + currentSubpattern->hasCurrencySign = true; + currentSubpattern->hasCurrencyDecimal = true; + currentSubpattern->hasDecimal = true; + currentSubpattern->widthExceptAffixes += 1; + state.next(); // consume the symbol + consumeFractionFormat(status); + if (U_FAILURE(status)) { return; } + } +} + +void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { + // Convenience reference: + ParsedSubpatternInfo& result = *currentSubpattern; + + while (true) { + switch (state.peek()) { + case u',': + result.widthExceptAffixes += 1; + result.groupingSizes <<= 16; + break; + + case u'#': + if (result.integerNumerals > 0) { + state.toParseException(u"# cannot follow 0 before decimal point"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + if (result.integerAtSigns > 0) { + result.integerTrailingHashSigns += 1; + } else { + result.integerLeadingHashSigns += 1; + } + result.integerTotal += 1; + break; + + case u'@': + if (result.integerNumerals > 0) { + state.toParseException(u"Cannot mix 0 and @"); + status = U_UNEXPECTED_TOKEN; + return; + } + if (result.integerTrailingHashSigns > 0) { + state.toParseException(u"Cannot nest # inside of a run of @"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + result.integerAtSigns += 1; + result.integerTotal += 1; + break; + + case u'0': + case u'1': + case u'2': + case u'3': + case u'4': + case u'5': + case u'6': + case u'7': + case u'8': + case u'9': + if (result.integerAtSigns > 0) { + state.toParseException(u"Cannot mix @ and 0"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + result.integerNumerals += 1; + result.integerTotal += 1; + if (!result.rounding.isZeroish() || state.peek() != u'0') { + result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true); + } + break; + + default: + goto after_outer; + } + state.next(); // consume the symbol + } + + after_outer: + // Disallow patterns with a trailing ',' or with two ',' next to each other + auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff); + auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff); + if (grouping1 == 0 && grouping2 != -1) { + state.toParseException(u"Trailing grouping separator is invalid"); + status = U_UNEXPECTED_TOKEN; + return; + } + if (grouping2 == 0 && grouping3 != -1) { + state.toParseException(u"Grouping width of zero is invalid"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } +} + +void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { + // Convenience reference: + ParsedSubpatternInfo& result = *currentSubpattern; + + int32_t zeroCounter = 0; + while (true) { + switch (state.peek()) { + case u'#': + result.widthExceptAffixes += 1; + result.fractionHashSigns += 1; + result.fractionTotal += 1; + zeroCounter++; + break; + + case u'0': + case u'1': + case u'2': + case u'3': + case u'4': + case u'5': + case u'6': + case u'7': + case u'8': + case u'9': + if (result.fractionHashSigns > 0) { + state.toParseException(u"0 cannot follow # after decimal point"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.fractionNumerals += 1; + result.fractionTotal += 1; + if (state.peek() == u'0') { + zeroCounter++; + } else { + result.rounding + .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false); + zeroCounter = 0; + } + break; + + default: + return; + } + state.next(); // consume the symbol + } +} + +void ParsedPatternInfo::consumeExponent(UErrorCode& status) { + // Convenience reference: + ParsedSubpatternInfo& result = *currentSubpattern; + + if (state.peek() != u'E') { + return; + } + if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { + state.toParseException(u"Cannot have grouping separator in scientific notation"); + status = U_MALFORMED_EXPONENTIAL_PATTERN; + return; + } + state.next(); // consume the E + result.widthExceptAffixes++; + if (state.peek() == u'+') { + state.next(); // consume the + + result.exponentHasPlusSign = true; + result.widthExceptAffixes++; + } + while (state.peek() == u'0') { + state.next(); // consume the 0 + result.exponentZeros += 1; + result.widthExceptAffixes++; + } +} + +/////////////////////////////////////////////////// +/// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// +/////////////////////////////////////////////////// + +void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, + DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { + if (pattern.length() == 0) { + // Backwards compatibility requires that we reset to the default values. + // TODO: Only overwrite the properties that "saveToProperties" normally touches? + properties.clear(); + return; + } + + ParsedPatternInfo patternInfo; + parseToPatternInfo(pattern, patternInfo, status); + if (U_FAILURE(status)) { return; } + patternInfoToProperties(properties, patternInfo, ignoreRounding, status); +} + +void +PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, + IgnoreRounding _ignoreRounding, UErrorCode& status) { + // Translate from PatternParseResult to Properties. + // Note that most data from "negative" is ignored per the specification of DecimalFormat. + + const ParsedSubpatternInfo& positive = patternInfo.positive; + + bool ignoreRounding; + if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { + ignoreRounding = false; + } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { + ignoreRounding = positive.hasCurrencySign; + } else { + U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); + ignoreRounding = true; + } + + // Grouping settings + auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff); + auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); + if (grouping2 != -1) { + properties.groupingSize = grouping1; + properties.groupingUsed = true; + } else { + properties.groupingSize = -1; + properties.groupingUsed = false; + } + if (grouping3 != -1) { + properties.secondaryGroupingSize = grouping2; + } else { + properties.secondaryGroupingSize = -1; + } + + // For backwards compatibility, require that the pattern emit at least one min digit. + int minInt, minFrac; + if (positive.integerTotal == 0 && positive.fractionTotal > 0) { + // patterns like ".##" + minInt = 0; + minFrac = uprv_max(1, positive.fractionNumerals); + } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { + // patterns like "#.##" + minInt = 1; + minFrac = 0; + } else { + minInt = positive.integerNumerals; + minFrac = positive.fractionNumerals; + } + + // Rounding settings + // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage + if (positive.integerAtSigns > 0) { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + properties.minimumSignificantDigits = positive.integerAtSigns; + properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; + } else if (!positive.rounding.isZeroish()) { + if (!ignoreRounding) { + properties.minimumFractionDigits = minFrac; + properties.maximumFractionDigits = positive.fractionTotal; + properties.roundingIncrement = positive.rounding.toDouble(); + } else { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + } + properties.minimumSignificantDigits = -1; + properties.maximumSignificantDigits = -1; + } else { + if (!ignoreRounding) { + properties.minimumFractionDigits = minFrac; + properties.maximumFractionDigits = positive.fractionTotal; + properties.roundingIncrement = 0.0; + } else { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + } + properties.minimumSignificantDigits = -1; + properties.maximumSignificantDigits = -1; + } + + // If the pattern ends with a '.' then force the decimal point. + if (positive.hasDecimal && positive.fractionTotal == 0) { + properties.decimalSeparatorAlwaysShown = true; + } else { + properties.decimalSeparatorAlwaysShown = false; + } + + // Persist the currency as decimal separator + properties.currencyAsDecimal = positive.hasCurrencyDecimal; + + // Scientific notation settings + if (positive.exponentZeros > 0) { + properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; + properties.minimumExponentDigits = positive.exponentZeros; + if (positive.integerAtSigns == 0) { + // patterns without '@' can define max integer digits, used for engineering notation + properties.minimumIntegerDigits = positive.integerNumerals; + properties.maximumIntegerDigits = positive.integerTotal; + } else { + // patterns with '@' cannot define max integer digits + properties.minimumIntegerDigits = 1; + properties.maximumIntegerDigits = -1; + } + } else { + properties.exponentSignAlwaysShown = false; + properties.minimumExponentDigits = -1; + properties.minimumIntegerDigits = minInt; + properties.maximumIntegerDigits = -1; + } + + // Compute the affix patterns (required for both padding and affixes) + UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); + UnicodeString posSuffix = patternInfo.getString(0); + + // Padding settings + if (positive.hasPadding) { + // The width of the positive prefix and suffix templates are included in the padding + int paddingWidth = positive.widthExceptAffixes + + AffixUtils::estimateLength(posPrefix, status) + + AffixUtils::estimateLength(posSuffix, status); + properties.formatWidth = paddingWidth; + UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); + if (rawPaddingString.length() == 1) { + properties.padString = rawPaddingString; + } else if (rawPaddingString.length() == 2) { + if (rawPaddingString.charAt(0) == u'\'') { + properties.padString.setTo(u"'", -1); + } else { + properties.padString = rawPaddingString; + } + } else { + properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); + } + properties.padPosition = positive.paddingLocation; + } else { + properties.formatWidth = -1; + properties.padString.setToBogus(); + properties.padPosition.nullify(); + } + + // Set the affixes + // Always call the setter, even if the prefixes are empty, especially in the case of the + // negative prefix pattern, to prevent default values from overriding the pattern. + properties.positivePrefixPattern = posPrefix; + properties.positiveSuffixPattern = posSuffix; + if (patternInfo.fHasNegativeSubpattern) { + properties.negativePrefixPattern = patternInfo.getString( + AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); + properties.negativeSuffixPattern = patternInfo.getString( + AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); + } else { + properties.negativePrefixPattern.setToBogus(); + properties.negativeSuffixPattern.setToBogus(); + } + + // Set the magnitude multiplier + if (positive.hasPercentSign) { + properties.magnitudeMultiplier = 2; + } else if (positive.hasPerMilleSign) { + properties.magnitudeMultiplier = 3; + } else { + properties.magnitudeMultiplier = 0; + } +} + +/////////////////////////////////////////////////////////////////// +/// End PatternStringParser.java; begin PatternStringUtils.java /// +/////////////////////////////////////////////////////////////////// + +// Determine whether a given roundingIncrement should be ignored for formatting +// based on the current maxFrac value (maximum fraction digits). For example a +// roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac +// is 2 or more. Note that roundingIncrements are rounded in significance, so +// a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. +// it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of +// 0.005 is treated like 0.001 for significance). This is the reason for the +// initial doubling below. +// roundIncr must be non-zero. +bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { + if (maxFrac < 0) { + return false; + } + int32_t frac = 0; + roundIncr *= 2.0; + for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); + return (frac > maxFrac); +} + +UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, + UErrorCode& status) { + UnicodeString sb; + + // Convenience references + // The uprv_min() calls prevent DoS + int32_t dosMax = 100; + int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); + int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); + bool useGrouping = properties.groupingUsed; + int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); + NullableValue<PadPosition> paddingLocation = properties.padPosition; + UnicodeString paddingString = properties.padString; + int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); + int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); + int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); + int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); + int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); + int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); + bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; + int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); + bool exponentShowPlusSign = properties.exponentSignAlwaysShown; + + AutoAffixPatternProvider affixProvider(properties, status); + + // Prefixes + sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX)); + int32_t afterPrefixPos = sb.length(); + + // Figure out the grouping sizes. + if (!useGrouping) { + grouping1 = 0; + grouping2 = 0; + } else if (grouping1 == grouping2) { + grouping1 = 0; + } + int32_t groupingLength = grouping1 + grouping2 + 1; + + // Figure out the digits we need to put in the pattern. + double increment = properties.roundingIncrement; + UnicodeString digitsString; + int32_t digitsStringScale = 0; + if (maxSig != uprv_min(dosMax, -1)) { + // Significant Digits. + while (digitsString.length() < minSig) { + digitsString.append(u'@'); + } + while (digitsString.length() < maxSig) { + digitsString.append(u'#'); + } + } else if (increment != 0.0 && !ignoreRoundingIncrement(increment,maxFrac)) { + // Rounding Increment. + DecimalQuantity incrementQuantity; + incrementQuantity.setToDouble(increment); + incrementQuantity.roundToInfinity(); + digitsStringScale = incrementQuantity.getLowerDisplayMagnitude(); + incrementQuantity.adjustMagnitude(-digitsStringScale); + incrementQuantity.setMinInteger(minInt - digitsStringScale); + UnicodeString str = incrementQuantity.toPlainString(); + if (str.charAt(0) == u'-') { + // TODO: Unsupported operation exception or fail silently? + digitsString.append(str, 1, str.length() - 1); + } else { + digitsString.append(str); + } + } + while (digitsString.length() + digitsStringScale < minInt) { + digitsString.insert(0, u'0'); + } + while (-digitsStringScale < minFrac) { + digitsString.append(u'0'); + digitsStringScale--; + } + + // Write the digits to the string builder + int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); + m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; + int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; + for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { + int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; + if (di < 0 || di >= digitsString.length()) { + sb.append(u'#'); + } else { + sb.append(digitsString.charAt(di)); + } + // Decimal separator + if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { + if (properties.currencyAsDecimal) { + sb.append(u'¤'); + } else { + sb.append(u'.'); + } + } + if (!useGrouping) { + continue; + } + // Least-significant grouping separator + if (magnitude > 0 && magnitude == grouping1) { + sb.append(u','); + } + // All other grouping separators + if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { + sb.append(u','); + } + } + + // Exponential notation + if (exponentDigits != uprv_min(dosMax, -1)) { + sb.append(u'E'); + if (exponentShowPlusSign) { + sb.append(u'+'); + } + for (int32_t i = 0; i < exponentDigits; i++) { + sb.append(u'0'); + } + } + + // Suffixes + int32_t beforeSuffixPos = sb.length(); + sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); + + // Resolve Padding + if (paddingWidth > 0 && !paddingLocation.isNull()) { + while (paddingWidth - sb.length() > 0) { + sb.insert(afterPrefixPos, u'#'); + beforeSuffixPos++; + } + int32_t addedLength; + switch (paddingLocation.get(status)) { + case PadPosition::UNUM_PAD_BEFORE_PREFIX: + addedLength = escapePaddingString(paddingString, sb, 0, status); + sb.insert(0, u'*'); + afterPrefixPos += addedLength + 1; + beforeSuffixPos += addedLength + 1; + break; + case PadPosition::UNUM_PAD_AFTER_PREFIX: + addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); + sb.insert(afterPrefixPos, u'*'); + afterPrefixPos += addedLength + 1; + beforeSuffixPos += addedLength + 1; + break; + case PadPosition::UNUM_PAD_BEFORE_SUFFIX: + escapePaddingString(paddingString, sb, beforeSuffixPos, status); + sb.insert(beforeSuffixPos, u'*'); + break; + case PadPosition::UNUM_PAD_AFTER_SUFFIX: + sb.append(u'*'); + escapePaddingString(paddingString, sb, sb.length(), status); + break; + } + if (U_FAILURE(status)) { return sb; } + } + + // Negative affixes + // Ignore if the negative prefix pattern is "-" and the negative suffix is empty + if (affixProvider.get().hasNegativeSubpattern()) { + sb.append(u';'); + sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); + // Copy the positive digit format into the negative. + // This is optional; the pattern is the same as if '#' were appended here instead. + // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. + // See https://unicode-org.atlassian.net/browse/ICU-13707 + UnicodeString copy(sb); + sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); + sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); + } + + return sb; +} + +int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, + UErrorCode& status) { + (void) status; + if (input.length() == 0) { + input.setTo(kFallbackPaddingString, -1); + } + int startLength = output.length(); + if (input.length() == 1) { + if (input.compare(u"'", -1) == 0) { + output.insert(startIndex, u"''", -1); + } else { + output.insert(startIndex, input); + } + } else { + output.insert(startIndex, u'\''); + int offset = 1; + for (int i = 0; i < input.length(); i++) { + // it's okay to deal in chars here because the quote mark is the only interesting thing. + char16_t ch = input.charAt(i); + if (ch == u'\'') { + output.insert(startIndex + offset, u"''", -1); + offset += 2; + } else { + output.insert(startIndex + offset, ch); + offset += 1; + } + } + output.insert(startIndex + offset, u'\''); + } + return output.length() - startLength; +} + +UnicodeString +PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, + bool toLocalized, UErrorCode& status) { + // Construct a table of strings to be converted between localized and standard. + static constexpr int32_t LEN = 21; + UnicodeString table[LEN][2]; + int standIdx = toLocalized ? 0 : 1; + int localIdx = toLocalized ? 1 : 0; + // TODO: Add approximately sign here? + table[0][standIdx] = u"%"; + table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); + table[1][standIdx] = u"‰"; + table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); + table[2][standIdx] = u"."; + table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); + table[3][standIdx] = u","; + table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); + table[4][standIdx] = u"-"; + table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); + table[5][standIdx] = u"+"; + table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); + table[6][standIdx] = u";"; + table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); + table[7][standIdx] = u"@"; + table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); + table[8][standIdx] = u"E"; + table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); + table[9][standIdx] = u"*"; + table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); + table[10][standIdx] = u"#"; + table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); + for (int i = 0; i < 10; i++) { + table[11 + i][standIdx] = u'0' + i; + table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); + } + + // Special case: quotes are NOT allowed to be in any localIdx strings. + // Substitute them with '’' instead. + for (int32_t i = 0; i < LEN; i++) { + table[i][localIdx].findAndReplace(u'\'', u'’'); + } + + // Iterate through the string and convert. + // State table: + // 0 => base state + // 1 => first char inside a quoted sequence in input and output string + // 2 => inside a quoted sequence in input and output string + // 3 => first char after a close quote in input string; + // close quote still needs to be written to output string + // 4 => base state in input string; inside quoted sequence in output string + // 5 => first char inside a quoted sequence in input string; + // inside quoted sequence in output string + UnicodeString result; + int state = 0; + for (int offset = 0; offset < input.length(); offset++) { + char16_t ch = input.charAt(offset); + + // Handle a quote character (state shift) + if (ch == u'\'') { + if (state == 0) { + result.append(u'\''); + state = 1; + continue; + } else if (state == 1) { + result.append(u'\''); + state = 0; + continue; + } else if (state == 2) { + state = 3; + continue; + } else if (state == 3) { + result.append(u'\''); + result.append(u'\''); + state = 1; + continue; + } else if (state == 4) { + state = 5; + continue; + } else { + U_ASSERT(state == 5); + result.append(u'\''); + result.append(u'\''); + state = 4; + continue; + } + } + + if (state == 0 || state == 3 || state == 4) { + for (auto& pair : table) { + // Perform a greedy match on this symbol string + UnicodeString temp = input.tempSubString(offset, pair[0].length()); + if (temp == pair[0]) { + // Skip ahead past this region for the next iteration + offset += pair[0].length() - 1; + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + result.append(pair[1]); + goto continue_outer; + } + } + // No replacement found. Check if a special quote is necessary + for (auto& pair : table) { + UnicodeString temp = input.tempSubString(offset, pair[1].length()); + if (temp == pair[1]) { + if (state == 0) { + result.append(u'\''); + state = 4; + } + result.append(ch); + goto continue_outer; + } + } + // Still nothing. Copy the char verbatim. (Add a close quote if necessary) + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + result.append(ch); + } else { + U_ASSERT(state == 1 || state == 2 || state == 5); + result.append(ch); + state = 2; + } + continue_outer:; + } + // Resolve final quotes + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + if (state != 0) { + // Malformed localized pattern: unterminated quote + status = U_PATTERN_SYNTAX_ERROR; + } + return result; +} + +void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, + PatternSignType patternSignType, + bool approximately, + StandardPlural::Form plural, + bool perMilleReplacesPercent, + bool dropCurrencySymbols, + UnicodeString& output) { + + // Should the output render '+' where '-' would normally appear in the pattern? + bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) + && !patternInfo.positiveHasPlusSign(); + + // Should we use the affix from the negative subpattern? + // (If not, we will use the positive subpattern.) + bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() + && (patternSignType == PATTERN_SIGN_TYPE_NEG + || (patternInfo.negativeHasMinusSign() && (plusReplacesMinusSign || approximately))); + + // Resolve the flags for the affix pattern. + int flags = 0; + if (useNegativeAffixPattern) { + flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; + } + if (isPrefix) { + flags |= AffixPatternProvider::AFFIX_PREFIX; + } + if (plural != StandardPlural::Form::COUNT) { + U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); + flags |= plural; + } + + // Should we prepend a sign to the pattern? + bool prependSign; + if (!isPrefix || useNegativeAffixPattern) { + prependSign = false; + } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { + prependSign = true; + } else { + prependSign = plusReplacesMinusSign || approximately; + } + + // What symbols should take the place of the sign placeholder? + const char16_t* signSymbols = u"-"; + if (approximately) { + if (plusReplacesMinusSign) { + signSymbols = u"~+"; + } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { + signSymbols = u"~-"; + } else { + signSymbols = u"~"; + } + } else if (plusReplacesMinusSign) { + signSymbols = u"+"; + } + + // Compute the number of tokens in the affix pattern (signSymbols is considered one token). + int length = patternInfo.length(flags) + (prependSign ? 1 : 0); + + // Finally, set the result into the StringBuilder. + output.remove(); + for (int index = 0; index < length; index++) { + char16_t candidate; + if (prependSign && index == 0) { + candidate = u'-'; + } else if (prependSign) { + candidate = patternInfo.charAt(flags, index - 1); + } else { + candidate = patternInfo.charAt(flags, index); + } + if (candidate == u'-') { + if (u_strlen(signSymbols) == 1) { + candidate = signSymbols[0]; + } else { + output.append(signSymbols[0]); + candidate = signSymbols[1]; + } + } + if (perMilleReplacesPercent && candidate == u'%') { + candidate = u'‰'; + } + if (dropCurrencySymbols && candidate == u'\u00A4') { + continue; + } + output.append(candidate); + } +} + +PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { + switch (signDisplay) { + case UNUM_SIGN_AUTO: + case UNUM_SIGN_ACCOUNTING: + switch (signum) { + case SIGNUM_NEG: + case SIGNUM_NEG_ZERO: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_POS_ZERO: + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS; + default: + break; + } + break; + + case UNUM_SIGN_ALWAYS: + case UNUM_SIGN_ACCOUNTING_ALWAYS: + switch (signum) { + case SIGNUM_NEG: + case SIGNUM_NEG_ZERO: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_POS_ZERO: + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS_SIGN; + default: + break; + } + break; + + case UNUM_SIGN_EXCEPT_ZERO: + case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: + switch (signum) { + case SIGNUM_NEG: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_NEG_ZERO: + case SIGNUM_POS_ZERO: + return PATTERN_SIGN_TYPE_POS; + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS_SIGN; + default: + break; + } + break; + + case UNUM_SIGN_NEGATIVE: + case UNUM_SIGN_ACCOUNTING_NEGATIVE: + switch (signum) { + case SIGNUM_NEG: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_NEG_ZERO: + case SIGNUM_POS_ZERO: + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS; + default: + break; + } + break; + + case UNUM_SIGN_NEVER: + return PATTERN_SIGN_TYPE_POS; + + default: + break; + } + + UPRV_UNREACHABLE_EXIT; + return PATTERN_SIGN_TYPE_POS; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ |