From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- intl/icu/source/i18n/choicfmt.cpp | 577 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 577 insertions(+) create mode 100644 intl/icu/source/i18n/choicfmt.cpp (limited to 'intl/icu/source/i18n/choicfmt.cpp') diff --git a/intl/icu/source/i18n/choicfmt.cpp b/intl/icu/source/i18n/choicfmt.cpp new file mode 100644 index 0000000000..96e73fabcf --- /dev/null +++ b/intl/icu/source/i18n/choicfmt.cpp @@ -0,0 +1,577 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2013, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* File CHOICFMT.CPP +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/20/97 helena Finished first cut of implementation and got rid +* of nextDouble/previousDouble and replaced with +* boolean array. +* 4/10/97 aliu Clean up. Modified to work on AIX. +* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include +* wchar.h. +* 07/09/97 helena Made ParsePosition into a class. +* 08/06/97 nos removed overloaded constructor, fixed 'format(array)' +* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) +* 02/22/99 stephen Removed character literals for EBCDIC safety +******************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/choicfmt.h" +#include "unicode/numfmt.h" +#include "unicode/locid.h" +#include "cpputils.h" +#include "cstring.h" +#include "messageimpl.h" +#include "putilimp.h" +#include "uassert.h" +#include +#include + +// ***************************************************************************** +// class ChoiceFormat +// ***************************************************************************** + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) + +// Special characters used by ChoiceFormat. There are two characters +// used interchangeably to indicate <=. Either is parsed, but only +// LESS_EQUAL is generated by toPattern(). +#define SINGLE_QUOTE ((char16_t)0x0027) /*'*/ +#define LESS_THAN ((char16_t)0x003C) /*<*/ +#define LESS_EQUAL ((char16_t)0x0023) /*#*/ +#define LESS_EQUAL2 ((char16_t)0x2264) +#define VERTICAL_BAR ((char16_t)0x007C) /*|*/ +#define MINUS ((char16_t)0x002D) /*-*/ + +static const char16_t LEFT_CURLY_BRACE = 0x7B; /*{*/ +static const char16_t RIGHT_CURLY_BRACE = 0x7D; /*}*/ + +#ifdef INFINITY +#undef INFINITY +#endif +#define INFINITY ((char16_t)0x221E) + +//static const char16_t gPositiveInfinity[] = {INFINITY, 0}; +//static const char16_t gNegativeInfinity[] = {MINUS, INFINITY, 0}; +#define POSITIVE_INF_STRLEN 1 +#define NEGATIVE_INF_STRLEN 2 + +// ------------------------------------- +// Creates a ChoiceFormat instance based on the pattern. + +ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, + UErrorCode& status) +: constructorErrorCode(status), + msgPattern(status) +{ + applyPattern(newPattern, status); +} + +// ------------------------------------- +// Creates a ChoiceFormat instance with the limit array and +// format strings for each limit. + +ChoiceFormat::ChoiceFormat(const double* limits, + const UnicodeString* formats, + int32_t cnt ) +: constructorErrorCode(U_ZERO_ERROR), + msgPattern(constructorErrorCode) +{ + setChoices(limits, nullptr, formats, cnt, constructorErrorCode); +} + +// ------------------------------------- + +ChoiceFormat::ChoiceFormat(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t cnt ) +: constructorErrorCode(U_ZERO_ERROR), + msgPattern(constructorErrorCode) +{ + setChoices(limits, closures, formats, cnt, constructorErrorCode); +} + +// ------------------------------------- +// copy constructor + +ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) +: NumberFormat(that), + constructorErrorCode(that.constructorErrorCode), + msgPattern(that.msgPattern) +{ +} + +// ------------------------------------- +// Private constructor that creates a +// ChoiceFormat instance based on the +// pattern and populates UParseError + +ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, + UParseError& parseError, + UErrorCode& status) +: constructorErrorCode(status), + msgPattern(status) +{ + applyPattern(newPattern,parseError, status); +} +// ------------------------------------- + +bool +ChoiceFormat::operator==(const Format& that) const +{ + if (this == &that) return true; + if (!NumberFormat::operator==(that)) return false; + const ChoiceFormat& thatAlias = static_cast(that); + return msgPattern == thatAlias.msgPattern; +} + +// ------------------------------------- +// copy constructor + +const ChoiceFormat& +ChoiceFormat::operator=(const ChoiceFormat& that) +{ + if (this != &that) { + NumberFormat::operator=(that); + constructorErrorCode = that.constructorErrorCode; + msgPattern = that.msgPattern; + } + return *this; +} + +// ------------------------------------- + +ChoiceFormat::~ChoiceFormat() +{ +} + +// ------------------------------------- + +/** + * Convert a double value to a string without the overhead of NumberFormat. + */ +UnicodeString& +ChoiceFormat::dtos(double value, + UnicodeString& string) +{ + /* Buffer to contain the digits and any extra formatting stuff. */ + char temp[DBL_DIG + 16]; + char *itrPtr = temp; + char *expPtr; + + snprintf(temp, sizeof(temp), "%.*g", DBL_DIG, value); + + /* Find and convert the decimal point. + Using setlocale on some machines will cause snprintf to use a comma for certain locales. + */ + while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { + itrPtr++; + } + if (*itrPtr != 0 && *itrPtr != 'e') { + /* We reached something that looks like a decimal point. + In case someone used setlocale(), which changes the decimal point. */ + *itrPtr = '.'; + itrPtr++; + } + /* Search for the exponent */ + while (*itrPtr && *itrPtr != 'e') { + itrPtr++; + } + if (*itrPtr == 'e') { + itrPtr++; + /* Verify the exponent sign */ + if (*itrPtr == '+' || *itrPtr == '-') { + itrPtr++; + } + /* Remove leading zeros. You will see this on Windows machines. */ + expPtr = itrPtr; + while (*itrPtr == '0') { + itrPtr++; + } + if (*itrPtr && expPtr != itrPtr) { + /* Shift the exponent without zeros. */ + while (*itrPtr) { + *(expPtr++) = *(itrPtr++); + } + // NUL terminate + *expPtr = 0; + } + } + + string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ + return string; +} + +// ------------------------------------- +// calls the overloaded applyPattern method. + +void +ChoiceFormat::applyPattern(const UnicodeString& pattern, + UErrorCode& status) +{ + msgPattern.parseChoiceStyle(pattern, nullptr, status); + constructorErrorCode = status; +} + +// ------------------------------------- +// Applies the pattern to this ChoiceFormat instance. + +void +ChoiceFormat::applyPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status) +{ + msgPattern.parseChoiceStyle(pattern, &parseError, status); + constructorErrorCode = status; +} +// ------------------------------------- +// Returns the input pattern string. + +UnicodeString& +ChoiceFormat::toPattern(UnicodeString& result) const +{ + return result = msgPattern.getPatternString(); +} + +// ------------------------------------- +// Sets the limit and format arrays. +void +ChoiceFormat::setChoices( const double* limits, + const UnicodeString* formats, + int32_t cnt ) +{ + UErrorCode errorCode = U_ZERO_ERROR; + setChoices(limits, nullptr, formats, cnt, errorCode); +} + +// ------------------------------------- +// Sets the limit and format arrays. +void +ChoiceFormat::setChoices( const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t cnt ) +{ + UErrorCode errorCode = U_ZERO_ERROR; + setChoices(limits, closures, formats, cnt, errorCode); +} + +void +ChoiceFormat::setChoices(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if (limits == nullptr || formats == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // Reconstruct the original input pattern. + // Modified version of the pre-ICU 4.8 toPattern() implementation. + UnicodeString result; + for (int32_t i = 0; i < count; ++i) { + if (i != 0) { + result += VERTICAL_BAR; + } + UnicodeString buf; + if (uprv_isPositiveInfinity(limits[i])) { + result += INFINITY; + } else if (uprv_isNegativeInfinity(limits[i])) { + result += MINUS; + result += INFINITY; + } else { + result += dtos(limits[i], buf); + } + if (closures != nullptr && closures[i]) { + result += LESS_THAN; + } else { + result += LESS_EQUAL; + } + // Append formats[i], using quotes if there are special + // characters. Single quotes themselves must be escaped in + // either case. + const UnicodeString& text = formats[i]; + int32_t textLength = text.length(); + int32_t nestingLevel = 0; + for (int32_t j = 0; j < textLength; ++j) { + char16_t c = text[j]; + if (c == SINGLE_QUOTE && nestingLevel == 0) { + // Double each top-level apostrophe. + result.append(c); + } else if (c == VERTICAL_BAR && nestingLevel == 0) { + // Surround each pipe symbol with apostrophes for quoting. + // If the next character is an apostrophe, then that will be doubled, + // and although the parser will see the apostrophe pairs beginning + // and ending one character earlier than our doubling, the result + // is as desired. + // | -> '|' + // |' -> '|''' + // |'' -> '|''''' etc. + result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); + continue; // Skip the append(c) at the end of the loop body. + } else if (c == LEFT_CURLY_BRACE) { + ++nestingLevel; + } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { + --nestingLevel; + } + result.append(c); + } + } + // Apply the reconstructed pattern. + applyPattern(result, errorCode); +} + +// ------------------------------------- +// Gets the limit array. + +const double* +ChoiceFormat::getLimits(int32_t& cnt) const +{ + cnt = 0; + return nullptr; +} + +// ------------------------------------- +// Gets the closures array. + +const UBool* +ChoiceFormat::getClosures(int32_t& cnt) const +{ + cnt = 0; + return nullptr; +} + +// ------------------------------------- +// Gets the format array. + +const UnicodeString* +ChoiceFormat::getFormats(int32_t& cnt) const +{ + cnt = 0; + return nullptr; +} + +// ------------------------------------- +// Formats an int64 number, it's actually formatted as +// a double. The returned format string may differ +// from the input number because of this. + +UnicodeString& +ChoiceFormat::format(int64_t number, + UnicodeString& appendTo, + FieldPosition& status) const +{ + return format((double) number, appendTo, status); +} + +// ------------------------------------- +// Formats an int32_t number, it's actually formatted as +// a double. + +UnicodeString& +ChoiceFormat::format(int32_t number, + UnicodeString& appendTo, + FieldPosition& status) const +{ + return format((double) number, appendTo, status); +} + +// ------------------------------------- +// Formats a double number. + +UnicodeString& +ChoiceFormat::format(double number, + UnicodeString& appendTo, + FieldPosition& /*pos*/) const +{ + if (msgPattern.countParts() == 0) { + // No pattern was applied, or it failed. + return appendTo; + } + // Get the appropriate sub-message. + int32_t msgStart = findSubMessage(msgPattern, 0, number); + if (!MessageImpl::jdkAposMode(msgPattern)) { + int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); + int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); + appendTo.append(msgPattern.getPatternString(), + patternStart, + msgPattern.getPatternIndex(msgLimit) - patternStart); + return appendTo; + } + // JDK compatibility mode: Remove SKIP_SYNTAX. + return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); +} + +int32_t +ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { + int32_t count = pattern.countParts(); + int32_t msgStart; + // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples + // until ARG_LIMIT or end of choice-only pattern. + // Ignore the first number and selector and start the loop on the first message. + partIndex += 2; + for (;;) { + // Skip but remember the current sub-message. + msgStart = partIndex; + partIndex = pattern.getLimitPartIndex(partIndex); + if (++partIndex >= count) { + // Reached the end of the choice-only pattern. + // Return with the last sub-message. + break; + } + const MessagePattern::Part &part = pattern.getPart(partIndex++); + UMessagePatternPartType type = part.getType(); + if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { + // Reached the end of the ChoiceFormat style. + // Return with the last sub-message. + break; + } + // part is an ARG_INT or ARG_DOUBLE + U_ASSERT(MessagePattern::Part::hasNumericValue(type)); + double boundary = pattern.getNumericValue(part); + // Fetch the ARG_SELECTOR character. + int32_t selectorIndex = pattern.getPatternIndex(partIndex++); + char16_t boundaryChar = pattern.getPatternString().charAt(selectorIndex); + if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { + // The number is in the interval between the previous boundary and the current one. + // Return with the sub-message between them. + // The !(a>b) and !(a>=b) comparisons are equivalent to + // (a<=b) and (a= 0) { + int32_t newIndex = start + len; + if (newIndex > furthest) { + furthest = newIndex; + bestNumber = tempNumber; + if (furthest == source.length()) { + break; + } + } + } + partIndex = msgLimit + 1; + } + if (furthest == start) { + pos.setErrorIndex(start); + } else { + pos.setIndex(furthest); + } + return bestNumber; +} + +int32_t +ChoiceFormat::matchStringUntilLimitPart( + const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, + const UnicodeString &source, int32_t sourceOffset) { + int32_t matchingSourceLength = 0; + const UnicodeString &msgString = pattern.getPatternString(); + int32_t prevIndex = pattern.getPart(partIndex).getLimit(); + for (;;) { + const MessagePattern::Part &part = pattern.getPart(++partIndex); + if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { + int32_t index = part.getIndex(); + int32_t length = index - prevIndex; + if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { + return -1; // mismatch + } + matchingSourceLength += length; + if (partIndex == limitPartIndex) { + return matchingSourceLength; + } + prevIndex = part.getLimit(); // SKIP_SYNTAX + } + } +} + +// ------------------------------------- + +ChoiceFormat* +ChoiceFormat::clone() const +{ + ChoiceFormat *aCopy = new ChoiceFormat(*this); + return aCopy; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof -- cgit v1.2.3