summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/choicfmt.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/i18n/choicfmt.cpp
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/i18n/choicfmt.cpp')
-rw-r--r--intl/icu/source/i18n/choicfmt.cpp577
1 files changed, 577 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/choicfmt.cpp b/intl/icu/source/i18n/choicfmt.cpp
new file mode 100644
index 0000000000..96e73fabcf
--- /dev/null
+++ b/intl/icu/source/i18n/choicfmt.cpp
@@ -0,0 +1,577 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2013, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* File CHOICFMT.CPP
+*
+* Modification History:
+*
+* Date Name Description
+* 02/19/97 aliu Converted from java.
+* 03/20/97 helena Finished first cut of implementation and got rid
+* of nextDouble/previousDouble and replaced with
+* boolean array.
+* 4/10/97 aliu Clean up. Modified to work on AIX.
+* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
+* wchar.h.
+* 07/09/97 helena Made ParsePosition into a class.
+* 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
+* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
+* 02/22/99 stephen Removed character literals for EBCDIC safety
+********************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/choicfmt.h"
+#include "unicode/numfmt.h"
+#include "unicode/locid.h"
+#include "cpputils.h"
+#include "cstring.h"
+#include "messageimpl.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include <stdio.h>
+#include <float.h>
+
+// *****************************************************************************
+// class ChoiceFormat
+// *****************************************************************************
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
+
+// Special characters used by ChoiceFormat. There are two characters
+// used interchangeably to indicate <=. Either is parsed, but only
+// LESS_EQUAL is generated by toPattern().
+#define SINGLE_QUOTE ((char16_t)0x0027) /*'*/
+#define LESS_THAN ((char16_t)0x003C) /*<*/
+#define LESS_EQUAL ((char16_t)0x0023) /*#*/
+#define LESS_EQUAL2 ((char16_t)0x2264)
+#define VERTICAL_BAR ((char16_t)0x007C) /*|*/
+#define MINUS ((char16_t)0x002D) /*-*/
+
+static const char16_t LEFT_CURLY_BRACE = 0x7B; /*{*/
+static const char16_t RIGHT_CURLY_BRACE = 0x7D; /*}*/
+
+#ifdef INFINITY
+#undef INFINITY
+#endif
+#define INFINITY ((char16_t)0x221E)
+
+//static const char16_t gPositiveInfinity[] = {INFINITY, 0};
+//static const char16_t gNegativeInfinity[] = {MINUS, INFINITY, 0};
+#define POSITIVE_INF_STRLEN 1
+#define NEGATIVE_INF_STRLEN 2
+
+// -------------------------------------
+// Creates a ChoiceFormat instance based on the pattern.
+
+ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
+ UErrorCode& status)
+: constructorErrorCode(status),
+ msgPattern(status)
+{
+ applyPattern(newPattern, status);
+}
+
+// -------------------------------------
+// Creates a ChoiceFormat instance with the limit array and
+// format strings for each limit.
+
+ChoiceFormat::ChoiceFormat(const double* limits,
+ const UnicodeString* formats,
+ int32_t cnt )
+: constructorErrorCode(U_ZERO_ERROR),
+ msgPattern(constructorErrorCode)
+{
+ setChoices(limits, nullptr, formats, cnt, constructorErrorCode);
+}
+
+// -------------------------------------
+
+ChoiceFormat::ChoiceFormat(const double* limits,
+ const UBool* closures,
+ const UnicodeString* formats,
+ int32_t cnt )
+: constructorErrorCode(U_ZERO_ERROR),
+ msgPattern(constructorErrorCode)
+{
+ setChoices(limits, closures, formats, cnt, constructorErrorCode);
+}
+
+// -------------------------------------
+// copy constructor
+
+ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
+: NumberFormat(that),
+ constructorErrorCode(that.constructorErrorCode),
+ msgPattern(that.msgPattern)
+{
+}
+
+// -------------------------------------
+// Private constructor that creates a
+// ChoiceFormat instance based on the
+// pattern and populates UParseError
+
+ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
+ UParseError& parseError,
+ UErrorCode& status)
+: constructorErrorCode(status),
+ msgPattern(status)
+{
+ applyPattern(newPattern,parseError, status);
+}
+// -------------------------------------
+
+bool
+ChoiceFormat::operator==(const Format& that) const
+{
+ if (this == &that) return true;
+ if (!NumberFormat::operator==(that)) return false;
+ const ChoiceFormat& thatAlias = static_cast<const ChoiceFormat&>(that);
+ return msgPattern == thatAlias.msgPattern;
+}
+
+// -------------------------------------
+// copy constructor
+
+const ChoiceFormat&
+ChoiceFormat::operator=(const ChoiceFormat& that)
+{
+ if (this != &that) {
+ NumberFormat::operator=(that);
+ constructorErrorCode = that.constructorErrorCode;
+ msgPattern = that.msgPattern;
+ }
+ return *this;
+}
+
+// -------------------------------------
+
+ChoiceFormat::~ChoiceFormat()
+{
+}
+
+// -------------------------------------
+
+/**
+ * Convert a double value to a string without the overhead of NumberFormat.
+ */
+UnicodeString&
+ChoiceFormat::dtos(double value,
+ UnicodeString& string)
+{
+ /* Buffer to contain the digits and any extra formatting stuff. */
+ char temp[DBL_DIG + 16];
+ char *itrPtr = temp;
+ char *expPtr;
+
+ snprintf(temp, sizeof(temp), "%.*g", DBL_DIG, value);
+
+ /* Find and convert the decimal point.
+ Using setlocale on some machines will cause snprintf to use a comma for certain locales.
+ */
+ while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
+ itrPtr++;
+ }
+ if (*itrPtr != 0 && *itrPtr != 'e') {
+ /* We reached something that looks like a decimal point.
+ In case someone used setlocale(), which changes the decimal point. */
+ *itrPtr = '.';
+ itrPtr++;
+ }
+ /* Search for the exponent */
+ while (*itrPtr && *itrPtr != 'e') {
+ itrPtr++;
+ }
+ if (*itrPtr == 'e') {
+ itrPtr++;
+ /* Verify the exponent sign */
+ if (*itrPtr == '+' || *itrPtr == '-') {
+ itrPtr++;
+ }
+ /* Remove leading zeros. You will see this on Windows machines. */
+ expPtr = itrPtr;
+ while (*itrPtr == '0') {
+ itrPtr++;
+ }
+ if (*itrPtr && expPtr != itrPtr) {
+ /* Shift the exponent without zeros. */
+ while (*itrPtr) {
+ *(expPtr++) = *(itrPtr++);
+ }
+ // NUL terminate
+ *expPtr = 0;
+ }
+ }
+
+ string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
+ return string;
+}
+
+// -------------------------------------
+// calls the overloaded applyPattern method.
+
+void
+ChoiceFormat::applyPattern(const UnicodeString& pattern,
+ UErrorCode& status)
+{
+ msgPattern.parseChoiceStyle(pattern, nullptr, status);
+ constructorErrorCode = status;
+}
+
+// -------------------------------------
+// Applies the pattern to this ChoiceFormat instance.
+
+void
+ChoiceFormat::applyPattern(const UnicodeString& pattern,
+ UParseError& parseError,
+ UErrorCode& status)
+{
+ msgPattern.parseChoiceStyle(pattern, &parseError, status);
+ constructorErrorCode = status;
+}
+// -------------------------------------
+// Returns the input pattern string.
+
+UnicodeString&
+ChoiceFormat::toPattern(UnicodeString& result) const
+{
+ return result = msgPattern.getPatternString();
+}
+
+// -------------------------------------
+// Sets the limit and format arrays.
+void
+ChoiceFormat::setChoices( const double* limits,
+ const UnicodeString* formats,
+ int32_t cnt )
+{
+ UErrorCode errorCode = U_ZERO_ERROR;
+ setChoices(limits, nullptr, formats, cnt, errorCode);
+}
+
+// -------------------------------------
+// Sets the limit and format arrays.
+void
+ChoiceFormat::setChoices( const double* limits,
+ const UBool* closures,
+ const UnicodeString* formats,
+ int32_t cnt )
+{
+ UErrorCode errorCode = U_ZERO_ERROR;
+ setChoices(limits, closures, formats, cnt, errorCode);
+}
+
+void
+ChoiceFormat::setChoices(const double* limits,
+ const UBool* closures,
+ const UnicodeString* formats,
+ int32_t count,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (limits == nullptr || formats == nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ // Reconstruct the original input pattern.
+ // Modified version of the pre-ICU 4.8 toPattern() implementation.
+ UnicodeString result;
+ for (int32_t i = 0; i < count; ++i) {
+ if (i != 0) {
+ result += VERTICAL_BAR;
+ }
+ UnicodeString buf;
+ if (uprv_isPositiveInfinity(limits[i])) {
+ result += INFINITY;
+ } else if (uprv_isNegativeInfinity(limits[i])) {
+ result += MINUS;
+ result += INFINITY;
+ } else {
+ result += dtos(limits[i], buf);
+ }
+ if (closures != nullptr && closures[i]) {
+ result += LESS_THAN;
+ } else {
+ result += LESS_EQUAL;
+ }
+ // Append formats[i], using quotes if there are special
+ // characters. Single quotes themselves must be escaped in
+ // either case.
+ const UnicodeString& text = formats[i];
+ int32_t textLength = text.length();
+ int32_t nestingLevel = 0;
+ for (int32_t j = 0; j < textLength; ++j) {
+ char16_t c = text[j];
+ if (c == SINGLE_QUOTE && nestingLevel == 0) {
+ // Double each top-level apostrophe.
+ result.append(c);
+ } else if (c == VERTICAL_BAR && nestingLevel == 0) {
+ // Surround each pipe symbol with apostrophes for quoting.
+ // If the next character is an apostrophe, then that will be doubled,
+ // and although the parser will see the apostrophe pairs beginning
+ // and ending one character earlier than our doubling, the result
+ // is as desired.
+ // | -> '|'
+ // |' -> '|'''
+ // |'' -> '|''''' etc.
+ result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
+ continue; // Skip the append(c) at the end of the loop body.
+ } else if (c == LEFT_CURLY_BRACE) {
+ ++nestingLevel;
+ } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
+ --nestingLevel;
+ }
+ result.append(c);
+ }
+ }
+ // Apply the reconstructed pattern.
+ applyPattern(result, errorCode);
+}
+
+// -------------------------------------
+// Gets the limit array.
+
+const double*
+ChoiceFormat::getLimits(int32_t& cnt) const
+{
+ cnt = 0;
+ return nullptr;
+}
+
+// -------------------------------------
+// Gets the closures array.
+
+const UBool*
+ChoiceFormat::getClosures(int32_t& cnt) const
+{
+ cnt = 0;
+ return nullptr;
+}
+
+// -------------------------------------
+// Gets the format array.
+
+const UnicodeString*
+ChoiceFormat::getFormats(int32_t& cnt) const
+{
+ cnt = 0;
+ return nullptr;
+}
+
+// -------------------------------------
+// Formats an int64 number, it's actually formatted as
+// a double. The returned format string may differ
+// from the input number because of this.
+
+UnicodeString&
+ChoiceFormat::format(int64_t number,
+ UnicodeString& appendTo,
+ FieldPosition& status) const
+{
+ return format((double) number, appendTo, status);
+}
+
+// -------------------------------------
+// Formats an int32_t number, it's actually formatted as
+// a double.
+
+UnicodeString&
+ChoiceFormat::format(int32_t number,
+ UnicodeString& appendTo,
+ FieldPosition& status) const
+{
+ return format((double) number, appendTo, status);
+}
+
+// -------------------------------------
+// Formats a double number.
+
+UnicodeString&
+ChoiceFormat::format(double number,
+ UnicodeString& appendTo,
+ FieldPosition& /*pos*/) const
+{
+ if (msgPattern.countParts() == 0) {
+ // No pattern was applied, or it failed.
+ return appendTo;
+ }
+ // Get the appropriate sub-message.
+ int32_t msgStart = findSubMessage(msgPattern, 0, number);
+ if (!MessageImpl::jdkAposMode(msgPattern)) {
+ int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
+ int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
+ appendTo.append(msgPattern.getPatternString(),
+ patternStart,
+ msgPattern.getPatternIndex(msgLimit) - patternStart);
+ return appendTo;
+ }
+ // JDK compatibility mode: Remove SKIP_SYNTAX.
+ return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
+}
+
+int32_t
+ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
+ int32_t count = pattern.countParts();
+ int32_t msgStart;
+ // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
+ // until ARG_LIMIT or end of choice-only pattern.
+ // Ignore the first number and selector and start the loop on the first message.
+ partIndex += 2;
+ for (;;) {
+ // Skip but remember the current sub-message.
+ msgStart = partIndex;
+ partIndex = pattern.getLimitPartIndex(partIndex);
+ if (++partIndex >= count) {
+ // Reached the end of the choice-only pattern.
+ // Return with the last sub-message.
+ break;
+ }
+ const MessagePattern::Part &part = pattern.getPart(partIndex++);
+ UMessagePatternPartType type = part.getType();
+ if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
+ // Reached the end of the ChoiceFormat style.
+ // Return with the last sub-message.
+ break;
+ }
+ // part is an ARG_INT or ARG_DOUBLE
+ U_ASSERT(MessagePattern::Part::hasNumericValue(type));
+ double boundary = pattern.getNumericValue(part);
+ // Fetch the ARG_SELECTOR character.
+ int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
+ char16_t boundaryChar = pattern.getPatternString().charAt(selectorIndex);
+ if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
+ // The number is in the interval between the previous boundary and the current one.
+ // Return with the sub-message between them.
+ // The !(a>b) and !(a>=b) comparisons are equivalent to
+ // (a<=b) and (a<b) except they "catch" NaN.
+ break;
+ }
+ }
+ return msgStart;
+}
+
+// -------------------------------------
+// Formats an array of objects. Checks if the data type of the objects
+// to get the right value for formatting.
+
+UnicodeString&
+ChoiceFormat::format(const Formattable* objs,
+ int32_t cnt,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const
+{
+ if(cnt < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+ }
+ if (msgPattern.countParts() == 0) {
+ status = U_INVALID_STATE_ERROR;
+ return appendTo;
+ }
+
+ for (int32_t i = 0; i < cnt; i++) {
+ double objDouble = objs[i].getDouble(status);
+ if (U_SUCCESS(status)) {
+ format(objDouble, appendTo, pos);
+ }
+ }
+
+ return appendTo;
+}
+
+// -------------------------------------
+
+void
+ChoiceFormat::parse(const UnicodeString& text,
+ Formattable& result,
+ ParsePosition& pos) const
+{
+ result.setDouble(parseArgument(msgPattern, 0, text, pos));
+}
+
+double
+ChoiceFormat::parseArgument(
+ const MessagePattern &pattern, int32_t partIndex,
+ const UnicodeString &source, ParsePosition &pos) {
+ // find the best number (defined as the one with the longest parse)
+ int32_t start = pos.getIndex();
+ int32_t furthest = start;
+ double bestNumber = uprv_getNaN();
+ double tempNumber = 0.0;
+ int32_t count = pattern.countParts();
+ while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
+ tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
+ partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
+ int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
+ int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
+ if (len >= 0) {
+ int32_t newIndex = start + len;
+ if (newIndex > furthest) {
+ furthest = newIndex;
+ bestNumber = tempNumber;
+ if (furthest == source.length()) {
+ break;
+ }
+ }
+ }
+ partIndex = msgLimit + 1;
+ }
+ if (furthest == start) {
+ pos.setErrorIndex(start);
+ } else {
+ pos.setIndex(furthest);
+ }
+ return bestNumber;
+}
+
+int32_t
+ChoiceFormat::matchStringUntilLimitPart(
+ const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
+ const UnicodeString &source, int32_t sourceOffset) {
+ int32_t matchingSourceLength = 0;
+ const UnicodeString &msgString = pattern.getPatternString();
+ int32_t prevIndex = pattern.getPart(partIndex).getLimit();
+ for (;;) {
+ const MessagePattern::Part &part = pattern.getPart(++partIndex);
+ if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
+ int32_t index = part.getIndex();
+ int32_t length = index - prevIndex;
+ if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
+ return -1; // mismatch
+ }
+ matchingSourceLength += length;
+ if (partIndex == limitPartIndex) {
+ return matchingSourceLength;
+ }
+ prevIndex = part.getLimit(); // SKIP_SYNTAX
+ }
+ }
+}
+
+// -------------------------------------
+
+ChoiceFormat*
+ChoiceFormat::clone() const
+{
+ ChoiceFormat *aCopy = new ChoiceFormat(*this);
+ return aCopy;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+//eof