From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/i18n/number_modifiers.cpp | 494 ++++++++++++++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100644 intl/icu/source/i18n/number_modifiers.cpp (limited to 'intl/icu/source/i18n/number_modifiers.cpp') diff --git a/intl/icu/source/i18n/number_modifiers.cpp b/intl/icu/source/i18n/number_modifiers.cpp new file mode 100644 index 0000000000..0f6fdafb09 --- /dev/null +++ b/intl/icu/source/i18n/number_modifiers.cpp @@ -0,0 +1,494 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "umutex.h" +#include "ucln_cmn.h" +#include "ucln_in.h" +#include "number_modifiers.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + +namespace { + +// TODO: This is copied from simpleformatter.cpp +const int32_t ARG_NUM_LIMIT = 0x100; + +// These are the default currency spacing UnicodeSets in CLDR. +// Pre-compute them for performance. +// The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR. +icu::UInitOnce gDefaultCurrencySpacingInitOnce {}; + +UnicodeSet *UNISET_DIGIT = nullptr; +UnicodeSet *UNISET_NOTSZ = nullptr; + +UBool U_CALLCONV cleanupDefaultCurrencySpacing() { + delete UNISET_DIGIT; + UNISET_DIGIT = nullptr; + delete UNISET_NOTSZ; + UNISET_NOTSZ = nullptr; + gDefaultCurrencySpacingInitOnce.reset(); + return true; +} + +void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { + ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing); + UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status); + UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status); + if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + UNISET_DIGIT->freeze(); + UNISET_NOTSZ->freeze(); +} + +} // namespace + + +Modifier::~Modifier() = default; + +Modifier::Parameters::Parameters() + : obj(nullptr) {} + +Modifier::Parameters::Parameters( + const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural) + : obj(_obj), signum(_signum), plural(_plural) {} + +ModifierStore::~ModifierStore() = default; + +AdoptingSignumModifierStore::~AdoptingSignumModifierStore() { + for (const Modifier *mod : mods) { + delete mod; + } +} + +AdoptingSignumModifierStore& +AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept { + for (size_t i=0; imods[i] = other.mods[i]; + other.mods[i] = nullptr; + } + return *this; +} + + +int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + // Insert the suffix first since inserting the prefix will change the rightIndex + int length = output.insert(rightIndex, fSuffix, fField, status); + length += output.insert(leftIndex, fPrefix, fField, status); + return length; +} + +int32_t ConstantAffixModifier::getPrefixLength() const { + return fPrefix.length(); +} + +int32_t ConstantAffixModifier::getCodePointCount() const { + return fPrefix.countChar32() + fSuffix.countChar32(); +} + +bool ConstantAffixModifier::isStrong() const { + return fStrong; +} + +bool ConstantAffixModifier::containsField(Field field) const { + (void)field; + // This method is not currently used. + UPRV_UNREACHABLE_EXIT; +} + +void ConstantAffixModifier::getParameters(Parameters& output) const { + (void)output; + // This method is not currently used. + UPRV_UNREACHABLE_EXIT; +} + +bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + return fPrefix == _other->fPrefix + && fSuffix == _other->fSuffix + && fField == _other->fField + && fStrong == _other->fStrong; +} + + +SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) + : SimpleModifier(simpleFormatter, field, strong, {}) {} + +SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong, + const Modifier::Parameters parameters) + : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong), + fParameters(parameters) { + int32_t argLimit = SimpleFormatter::getArgumentLimit( + fCompiledPattern.getBuffer(), fCompiledPattern.length()); + if (argLimit == 0) { + // No arguments in compiled pattern + fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; + U_ASSERT(2 + fPrefixLength == fCompiledPattern.length()); + // Set suffixOffset = -1 to indicate no arguments in compiled pattern. + fSuffixOffset = -1; + fSuffixLength = 0; + } else { + U_ASSERT(argLimit == 1); + if (fCompiledPattern.charAt(1) != 0) { + // Found prefix + fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; + fSuffixOffset = 3 + fPrefixLength; + } else { + // No prefix + fPrefixLength = 0; + fSuffixOffset = 2; + } + if (3 + fPrefixLength < fCompiledPattern.length()) { + // Found suffix + fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; + } else { + // No suffix + fSuffixLength = 0; + } + } +} + +SimpleModifier::SimpleModifier() + : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) { +} + +int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + return formatAsPrefixSuffix(output, leftIndex, rightIndex, status); +} + +int32_t SimpleModifier::getPrefixLength() const { + return fPrefixLength; +} + +int32_t SimpleModifier::getCodePointCount() const { + int32_t count = 0; + if (fPrefixLength > 0) { + count += fCompiledPattern.countChar32(2, fPrefixLength); + } + if (fSuffixLength > 0) { + count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength); + } + return count; +} + +bool SimpleModifier::isStrong() const { + return fStrong; +} + +bool SimpleModifier::containsField(Field field) const { + (void)field; + // This method is not currently used. + UPRV_UNREACHABLE_EXIT; +} + +void SimpleModifier::getParameters(Parameters& output) const { + output = fParameters; +} + +bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + if (fParameters.obj != nullptr) { + return fParameters.obj == _other->fParameters.obj; + } + return fCompiledPattern == _other->fCompiledPattern + && fField == _other->fField + && fStrong == _other->fStrong; +} + + +int32_t +SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex, + UErrorCode &status) const { + if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) { + // There is no argument for the inner number; overwrite the entire segment with our string. + return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status); + } else { + if (fPrefixLength > 0) { + result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status); + } + if (fSuffixLength > 0) { + result.insert( + endIndex + fPrefixLength, + fCompiledPattern, + 1 + fSuffixOffset, + 1 + fSuffixOffset + fSuffixLength, + fField, + status); + } + return fPrefixLength + fSuffixLength; + } +} + + +int32_t +SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result, + int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength, + Field field, UErrorCode& status) { + const UnicodeString& compiledPattern = compiled.compiledPattern; + int32_t argLimit = SimpleFormatter::getArgumentLimit( + compiledPattern.getBuffer(), compiledPattern.length()); + if (argLimit != 2) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + int32_t offset = 1; // offset into compiledPattern + int32_t length = 0; // chars added to result + + int32_t prefixLength = compiledPattern.charAt(offset); + offset++; + if (prefixLength < ARG_NUM_LIMIT) { + // No prefix + prefixLength = 0; + } else { + prefixLength -= ARG_NUM_LIMIT; + result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status); + offset += prefixLength; + length += prefixLength; + offset++; + } + + int32_t infixLength = compiledPattern.charAt(offset); + offset++; + if (infixLength < ARG_NUM_LIMIT) { + // No infix + infixLength = 0; + } else { + infixLength -= ARG_NUM_LIMIT; + result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status); + offset += infixLength; + length += infixLength; + offset++; + } + + int32_t suffixLength; + if (offset == compiledPattern.length()) { + // No suffix + suffixLength = 0; + } else { + suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT; + offset++; + result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status); + length += suffixLength; + } + + *outPrefixLength = prefixLength; + *outSuffixLength = suffixLength; + + return length; +} + + +int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + int32_t length = output.insert(leftIndex, fPrefix, status); + if (fOverwrite) { + length += output.splice( + leftIndex + length, + rightIndex + length, + UnicodeString(), 0, 0, + kUndefinedField, status); + } + length += output.insert(rightIndex + length, fSuffix, status); + return length; +} + +int32_t ConstantMultiFieldModifier::getPrefixLength() const { + return fPrefix.length(); +} + +int32_t ConstantMultiFieldModifier::getCodePointCount() const { + return fPrefix.codePointCount() + fSuffix.codePointCount(); +} + +bool ConstantMultiFieldModifier::isStrong() const { + return fStrong; +} + +bool ConstantMultiFieldModifier::containsField(Field field) const { + return fPrefix.containsField(field) || fSuffix.containsField(field); +} + +void ConstantMultiFieldModifier::getParameters(Parameters& output) const { + output = fParameters; +} + +bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + if (fParameters.obj != nullptr) { + return fParameters.obj == _other->fParameters.obj; + } + return fPrefix.contentEquals(_other->fPrefix) + && fSuffix.contentEquals(_other->fSuffix) + && fOverwrite == _other->fOverwrite + && fStrong == _other->fStrong; +} + + +CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix, + const FormattedStringBuilder &suffix, + bool overwrite, + bool strong, + const DecimalFormatSymbols &symbols, + UErrorCode &status) + : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { + // Check for currency spacing. Do not build the UnicodeSets unless there is + // a currency code point at a boundary. + if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { + int prefixCp = prefix.getLastCodePoint(); + UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status); + if (prefixUnicodeSet.contains(prefixCp)) { + fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status); + fAfterPrefixUnicodeSet.freeze(); + fAfterPrefixInsert = getInsertString(symbols, PREFIX, status); + } else { + fAfterPrefixUnicodeSet.setToBogus(); + fAfterPrefixInsert.setToBogus(); + } + } else { + fAfterPrefixUnicodeSet.setToBogus(); + fAfterPrefixInsert.setToBogus(); + } + if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { + int suffixCp = suffix.getFirstCodePoint(); + UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status); + if (suffixUnicodeSet.contains(suffixCp)) { + fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status); + fBeforeSuffixUnicodeSet.freeze(); + fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status); + } else { + fBeforeSuffixUnicodeSet.setToBogus(); + fBeforeSuffixInsert.setToBogus(); + } + } else { + fBeforeSuffixUnicodeSet.setToBogus(); + fBeforeSuffixInsert.setToBogus(); + } +} + +int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + // Currency spacing logic + int length = 0; + if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() && + fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) { + // TODO: Should we use the CURRENCY field here? + length += output.insert( + leftIndex, + fAfterPrefixInsert, + kUndefinedField, + status); + } + if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() && + fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) { + // TODO: Should we use the CURRENCY field here? + length += output.insert( + rightIndex + length, + fBeforeSuffixInsert, + kUndefinedField, + status); + } + + // Call super for the remaining logic + length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status); + return length; +} + +int32_t +CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart, + int32_t prefixLen, int32_t suffixStart, + int32_t suffixLen, + const DecimalFormatSymbols &symbols, + UErrorCode &status) { + int length = 0; + bool hasPrefix = (prefixLen > 0); + bool hasSuffix = (suffixLen > 0); + bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string + if (hasPrefix && hasNumber) { + length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status); + } + if (hasSuffix && hasNumber) { + length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status); + } + return length; +} + +int32_t +CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index, + EAffix affix, + const DecimalFormatSymbols &symbols, + UErrorCode &status) { + // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix. + // This works even if the last code point in the prefix is 2 code units because the + // field value gets populated to both indices in the field array. + Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index); + if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { + return 0; + } + int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index); + UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status); + if (!affixUniset.contains(affixCp)) { + return 0; + } + int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index); + UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status); + if (!numberUniset.contains(numberCp)) { + return 0; + } + UnicodeString spacingString = getInsertString(symbols, affix, status); + + // NOTE: This next line *inserts* the spacing string, triggering an arraycopy. + // It would be more efficient if this could be done before affixes were attached, + // so that it could be prepended/appended instead of inserted. + // However, the build code path is more efficient, and this is the most natural + // place to put currency spacing in the non-build code path. + // TODO: Should we use the CURRENCY field here? + return output.insert(index, spacingString, kUndefinedField, status); +} + +UnicodeSet +CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, + EAffix affix, UErrorCode &status) { + // Ensure the static defaults are initialized: + umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status); + if (U_FAILURE(status)) { + return UnicodeSet(); + } + + const UnicodeString& pattern = symbols.getPatternForCurrencySpacing( + position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH, + affix == SUFFIX, + status); + if (pattern.compare(u"[:digit:]", -1) == 0) { + return *UNISET_DIGIT; + } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) { + return *UNISET_NOTSZ; + } else { + return UnicodeSet(pattern, status); + } +} + +UnicodeString +CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, + UErrorCode &status) { + return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ -- cgit v1.2.3