diff options
Diffstat (limited to 'intl/icu/source/i18n/number_longnames.cpp')
-rw-r--r-- | intl/icu/source/i18n/number_longnames.cpp | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/number_longnames.cpp b/intl/icu/source/i18n/number_longnames.cpp new file mode 100644 index 0000000000..bb32d0381a --- /dev/null +++ b/intl/icu/source/i18n/number_longnames.cpp @@ -0,0 +1,350 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/simpleformatter.h" +#include "unicode/ures.h" +#include "ureslocs.h" +#include "charstr.h" +#include "uresimp.h" +#include "number_longnames.h" +#include "number_microprops.h" +#include <algorithm> +#include "cstring.h" +#include "util.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + +namespace { + +constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; +constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; + +static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { + // pluralKeyword can also be "dnam" or "per" + if (uprv_strcmp(pluralKeyword, "dnam") == 0) { + return DNAM_INDEX; + } else if (uprv_strcmp(pluralKeyword, "per") == 0) { + return PER_INDEX; + } else { + StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); + return plural; + } +} + +static UnicodeString getWithPlural( + const UnicodeString* strings, + StandardPlural::Form plural, + UErrorCode& status) { + UnicodeString result = strings[plural]; + if (result.isBogus()) { + result = strings[StandardPlural::Form::OTHER]; + } + if (result.isBogus()) { + // There should always be data in the "other" plural variant. + status = U_INTERNAL_PROGRAM_ERROR; + } + return result; +} + + +////////////////////////// +/// BEGIN DATA LOADING /// +////////////////////////// + +class PluralTableSink : public ResourceSink { + public: + explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { + // Initialize the array to bogus strings. + for (int32_t i = 0; i < ARRAY_LENGTH; i++) { + outArray[i].setToBogus(); + } + } + + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { + ResourceTable pluralsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + int32_t index = getIndex(key, status); + if (U_FAILURE(status)) { return; } + if (!outArray[index].isBogus()) { + continue; + } + outArray[index] = value.getUnicodeString(status); + if (U_FAILURE(status)) { return; } + } + } + + private: + UnicodeString *outArray; +}; + +// NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed. + +void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, + UnicodeString *outArray, UErrorCode &status) { + PluralTableSink sink(outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... + // TODO(ICU-20400): Get duration-*-person data properly with aliases. + StringPiece subtypeForResource; + int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype())); + if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) { + subtypeForResource = {unit.getSubtype(), subtypeLen - 7}; + } else { + subtypeForResource = unit.getSubtype(); + } + + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/", status); + key.append(unit.getType(), status); + key.append("/", status); + key.append(subtypeForResource, status); + + UErrorCode localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); + if (width == UNUM_UNIT_WIDTH_SHORT) { + if (U_FAILURE(localStatus)) { + status = localStatus; + } + return; + } + + // TODO(ICU-13353): The fallback to short does not work in ICU4C. + // Manually fall back to short (this is done automatically in Java). + key.clear(); + key.append("unitsShort/", status); + key.append(unit.getType(), status); + key.append("/", status); + key.append(subtypeForResource, status); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); +} + +void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, + UErrorCode &status) { + // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. + // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? + PluralTableSink sink(outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + UnicodeString &pattern = outArray[i]; + if (pattern.isBogus()) { + continue; + } + int32_t longNameLen = 0; + const char16_t *longName = ucurr_getPluralName( + currency.getISOCurrency(), + locale.getName(), + nullptr /* isChoiceFormat */, + StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)), + &longNameLen, + &status); + // Example pattern from data: "{0} {1}" + // Example output after find-and-replace: "{0} US dollars" + pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen)); + } +} + +UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return {}; } + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/compound/per", status); + int32_t len = 0; + const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + return UnicodeString(ptr, len); +} + +//////////////////////// +/// END DATA LOADING /// +//////////////////////// + +} // namespace + +LongNameHandler* +LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + if (uprv_strlen(unitRef.getType()) == 0 || uprv_strlen(perUnit.getType()) == 0) { + // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an error code. + status = U_UNSUPPORTED_ERROR; + return nullptr; + } + + MeasureUnit unit = unitRef; + if (uprv_strcmp(perUnit.getType(), "none") != 0) { + // Compound unit: first try to simplify (e.g., meters per second is its own unit). + bool isResolved = false; + MeasureUnit resolved = MeasureUnit::resolveUnitPerUnit(unit, perUnit, &isResolved); + if (isResolved) { + unit = resolved; + } else { + // No simplified form is available. + return forCompoundUnit(loc, unit, perUnit, width, rules, parent, status); + } + } + + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + UnicodeString simpleFormats[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, simpleFormats, status); + if (U_FAILURE(status)) { return result; } + result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + return result; +} + +LongNameHandler* +LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + UnicodeString primaryData[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, primaryData, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryData[ARRAY_LENGTH]; + getMeasureData(loc, perUnit, width, secondaryData, status); + if (U_FAILURE(status)) { return result; } + + UnicodeString perUnitFormat; + if (!secondaryData[PER_INDEX].isBogus()) { + perUnitFormat = secondaryData[PER_INDEX]; + } else { + UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); + if (U_FAILURE(status)) { return result; } + // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. + SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); + if (U_FAILURE(status)) { return result; } + // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. + SimpleFormatter secondaryCompiled(secondaryFormat, 0, 1, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); + // TODO: Why does UnicodeString need to be explicit in the following line? + compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); + if (U_FAILURE(status)) { return result; } + } + result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + return result; +} + +UnicodeString LongNameHandler::getUnitDisplayName( + const Locale& loc, + const MeasureUnit& unit, + UNumberUnitWidth width, + UErrorCode& status) { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + UnicodeString simpleFormats[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, simpleFormats, status); + return simpleFormats[DNAM_INDEX]; +} + +UnicodeString LongNameHandler::getUnitPattern( + const Locale& loc, + const MeasureUnit& unit, + UNumberUnitWidth width, + StandardPlural::Form pluralForm, + UErrorCode& status) { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + UnicodeString simpleFormats[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, simpleFormats, status); + // The above already handles fallback from other widths to short + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + // Now handle fallback from other plural forms to OTHER + return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]: + simpleFormats[StandardPlural::Form::OTHER]; +} + +LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status) { + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + UnicodeString simpleFormats[ARRAY_LENGTH]; + getCurrencyLongNameData(loc, currency, simpleFormats, status); + if (U_FAILURE(status)) { return nullptr; } + result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); + return result; +} + +void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, + UErrorCode &status) { + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); + UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } + fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural}); + } +} + +void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, UErrorCode &status) { + SimpleFormatter trailCompiled(trailFormat, 1, 1, status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); + UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); + if (U_FAILURE(status)) { return; } + UnicodeString compoundFormat; + trailCompiled.format(leadFormat, compoundFormat, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } + fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); + } +} + +void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + parent->processQuantity(quantity, micros, status); + StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); + micros.modOuter = &fModifiers[pluralForm]; +} + +const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { + return &fModifiers[plural]; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ |