diff options
Diffstat (limited to 'intl/icu/source/i18n/plurfmt.cpp')
-rw-r--r-- | intl/icu/source/i18n/plurfmt.cpp | 607 |
1 files changed, 607 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/plurfmt.cpp b/intl/icu/source/i18n/plurfmt.cpp new file mode 100644 index 0000000000..33a539cd19 --- /dev/null +++ b/intl/icu/source/i18n/plurfmt.cpp @@ -0,0 +1,607 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2009-2015, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File PLURFMT.CPP +******************************************************************************* +*/ + +#include "unicode/decimfmt.h" +#include "unicode/messagepattern.h" +#include "unicode/plurfmt.h" +#include "unicode/plurrule.h" +#include "unicode/utypes.h" +#include "cmemory.h" +#include "messageimpl.h" +#include "nfrule.h" +#include "plurrule_impl.h" +#include "uassert.h" +#include "uhash.h" +#include "number_decimalquantity.h" +#include "number_utils.h" +#include "number_utypes.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +using number::impl::DecimalQuantity; + +static const char16_t OTHER_STRING[] = { + 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" +}; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) + +PluralFormat::PluralFormat(UErrorCode& status) + : locale(Locale::getDefault()), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, UPLURAL_TYPE_CARDINAL, status); +} + +PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, UPLURAL_TYPE_CARDINAL, status); +} + +PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) + : locale(Locale::getDefault()), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(&rules, UPLURAL_TYPE_COUNT, status); +} + +PluralFormat::PluralFormat(const Locale& loc, + const PluralRules& rules, + UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(&rules, UPLURAL_TYPE_COUNT, status); +} + +PluralFormat::PluralFormat(const Locale& loc, + UPluralType type, + UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, type, status); +} + +PluralFormat::PluralFormat(const UnicodeString& pat, + UErrorCode& status) + : locale(Locale::getDefault()), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, UPLURAL_TYPE_CARDINAL, status); + applyPattern(pat, status); +} + +PluralFormat::PluralFormat(const Locale& loc, + const UnicodeString& pat, + UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, UPLURAL_TYPE_CARDINAL, status); + applyPattern(pat, status); +} + +PluralFormat::PluralFormat(const PluralRules& rules, + const UnicodeString& pat, + UErrorCode& status) + : locale(Locale::getDefault()), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(&rules, UPLURAL_TYPE_COUNT, status); + applyPattern(pat, status); +} + +PluralFormat::PluralFormat(const Locale& loc, + const PluralRules& rules, + const UnicodeString& pat, + UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(&rules, UPLURAL_TYPE_COUNT, status); + applyPattern(pat, status); +} + +PluralFormat::PluralFormat(const Locale& loc, + UPluralType type, + const UnicodeString& pat, + UErrorCode& status) + : locale(loc), + msgPattern(status), + numberFormat(nullptr), + offset(0) { + init(nullptr, type, status); + applyPattern(pat, status); +} + +PluralFormat::PluralFormat(const PluralFormat& other) + : Format(other), + locale(other.locale), + msgPattern(other.msgPattern), + numberFormat(nullptr), + offset(other.offset) { + copyObjects(other); +} + +void +PluralFormat::copyObjects(const PluralFormat& other) { + UErrorCode status = U_ZERO_ERROR; + if (numberFormat != nullptr) { + delete numberFormat; + } + if (pluralRulesWrapper.pluralRules != nullptr) { + delete pluralRulesWrapper.pluralRules; + } + + if (other.numberFormat == nullptr) { + numberFormat = NumberFormat::createInstance(locale, status); + } else { + numberFormat = other.numberFormat->clone(); + } + if (other.pluralRulesWrapper.pluralRules == nullptr) { + pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); + } else { + pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); + } +} + + +PluralFormat::~PluralFormat() { + delete numberFormat; +} + +void +PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + if (rules==nullptr) { + pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); + } else { + pluralRulesWrapper.pluralRules = rules->clone(); + if (pluralRulesWrapper.pluralRules == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + + numberFormat= NumberFormat::createInstance(locale, status); +} + +void +PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { + msgPattern.parsePluralStyle(newPattern, nullptr, status); + if (U_FAILURE(status)) { + msgPattern.clear(); + offset = 0; + return; + } + offset = msgPattern.getPluralOffset(0); +} + +UnicodeString& +PluralFormat::format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const +{ + if (U_FAILURE(status)) return appendTo; + + if (obj.isNumeric()) { + return format(obj, obj.getDouble(), appendTo, pos, status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } +} + +UnicodeString +PluralFormat::format(int32_t number, UErrorCode& status) const { + FieldPosition fpos(FieldPosition::DONT_CARE); + UnicodeString result; + return format(Formattable(number), number, result, fpos, status); +} + +UnicodeString +PluralFormat::format(double number, UErrorCode& status) const { + FieldPosition fpos(FieldPosition::DONT_CARE); + UnicodeString result; + return format(Formattable(number), number, result, fpos, status); +} + + +UnicodeString& +PluralFormat::format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + return format(Formattable(number), (double)number, appendTo, pos, status); +} + +UnicodeString& +PluralFormat::format(double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + return format(Formattable(number), (double)number, appendTo, pos, status); +} + +UnicodeString& +PluralFormat::format(const Formattable& numberObject, double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return appendTo; + } + if (msgPattern.countParts() == 0) { + return numberFormat->format(numberObject, appendTo, pos, status); + } + + // Get the appropriate sub-message. + // Select it based on the formatted number-offset. + double numberMinusOffset = number - offset; + // Call NumberFormatter to get both the DecimalQuantity and the string. + // This call site needs to use more internal APIs than the Java equivalent. + number::impl::UFormattedNumberData data; + if (offset == 0) { + // could be BigDecimal etc. + numberObject.populateDecimalQuantity(data.quantity, status); + } else { + data.quantity.setToDouble(numberMinusOffset); + } + UnicodeString numberString; + auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); + if(decFmt != nullptr) { + const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status); + if (U_FAILURE(status)) { + return appendTo; + } + lnf->formatImpl(&data, status); // mutates &data + if (U_FAILURE(status)) { + return appendTo; + } + numberString = data.getStringRef().toUnicodeString(); + } else { + if (offset == 0) { + numberFormat->format(numberObject, numberString, status); + } else { + numberFormat->format(numberMinusOffset, numberString, status); + } + } + + int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status); + if (U_FAILURE(status)) { return appendTo; } + // Replace syntactic # signs in the top level of this sub-message + // (not in nested arguments) with the formatted number-offset. + const UnicodeString& pattern = msgPattern.getPatternString(); + int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); + for (;;) { + const MessagePattern::Part& part = msgPattern.getPart(++partIndex); + const UMessagePatternPartType type = part.getType(); + int32_t index = part.getIndex(); + if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { + return appendTo.append(pattern, prevIndex, index - prevIndex); + } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || + (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { + appendTo.append(pattern, prevIndex, index - prevIndex); + if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { + appendTo.append(numberString); + } + prevIndex = part.getLimit(); + } else if (type == UMSGPAT_PART_TYPE_ARG_START) { + appendTo.append(pattern, prevIndex, index - prevIndex); + prevIndex = index; + partIndex = msgPattern.getLimitPartIndex(partIndex); + index = msgPattern.getPart(partIndex).getLimit(); + MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); + prevIndex = index; + } + } +} + +UnicodeString& +PluralFormat::toPattern(UnicodeString& appendTo) { + if (0 == msgPattern.countParts()) { + appendTo.setToBogus(); + } else { + appendTo.append(msgPattern.getPatternString()); + } + return appendTo; +} + +void +PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + locale = loc; + msgPattern.clear(); + delete numberFormat; + offset = 0; + numberFormat = nullptr; + pluralRulesWrapper.reset(); + init(nullptr, UPLURAL_TYPE_CARDINAL, status); +} + +void +PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + NumberFormat* nf = format->clone(); + if (nf != nullptr) { + delete numberFormat; + numberFormat = nf; + } else { + status = U_MEMORY_ALLOCATION_ERROR; + } +} + +PluralFormat* +PluralFormat::clone() const +{ + return new PluralFormat(*this); +} + + +PluralFormat& +PluralFormat::operator=(const PluralFormat& other) { + if (this != &other) { + locale = other.locale; + msgPattern = other.msgPattern; + offset = other.offset; + copyObjects(other); + } + + return *this; +} + +bool +PluralFormat::operator==(const Format& other) const { + if (this == &other) { + return true; + } + if (!Format::operator==(other)) { + return false; + } + const PluralFormat& o = (const PluralFormat&)other; + return + locale == o.locale && + msgPattern == o.msgPattern && // implies same offset + (numberFormat == nullptr) == (o.numberFormat == nullptr) && + (numberFormat == nullptr || *numberFormat == *o.numberFormat) && + (pluralRulesWrapper.pluralRules == nullptr) == (o.pluralRulesWrapper.pluralRules == nullptr) && + (pluralRulesWrapper.pluralRules == nullptr || + *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); +} + +bool +PluralFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +void +PluralFormat::parseObject(const UnicodeString& /*source*/, + Formattable& /*result*/, + ParsePosition& pos) const +{ + // Parsing not supported. + pos.setErrorIndex(pos.getIndex()); +} + +int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, + const PluralSelector& selector, void *context, + double number, UErrorCode& ec) { + if (U_FAILURE(ec)) { + return 0; + } + int32_t count=pattern.countParts(); + double offset; + const MessagePattern::Part* part=&pattern.getPart(partIndex); + if (MessagePattern::Part::hasNumericValue(part->getType())) { + offset=pattern.getNumericValue(*part); + ++partIndex; + } else { + offset=0; + } + // The keyword is empty until we need to match against a non-explicit, not-"other" value. + // Then we get the keyword from the selector. + // (In other words, we never call the selector if we match against an explicit value, + // or if the only non-explicit keyword is "other".) + UnicodeString keyword; + UnicodeString other(false, OTHER_STRING, 5); + // When we find a match, we set msgStart>0 and also set this boolean to true + // to avoid matching the keyword again (duplicates are allowed) + // while we continue to look for an explicit-value match. + UBool haveKeywordMatch=false; + // msgStart is 0 until we find any appropriate sub-message. + // We remember the first "other" sub-message if we have not seen any + // appropriate sub-message before. + // We remember the first matching-keyword sub-message if we have not seen + // one of those before. + // (The parser allows [does not check for] duplicate keywords. + // We just have to make sure to take the first one.) + // We avoid matching the keyword twice by also setting haveKeywordMatch=true + // at the first keyword match. + // We keep going until we find an explicit-value match or reach the end of the plural style. + int32_t msgStart=0; + // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples + // until ARG_LIMIT or end of plural-only pattern. + do { + part=&pattern.getPart(partIndex++); + const UMessagePatternPartType type = part->getType(); + if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { + break; + } + U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); + // part is an ARG_SELECTOR followed by an optional explicit value, and then a message + if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { + // explicit value like "=2" + part=&pattern.getPart(partIndex++); + if(number==pattern.getNumericValue(*part)) { + // matches explicit value + return partIndex; + } + } else if(!haveKeywordMatch) { + // plural keyword like "few" or "other" + // Compare "other" first and call the selector if this is not "other". + if(pattern.partSubstringMatches(*part, other)) { + if(msgStart==0) { + msgStart=partIndex; + if(0 == keyword.compare(other)) { + // This is the first "other" sub-message, + // and the selected keyword is also "other". + // Do not match "other" again. + haveKeywordMatch=true; + } + } + } else { + if(keyword.isEmpty()) { + keyword=selector.select(context, number-offset, ec); + if(msgStart!=0 && (0 == keyword.compare(other))) { + // We have already seen an "other" sub-message. + // Do not match "other" again. + haveKeywordMatch=true; + // Skip keyword matching but do getLimitPartIndex(). + } + } + if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { + // keyword matches + msgStart=partIndex; + // Do not match this keyword again. + haveKeywordMatch=true; + } + } + } + partIndex=pattern.getLimitPartIndex(partIndex); + } while(++partIndex<count); + return msgStart; +} + +void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { + // If no pattern was applied, return null. + if (msgPattern.countParts() == 0) { + pos.setBeginIndex(-1); + pos.setEndIndex(-1); + return; + } + int partIndex = 0; + int currMatchIndex; + int count=msgPattern.countParts(); + int startingAt = pos.getBeginIndex(); + if (startingAt < 0) { + startingAt = 0; + } + + // The keyword is null until we need to match against a non-explicit, not-"other" value. + // Then we get the keyword from the selector. + // (In other words, we never call the selector if we match against an explicit value, + // or if the only non-explicit keyword is "other".) + UnicodeString keyword; + UnicodeString matchedWord; + const UnicodeString& pattern = msgPattern.getPatternString(); + int matchedIndex = -1; + // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples + // until the end of the plural-only pattern. + while (partIndex < count) { + const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); + if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { + // Bad format + continue; + } + + const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); + if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { + // Bad format + continue; + } + + const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); + if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { + // Bad format + continue; + } + + UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); + if (rbnfLenientScanner != nullptr) { + // Check if non-lenient rule finds the text before call lenient parsing + int32_t tempIndex = source.indexOf(currArg, startingAt); + if (tempIndex >= 0) { + currMatchIndex = tempIndex; + } else { + // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. + int32_t length = -1; + currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); + } + } + else { + currMatchIndex = source.indexOf(currArg, startingAt); + } + if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { + matchedIndex = currMatchIndex; + matchedWord = currArg; + keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); + } + } + if (matchedIndex >= 0) { + pos.setBeginIndex(matchedIndex); + pos.setEndIndex(matchedIndex + matchedWord.length()); + result.setString(keyword); + return; + } + + // Not found! + pos.setBeginIndex(-1); + pos.setEndIndex(-1); +} + +PluralFormat::PluralSelector::~PluralSelector() {} + +PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { + delete pluralRules; +} + +UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, + UErrorCode& /*ec*/) const { + (void)number; // unused except in the assertion + IFixedDecimal *dec=static_cast<IFixedDecimal *>(context); + return pluralRules->select(*dec); +} + +void PluralFormat::PluralSelectorAdapter::reset() { + delete pluralRules; + pluralRules = nullptr; +} + + +U_NAMESPACE_END + + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof |