diff options
Diffstat (limited to 'intl/icu/source/i18n/numparse_currency.cpp')
-rw-r--r-- | intl/icu/source/i18n/numparse_currency.cpp | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/numparse_currency.cpp b/intl/icu/source/i18n/numparse_currency.cpp new file mode 100644 index 0000000000..7bbb060f3d --- /dev/null +++ b/intl/icu/source/i18n/numparse_currency.cpp @@ -0,0 +1,189 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "numparse_types.h" +#include "numparse_currency.h" +#include "ucurrimp.h" +#include "unicode/errorcode.h" +#include "numparse_utils.h" +#include "string_segment.h" + +using namespace icu; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, + parse_flags_t parseFlags, UErrorCode& status) + : fCurrency1(currencySymbols.getCurrencySymbol(status)), + fCurrency2(currencySymbols.getIntlCurrencySymbol(status)), + fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)), + afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)), + beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)), + fLocaleName(dfs.getLocale().getName(), -1, status) { + utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); + + // Pre-load the long names for the current locale and currency + // if we are parsing without the full currency data. + if (!fUseFullCurrencyData) { + for (int32_t i=0; i<StandardPlural::COUNT; i++) { + auto plural = static_cast<StandardPlural::Form>(i); + fLocalLongNames[i] = currencySymbols.getPluralName(plural, status); + } + } + + // TODO: Figure out how to make this faster and re-enable. + // Computing the "lead code points" set for fastpathing is too slow to use in production. + // See https://unicode-org.atlassian.net/browse/ICU-13584 +// // Compute the full set of characters that could be the first in a currency to allow for +// // efficient smoke test. +// fLeadCodePoints.add(fCurrency1.char32At(0)); +// fLeadCodePoints.add(fCurrency2.char32At(0)); +// fLeadCodePoints.add(beforeSuffixInsert.char32At(0)); +// uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status); +// // Always apply case mapping closure for currencies +// fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS); +// fLeadCodePoints.freeze(); +} + +bool +CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { + if (result.currencyCode[0] != 0) { + return false; + } + + // Try to match a currency spacing separator. + int32_t initialOffset = segment.getOffset(); + bool maybeMore = false; + if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { + int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); + if (overlap == beforeSuffixInsert.length()) { + segment.adjustOffset(overlap); + // Note: let currency spacing be a weak match. Don't update chars consumed. + } + maybeMore = maybeMore || overlap == segment.length(); + } + + // Match the currency string, and reset if we didn't find one. + maybeMore = maybeMore || matchCurrency(segment, result, status); + if (result.currencyCode[0] == 0) { + segment.setOffset(initialOffset); + return maybeMore; + } + + // Try to match a currency spacing separator. + if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { + int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); + if (overlap == afterPrefixInsert.length()) { + segment.adjustOffset(overlap); + // Note: let currency spacing be a weak match. Don't update chars consumed. + } + maybeMore = maybeMore || overlap == segment.length(); + } + + return maybeMore; +} + +bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result, + UErrorCode& status) const { + bool maybeMore = false; + + int32_t overlap1; + if (!fCurrency1.isEmpty()) { + overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); + } else { + overlap1 = -1; + } + maybeMore = maybeMore || overlap1 == segment.length(); + if (overlap1 == fCurrency1.length()) { + utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); + segment.adjustOffset(overlap1); + result.setCharsConsumed(segment); + return maybeMore; + } + + int32_t overlap2; + if (!fCurrency2.isEmpty()) { + // ISO codes should be accepted case-insensitive. + // https://unicode-org.atlassian.net/browse/ICU-13696 + overlap2 = segment.getCommonPrefixLength(fCurrency2); + } else { + overlap2 = -1; + } + maybeMore = maybeMore || overlap2 == segment.length(); + if (overlap2 == fCurrency2.length()) { + utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); + segment.adjustOffset(overlap2); + result.setCharsConsumed(segment); + return maybeMore; + } + + if (fUseFullCurrencyData) { + // Use the full currency data. + // NOTE: This call site should be improved with #13584. + const UnicodeString segmentString = segment.toTempUnicodeString(); + + // Try to parse the currency + ParsePosition ppos(0); + int32_t partialMatchLen = 0; + uprv_parseCurrency( + fLocaleName.data(), + segmentString, + ppos, + UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME + &partialMatchLen, + result.currencyCode, + status); + maybeMore = maybeMore || partialMatchLen == segment.length(); + + if (U_SUCCESS(status) && ppos.getIndex() != 0) { + // Complete match. + // NOTE: The currency code should already be saved in the ParsedNumber. + segment.adjustOffset(ppos.getIndex()); + result.setCharsConsumed(segment); + return maybeMore; + } + + } else { + // Use the locale long names. + int32_t longestFullMatch = 0; + for (int32_t i=0; i<StandardPlural::COUNT; i++) { + const UnicodeString& name = fLocalLongNames[i]; + int32_t overlap = segment.getCommonPrefixLength(name); + if (overlap == name.length() && name.length() > longestFullMatch) { + longestFullMatch = name.length(); + } + maybeMore = maybeMore || overlap > 0; + } + if (longestFullMatch > 0) { + utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); + segment.adjustOffset(longestFullMatch); + result.setCharsConsumed(segment); + return maybeMore; + } + } + + // No match found. + return maybeMore; +} + +bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { + // TODO: See constructor + return true; + //return segment.startsWith(fLeadCodePoints); +} + +UnicodeString CombinedCurrencyMatcher::toString() const { + return u"<CombinedCurrencyMatcher>"; +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ |