diff options
Diffstat (limited to 'intl/icu/source/i18n/units_data.cpp')
-rw-r--r-- | intl/icu/source/i18n/units_data.cpp | 488 |
1 files changed, 488 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/units_data.cpp b/intl/icu/source/i18n/units_data.cpp new file mode 100644 index 0000000000..5fa17567a1 --- /dev/null +++ b/intl/icu/source/i18n/units_data.cpp @@ -0,0 +1,488 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "bytesinkutil.h" +#include "cstring.h" +#include "number_decimalquantity.h" +#include "resource.h" +#include "uassert.h" +#include "unicode/locid.h" +#include "unicode/unistr.h" +#include "unicode/ures.h" +#include "units_data.h" +#include "uresimp.h" +#include "util.h" +#include <utility> + +U_NAMESPACE_BEGIN +namespace units { + +namespace { + +using icu::number::impl::DecimalQuantity; + +void trimSpaces(CharString& factor, UErrorCode& status){ + CharString trimmed; + for (int i = 0 ; i < factor.length(); i++) { + if (factor[i] == ' ') continue; + + trimmed.append(factor[i], status); + } + + factor = std::move(trimmed); +} + +/** + * A ResourceSink that collects conversion rate information. + * + * This class is for use by ures_getAllItemsWithFallback. + */ +class ConversionRateDataSink : public ResourceSink { + public: + /** + * Constructor. + * @param out The vector to which ConversionRateInfo instances are to be + * added. This vector must outlive the use of the ResourceSink. + */ + explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {} + + /** + * Method for use by `ures_getAllItemsWithFallback`. Adds the unit + * conversion rates that are found in `value` to the output vector. + * + * @param source This string must be "convertUnits": the resource that this + * class supports reading. + * @param value The "convertUnits" resource, containing unit conversion rate + * information. + * @param noFallback Ignored. + * @param status The standard ICU error code output parameter. + */ + void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + if (U_FAILURE(status)) { return; } + if (uprv_strcmp(source, "convertUnits") != 0) { + // This is very strict, however it is the cheapest way to be sure + // that with `value`, we're looking at the convertUnits table. + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + ResourceTable conversionRateTable = value.getTable(status); + const char *srcUnit; + // We're reusing `value`, which seems to be a common pattern: + for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) { + ResourceTable unitTable = value.getTable(status); + const char *key; + UnicodeString baseUnit = ICU_Utility::makeBogusString(); + UnicodeString factor = ICU_Utility::makeBogusString(); + UnicodeString offset = ICU_Utility::makeBogusString(); + for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { + if (uprv_strcmp(key, "target") == 0) { + baseUnit = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "factor") == 0) { + factor = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "offset") == 0) { + offset = value.getUnicodeString(status); + } + } + if (U_FAILURE(status)) { return; } + if (baseUnit.isBogus() || factor.isBogus()) { + // We could not find a usable conversion rate: bad resource. + status = U_MISSING_RESOURCE_ERROR; + return; + } + + // We don't have this ConversionRateInfo yet: add it. + ConversionRateInfo *cr = outVector->emplaceBack(); + if (!cr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } else { + cr->sourceUnit.append(srcUnit, status); + cr->baseUnit.appendInvariantChars(baseUnit, status); + cr->factor.appendInvariantChars(factor, status); + trimSpaces(cr->factor, status); + if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status); + } + } + return; + } + + private: + MaybeStackVector<ConversionRateInfo> *outVector; +}; + +bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) { + return a.compareTo(b) < 0; +} + +/** + * A ResourceSink that collects unit preferences information. + * + * This class is for use by ures_getAllItemsWithFallback. + */ +class UnitPreferencesSink : public ResourceSink { + public: + /** + * Constructor. + * @param outPrefs The vector to which UnitPreference instances are to be + * added. This vector must outlive the use of the ResourceSink. + * @param outMetadata The vector to which UnitPreferenceMetadata instances + * are to be added. This vector must outlive the use of the ResourceSink. + */ + explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs, + MaybeStackVector<UnitPreferenceMetadata> *outMetadata) + : preferences(outPrefs), metadata(outMetadata) {} + + /** + * Method for use by `ures_getAllItemsWithFallback`. Adds the unit + * preferences info that are found in `value` to the output vector. + * + * @param source This string must be "unitPreferenceData": the resource that + * this class supports reading. + * @param value The "unitPreferenceData" resource, containing unit + * preferences data. + * @param noFallback Ignored. + * @param status The standard ICU error code output parameter. Note: if an + * error is returned, outPrefs and outMetadata may be inconsistent. + */ + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + if (U_FAILURE(status)) { return; } + if (uprv_strcmp(key, "unitPreferenceData") != 0) { + // This is very strict, however it is the cheapest way to be sure + // that with `value`, we're looking at the convertUnits table. + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // The unitPreferenceData structure (see data/misc/units.txt) contains a + // hierarchy of category/usage/region, within which are a set of + // preferences. Hence three for-loops and another loop for the + // preferences themselves: + ResourceTable unitPreferenceDataTable = value.getTable(status); + const char *category; + for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) { + ResourceTable categoryTable = value.getTable(status); + const char *usage; + for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) { + ResourceTable regionTable = value.getTable(status); + const char *region; + for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) { + // `value` now contains the set of preferences for + // category/usage/region. + ResourceArray unitPrefs = value.getArray(status); + if (U_FAILURE(status)) { return; } + int32_t prefLen = unitPrefs.getSize(); + + // Update metadata for this set of preferences. + UnitPreferenceMetadata *meta = metadata->emplaceBack( + category, usage, region, preferences->length(), prefLen, status); + if (!meta) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(status)) { return; } + if (metadata->length() > 1) { + // Verify that unit preferences are sorted and + // without duplicates. + if (!(*(*metadata)[metadata->length() - 2] < + *(*metadata)[metadata->length() - 1])) { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + + // Collect the individual preferences. + for (int32_t i = 0; unitPrefs.getValue(i, value); i++) { + UnitPreference *up = preferences->emplaceBack(); + if (!up) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ResourceTable unitPref = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "unit") == 0) { + int32_t length; + const char16_t *u = value.getString(length, status); + up->unit.appendInvariantChars(u, length, status); + } else if (uprv_strcmp(key, "geq") == 0) { + int32_t length; + const char16_t *g = value.getString(length, status); + CharString geq; + geq.appendInvariantChars(g, length, status); + DecimalQuantity dq; + dq.setToDecNumber(geq.data(), status); + up->geq = dq.toDouble(); + } else if (uprv_strcmp(key, "skeleton") == 0) { + up->skeleton = value.getUnicodeString(status); + } + } + } + } + } + } + } + + private: + MaybeStackVector<UnitPreference> *preferences; + MaybeStackVector<UnitPreferenceMetadata> *metadata; +}; + +int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata, + const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage, + bool *foundRegion, UErrorCode &status) { + if (U_FAILURE(status)) { return -1; } + int32_t start = 0; + int32_t end = metadata->length(); + *foundCategory = false; + *foundUsage = false; + *foundRegion = false; + while (start < end) { + int32_t mid = (start + end) / 2; + int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion); + if (cmp < 0) { + start = mid + 1; + } else if (cmp > 0) { + end = mid; + } else { + return mid; + } + } + return -1; +} + +/** + * Finds the UnitPreferenceMetadata instance that matches the given category, + * usage and region: if missing, region falls back to "001", and usage + * repeatedly drops tailing components, eventually trying "default" + * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default"). + * + * @param metadata The full list of UnitPreferenceMetadata instances. + * @param category The category to search for. See getUnitCategory(). + * @param usage The usage for which formatting preferences is needed. If the + * given usage is not known, automatic fallback occurs, see function description + * above. + * @param region The region for which preferences are needed. If there are no + * region-specific preferences, this function automatically falls back to the + * "001" region (global). + * @param status The standard ICU error code output parameter. + * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR. + * * If fallback to "default" or "001" didn't resolve, status will be + * U_MISSING_RESOURCE. + * @return The index into the metadata vector which represents the appropriate + * preferences. If appropriate preferences are not found, -1 is returned. + */ +int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata, + StringPiece category, StringPiece usage, StringPiece region, + UErrorCode &status) { + if (U_FAILURE(status)) { return -1; } + bool foundCategory, foundUsage, foundRegion; + UnitPreferenceMetadata desired(category, usage, region, -1, -1, status); + int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + if (U_FAILURE(status)) { return -1; } + if (idx >= 0) { return idx; } + if (!foundCategory) { + // TODO: failures can happen if units::getUnitCategory returns a category + // that does not appear in unitPreferenceData. Do we want a unit test that + // checks unitPreferenceData has full coverage of categories? Or just trust + // CLDR? + status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + U_ASSERT(foundCategory); + while (!foundUsage) { + int32_t lastDashIdx = desired.usage.lastIndexOf('-'); + if (lastDashIdx > 0) { + desired.usage.truncate(lastDashIdx); + } else if (uprv_strcmp(desired.usage.data(), "default") != 0) { + desired.usage.truncate(0).append("default", status); + } else { + // "default" is not supposed to be missing for any valid category. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + if (U_FAILURE(status)) { return -1; } + } + U_ASSERT(foundCategory); + U_ASSERT(foundUsage); + if (!foundRegion) { + if (uprv_strcmp(desired.region.data(), "001") != 0) { + desired.region.truncate(0).append("001", status); + idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + } + if (!foundRegion) { + // "001" is not supposed to be missing for any valid usage. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + } + U_ASSERT(foundCategory); + U_ASSERT(foundUsage); + U_ASSERT(foundRegion); + U_ASSERT(idx >= 0); + return idx; +} + +} // namespace + +UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage, + StringPiece region, int32_t prefsOffset, + int32_t prefsCount, UErrorCode &status) { + this->category.append(category, status); + this->usage.append(usage, status); + this->region.append(region, status); + this->prefsOffset = prefsOffset; + this->prefsCount = prefsCount; +} + +int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const { + int32_t cmp = uprv_strcmp(category.data(), other.category.data()); + if (cmp == 0) { + cmp = uprv_strcmp(usage.data(), other.usage.data()); + } + if (cmp == 0) { + cmp = uprv_strcmp(region.data(), other.region.data()); + } + return cmp; +} + +int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, + bool *foundUsage, bool *foundRegion) const { + int32_t cmp = uprv_strcmp(category.data(), other.category.data()); + if (cmp == 0) { + *foundCategory = true; + cmp = uprv_strcmp(usage.data(), other.usage.data()); + } + if (cmp == 0) { + *foundUsage = true; + cmp = uprv_strcmp(region.data(), other.region.data()); + } + if (cmp == 0) { + *foundRegion = true; + } + return cmp; +} + +// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? +void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); + ConversionRateDataSink sink(&result); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status); +} + +const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source, + UErrorCode &status) const { + for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) { + if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i]; + } + + status = U_INTERNAL_PROGRAM_ERROR; + return nullptr; +} + +U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); + UnitPreferencesSink sink(&unitPrefs_, &metadata_); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status); +} + +CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) { + CharString result; + if (U_FAILURE(status)) { return result; } + { + CharStringByteSink sink(&result); + locale.getKeywordValue(kw, sink, status); + } + if (U_SUCCESS(status) && result.isEmpty()) { + status = U_MISSING_RESOURCE_ERROR; + } + return result; +} + +MaybeStackVector<UnitPreference> + U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage, + const Locale &locale, UErrorCode &status) const { + + MaybeStackVector<UnitPreference> result; + + // TODO: remove this once all the categories are allowed. + UErrorCode internalMuStatus = U_ZERO_ERROR; + if (category.compare("temperature") == 0) { + CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus); + if (U_SUCCESS(internalMuStatus)) { + // TODO: use the unit category as Java especially when all the categories are allowed.. + if (localeUnitCharString == "celsius" // + || localeUnitCharString == "fahrenheit" // + || localeUnitCharString == "kelvin" // + ) { + UnitPreference unitPref; + unitPref.unit.append(localeUnitCharString, status); + result.emplaceBackAndCheckErrorCode(status, unitPref); + return result; + } + } + } + + CharString region(locale.getCountry(), status); + + // Check the locale system tag, e.g `ms=metric`. + UErrorCode internalMeasureTagStatus = U_ZERO_ERROR; + CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus); + bool isLocaleSystem = false; + if (U_SUCCESS(internalMeasureTagStatus)) { + if (localeSystem == "metric") { + region.clear(); + region.append("001", status); + isLocaleSystem = true; + } else if (localeSystem == "ussystem") { + region.clear(); + region.append("US", status); + isLocaleSystem = true; + } else if (localeSystem == "uksystem") { + region.clear(); + region.append("GB", status); + isLocaleSystem = true; + } + } + + // Check the region tag, e.g. `rg=uszzz`. + if (!isLocaleSystem) { + UErrorCode internalRgTagStatus = U_ZERO_ERROR; + CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus); + if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) { + if (localeRegion == "default") { + region.clear(); + region.append(localeRegion, status); + } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') { + region.clear(); + region.append(localeRegion.data(), 3, status); + } else { + // Take the first two character and capitalize them. + region.clear(); + region.append(uprv_toupper(localeRegion[0]), status); + region.append(uprv_toupper(localeRegion[1]), status); + } + } + } + + int32_t idx = + getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status); + if (U_FAILURE(status)) { + return result; + } + + U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. + const UnitPreferenceMetadata *m = metadata_[idx]; + for (int32_t i = 0; i < m->prefsCount; i++) { + result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset])); + } + return result; +} + +} // namespace units +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ |