summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/units_data.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/i18n/units_data.cpp
parentInitial commit. (diff)
downloadfirefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/i18n/units_data.cpp')
-rw-r--r--intl/icu/source/i18n/units_data.cpp488
1 files changed, 488 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/units_data.cpp b/intl/icu/source/i18n/units_data.cpp
new file mode 100644
index 0000000000..5fa17567a1
--- /dev/null
+++ b/intl/icu/source/i18n/units_data.cpp
@@ -0,0 +1,488 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "bytesinkutil.h"
+#include "cstring.h"
+#include "number_decimalquantity.h"
+#include "resource.h"
+#include "uassert.h"
+#include "unicode/locid.h"
+#include "unicode/unistr.h"
+#include "unicode/ures.h"
+#include "units_data.h"
+#include "uresimp.h"
+#include "util.h"
+#include <utility>
+
+U_NAMESPACE_BEGIN
+namespace units {
+
+namespace {
+
+using icu::number::impl::DecimalQuantity;
+
+void trimSpaces(CharString& factor, UErrorCode& status){
+ CharString trimmed;
+ for (int i = 0 ; i < factor.length(); i++) {
+ if (factor[i] == ' ') continue;
+
+ trimmed.append(factor[i], status);
+ }
+
+ factor = std::move(trimmed);
+}
+
+/**
+ * A ResourceSink that collects conversion rate information.
+ *
+ * This class is for use by ures_getAllItemsWithFallback.
+ */
+class ConversionRateDataSink : public ResourceSink {
+ public:
+ /**
+ * Constructor.
+ * @param out The vector to which ConversionRateInfo instances are to be
+ * added. This vector must outlive the use of the ResourceSink.
+ */
+ explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
+
+ /**
+ * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
+ * conversion rates that are found in `value` to the output vector.
+ *
+ * @param source This string must be "convertUnits": the resource that this
+ * class supports reading.
+ * @param value The "convertUnits" resource, containing unit conversion rate
+ * information.
+ * @param noFallback Ignored.
+ * @param status The standard ICU error code output parameter.
+ */
+ void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
+ if (U_FAILURE(status)) { return; }
+ if (uprv_strcmp(source, "convertUnits") != 0) {
+ // This is very strict, however it is the cheapest way to be sure
+ // that with `value`, we're looking at the convertUnits table.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ ResourceTable conversionRateTable = value.getTable(status);
+ const char *srcUnit;
+ // We're reusing `value`, which seems to be a common pattern:
+ for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
+ ResourceTable unitTable = value.getTable(status);
+ const char *key;
+ UnicodeString baseUnit = ICU_Utility::makeBogusString();
+ UnicodeString factor = ICU_Utility::makeBogusString();
+ UnicodeString offset = ICU_Utility::makeBogusString();
+ for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
+ if (uprv_strcmp(key, "target") == 0) {
+ baseUnit = value.getUnicodeString(status);
+ } else if (uprv_strcmp(key, "factor") == 0) {
+ factor = value.getUnicodeString(status);
+ } else if (uprv_strcmp(key, "offset") == 0) {
+ offset = value.getUnicodeString(status);
+ }
+ }
+ if (U_FAILURE(status)) { return; }
+ if (baseUnit.isBogus() || factor.isBogus()) {
+ // We could not find a usable conversion rate: bad resource.
+ status = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ // We don't have this ConversionRateInfo yet: add it.
+ ConversionRateInfo *cr = outVector->emplaceBack();
+ if (!cr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ } else {
+ cr->sourceUnit.append(srcUnit, status);
+ cr->baseUnit.appendInvariantChars(baseUnit, status);
+ cr->factor.appendInvariantChars(factor, status);
+ trimSpaces(cr->factor, status);
+ if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
+ }
+ }
+ return;
+ }
+
+ private:
+ MaybeStackVector<ConversionRateInfo> *outVector;
+};
+
+bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
+ return a.compareTo(b) < 0;
+}
+
+/**
+ * A ResourceSink that collects unit preferences information.
+ *
+ * This class is for use by ures_getAllItemsWithFallback.
+ */
+class UnitPreferencesSink : public ResourceSink {
+ public:
+ /**
+ * Constructor.
+ * @param outPrefs The vector to which UnitPreference instances are to be
+ * added. This vector must outlive the use of the ResourceSink.
+ * @param outMetadata The vector to which UnitPreferenceMetadata instances
+ * are to be added. This vector must outlive the use of the ResourceSink.
+ */
+ explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
+ MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
+ : preferences(outPrefs), metadata(outMetadata) {}
+
+ /**
+ * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
+ * preferences info that are found in `value` to the output vector.
+ *
+ * @param source This string must be "unitPreferenceData": the resource that
+ * this class supports reading.
+ * @param value The "unitPreferenceData" resource, containing unit
+ * preferences data.
+ * @param noFallback Ignored.
+ * @param status The standard ICU error code output parameter. Note: if an
+ * error is returned, outPrefs and outMetadata may be inconsistent.
+ */
+ void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
+ if (U_FAILURE(status)) { return; }
+ if (uprv_strcmp(key, "unitPreferenceData") != 0) {
+ // This is very strict, however it is the cheapest way to be sure
+ // that with `value`, we're looking at the convertUnits table.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ // The unitPreferenceData structure (see data/misc/units.txt) contains a
+ // hierarchy of category/usage/region, within which are a set of
+ // preferences. Hence three for-loops and another loop for the
+ // preferences themselves:
+ ResourceTable unitPreferenceDataTable = value.getTable(status);
+ const char *category;
+ for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
+ ResourceTable categoryTable = value.getTable(status);
+ const char *usage;
+ for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
+ ResourceTable regionTable = value.getTable(status);
+ const char *region;
+ for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
+ // `value` now contains the set of preferences for
+ // category/usage/region.
+ ResourceArray unitPrefs = value.getArray(status);
+ if (U_FAILURE(status)) { return; }
+ int32_t prefLen = unitPrefs.getSize();
+
+ // Update metadata for this set of preferences.
+ UnitPreferenceMetadata *meta = metadata->emplaceBack(
+ category, usage, region, preferences->length(), prefLen, status);
+ if (!meta) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (U_FAILURE(status)) { return; }
+ if (metadata->length() > 1) {
+ // Verify that unit preferences are sorted and
+ // without duplicates.
+ if (!(*(*metadata)[metadata->length() - 2] <
+ *(*metadata)[metadata->length() - 1])) {
+ status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+
+ // Collect the individual preferences.
+ for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
+ UnitPreference *up = preferences->emplaceBack();
+ if (!up) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ResourceTable unitPref = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
+ if (uprv_strcmp(key, "unit") == 0) {
+ int32_t length;
+ const char16_t *u = value.getString(length, status);
+ up->unit.appendInvariantChars(u, length, status);
+ } else if (uprv_strcmp(key, "geq") == 0) {
+ int32_t length;
+ const char16_t *g = value.getString(length, status);
+ CharString geq;
+ geq.appendInvariantChars(g, length, status);
+ DecimalQuantity dq;
+ dq.setToDecNumber(geq.data(), status);
+ up->geq = dq.toDouble();
+ } else if (uprv_strcmp(key, "skeleton") == 0) {
+ up->skeleton = value.getUnicodeString(status);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ private:
+ MaybeStackVector<UnitPreference> *preferences;
+ MaybeStackVector<UnitPreferenceMetadata> *metadata;
+};
+
+int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
+ const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
+ bool *foundRegion, UErrorCode &status) {
+ if (U_FAILURE(status)) { return -1; }
+ int32_t start = 0;
+ int32_t end = metadata->length();
+ *foundCategory = false;
+ *foundUsage = false;
+ *foundRegion = false;
+ while (start < end) {
+ int32_t mid = (start + end) / 2;
+ int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
+ if (cmp < 0) {
+ start = mid + 1;
+ } else if (cmp > 0) {
+ end = mid;
+ } else {
+ return mid;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Finds the UnitPreferenceMetadata instance that matches the given category,
+ * usage and region: if missing, region falls back to "001", and usage
+ * repeatedly drops tailing components, eventually trying "default"
+ * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
+ *
+ * @param metadata The full list of UnitPreferenceMetadata instances.
+ * @param category The category to search for. See getUnitCategory().
+ * @param usage The usage for which formatting preferences is needed. If the
+ * given usage is not known, automatic fallback occurs, see function description
+ * above.
+ * @param region The region for which preferences are needed. If there are no
+ * region-specific preferences, this function automatically falls back to the
+ * "001" region (global).
+ * @param status The standard ICU error code output parameter.
+ * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
+ * * If fallback to "default" or "001" didn't resolve, status will be
+ * U_MISSING_RESOURCE.
+ * @return The index into the metadata vector which represents the appropriate
+ * preferences. If appropriate preferences are not found, -1 is returned.
+ */
+int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
+ StringPiece category, StringPiece usage, StringPiece region,
+ UErrorCode &status) {
+ if (U_FAILURE(status)) { return -1; }
+ bool foundCategory, foundUsage, foundRegion;
+ UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
+ int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
+ if (U_FAILURE(status)) { return -1; }
+ if (idx >= 0) { return idx; }
+ if (!foundCategory) {
+ // TODO: failures can happen if units::getUnitCategory returns a category
+ // that does not appear in unitPreferenceData. Do we want a unit test that
+ // checks unitPreferenceData has full coverage of categories? Or just trust
+ // CLDR?
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+ U_ASSERT(foundCategory);
+ while (!foundUsage) {
+ int32_t lastDashIdx = desired.usage.lastIndexOf('-');
+ if (lastDashIdx > 0) {
+ desired.usage.truncate(lastDashIdx);
+ } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
+ desired.usage.truncate(0).append("default", status);
+ } else {
+ // "default" is not supposed to be missing for any valid category.
+ status = U_MISSING_RESOURCE_ERROR;
+ return -1;
+ }
+ idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
+ if (U_FAILURE(status)) { return -1; }
+ }
+ U_ASSERT(foundCategory);
+ U_ASSERT(foundUsage);
+ if (!foundRegion) {
+ if (uprv_strcmp(desired.region.data(), "001") != 0) {
+ desired.region.truncate(0).append("001", status);
+ idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
+ }
+ if (!foundRegion) {
+ // "001" is not supposed to be missing for any valid usage.
+ status = U_MISSING_RESOURCE_ERROR;
+ return -1;
+ }
+ }
+ U_ASSERT(foundCategory);
+ U_ASSERT(foundUsage);
+ U_ASSERT(foundRegion);
+ U_ASSERT(idx >= 0);
+ return idx;
+}
+
+} // namespace
+
+UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
+ StringPiece region, int32_t prefsOffset,
+ int32_t prefsCount, UErrorCode &status) {
+ this->category.append(category, status);
+ this->usage.append(usage, status);
+ this->region.append(region, status);
+ this->prefsOffset = prefsOffset;
+ this->prefsCount = prefsCount;
+}
+
+int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
+ int32_t cmp = uprv_strcmp(category.data(), other.category.data());
+ if (cmp == 0) {
+ cmp = uprv_strcmp(usage.data(), other.usage.data());
+ }
+ if (cmp == 0) {
+ cmp = uprv_strcmp(region.data(), other.region.data());
+ }
+ return cmp;
+}
+
+int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
+ bool *foundUsage, bool *foundRegion) const {
+ int32_t cmp = uprv_strcmp(category.data(), other.category.data());
+ if (cmp == 0) {
+ *foundCategory = true;
+ cmp = uprv_strcmp(usage.data(), other.usage.data());
+ }
+ if (cmp == 0) {
+ *foundUsage = true;
+ cmp = uprv_strcmp(region.data(), other.region.data());
+ }
+ if (cmp == 0) {
+ *foundRegion = true;
+ }
+ return cmp;
+}
+
+// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
+void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
+ LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
+ ConversionRateDataSink sink(&result);
+ ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
+}
+
+const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
+ UErrorCode &status) const {
+ for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
+ if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
+ }
+
+ status = U_INTERNAL_PROGRAM_ERROR;
+ return nullptr;
+}
+
+U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
+ LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
+ UnitPreferencesSink sink(&unitPrefs_, &metadata_);
+ ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
+}
+
+CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
+ CharString result;
+ if (U_FAILURE(status)) { return result; }
+ {
+ CharStringByteSink sink(&result);
+ locale.getKeywordValue(kw, sink, status);
+ }
+ if (U_SUCCESS(status) && result.isEmpty()) {
+ status = U_MISSING_RESOURCE_ERROR;
+ }
+ return result;
+}
+
+MaybeStackVector<UnitPreference>
+ U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
+ const Locale &locale, UErrorCode &status) const {
+
+ MaybeStackVector<UnitPreference> result;
+
+ // TODO: remove this once all the categories are allowed.
+ UErrorCode internalMuStatus = U_ZERO_ERROR;
+ if (category.compare("temperature") == 0) {
+ CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
+ if (U_SUCCESS(internalMuStatus)) {
+ // TODO: use the unit category as Java especially when all the categories are allowed..
+ if (localeUnitCharString == "celsius" //
+ || localeUnitCharString == "fahrenheit" //
+ || localeUnitCharString == "kelvin" //
+ ) {
+ UnitPreference unitPref;
+ unitPref.unit.append(localeUnitCharString, status);
+ result.emplaceBackAndCheckErrorCode(status, unitPref);
+ return result;
+ }
+ }
+ }
+
+ CharString region(locale.getCountry(), status);
+
+ // Check the locale system tag, e.g `ms=metric`.
+ UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
+ CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
+ bool isLocaleSystem = false;
+ if (U_SUCCESS(internalMeasureTagStatus)) {
+ if (localeSystem == "metric") {
+ region.clear();
+ region.append("001", status);
+ isLocaleSystem = true;
+ } else if (localeSystem == "ussystem") {
+ region.clear();
+ region.append("US", status);
+ isLocaleSystem = true;
+ } else if (localeSystem == "uksystem") {
+ region.clear();
+ region.append("GB", status);
+ isLocaleSystem = true;
+ }
+ }
+
+ // Check the region tag, e.g. `rg=uszzz`.
+ if (!isLocaleSystem) {
+ UErrorCode internalRgTagStatus = U_ZERO_ERROR;
+ CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus);
+ if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) {
+ if (localeRegion == "default") {
+ region.clear();
+ region.append(localeRegion, status);
+ } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') {
+ region.clear();
+ region.append(localeRegion.data(), 3, status);
+ } else {
+ // Take the first two character and capitalize them.
+ region.clear();
+ region.append(uprv_toupper(localeRegion[0]), status);
+ region.append(uprv_toupper(localeRegion[1]), status);
+ }
+ }
+ }
+
+ int32_t idx =
+ getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
+ const UnitPreferenceMetadata *m = metadata_[idx];
+ for (int32_t i = 0; i < m->prefsCount; i++) {
+ result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
+ }
+ return result;
+}
+
+} // namespace units
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */