summaryrefslogtreecommitdiffstats
path: root/intl/components/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /intl/components/src
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/components/src')
-rw-r--r--intl/components/src/Bidi.cpp138
-rw-r--r--intl/components/src/Bidi.h160
-rw-r--r--intl/components/src/BidiClass.h47
-rw-r--r--intl/components/src/BidiEmbeddingLevel.cpp53
-rw-r--r--intl/components/src/BidiEmbeddingLevel.h113
-rw-r--r--intl/components/src/Calendar.cpp172
-rw-r--r--intl/components/src/Calendar.h133
-rw-r--r--intl/components/src/Collator.cpp305
-rw-r--r--intl/components/src/Collator.h322
-rw-r--r--intl/components/src/Currency.cpp22
-rw-r--r--intl/components/src/Currency.h30
-rw-r--r--intl/components/src/DateIntervalFormat.cpp266
-rw-r--r--intl/components/src/DateIntervalFormat.h106
-rw-r--r--intl/components/src/DateTimeFormat.cpp1140
-rw-r--r--intl/components/src/DateTimeFormat.h593
-rw-r--r--intl/components/src/DateTimeFormatUtils.cpp104
-rw-r--r--intl/components/src/DateTimeFormatUtils.h14
-rw-r--r--intl/components/src/DateTimePart.h84
-rw-r--r--intl/components/src/DateTimePatternGenerator.cpp49
-rw-r--r--intl/components/src/DateTimePatternGenerator.h161
-rw-r--r--intl/components/src/DisplayNames.cpp234
-rw-r--r--intl/components/src/DisplayNames.h971
-rw-r--r--intl/components/src/FormatBuffer.h77
-rw-r--r--intl/components/src/ICU4CGlue.cpp44
-rw-r--r--intl/components/src/ICU4CGlue.h722
-rw-r--r--intl/components/src/ICU4CLibrary.cpp41
-rw-r--r--intl/components/src/ICU4CLibrary.h74
-rw-r--r--intl/components/src/ICUError.h118
-rw-r--r--intl/components/src/IDNA.cpp26
-rw-r--r--intl/components/src/IDNA.h130
-rw-r--r--intl/components/src/ListFormat.cpp132
-rw-r--r--intl/components/src/ListFormat.h223
-rw-r--r--intl/components/src/Locale.cpp1471
-rw-r--r--intl/components/src/Locale.h773
-rw-r--r--intl/components/src/LocaleCanonicalizer.cpp36
-rw-r--r--intl/components/src/LocaleCanonicalizer.h43
-rw-r--r--intl/components/src/LocaleGenerated.cpp1129
-rw-r--r--intl/components/src/MeasureUnit.cpp110
-rw-r--r--intl/components/src/MeasureUnit.h155
-rw-r--r--intl/components/src/MeasureUnitGenerated.h70
-rw-r--r--intl/components/src/NumberFormat.cpp154
-rw-r--r--intl/components/src/NumberFormat.h426
-rw-r--r--intl/components/src/NumberFormatFields.cpp398
-rw-r--r--intl/components/src/NumberFormatFields.h91
-rw-r--r--intl/components/src/NumberFormatterSkeleton.cpp473
-rw-r--r--intl/components/src/NumberFormatterSkeleton.h110
-rw-r--r--intl/components/src/NumberParser.cpp45
-rw-r--r--intl/components/src/NumberParser.h46
-rw-r--r--intl/components/src/NumberPart.h53
-rw-r--r--intl/components/src/NumberRangeFormat.cpp215
-rw-r--r--intl/components/src/NumberRangeFormat.h237
-rw-r--r--intl/components/src/NumberingSystem.cpp38
-rw-r--r--intl/components/src/NumberingSystem.h56
-rw-r--r--intl/components/src/PluralRules.cpp180
-rw-r--r--intl/components/src/PluralRules.h221
-rw-r--r--intl/components/src/RelativeTimeFormat.cpp153
-rw-r--r--intl/components/src/RelativeTimeFormat.h146
-rw-r--r--intl/components/src/ScopedICUObject.h40
-rw-r--r--intl/components/src/String.cpp13
-rw-r--r--intl/components/src/String.h256
-rw-r--r--intl/components/src/TimeZone.cpp344
-rw-r--r--intl/components/src/TimeZone.h237
-rw-r--r--intl/components/src/UnicodeProperties.h306
-rw-r--r--intl/components/src/UnicodeScriptCodes.h261
64 files changed, 15090 insertions, 0 deletions
diff --git a/intl/components/src/Bidi.cpp b/intl/components/src/Bidi.cpp
new file mode 100644
index 0000000000..2ce355c8eb
--- /dev/null
+++ b/intl/components/src/Bidi.cpp
@@ -0,0 +1,138 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/Bidi.h"
+#include "mozilla/Casting.h"
+#include "mozilla/intl/ICU4CGlue.h"
+
+#include "unicode/ubidi.h"
+
+namespace mozilla::intl {
+
+Bidi::Bidi() { mBidi = ubidi_open(); }
+Bidi::~Bidi() { ubidi_close(mBidi.GetMut()); }
+
+ICUResult Bidi::SetParagraph(Span<const char16_t> aParagraph,
+ BidiEmbeddingLevel aLevel) {
+ // Do not allow any reordering of the runs, as this can change the
+ // performance characteristics of working with runs. In the default mode,
+ // the levels can be iterated over directly, rather than relying on computing
+ // logical runs on the fly. This can have negative performance characteristics
+ // compared to iterating over the levels.
+ //
+ // In the UBIDI_REORDER_RUNS_ONLY the levels are encoded with additional
+ // information which can be safely ignored in this Bidi implementation.
+ // Note that this check is here since setting the mode must be done before
+ // calls to setting the paragraph.
+ MOZ_ASSERT(ubidi_getReorderingMode(mBidi.GetMut()) == UBIDI_REORDER_DEFAULT);
+
+ UErrorCode status = U_ZERO_ERROR;
+ ubidi_setPara(mBidi.GetMut(), aParagraph.Elements(),
+ AssertedCast<int32_t>(aParagraph.Length()), aLevel, nullptr,
+ &status);
+
+ mLevels = nullptr;
+
+ return ToICUResult(status);
+}
+
+Bidi::ParagraphDirection Bidi::GetParagraphDirection() const {
+ switch (ubidi_getDirection(mBidi.GetConst())) {
+ case UBIDI_LTR:
+ return Bidi::ParagraphDirection::LTR;
+ case UBIDI_RTL:
+ return Bidi::ParagraphDirection::RTL;
+ case UBIDI_MIXED:
+ return Bidi::ParagraphDirection::Mixed;
+ case UBIDI_NEUTRAL:
+ // This is only used in `ubidi_getBaseDirection` which is unused in this
+ // API.
+ MOZ_ASSERT_UNREACHABLE("Unexpected UBiDiDirection value.");
+ };
+ return Bidi::ParagraphDirection::Mixed;
+}
+
+/* static */
+void Bidi::ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength,
+ int32_t* aIndexMap) {
+ ubidi_reorderVisual(reinterpret_cast<const uint8_t*>(aLevels), aLength,
+ aIndexMap);
+}
+
+/* static */
+Bidi::BaseDirection Bidi::GetBaseDirection(Span<const char16_t> aParagraph) {
+ UBiDiDirection direction = ubidi_getBaseDirection(
+ aParagraph.Elements(), AssertedCast<int32_t>(aParagraph.Length()));
+
+ switch (direction) {
+ case UBIDI_LTR:
+ return Bidi::BaseDirection::LTR;
+ case UBIDI_RTL:
+ return Bidi::BaseDirection::RTL;
+ case UBIDI_NEUTRAL:
+ return Bidi::BaseDirection::Neutral;
+ case UBIDI_MIXED:
+ MOZ_ASSERT_UNREACHABLE("Unexpected UBiDiDirection value.");
+ }
+
+ return Bidi::BaseDirection::Neutral;
+}
+
+static BidiDirection ToBidiDirection(UBiDiDirection aDirection) {
+ switch (aDirection) {
+ case UBIDI_LTR:
+ return BidiDirection::LTR;
+ case UBIDI_RTL:
+ return BidiDirection::RTL;
+ case UBIDI_MIXED:
+ case UBIDI_NEUTRAL:
+ MOZ_ASSERT_UNREACHABLE("Unexpected UBiDiDirection value.");
+ }
+ return BidiDirection::LTR;
+}
+
+Result<int32_t, ICUError> Bidi::CountRuns() {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t runCount = ubidi_countRuns(mBidi.GetMut(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ mLength = ubidi_getProcessedLength(mBidi.GetConst());
+ mLevels = mLength > 0 ? reinterpret_cast<const BidiEmbeddingLevel*>(
+ ubidi_getLevels(mBidi.GetMut(), &status))
+ : nullptr;
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return runCount;
+}
+
+void Bidi::GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut,
+ BidiEmbeddingLevel* aLevelOut) {
+ MOZ_ASSERT(mLevels, "CountRuns hasn't been run?");
+ MOZ_RELEASE_ASSERT(aLogicalStart < mLength, "Out of bound");
+ BidiEmbeddingLevel level = mLevels[aLogicalStart];
+ int32_t limit;
+ for (limit = aLogicalStart + 1; limit < mLength; limit++) {
+ if (mLevels[limit] != level) {
+ break;
+ }
+ }
+ *aLogicalLimitOut = limit;
+ *aLevelOut = level;
+}
+
+BidiEmbeddingLevel Bidi::GetParagraphEmbeddingLevel() const {
+ return BidiEmbeddingLevel(ubidi_getParaLevel(mBidi.GetConst()));
+}
+
+BidiDirection Bidi::GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart,
+ int32_t* aLength) {
+ return ToBidiDirection(
+ ubidi_getVisualRun(mBidi.GetMut(), aRunIndex, aLogicalStart, aLength));
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/Bidi.h b/intl/components/src/Bidi.h
new file mode 100644
index 0000000000..9b7fba73e2
--- /dev/null
+++ b/intl/components/src/Bidi.h
@@ -0,0 +1,160 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_Bidi_h_
+#define intl_components_Bidi_h_
+
+#include "mozilla/intl/BidiEmbeddingLevel.h"
+#include "mozilla/intl/ICU4CGlue.h"
+
+struct UBiDi;
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with bidirectional (bidi)
+ * text. Text is commonly displayed left to right (LTR), especially for
+ * Latin-based alphabets. However, languages like Arabic and Hebrew displays
+ * text right to left (RTL). When displaying text, LTR and RTL text can be
+ * combined together in the same paragraph. This class gives tools for working
+ * with unidirectional, and mixed direction paragraphs.
+ *
+ * See the Unicode Bidirectional Algorithm document for implementation details:
+ * https://unicode.org/reports/tr9/
+ */
+class Bidi final {
+ public:
+ Bidi();
+ ~Bidi();
+
+ // Not copyable or movable
+ Bidi(const Bidi&) = delete;
+ Bidi& operator=(const Bidi&) = delete;
+
+ /**
+ * This enum indicates the text direction for the set paragraph. Some
+ * paragraphs are unidirectional, where they only have one direction, or a
+ * paragraph could use both LTR and RTL. In this case the paragraph's
+ * direction would be mixed.
+ */
+ enum class ParagraphDirection { LTR, RTL, Mixed };
+
+ /**
+ * Set the current paragraph of text to analyze for its bidi properties. This
+ * performs the Unicode bidi algorithm as specified by:
+ * https://unicode.org/reports/tr9/
+ *
+ * After setting the text, the other getter methods can be used to find out
+ * the directionality of the paragraph text.
+ */
+ ICUResult SetParagraph(Span<const char16_t> aParagraph,
+ BidiEmbeddingLevel aLevel);
+
+ /**
+ * Get the embedding level for the paragraph that was set by SetParagraph.
+ */
+ BidiEmbeddingLevel GetParagraphEmbeddingLevel() const;
+
+ /**
+ * Get the directionality of the paragraph text that was set by SetParagraph.
+ */
+ ParagraphDirection GetParagraphDirection() const;
+
+ /**
+ * Get the number of runs. This function may invoke the actual reordering on
+ * the Bidi object, after SetParagraph may have resolved only the levels of
+ * the text. Therefore, `CountRuns` may have to allocate memory, and may fail
+ * doing so.
+ */
+ Result<int32_t, ICUError> CountRuns();
+
+ /**
+ * Get the next logical run. The logical runs are a run of text that has the
+ * same directionality and embedding level. These runs are in memory order,
+ * and not in display order.
+ *
+ * Important! `Bidi::CountRuns` must be called before calling this method.
+ *
+ * @param aLogicalStart is the offset into the paragraph text that marks the
+ * logical start of the text.
+ * @param aLogicalLimitOut is an out param that is the length of the string
+ * that makes up the logical run.
+ * @param aLevelOut is an out parameter that returns the embedding level for
+ * the run
+ */
+ void GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut,
+ BidiEmbeddingLevel* aLevelOut);
+
+ /**
+ * This is a convenience function that does not use the ICU Bidi object.
+ * It is intended to be used for when an application has determined the
+ * embedding levels of objects (character sequences) and just needs to have
+ * them reordered (L2).
+ *
+ * @param aLevels is an array with `aLength` levels that have been
+ * determined by the application.
+ *
+ * @param aLength is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered. It must be greater than 0.
+ *
+ * @param aIndexMap is a pointer to an array of `aLength`
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.
+ * The index map will result in
+ * `aIndexMap[aVisualIndex]==aLogicalIndex`.
+ */
+ static void ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength,
+ int32_t* aIndexMap);
+
+ /**
+ * This enum indicates the bidi character type of the first strong character
+ * for the set paragraph.
+ * LTR: bidi character type 'L'.
+ * RTL: bidi character type 'R' or 'AL'.
+ * Neutral: The rest of bidi character types.
+ */
+ enum class BaseDirection { LTR, RTL, Neutral };
+
+ /**
+ * Get the base direction of the paragraph.
+ */
+ static BaseDirection GetBaseDirection(Span<const char16_t> aParagraph);
+
+ /**
+ * Get one run's logical start, length, and directionality. In an RTL run, the
+ * character at the logical start is visually on the right of the displayed
+ * run. The length is the number of characters in the run.
+ * `Bidi::CountRuns` should be called before the runs are retrieved.
+ *
+ * @param aRunIndex is the number of the run in visual order, in the
+ * range `[0..CountRuns-1]`.
+ *
+ * @param aLogicalStart is the first logical character index in the text.
+ * The pointer may be `nullptr` if this index is not needed.
+ *
+ * @param aLength is the number of characters (at least one) in the run.
+ * The pointer may be `nullptr` if this is not needed.
+ *
+ * Note that in right-to-left runs, the code places modifier letters before
+ * base characters and second surrogates before first ones.
+ */
+ BidiDirection GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart,
+ int32_t* aLength);
+
+ private:
+ ICUPointer<UBiDi> mBidi = ICUPointer<UBiDi>(nullptr);
+
+ /**
+ * An array of levels that is the same length as the paragraph from
+ * `Bidi::SetParagraph`.
+ */
+ const BidiEmbeddingLevel* mLevels = nullptr;
+
+ /**
+ * The length of the paragraph from `Bidi::SetParagraph`.
+ */
+ int32_t mLength = 0;
+};
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/BidiClass.h b/intl/components/src/BidiClass.h
new file mode 100644
index 0000000000..f4d31e9e95
--- /dev/null
+++ b/intl/components/src/BidiClass.h
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_BidiClass_h_
+#define intl_components_BidiClass_h_
+
+namespace mozilla::intl {
+
+/**
+ * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
+ * section BIDIRECTIONAL PROPERTIES
+ * for the detailed definition of the following categories
+ *
+ * The values here must match the equivalents in %bidicategorycode in
+ * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
+ * and must also match the values used by ICU's UCharDirection.
+ */
+enum class BidiClass : uint8_t {
+ LeftToRight = 0,
+ RightToLeft = 1,
+ EuropeanNumber = 2,
+ EuropeanNumberSeparator = 3,
+ EuropeanNumberTerminator = 4,
+ ArabicNumber = 5,
+ CommonNumberSeparator = 6,
+ BlockSeparator = 7,
+ SegmentSeparator = 8,
+ WhiteSpaceNeutral = 9,
+ OtherNeutral = 10,
+ LeftToRightEmbedding = 11,
+ LeftToRightOverride = 12,
+ RightToLeftArabic = 13,
+ RightToLeftEmbedding = 14,
+ RightToLeftOverride = 15,
+ PopDirectionalFormat = 16,
+ DirNonSpacingMark = 17,
+ BoundaryNeutral = 18,
+ FirstStrongIsolate = 19,
+ LeftToRightIsolate = 20,
+ RightToLeftIsolate = 21,
+ PopDirectionalIsolate = 22,
+ BidiClassCount
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/BidiEmbeddingLevel.cpp b/intl/components/src/BidiEmbeddingLevel.cpp
new file mode 100644
index 0000000000..d3ef5da937
--- /dev/null
+++ b/intl/components/src/BidiEmbeddingLevel.cpp
@@ -0,0 +1,53 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/BidiEmbeddingLevel.h"
+#include "mozilla/Casting.h"
+#include "mozilla/intl/ICU4CGlue.h"
+
+#include "unicode/ubidi.h"
+
+namespace mozilla::intl {
+
+bool BidiEmbeddingLevel::IsDefaultLTR() const {
+ return mValue == UBIDI_DEFAULT_LTR;
+};
+
+bool BidiEmbeddingLevel::IsDefaultRTL() const {
+ return mValue == UBIDI_DEFAULT_RTL;
+};
+
+bool BidiEmbeddingLevel::IsRTL() const {
+ // If the least significant bit is 1, then the embedding level
+ // is right-to-left.
+ // If the least significant bit is 0, then the embedding level
+ // is left-to-right.
+ return (mValue & 0x1) == 1;
+};
+
+bool BidiEmbeddingLevel::IsLTR() const { return !IsRTL(); };
+
+bool BidiEmbeddingLevel::IsSameDirection(BidiEmbeddingLevel aOther) const {
+ return (((mValue ^ aOther) & 1) == 0);
+}
+
+BidiEmbeddingLevel BidiEmbeddingLevel::LTR() { return BidiEmbeddingLevel(0); };
+
+BidiEmbeddingLevel BidiEmbeddingLevel::RTL() { return BidiEmbeddingLevel(1); };
+
+BidiEmbeddingLevel BidiEmbeddingLevel::DefaultLTR() {
+ return BidiEmbeddingLevel(UBIDI_DEFAULT_LTR);
+};
+
+BidiEmbeddingLevel BidiEmbeddingLevel::DefaultRTL() {
+ return BidiEmbeddingLevel(UBIDI_DEFAULT_RTL);
+};
+
+BidiDirection BidiEmbeddingLevel::Direction() {
+ return IsRTL() ? BidiDirection::RTL : BidiDirection::LTR;
+};
+
+uint8_t BidiEmbeddingLevel::Value() const { return mValue; }
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/BidiEmbeddingLevel.h b/intl/components/src/BidiEmbeddingLevel.h
new file mode 100644
index 0000000000..1628b6392f
--- /dev/null
+++ b/intl/components/src/BidiEmbeddingLevel.h
@@ -0,0 +1,113 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_BidiEmbeddingLevel_h_
+#define intl_components_BidiEmbeddingLevel_h_
+
+#include <cstdint>
+
+/**
+ * This file has the BidiEmbeddingLevel and BidiDirection enum broken out from
+ * the main Bidi class for faster includes. This code is used in Layout which
+ * could trigger long build times when changing core mozilla::intl files.
+ */
+namespace mozilla::intl {
+
+/**
+ * This enum unambiguously classifies text runs as either being left to right,
+ * or right to left.
+ */
+enum class BidiDirection : uint8_t {
+ // Left to right text.
+ LTR = 0,
+ // Right to left text.
+ RTL = 1,
+};
+
+/**
+ * Embedding levels are numbers that indicate how deeply the bidi text is
+ * embedded, and the direction of text on that embedding level. When switching
+ * between strongly LTR code points and strongly RTL code points the embedding
+ * level normally switches between an embedding level of 0 (LTR) and 1 (RTL).
+ * The only time the embedding level increases is if the embedding code points
+ * are used. This is the Left-to-Right Embedding (LRE) code point (U+202A), or
+ * the Right-to-Left Embedding (RLE) code point (U+202B). The minimum
+ * embedding level of text is zero, and the maximum explicit depth is 125.
+ *
+ * The most significant bit is reserved for additional meaning. It can be used
+ * to signify in certain APIs that the text should by default be LTR or RTL if
+ * no strongly directional code points are found.
+ *
+ * Bug 1736595: At the time of this writing, some places in Gecko code use a 1
+ * in the most significant bit to indicate that an embedding level has not
+ * been set. This leads to an ambiguous understanding of what the most
+ * significant bit actually means.
+ */
+class BidiEmbeddingLevel {
+ public:
+ explicit BidiEmbeddingLevel(uint8_t aValue) : mValue(aValue) {}
+ explicit BidiEmbeddingLevel(int aValue)
+ : mValue(static_cast<uint8_t>(aValue)) {}
+
+ BidiEmbeddingLevel() = default;
+
+ // Enable the copy operators, but disable move as this is only a uint8_t.
+ BidiEmbeddingLevel(const BidiEmbeddingLevel& other) = default;
+ BidiEmbeddingLevel& operator=(const BidiEmbeddingLevel& other) = default;
+
+ /**
+ * Determine the direction of the embedding level by looking at the least
+ * significant bit. If it is 0, then it is LTR. If it is 1, then it is RTL.
+ */
+ BidiDirection Direction();
+
+ /**
+ * Create a left-to-right embedding level.
+ */
+ static BidiEmbeddingLevel LTR();
+
+ /**
+ * Create an right-to-left embedding level.
+ */
+ static BidiEmbeddingLevel RTL();
+
+ /**
+ * When passed into `SetParagraph`, the direction is determined by first
+ * strongly directional character, with the default set to left-to-right if
+ * none is found.
+ *
+ * This is encoded with the highest bit set to 1.
+ */
+ static BidiEmbeddingLevel DefaultLTR();
+
+ /**
+ * When passed into `SetParagraph`, the direction is determined by first
+ * strongly directional character, with the default set to right-to-left if
+ * none is found.
+ *
+ * * This is encoded with the highest and lowest bits set to 1.
+ */
+ static BidiEmbeddingLevel DefaultRTL();
+
+ bool IsDefaultLTR() const;
+ bool IsDefaultRTL() const;
+ bool IsLTR() const;
+ bool IsRTL() const;
+ bool IsSameDirection(BidiEmbeddingLevel aOther) const;
+
+ /**
+ * Get the underlying value as a uint8_t.
+ */
+ uint8_t Value() const;
+
+ /**
+ * Implicitly convert to the underlying value.
+ */
+ operator uint8_t() const { return mValue; }
+
+ private:
+ uint8_t mValue = 0;
+};
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/Calendar.cpp b/intl/components/src/Calendar.cpp
new file mode 100644
index 0000000000..d44dedaaae
--- /dev/null
+++ b/intl/components/src/Calendar.cpp
@@ -0,0 +1,172 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/Calendar.h"
+
+#include "unicode/ucal.h"
+#include "unicode/uloc.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+/* static */
+Result<UniquePtr<Calendar>, ICUError> Calendar::TryCreate(
+ const char* aLocale, Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ UErrorCode status = U_ZERO_ERROR;
+ const UChar* zoneID = nullptr;
+ int32_t zoneIDLen = 0;
+ if (aTimeZoneOverride) {
+ zoneIDLen = static_cast<int32_t>(aTimeZoneOverride->Length());
+ zoneID = aTimeZoneOverride->Elements();
+ }
+
+ UCalendar* calendar =
+ ucal_open(zoneID, zoneIDLen, aLocale, UCAL_DEFAULT, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return MakeUnique<Calendar>(calendar);
+}
+
+Result<Span<const char>, ICUError> Calendar::GetBcp47Type() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char* oldType = ucal_getType(mCalendar, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ const char* bcp47Type = uloc_toUnicodeLocaleType("calendar", oldType);
+
+ if (!bcp47Type) {
+ return Err(ICUError::InternalError);
+ }
+
+ return MakeStringSpan(bcp47Type);
+}
+
+static Weekday WeekdayFromDaysOfWeek(UCalendarDaysOfWeek weekday) {
+ switch (weekday) {
+ case UCAL_MONDAY:
+ return Weekday::Monday;
+ case UCAL_TUESDAY:
+ return Weekday::Tuesday;
+ case UCAL_WEDNESDAY:
+ return Weekday::Wednesday;
+ case UCAL_THURSDAY:
+ return Weekday::Thursday;
+ case UCAL_FRIDAY:
+ return Weekday::Friday;
+ case UCAL_SATURDAY:
+ return Weekday::Saturday;
+ case UCAL_SUNDAY:
+ return Weekday::Sunday;
+ }
+ MOZ_CRASH("unexpected weekday value");
+}
+
+Result<EnumSet<Weekday>, ICUError> Calendar::GetWeekend() {
+ static_assert(static_cast<int32_t>(UCAL_SUNDAY) == 1);
+ static_assert(static_cast<int32_t>(UCAL_SATURDAY) == 7);
+
+ UErrorCode status = U_ZERO_ERROR;
+
+ EnumSet<Weekday> weekend;
+ for (int32_t i = UCAL_SUNDAY; i <= UCAL_SATURDAY; i++) {
+ auto dayOfWeek = static_cast<UCalendarDaysOfWeek>(i);
+ auto type = ucal_getDayOfWeekType(mCalendar, dayOfWeek, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ switch (type) {
+ case UCAL_WEEKEND_ONSET:
+ // Treat days which start as a weekday as weekdays.
+ [[fallthrough]];
+ case UCAL_WEEKDAY:
+ break;
+
+ case UCAL_WEEKEND_CEASE:
+ // Treat days which start as a weekend day as weekend days.
+ [[fallthrough]];
+ case UCAL_WEEKEND:
+ weekend += WeekdayFromDaysOfWeek(dayOfWeek);
+ break;
+ }
+ }
+
+ return weekend;
+}
+
+Weekday Calendar::GetFirstDayOfWeek() {
+ int32_t firstDayOfWeek = ucal_getAttribute(mCalendar, UCAL_FIRST_DAY_OF_WEEK);
+ MOZ_ASSERT(UCAL_SUNDAY <= firstDayOfWeek && firstDayOfWeek <= UCAL_SATURDAY);
+
+ return WeekdayFromDaysOfWeek(
+ static_cast<UCalendarDaysOfWeek>(firstDayOfWeek));
+}
+
+int32_t Calendar::GetMinimalDaysInFirstWeek() {
+ int32_t minimalDays =
+ ucal_getAttribute(mCalendar, UCAL_MINIMAL_DAYS_IN_FIRST_WEEK);
+ MOZ_ASSERT(1 <= minimalDays && minimalDays <= 7);
+
+ return minimalDays;
+}
+
+Result<Ok, ICUError> Calendar::SetTimeInMs(double aUnixEpoch) {
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setMillis(mCalendar, aUnixEpoch, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return Ok{};
+}
+
+/* static */
+Result<SpanEnumeration<char>, ICUError>
+Calendar::GetLegacyKeywordValuesForLocale(const char* aLocale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucal_getKeywordValuesForLocale(
+ "calendar", aLocale, /* commonlyUsed */ false, &status);
+
+ if (U_SUCCESS(status)) {
+ return SpanEnumeration<char>(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+/* static */
+SpanResult<char> Calendar::LegacyIdentifierToBcp47(const char* aIdentifier,
+ int32_t aLength) {
+ if (aIdentifier == nullptr) {
+ return Err(InternalError{});
+ }
+ // aLength is not needed here, as the ICU call uses the null terminated
+ // string.
+ return MakeStringSpan(uloc_toUnicodeLocaleType("ca", aIdentifier));
+}
+
+/* static */
+Result<Calendar::Bcp47IdentifierEnumeration, ICUError>
+Calendar::GetBcp47KeywordValuesForLocale(const char* aLocale,
+ CommonlyUsed aCommonlyUsed) {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucal_getKeywordValuesForLocale(
+ "calendar", aLocale, static_cast<bool>(aCommonlyUsed), &status);
+
+ if (U_SUCCESS(status)) {
+ return Bcp47IdentifierEnumeration(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+Calendar::~Calendar() {
+ MOZ_ASSERT(mCalendar);
+ ucal_close(mCalendar);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/Calendar.h b/intl/components/src/Calendar.h
new file mode 100644
index 0000000000..32975bc376
--- /dev/null
+++ b/intl/components/src/Calendar.h
@@ -0,0 +1,133 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_Calendar_h_
+#define intl_components_Calendar_h_
+
+#include "mozilla/Assertions.h"
+#include "mozilla/EnumSet.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+using UCalendar = void*;
+
+namespace mozilla::intl {
+
+/**
+ * Weekdays in the ISO-8601 calendar.
+ */
+enum class Weekday : uint8_t {
+ Monday = 1,
+ Tuesday,
+ Wednesday,
+ Thursday,
+ Friday,
+ Saturday,
+ Sunday,
+};
+
+/**
+ * This component is a Mozilla-focused API for working with calendar systems in
+ * internationalization code. It is used in coordination with other operations
+ * such as datetime formatting.
+ */
+class Calendar final {
+ public:
+ explicit Calendar(UCalendar* aCalendar) : mCalendar(aCalendar) {
+ MOZ_ASSERT(aCalendar);
+ };
+
+ // Do not allow copy as this class owns the ICU resource. Move is not
+ // currently implemented, but a custom move operator could be created if
+ // needed.
+ Calendar(const Calendar&) = delete;
+ Calendar& operator=(const Calendar&) = delete;
+
+ /**
+ * Create a Calendar.
+ */
+ static Result<UniquePtr<Calendar>, ICUError> TryCreate(
+ const char* aLocale,
+ Maybe<Span<const char16_t>> aTimeZoneOverride = Nothing{});
+
+ /**
+ * Get the BCP 47 keyword value string designating the calendar type. For
+ * instance "gregory", "chinese", "islamic-civil", etc.
+ */
+ Result<Span<const char>, ICUError> GetBcp47Type();
+
+ /**
+ * Return the set of weekdays which are considered as part of the weekend.
+ */
+ Result<EnumSet<Weekday>, ICUError> GetWeekend();
+
+ /**
+ * Return the weekday which is considered the first day of the week.
+ */
+ Weekday GetFirstDayOfWeek();
+
+ /**
+ * Return the minimal number of days in the first week of a year.
+ */
+ int32_t GetMinimalDaysInFirstWeek();
+
+ /**
+ * Set the time for the calendar relative to the number of milliseconds since
+ * 1 January 1970, UTC.
+ */
+ Result<Ok, ICUError> SetTimeInMs(double aUnixEpoch);
+
+ /**
+ * Return ICU legacy keywords, such as "gregorian", "islamic",
+ * "islamic-civil", "hebrew", etc.
+ */
+ static Result<SpanEnumeration<char>, ICUError>
+ GetLegacyKeywordValuesForLocale(const char* aLocale);
+
+ private:
+ /**
+ * Internal function to convert a legacy calendar identifier to the newer
+ * BCP 47 identifier.
+ */
+ static SpanResult<char> LegacyIdentifierToBcp47(const char* aIdentifier,
+ int32_t aLength);
+
+ public:
+ enum class CommonlyUsed : bool {
+ /**
+ * Select all possible values, even when not commonly used by a locale.
+ */
+ No,
+
+ /**
+ * Only select the values which are commonly used by a locale.
+ */
+ Yes,
+ };
+
+ using Bcp47IdentifierEnumeration =
+ Enumeration<char, SpanResult<char>, Calendar::LegacyIdentifierToBcp47>;
+
+ /**
+ * Return BCP 47 Unicode locale extension type keywords.
+ */
+ static Result<Bcp47IdentifierEnumeration, ICUError>
+ GetBcp47KeywordValuesForLocale(const char* aLocale,
+ CommonlyUsed aCommonlyUsed = CommonlyUsed::No);
+
+ ~Calendar();
+
+ private:
+ friend class DateIntervalFormat;
+ UCalendar* GetUCalendar() const { return mCalendar; }
+
+ UCalendar* mCalendar = nullptr;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/Collator.cpp b/intl/components/src/Collator.cpp
new file mode 100644
index 0000000000..93052932de
--- /dev/null
+++ b/intl/components/src/Collator.cpp
@@ -0,0 +1,305 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <string.h>
+#include "mozilla/intl/Collator.h"
+
+namespace mozilla::intl {
+
+Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
+ MOZ_ASSERT(aCollator);
+}
+
+Collator::~Collator() {
+ if (mCollator.GetMut()) {
+ ucol_close(mCollator.GetMut());
+ }
+}
+
+Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator* collator = ucol_open(IcuLocale(aLocale), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return MakeUnique<Collator>(collator);
+};
+
+int32_t Collator::CompareStrings(Span<const char16_t> aSource,
+ Span<const char16_t> aTarget) const {
+ switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
+ static_cast<int32_t>(aSource.size()), aTarget.data(),
+ static_cast<int32_t>(aTarget.size()))) {
+ case UCOL_LESS:
+ return -1;
+ case UCOL_EQUAL:
+ return 0;
+ case UCOL_GREATER:
+ return 1;
+ }
+ MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
+ return 0;
+}
+
+int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
+ Span<const uint8_t> aKey2) const {
+ size_t minLength = std::min(aKey1.Length(), aKey2.Length());
+ int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
+ (const char*)aKey2.Elements(), minLength);
+ if (tmpResult < 0) {
+ return -1;
+ }
+ if (tmpResult > 0) {
+ return 1;
+ }
+ if (aKey1.Length() > minLength) {
+ // First string contains second one, so comes later, hence return > 0.
+ return 1;
+ }
+ if (aKey2.Length() > minLength) {
+ // First string is a substring of second one, so comes earlier,
+ // hence return < 0.
+ return -1;
+ }
+ return 0;
+}
+
+static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
+ switch (caseFirst) {
+ case Collator::CaseFirst::False:
+ return UCOL_OFF;
+ case Collator::CaseFirst::Upper:
+ return UCOL_UPPER_FIRST;
+ case Collator::CaseFirst::Lower:
+ return UCOL_LOWER_FIRST;
+ }
+
+ MOZ_ASSERT_UNREACHABLE();
+ return UCOL_DEFAULT;
+}
+
+// Define this as a macro to work around exposing the UColAttributeValue type to
+// the header file. Collation::Feature is private to the class.
+#define FEATURE_TO_ICU(featureICU, feature) \
+ switch (feature) { \
+ case Collator::Feature::On: \
+ (featureICU) = UCOL_ON; \
+ break; \
+ case Collator::Feature::Off: \
+ (featureICU) = UCOL_OFF; \
+ break; \
+ case Collator::Feature::Default: \
+ (featureICU) = UCOL_DEFAULT; \
+ break; \
+ }
+
+void Collator::SetStrength(Collator::Strength aStrength) {
+ UColAttributeValue strength;
+ switch (aStrength) {
+ case Collator::Strength::Default:
+ strength = UCOL_DEFAULT_STRENGTH;
+ break;
+ case Collator::Strength::Primary:
+ strength = UCOL_PRIMARY;
+ break;
+ case Collator::Strength::Secondary:
+ strength = UCOL_SECONDARY;
+ break;
+ case Collator::Strength::Tertiary:
+ strength = UCOL_TERTIARY;
+ break;
+ case Collator::Strength::Quaternary:
+ strength = UCOL_QUATERNARY;
+ break;
+ case Collator::Strength::Identical:
+ strength = UCOL_IDENTICAL;
+ break;
+ }
+
+ ucol_setStrength(mCollator.GetMut(), strength);
+}
+
+ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+ ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetAlternateHandling(
+ Collator::AlternateHandling aAlternateHandling) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue handling;
+ switch (aAlternateHandling) {
+ case Collator::AlternateHandling::NonIgnorable:
+ handling = UCOL_NON_IGNORABLE;
+ break;
+ case Collator::AlternateHandling::Shifted:
+ handling = UCOL_SHIFTED;
+ break;
+ case Collator::AlternateHandling::Default:
+ handling = UCOL_DEFAULT;
+ break;
+ }
+
+ ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+
+ ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+ ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
+ CaseFirstToICU(aCaseFirst), &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetOptions(const Options& aOptions,
+ const Maybe<Options&> aPrevOptions) {
+ if (aPrevOptions &&
+ // Check the equality of the previous options.
+ aPrevOptions->sensitivity == aOptions.sensitivity &&
+ aPrevOptions->caseFirst == aOptions.caseFirst &&
+ aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
+ aPrevOptions->numeric == aOptions.numeric) {
+ return Ok();
+ }
+
+ Collator::Strength strength = Collator::Strength::Default;
+ Collator::Feature caseLevel = Collator::Feature::Off;
+ switch (aOptions.sensitivity) {
+ case Collator::Sensitivity::Base:
+ strength = Collator::Strength::Primary;
+ break;
+ case Collator::Sensitivity::Accent:
+ strength = Collator::Strength::Secondary;
+ break;
+ case Collator::Sensitivity::Case:
+ caseLevel = Collator::Feature::On;
+ strength = Collator::Strength::Primary;
+ break;
+ case Collator::Sensitivity::Variant:
+ strength = Collator::Strength::Tertiary;
+ break;
+ }
+
+ SetStrength(strength);
+
+ ICUResult result = Ok();
+
+ // According to the ICU team, UCOL_SHIFTED causes punctuation to be
+ // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
+ // Markup Language, "shifted" causes whitespace and punctuation to be
+ // ignored - that's a bit more than asked for, but there's no way to get
+ // less.
+ result = this->SetAlternateHandling(
+ aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
+ : Collator::AlternateHandling::Default);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetCaseLevel(caseLevel);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
+ : Collator::Feature::Off);
+ if (result.isErr()) {
+ return result;
+ }
+
+ // Normalization is always on to meet the canonical equivalence requirement.
+ result = SetNormalizationMode(Collator::Feature::On);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetCaseFirst(aOptions.caseFirst);
+ if (result.isErr()) {
+ return result;
+ }
+ return Ok();
+}
+
+#undef FEATURE_TO_ICU
+
+Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue caseFirst =
+ ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ if (caseFirst == UCOL_OFF) {
+ return CaseFirst::False;
+ }
+ if (caseFirst == UCOL_UPPER_FIRST) {
+ return CaseFirst::Upper;
+ }
+ MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST);
+ return CaseFirst::Lower;
+}
+
+/* static */
+Result<Collator::Bcp47ExtEnumeration, ICUError>
+Collator::GetBcp47KeywordValuesForLocale(const char* aLocale,
+ CommonlyUsed aCommonlyUsed) {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
+ "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status);
+
+ if (U_SUCCESS(status)) {
+ return Bcp47ExtEnumeration(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+/* static */
+Result<Collator::Bcp47ExtEnumeration, ICUError>
+Collator::GetBcp47KeywordValues() {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucol_getKeywordValues("collation", &status);
+
+ if (U_SUCCESS(status)) {
+ return Bcp47ExtEnumeration(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+/* static */
+SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
+ int32_t aLength) {
+ if (aKeyword == nullptr) {
+ return Err(InternalError{});
+ }
+ return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/Collator.h b/intl/components/src/Collator.h
new file mode 100644
index 0000000000..dcb5a12a4f
--- /dev/null
+++ b/intl/components/src/Collator.h
@@ -0,0 +1,322 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_Collator_h_
+#define intl_components_Collator_h_
+
+#ifndef JS_STANDALONE
+# include "gtest/MozGtestFriend.h"
+#endif
+
+#include "unicode/ucol.h"
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+
+namespace mozilla::intl {
+
+class Collator final {
+ public:
+ /**
+ * Construct from a raw UCollator. This is public so that the UniquePtr can
+ * access it.
+ */
+ explicit Collator(UCollator* aCollator);
+
+ // Do not allow copy as this class owns the ICU resource. Move is not
+ // currently implemented, but a custom move operator could be created if
+ // needed.
+ Collator(const Collator&) = delete;
+ Collator& operator=(const Collator&) = delete;
+
+ /**
+ * Attempt to initialize a new collator.
+ */
+ static Result<UniquePtr<Collator>, ICUError> TryCreate(const char* aLocale);
+
+ ~Collator();
+
+ /**
+ * Get a sort key with the provided UTF-16 string, and store the sort key into
+ * the provided buffer of byte array.
+ * Every sort key ends with 0x00, and the terminating 0x00 byte is counted
+ * when calculating the length of buffer. For the purpose of other byte
+ * values, check the "Special Byte Values" document from ICU.
+ *
+ * https://icu.unicode.org/design/collation/bytes
+ */
+ template <typename B>
+ ICUResult GetSortKey(Span<const char16_t> aString, B& aBuffer) const {
+ return FillBufferWithICUCall(
+ aBuffer,
+ [this, aString](uint8_t* target, int32_t length, UErrorCode* status) {
+ // ucol_getSortKey doesn't use the error code to report
+ // U_BUFFER_OVERFLOW_ERROR, instead it uses the return value to
+ // indicate the desired length to store the key. So we update the
+ // UErrorCode accordingly to let FillBufferWithICUCall resize the
+ // buffer.
+ int32_t len = ucol_getSortKey(mCollator.GetConst(), aString.data(),
+ static_cast<int32_t>(aString.size()),
+ target, length);
+ if (len == 0) {
+ // Returns 0 means there's an internal error.
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ } else if (len > length) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ *status = U_ZERO_ERROR;
+ }
+ return len;
+ });
+ }
+
+ int32_t CompareStrings(Span<const char16_t> aSource,
+ Span<const char16_t> aTarget) const;
+
+ int32_t CompareSortKeys(Span<const uint8_t> aKey1,
+ Span<const uint8_t> aKey2) const;
+
+ /**
+ * Determine how casing affects sorting. These options map to ECMA 402
+ * collator options.
+ *
+ * https://tc39.es/ecma402/#sec-initializecollator
+ */
+ enum class CaseFirst {
+ // Sort upper case first.
+ Upper,
+ // Sort lower case first.
+ Lower,
+ // Orders upper and lower case letters in accordance to their tertiary
+ // weights.
+ False,
+ };
+
+ /**
+ * Which differences in the strings should lead to differences in collation
+ * comparisons.
+ *
+ * This setting needs to be ECMA 402 compliant.
+ * https://tc39.es/ecma402/#sec-collator-comparestrings
+ */
+ enum class Sensitivity {
+ // Only strings that differ in base letters compare as unequal.
+ // Examples: a ≠ b, a = á, a = A.
+ Base,
+ // Only strings that differ in base letters or accents and other diacritic
+ // marks compare as unequal.
+ // Examples: a ≠ b, a ≠ á, a = A.
+ Accent,
+ // Only strings that differ in base letters or case compare as unequal.
+ // Examples: a ≠ b, a = á, a ≠ A.
+ Case,
+ // Strings that differ in base letters, accents and other diacritic marks,
+ // or case compare as unequal. Other differences may also be taken into
+ // consideration.
+ // Examples: a ≠ b, a ≠ á, a ≠ A.
+ Variant,
+ };
+
+ /**
+ * These options map to ECMA 402 collator options. Make sure the defaults map
+ * to the default initialized values of ECMA 402.
+ *
+ * https://tc39.es/ecma402/#sec-initializecollator
+ */
+ struct Options {
+ Sensitivity sensitivity = Sensitivity::Variant;
+ CaseFirst caseFirst = CaseFirst::False;
+ bool ignorePunctuation = false;
+ bool numeric = false;
+ };
+
+ /**
+ * Change the configuraton of the options.
+ */
+ ICUResult SetOptions(const Options& aOptions,
+ const Maybe<Options&> aPrevOptions = Nothing());
+
+ /**
+ * Return the case first option of this collator.
+ */
+ Result<CaseFirst, ICUError> GetCaseFirst() const;
+
+ /**
+ * Map keywords to their BCP 47 equivalents.
+ */
+ static SpanResult<char> KeywordValueToBcp47Extension(const char* aKeyword,
+ int32_t aLength);
+
+ enum class CommonlyUsed : bool {
+ /**
+ * Select all possible values, even when not commonly used by a locale.
+ */
+ No,
+
+ /**
+ * Only select the values which are commonly used by a locale.
+ */
+ Yes,
+ };
+
+ using Bcp47ExtEnumeration =
+ Enumeration<char, SpanResult<char>,
+ Collator::KeywordValueToBcp47Extension>;
+
+ /**
+ * Returns an iterator of collator locale extensions in the preferred order.
+ * These extensions can be used in BCP 47 locales. For instance this
+ * iterator could return "phonebk" and could be appled to the German locale
+ * "de" as "de-co-phonebk" for a phonebook-style collation.
+ *
+ * The collation extensions can be found here:
+ * http://cldr.unicode.org/core-spec/#Key_Type_Definitions
+ */
+ static Result<Bcp47ExtEnumeration, ICUError> GetBcp47KeywordValuesForLocale(
+ const char* aLocale, CommonlyUsed aCommonlyUsed = CommonlyUsed::No);
+
+ /**
+ * Returns an iterator over all possible collator locale extensions.
+ * These extensions can be used in BCP 47 locales. For instance this
+ * iterator could return "phonebk" and could be appled to the German locale
+ * "de" as "de-co-phonebk" for a phonebook-style collation.
+ *
+ * The collation extensions can be found here:
+ * http://cldr.unicode.org/core-spec/#Key_Type_Definitions
+ */
+ static Result<Bcp47ExtEnumeration, ICUError> GetBcp47KeywordValues();
+
+ /**
+ * Returns an iterator over all supported collator locales.
+ *
+ * The returned strings are ICU locale identifiers and NOT BCP 47 language
+ * tags.
+ *
+ * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+ */
+ static auto GetAvailableLocales() {
+ return AvailableLocalesEnumeration<ucol_countAvailable,
+ ucol_getAvailable>();
+ }
+
+ private:
+ /**
+ * Toggle features, or use the default setting.
+ */
+ enum class Feature {
+ // Turn the feature off.
+ On,
+ // Turn the feature off.
+ Off,
+ // Use the default setting for the feature.
+ Default,
+ };
+
+ /**
+ * Attribute for handling variable elements.
+ */
+ enum class AlternateHandling {
+ // Treats all the codepoints with non-ignorable primary weights in the
+ // same way (default)
+ NonIgnorable,
+ // Causes codepoints with primary weights that are equal or below the
+ // variable top value to be ignored on primary level and moved to the
+ // quaternary level.
+ Shifted,
+ Default,
+ };
+
+ /**
+ * The strength attribute.
+ *
+ * The usual strength for most locales (except Japanese) is tertiary.
+ *
+ * Quaternary strength is useful when combined with shifted setting for
+ * alternate handling attribute and for JIS X 4061 collation, when it is used
+ * to distinguish between Katakana and Hiragana. Otherwise, quaternary level
+ * is affected only by the number of non-ignorable code points in the string.
+ *
+ * Identical strength is rarely useful, as it amounts to codepoints of the NFD
+ * form of the string.
+ */
+ enum class Strength {
+ // Primary collation strength.
+ Primary,
+ // Secondary collation strength.
+ Secondary,
+ // Tertiary collation strength.
+ Tertiary,
+ // Quaternary collation strength.
+ Quaternary,
+ // Identical collation strength.
+ Identical,
+ Default,
+ };
+
+ /**
+ * Configure the Collation::Strength
+ */
+ void SetStrength(Strength strength);
+
+ /**
+ * Configure Collation::AlternateHandling.
+ */
+ ICUResult SetAlternateHandling(AlternateHandling aAlternateHandling);
+
+ /**
+ * Controls whether an extra case level (positioned before the third level) is
+ * generated or not.
+ *
+ * Contents of the case level are affected by the value of CaseFirst
+ * attribute. A simple way to ignore accent differences in a string is to set
+ * the strength to Primary and enable case level.
+ */
+ ICUResult SetCaseLevel(Feature aFeature);
+
+ /**
+ * When turned on, this attribute makes substrings of digits sort according to
+ * their numeric values.
+ *
+ * This is a way to get '100' to sort AFTER '2'. Note that the longest digit
+ * substring that can be treated as a single unit is 254 digits (not counting
+ * leading zeros). If a digit substring is longer than that, the digits beyond
+ * the limit will be treated as a separate digit substring.
+ *
+ * A "digit" in this sense is a code point with General_Category=Nd, which
+ * does not include circled numbers, roman numerals, etc. Only a contiguous
+ * digit substring is considered, that is, non-negative integers without
+ * separators. There is no support for plus/minus signs, decimals, exponents,
+ * etc.
+ */
+ ICUResult SetNumericCollation(Feature aFeature);
+
+ /**
+ * Controls whether the normalization check and necessary normalizations are
+ * performed.
+ *
+ * When off (default), no normalization check is performed. The correctness of
+ * the result is guaranteed only if the input data is in so-called FCD form
+ * When set to on, an incremental check is performed to see whether the input
+ * data is in the FCD form. If the data is not in the FCD form, incremental
+ * NFD normalization is performed.
+ */
+ ICUResult SetNormalizationMode(Feature aFeature);
+
+ /**
+ * Configure Collation::CaseFirst.
+ */
+ ICUResult SetCaseFirst(CaseFirst aCaseFirst);
+
+#ifndef JS_STANDALONE
+ FRIEND_TEST(IntlCollator, SetAttributesInternal);
+#endif
+
+ ICUPointer<UCollator> mCollator = ICUPointer<UCollator>(nullptr);
+ Maybe<Sensitivity> mLastStrategy = Nothing();
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/Currency.cpp b/intl/components/src/Currency.cpp
new file mode 100644
index 0000000000..4db8e0919c
--- /dev/null
+++ b/intl/components/src/Currency.cpp
@@ -0,0 +1,22 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/Currency.h"
+
+#include "unicode/ucurr.h"
+#include "unicode/uenum.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+Result<SpanEnumeration<char>, ICUError> Currency::GetISOCurrencies() {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucurr_openISOCurrencies(UCURR_ALL, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return SpanEnumeration<char>(enumeration);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/Currency.h b/intl/components/src/Currency.h
new file mode 100644
index 0000000000..d0f8eb6ee8
--- /dev/null
+++ b/intl/components/src/Currency.h
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_Currency_h_
+#define intl_components_Currency_h_
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with currencies in
+ * internationalization code.
+ */
+class Currency final {
+ public:
+ Currency() = delete;
+
+ /**
+ * Returns an enumeration of all supported ISO currency codes.
+ */
+ static Result<SpanEnumeration<char>, ICUError> GetISOCurrencies();
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/DateIntervalFormat.cpp b/intl/components/src/DateIntervalFormat.cpp
new file mode 100644
index 0000000000..0097668f8b
--- /dev/null
+++ b/intl/components/src/DateIntervalFormat.cpp
@@ -0,0 +1,266 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "DateTimeFormat.h" // for DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+#include "DateTimeFormatUtils.h"
+#include "ScopedICUObject.h"
+
+#include "mozilla/intl/Calendar.h"
+#include "mozilla/intl/DateIntervalFormat.h"
+
+namespace mozilla::intl {
+
+/**
+ * PartitionDateTimeRangePattern ( dateTimeFormat, x, y ), steps 9-11.
+ *
+ * Examine the formatted value to see if any interval span field is present.
+ *
+ * https://tc39.es/ecma402/#sec-partitiondatetimerangepattern
+ */
+static ICUResult DateFieldsPracticallyEqual(
+ const UFormattedValue* aFormattedValue, bool* aEqual) {
+ if (!aFormattedValue) {
+ return Err(ICUError::InternalError);
+ }
+
+ MOZ_ASSERT(aEqual);
+ *aEqual = false;
+ UErrorCode status = U_ZERO_ERROR;
+ UConstrainedFieldPosition* fpos = ucfpos_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
+
+ // We're only interested in UFIELD_CATEGORY_DATE_INTERVAL_SPAN fields.
+ ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_DATE_INTERVAL_SPAN, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ bool hasSpan = ufmtval_nextPosition(aFormattedValue, fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ // When no date interval span field was found, both dates are "practically
+ // equal" per PartitionDateTimeRangePattern.
+ *aEqual = !hasSpan;
+ return Ok();
+}
+
+/* static */
+Result<UniquePtr<DateIntervalFormat>, ICUError> DateIntervalFormat::TryCreate(
+ Span<const char> aLocale, Span<const char16_t> aSkeleton,
+ Span<const char16_t> aTimeZone) {
+ UErrorCode status = U_ZERO_ERROR;
+ UDateIntervalFormat* dif =
+ udtitvfmt_open(IcuLocale(aLocale), aSkeleton.data(),
+ AssertedCast<int32_t>(aSkeleton.size()), aTimeZone.data(),
+ AssertedCast<int32_t>(aTimeZone.size()), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return UniquePtr<DateIntervalFormat>(new DateIntervalFormat(dif));
+}
+
+DateIntervalFormat::~DateIntervalFormat() {
+ MOZ_ASSERT(mDateIntervalFormat);
+ udtitvfmt_close(mDateIntervalFormat.GetMut());
+}
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+// We reach inside the UFormattedValue and modify its internal string. (It's
+// crucial that this is just an in-place replacement that doesn't alter any
+// field positions, etc., )
+static void ReplaceSpecialSpaces(const UFormattedValue* aValue) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len;
+ const UChar* str = ufmtval_getString(aValue, &len, &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ for (const auto& c : Span(str, len)) {
+ if (IsSpecialSpace(c)) {
+ const_cast<UChar&>(c) = ' ';
+ }
+ }
+}
+#endif
+
+ICUResult DateIntervalFormat::TryFormatCalendar(
+ const Calendar& aStart, const Calendar& aEnd,
+ AutoFormattedDateInterval& aFormatted, bool* aPracticallyEqual) const {
+ MOZ_ASSERT(aFormatted.IsValid());
+
+ UErrorCode status = U_ZERO_ERROR;
+ udtitvfmt_formatCalendarToResult(mDateIntervalFormat.GetConst(),
+ aStart.GetUCalendar(), aEnd.GetUCalendar(),
+ aFormatted.GetFormatted(), &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+ ReplaceSpecialSpaces(aFormatted.Value());
+#endif
+
+ MOZ_TRY(DateFieldsPracticallyEqual(aFormatted.Value(), aPracticallyEqual));
+ return Ok();
+}
+
+ICUResult DateIntervalFormat::TryFormatDateTime(
+ double aStart, double aEnd, AutoFormattedDateInterval& aFormatted,
+ bool* aPracticallyEqual) const {
+ MOZ_ASSERT(aFormatted.IsValid());
+
+ UErrorCode status = U_ZERO_ERROR;
+ udtitvfmt_formatToResult(mDateIntervalFormat.GetConst(), aStart, aEnd,
+ aFormatted.GetFormatted(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+ ReplaceSpecialSpaces(aFormatted.Value());
+#endif
+
+ MOZ_TRY(DateFieldsPracticallyEqual(aFormatted.Value(), aPracticallyEqual));
+ return Ok();
+}
+
+ICUResult DateIntervalFormat::TryFormattedToParts(
+ const AutoFormattedDateInterval& aFormatted,
+ DateTimePartVector& aParts) const {
+ MOZ_ASSERT(aFormatted.IsValid());
+ const UFormattedValue* value = aFormatted.Value();
+ if (!value) {
+ return Err(ICUError::InternalError);
+ }
+
+ size_t lastEndIndex = 0;
+ auto AppendPart = [&](DateTimePartType type, size_t endIndex,
+ DateTimePartSource source) {
+ if (!aParts.emplaceBack(type, endIndex, source)) {
+ return false;
+ }
+
+ lastEndIndex = endIndex;
+ return true;
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConstrainedFieldPosition* fpos = ucfpos_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
+
+ size_t categoryEndIndex = 0;
+ DateTimePartSource source = DateTimePartSource::Shared;
+
+ while (true) {
+ bool hasMore = ufmtval_nextPosition(value, fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ if (!hasMore) {
+ break;
+ }
+
+ int32_t category = ucfpos_getCategory(fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t field = ucfpos_getField(fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t beginIndexInt, endIndexInt;
+ ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ MOZ_ASSERT(beginIndexInt <= endIndexInt,
+ "field iterator returning invalid range");
+
+ size_t beginIndex = AssertedCast<size_t>(beginIndexInt);
+ size_t endIndex = AssertedCast<size_t>(endIndexInt);
+
+ // Indices are guaranteed to be returned in order (from left to right).
+ MOZ_ASSERT(lastEndIndex <= beginIndex,
+ "field iteration didn't return fields in order start to "
+ "finish as expected");
+
+ if (category == UFIELD_CATEGORY_DATE_INTERVAL_SPAN) {
+ // Append any remaining literal parts before changing the source kind.
+ if (lastEndIndex < beginIndex) {
+ if (!AppendPart(DateTimePartType::Literal, beginIndex, source)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ // The special field category UFIELD_CATEGORY_DATE_INTERVAL_SPAN has only
+ // two allowed values (0 or 1), indicating the begin of the start- resp.
+ // end-date.
+ MOZ_ASSERT(field == 0 || field == 1,
+ "span category has unexpected value");
+
+ source = field == 0 ? DateTimePartSource::StartRange
+ : DateTimePartSource::EndRange;
+ categoryEndIndex = endIndex;
+ continue;
+ }
+
+ // Ignore categories other than UFIELD_CATEGORY_DATE.
+ if (category != UFIELD_CATEGORY_DATE) {
+ continue;
+ }
+
+ DateTimePartType type =
+ ConvertUFormatFieldToPartType(static_cast<UDateFormatField>(field));
+ if (lastEndIndex < beginIndex) {
+ if (!AppendPart(DateTimePartType::Literal, beginIndex, source)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ if (!AppendPart(type, endIndex, source)) {
+ return Err(ICUError::InternalError);
+ }
+
+ if (endIndex == categoryEndIndex) {
+ // Append any remaining literal parts before changing the source kind.
+ if (lastEndIndex < endIndex) {
+ if (!AppendPart(DateTimePartType::Literal, endIndex, source)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ source = DateTimePartSource::Shared;
+ }
+ }
+
+ // Append any final literal.
+ auto spanResult = aFormatted.ToSpan();
+ if (spanResult.isErr()) {
+ return spanResult.propagateErr();
+ }
+ size_t formattedSize = spanResult.unwrap().size();
+ if (lastEndIndex < formattedSize) {
+ if (!AppendPart(DateTimePartType::Literal, formattedSize, source)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ return Ok();
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/DateIntervalFormat.h b/intl/components/src/DateIntervalFormat.h
new file mode 100644
index 0000000000..aa551ed345
--- /dev/null
+++ b/intl/components/src/DateIntervalFormat.h
@@ -0,0 +1,106 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DateIntervalFormat_h_
+#define intl_components_DateIntervalFormat_h_
+
+#include "mozilla/intl/DateTimePart.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+#include "unicode/udateintervalformat.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+class Calendar;
+
+using AutoFormattedDateInterval =
+ AutoFormattedResult<UFormattedDateInterval, udtitvfmt_openResult,
+ udtitvfmt_resultAsValue, udtitvfmt_closeResult>;
+
+/**
+ * This component is a Mozilla-focused API for the date range formatting
+ * provided by ICU. This DateIntervalFormat class helps to format the range
+ * between two date-time values.
+ *
+ * https://tc39.es/ecma402/#sec-formatdatetimerange
+ * https://tc39.es/ecma402/#sec-formatdatetimerangetoparts
+ */
+class DateIntervalFormat final {
+ public:
+ /**
+ * Create a DateIntervalFormat object from locale, skeleton and time zone.
+ * The format of skeleton can be found in [1].
+ *
+ * Note: Skeleton will be removed in the future.
+ *
+ * [1]: https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns
+ */
+ static Result<UniquePtr<DateIntervalFormat>, ICUError> TryCreate(
+ Span<const char> aLocale, Span<const char16_t> aSkeleton,
+ Span<const char16_t> aTimeZone);
+
+ ~DateIntervalFormat();
+
+ /**
+ * Format a date-time range between two Calendar objects.
+ *
+ * DateIntervalFormat cannot be changed to use a proleptic Gregorian
+ * calendar, so use this method if the start date is before the Gregorian
+ * calendar is introduced(October 15, 1582), otherwise use TryFormatDateTime
+ * instead.
+ *
+ * The result will be stored in aFormatted, caller can use
+ * AutoFormattedDateInterval::ToSpan() to get the formatted string, or pass
+ * the aFormatted to TryFormattedToParts to get the parts vector.
+ *
+ * aPracticallyEqual will be set to true if the date times of the two
+ * calendars are equal.
+ */
+ ICUResult TryFormatCalendar(const Calendar& aStart, const Calendar& aEnd,
+ AutoFormattedDateInterval& aFormatted,
+ bool* aPracticallyEqual) const;
+
+ /**
+ * Format a date-time range between two Unix epoch times in milliseconds.
+ *
+ * The result will be stored in aFormatted, caller can use
+ * AutoFormattedDateInterval::ToSpan() to get the formatted string, or pass
+ * the aFormatted to TryFormattedToParts to get the parts vector.
+ *
+ * aPracticallyEqual will be set to true if the date times of the two
+ * Unix epoch times are equal.
+ */
+ ICUResult TryFormatDateTime(double aStart, double aEnd,
+ AutoFormattedDateInterval& aFormatted,
+ bool* aPracticallyEqual) const;
+
+ /**
+ * Convert the formatted DateIntervalFormat into several parts.
+ *
+ * The caller get the formatted result from either TryFormatCalendar, or
+ * TryFormatDateTime methods, and instantiate the DateTimePartVector. This
+ * method will generate the parts and insert them into the vector.
+ *
+ * See:
+ * https://tc39.es/ecma402/#sec-formatdatetimerangetoparts
+ */
+ ICUResult TryFormattedToParts(const AutoFormattedDateInterval& aFormatted,
+ DateTimePartVector& aParts) const;
+
+ private:
+ DateIntervalFormat() = delete;
+ explicit DateIntervalFormat(UDateIntervalFormat* aDif)
+ : mDateIntervalFormat(aDif) {}
+ DateIntervalFormat(const DateIntervalFormat&) = delete;
+ DateIntervalFormat& operator=(const DateIntervalFormat&) = delete;
+
+ ICUPointer<UDateIntervalFormat> mDateIntervalFormat =
+ ICUPointer<UDateIntervalFormat>(nullptr);
+};
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/DateTimeFormat.cpp b/intl/components/src/DateTimeFormat.cpp
new file mode 100644
index 0000000000..5a6429e976
--- /dev/null
+++ b/intl/components/src/DateTimeFormat.cpp
@@ -0,0 +1,1140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <cstring>
+
+#include "unicode/ucal.h"
+#include "unicode/udat.h"
+#include "unicode/udatpg.h"
+#include "unicode/ures.h"
+
+#include "DateTimeFormatUtils.h"
+#include "ScopedICUObject.h"
+
+#include "mozilla/EnumSet.h"
+#include "mozilla/intl/Calendar.h"
+#include "mozilla/intl/DateTimeFormat.h"
+#include "mozilla/intl/DateTimePatternGenerator.h"
+
+namespace mozilla::intl {
+
+DateTimeFormat::~DateTimeFormat() {
+ MOZ_ASSERT(mDateFormat);
+ udat_close(mDateFormat);
+}
+
+static UDateFormatStyle ToUDateFormatStyle(
+ Maybe<DateTimeFormat::Style> aLength) {
+ if (!aLength) {
+ return UDAT_NONE;
+ }
+ switch (*aLength) {
+ case DateTimeFormat::Style::Full:
+ return UDAT_FULL;
+ case DateTimeFormat::Style::Long:
+ return UDAT_LONG;
+ case DateTimeFormat::Style::Medium:
+ return UDAT_MEDIUM;
+ case DateTimeFormat::Style::Short:
+ return UDAT_SHORT;
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ // Do not use the default: branch so that the enum is exhaustively checked.
+ return UDAT_NONE;
+}
+
+/**
+ * Parse a pattern according to the format specified in
+ * <https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns>.
+ */
+template <typename CharT>
+class PatternIterator {
+ CharT* iter;
+ const CharT* const end;
+
+ public:
+ explicit PatternIterator(mozilla::Span<CharT> aPattern)
+ : iter(aPattern.data()), end(aPattern.data() + aPattern.size()) {}
+
+ CharT* next() {
+ MOZ_ASSERT(iter != nullptr);
+
+ bool inQuote = false;
+ while (iter < end) {
+ CharT* cur = iter++;
+ if (*cur == '\'') {
+ inQuote = !inQuote;
+ } else if (!inQuote) {
+ return cur;
+ }
+ }
+
+ iter = nullptr;
+ return nullptr;
+ }
+};
+
+Maybe<DateTimeFormat::HourCycle> DateTimeFormat::HourCycleFromPattern(
+ Span<const char16_t> aPattern) {
+ PatternIterator<const char16_t> iter(aPattern);
+ while (const auto* ptr = iter.next()) {
+ switch (*ptr) {
+ case 'K':
+ return Some(DateTimeFormat::HourCycle::H11);
+ case 'h':
+ return Some(DateTimeFormat::HourCycle::H12);
+ case 'H':
+ return Some(DateTimeFormat::HourCycle::H23);
+ case 'k':
+ return Some(DateTimeFormat::HourCycle::H24);
+ }
+ }
+ return Nothing();
+}
+
+static bool IsHour12(DateTimeFormat::HourCycle aHourCycle) {
+ return aHourCycle == DateTimeFormat::HourCycle::H11 ||
+ aHourCycle == DateTimeFormat::HourCycle::H12;
+}
+
+static char16_t HourSymbol(DateTimeFormat::HourCycle aHourCycle) {
+ switch (aHourCycle) {
+ case DateTimeFormat::HourCycle::H11:
+ return 'K';
+ case DateTimeFormat::HourCycle::H12:
+ return 'h';
+ case DateTimeFormat::HourCycle::H23:
+ return 'H';
+ case DateTimeFormat::HourCycle::H24:
+ return 'k';
+ }
+ MOZ_CRASH("unexpected hour cycle");
+}
+
+enum class PatternField { Hour, Minute, Second, Other };
+
+template <typename CharT>
+static PatternField ToPatternField(CharT aCh) {
+ if (aCh == 'K' || aCh == 'h' || aCh == 'H' || aCh == 'k' || aCh == 'j') {
+ return PatternField::Hour;
+ }
+ if (aCh == 'm') {
+ return PatternField::Minute;
+ }
+ if (aCh == 's') {
+ return PatternField::Second;
+ }
+ return PatternField::Other;
+}
+
+/**
+ * Replaces all hour pattern characters in |patternOrSkeleton| to use the
+ * matching hour representation for |hourCycle|.
+ */
+/* static */
+void DateTimeFormat::ReplaceHourSymbol(
+ mozilla::Span<char16_t> aPatternOrSkeleton,
+ DateTimeFormat::HourCycle aHourCycle) {
+ char16_t replacement = HourSymbol(aHourCycle);
+ PatternIterator<char16_t> iter(aPatternOrSkeleton);
+ while (auto* ptr = iter.next()) {
+ auto field = ToPatternField(*ptr);
+ if (field == PatternField::Hour) {
+ *ptr = replacement;
+ }
+ }
+}
+
+/**
+ * Find a matching pattern using the requested hour-12 options.
+ *
+ * This function is needed to work around the following two issues.
+ * - https://unicode-org.atlassian.net/browse/ICU-21023
+ * - https://unicode-org.atlassian.net/browse/CLDR-13425
+ *
+ * We're currently using a relatively simple workaround, which doesn't give the
+ * most accurate results. For example:
+ *
+ * ```
+ * var dtf = new Intl.DateTimeFormat("en", {
+ * timeZone: "UTC",
+ * dateStyle: "long",
+ * timeStyle: "long",
+ * hourCycle: "h12",
+ * });
+ * print(dtf.format(new Date("2020-01-01T00:00Z")));
+ * ```
+ *
+ * Returns the pattern "MMMM d, y 'at' h:mm:ss a z", but when going through
+ * |DateTimePatternGenerator::GetSkeleton| and then
+ * |DateTimePatternGenerator::GetBestPattern| to find an equivalent pattern for
+ * "h23", we'll end up with the pattern "MMMM d, y, HH:mm:ss z", so the
+ * combinator element " 'at' " was lost in the process.
+ */
+/* static */
+ICUResult DateTimeFormat::FindPatternWithHourCycle(
+ DateTimePatternGenerator& aDateTimePatternGenerator,
+ DateTimeFormat::PatternVector& aPattern, bool aHour12,
+ DateTimeFormat::SkeletonVector& aSkeleton) {
+ MOZ_TRY(mozilla::intl::DateTimePatternGenerator::GetSkeleton(aPattern,
+ aSkeleton));
+
+ // Input skeletons don't differentiate between "K" and "h" resp. "k" and "H".
+ DateTimeFormat::ReplaceHourSymbol(aSkeleton,
+ aHour12 ? DateTimeFormat::HourCycle::H12
+ : DateTimeFormat::HourCycle::H23);
+
+ MOZ_TRY(aDateTimePatternGenerator.GetBestPattern(aSkeleton, aPattern));
+
+ return Ok();
+}
+
+static auto PatternMatchOptions(mozilla::Span<const char16_t> aSkeleton) {
+ // Values for hour, minute, and second are:
+ // - absent: 0
+ // - numeric: 1
+ // - 2-digit: 2
+ int32_t hour = 0;
+ int32_t minute = 0;
+ int32_t second = 0;
+
+ PatternIterator<const char16_t> iter(aSkeleton);
+ while (const auto* ptr = iter.next()) {
+ switch (ToPatternField(*ptr)) {
+ case PatternField::Hour:
+ MOZ_ASSERT(hour < 2);
+ hour += 1;
+ break;
+ case PatternField::Minute:
+ MOZ_ASSERT(minute < 2);
+ minute += 1;
+ break;
+ case PatternField::Second:
+ MOZ_ASSERT(second < 2);
+ second += 1;
+ break;
+ case PatternField::Other:
+ break;
+ }
+ }
+
+ // Adjust the field length when the user requested '2-digit' representation.
+ //
+ // We can't just always adjust the field length, because
+ // 1. The default value for hour, minute, and second fields is 'numeric'. If
+ // the length is always adjusted, |date.toLocaleTime()| will start to
+ // return strings like "1:5:9 AM" instead of "1:05:09 AM".
+ // 2. ICU doesn't support to adjust the field length to 'numeric' in certain
+ // cases. For example when the locale is "de" (German):
+ // a. hour='numeric' and minute='2-digit' will return "1:05".
+ // b. whereas hour='numeric' and minute='numeric' will return "01:05".
+ //
+ // Therefore we only support adjusting the field length when the user
+ // explicitly requested the '2-digit' representation.
+
+ using PatternMatchOption =
+ mozilla::intl::DateTimePatternGenerator::PatternMatchOption;
+ mozilla::EnumSet<PatternMatchOption> options;
+ if (hour == 2) {
+ options += PatternMatchOption::HourField;
+ }
+ if (minute == 2) {
+ options += PatternMatchOption::MinuteField;
+ }
+ if (second == 2) {
+ options += PatternMatchOption::SecondField;
+ }
+ return options;
+}
+
+/* static */
+Result<UniquePtr<DateTimeFormat>, ICUError> DateTimeFormat::TryCreateFromStyle(
+ Span<const char> aLocale, const StyleBag& aStyleBag,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ auto dateStyle = ToUDateFormatStyle(aStyleBag.date);
+ auto timeStyle = ToUDateFormatStyle(aStyleBag.time);
+
+ if (dateStyle == UDAT_NONE && timeStyle == UDAT_NONE) {
+ dateStyle = UDAT_DEFAULT;
+ timeStyle = UDAT_DEFAULT;
+ }
+
+ // The time zone is optional.
+ int32_t tzIDLength = -1;
+ const UChar* tzID = nullptr;
+ if (aTimeZoneOverride) {
+ tzIDLength = static_cast<int32_t>(aTimeZoneOverride->size());
+ tzID = aTimeZoneOverride->Elements();
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UDateFormat* dateFormat =
+ udat_open(timeStyle, dateStyle, IcuLocale(aLocale), tzID, tzIDLength,
+ /* pattern */ nullptr, /* pattern length */ -1, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ auto df = UniquePtr<DateTimeFormat>(new DateTimeFormat(dateFormat));
+
+ if (aStyleBag.time && (aStyleBag.hour12 || aStyleBag.hourCycle)) {
+ // Only adjust the style pattern for time if there is an override.
+ // Extract the pattern and adjust it for the preferred hour cycle.
+ DateTimeFormat::PatternVector pattern{};
+
+ VectorToBufferAdaptor buffer(pattern);
+ MOZ_TRY(df->GetPattern(buffer));
+
+ Maybe<DateTimeFormat::HourCycle> hcPattern = HourCycleFromPattern(pattern);
+ DateTimeFormat::SkeletonVector skeleton{};
+
+ if (hcPattern) {
+ bool wantHour12 =
+ aStyleBag.hour12 ? *aStyleBag.hour12 : IsHour12(*aStyleBag.hourCycle);
+ if (wantHour12 == IsHour12(*hcPattern)) {
+ // Return the date-time format when its hour-cycle settings match the
+ // requested options.
+ if (aStyleBag.hour12 || *hcPattern == *aStyleBag.hourCycle) {
+ return df;
+ }
+ } else {
+ MOZ_ASSERT(aDateTimePatternGenerator);
+ MOZ_TRY(DateTimeFormat::FindPatternWithHourCycle(
+ *aDateTimePatternGenerator, pattern, wantHour12, skeleton));
+ }
+ // Replace the hourCycle, if present, in the pattern string. But only do
+ // this if no hour12 option is present, because the latter takes
+ // precedence over hourCycle.
+ if (!aStyleBag.hour12) {
+ DateTimeFormat::ReplaceHourSymbol(pattern, *aStyleBag.hourCycle);
+ }
+
+ auto result = DateTimeFormat::TryCreateFromPattern(aLocale, pattern,
+ aTimeZoneOverride);
+ if (result.isErr()) {
+ return Err(result.unwrapErr());
+ }
+ auto dateTimeFormat = result.unwrap();
+ MOZ_TRY(dateTimeFormat->CacheSkeleton(skeleton));
+ return dateTimeFormat;
+ }
+ }
+
+ return df;
+}
+
+DateTimeFormat::DateTimeFormat(UDateFormat* aDateFormat) {
+ MOZ_RELEASE_ASSERT(aDateFormat, "Expected aDateFormat to not be a nullptr.");
+ mDateFormat = aDateFormat;
+}
+
+// A helper to ergonomically push a string onto a string vector.
+template <typename V, size_t N>
+static ICUResult PushString(V& aVec, const char16_t (&aString)[N]) {
+ if (!aVec.append(aString, N - 1)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok();
+}
+
+// A helper to ergonomically push a char onto a string vector.
+template <typename V>
+static ICUResult PushChar(V& aVec, char16_t aCh) {
+ if (!aVec.append(aCh)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok();
+}
+
+/**
+ * Returns an ICU skeleton string representing the specified options.
+ * http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+ */
+ICUResult ToICUSkeleton(const DateTimeFormat::ComponentsBag& aBag,
+ DateTimeFormat::SkeletonVector& aSkeleton) {
+ // Create an ICU skeleton representing the specified aBag. See
+ if (aBag.weekday) {
+ switch (*aBag.weekday) {
+ case DateTimeFormat::Text::Narrow:
+ MOZ_TRY(PushString(aSkeleton, u"EEEEE"));
+ break;
+ case DateTimeFormat::Text::Short:
+ MOZ_TRY(PushString(aSkeleton, u"E"));
+ break;
+ case DateTimeFormat::Text::Long:
+ MOZ_TRY(PushString(aSkeleton, u"EEEE"));
+ }
+ }
+ if (aBag.era) {
+ switch (*aBag.era) {
+ case DateTimeFormat::Text::Narrow:
+ MOZ_TRY(PushString(aSkeleton, u"GGGGG"));
+ break;
+ case DateTimeFormat::Text::Short:
+ // Use "GGG" instead of "G" to return the same results as other
+ // browsers. This is exploiting the following ICU bug
+ // <https://unicode-org.atlassian.net/browse/ICU-22138>. As soon as that
+ // bug has been fixed, we can change this back to "G".
+ //
+ // In practice the bug only affects "G", so we only apply it for "G"
+ // and not for other symbols like "B" or "z".
+ MOZ_TRY(PushString(aSkeleton, u"GGG"));
+ break;
+ case DateTimeFormat::Text::Long:
+ MOZ_TRY(PushString(aSkeleton, u"GGGG"));
+ break;
+ }
+ }
+ if (aBag.year) {
+ switch (*aBag.year) {
+ case DateTimeFormat::Numeric::TwoDigit:
+ MOZ_TRY(PushString(aSkeleton, u"yy"));
+ break;
+ case DateTimeFormat::Numeric::Numeric:
+ MOZ_TRY(PushString(aSkeleton, u"y"));
+ break;
+ }
+ }
+ if (aBag.month) {
+ switch (*aBag.month) {
+ case DateTimeFormat::Month::TwoDigit:
+ MOZ_TRY(PushString(aSkeleton, u"MM"));
+ break;
+ case DateTimeFormat::Month::Numeric:
+ MOZ_TRY(PushString(aSkeleton, u"M"));
+ break;
+ case DateTimeFormat::Month::Narrow:
+ MOZ_TRY(PushString(aSkeleton, u"MMMMM"));
+ break;
+ case DateTimeFormat::Month::Short:
+ MOZ_TRY(PushString(aSkeleton, u"MMM"));
+ break;
+ case DateTimeFormat::Month::Long:
+ MOZ_TRY(PushString(aSkeleton, u"MMMM"));
+ break;
+ }
+ }
+ if (aBag.day) {
+ switch (*aBag.day) {
+ case DateTimeFormat::Numeric::TwoDigit:
+ MOZ_TRY(PushString(aSkeleton, u"dd"));
+ break;
+ case DateTimeFormat::Numeric::Numeric:
+ MOZ_TRY(PushString(aSkeleton, u"d"));
+ break;
+ }
+ }
+
+ // If hour12 and hourCycle are both present, hour12 takes precedence.
+ char16_t hourSkeletonChar = 'j';
+ if (aBag.hour12) {
+ if (*aBag.hour12) {
+ hourSkeletonChar = 'h';
+ } else {
+ hourSkeletonChar = 'H';
+ }
+ } else if (aBag.hourCycle) {
+ switch (*aBag.hourCycle) {
+ case DateTimeFormat::HourCycle::H11:
+ case DateTimeFormat::HourCycle::H12:
+ hourSkeletonChar = 'h';
+ break;
+ case DateTimeFormat::HourCycle::H23:
+ case DateTimeFormat::HourCycle::H24:
+ hourSkeletonChar = 'H';
+ break;
+ }
+ }
+ if (aBag.hour) {
+ switch (*aBag.hour) {
+ case DateTimeFormat::Numeric::TwoDigit:
+ MOZ_TRY(PushChar(aSkeleton, hourSkeletonChar));
+ MOZ_TRY(PushChar(aSkeleton, hourSkeletonChar));
+ break;
+ case DateTimeFormat::Numeric::Numeric:
+ MOZ_TRY(PushChar(aSkeleton, hourSkeletonChar));
+ break;
+ }
+ }
+ // ICU requires that "B" is set after the "j" hour skeleton symbol.
+ // https://unicode-org.atlassian.net/browse/ICU-20731
+ if (aBag.dayPeriod) {
+ switch (*aBag.dayPeriod) {
+ case DateTimeFormat::Text::Narrow:
+ MOZ_TRY(PushString(aSkeleton, u"BBBBB"));
+ break;
+ case DateTimeFormat::Text::Short:
+ MOZ_TRY(PushString(aSkeleton, u"B"));
+ break;
+ case DateTimeFormat::Text::Long:
+ MOZ_TRY(PushString(aSkeleton, u"BBBB"));
+ break;
+ }
+ }
+ if (aBag.minute) {
+ switch (*aBag.minute) {
+ case DateTimeFormat::Numeric::TwoDigit:
+ MOZ_TRY(PushString(aSkeleton, u"mm"));
+ break;
+ case DateTimeFormat::Numeric::Numeric:
+ MOZ_TRY(PushString(aSkeleton, u"m"));
+ break;
+ }
+ }
+ if (aBag.second) {
+ switch (*aBag.second) {
+ case DateTimeFormat::Numeric::TwoDigit:
+ MOZ_TRY(PushString(aSkeleton, u"ss"));
+ break;
+ case DateTimeFormat::Numeric::Numeric:
+ MOZ_TRY(PushString(aSkeleton, u"s"));
+ break;
+ }
+ }
+ if (aBag.fractionalSecondDigits) {
+ switch (*aBag.fractionalSecondDigits) {
+ case 1:
+ MOZ_TRY(PushString(aSkeleton, u"S"));
+ break;
+ case 2:
+ MOZ_TRY(PushString(aSkeleton, u"SS"));
+ break;
+ default:
+ MOZ_TRY(PushString(aSkeleton, u"SSS"));
+ break;
+ }
+ }
+ if (aBag.timeZoneName) {
+ switch (*aBag.timeZoneName) {
+ case DateTimeFormat::TimeZoneName::Short:
+ MOZ_TRY(PushString(aSkeleton, u"z"));
+ break;
+ case DateTimeFormat::TimeZoneName::Long:
+ MOZ_TRY(PushString(aSkeleton, u"zzzz"));
+ break;
+ case DateTimeFormat::TimeZoneName::ShortOffset:
+ MOZ_TRY(PushString(aSkeleton, u"O"));
+ break;
+ case DateTimeFormat::TimeZoneName::LongOffset:
+ MOZ_TRY(PushString(aSkeleton, u"OOOO"));
+ break;
+ case DateTimeFormat::TimeZoneName::ShortGeneric:
+ MOZ_TRY(PushString(aSkeleton, u"v"));
+ break;
+ case DateTimeFormat::TimeZoneName::LongGeneric:
+ MOZ_TRY(PushString(aSkeleton, u"vvvv"));
+ break;
+ }
+ }
+ return Ok();
+}
+
+/* static */
+Result<UniquePtr<DateTimeFormat>, ICUError>
+DateTimeFormat::TryCreateFromComponents(
+ Span<const char> aLocale, const DateTimeFormat::ComponentsBag& aBag,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ DateTimeFormat::SkeletonVector skeleton;
+ MOZ_TRY(ToICUSkeleton(aBag, skeleton));
+ return TryCreateFromSkeleton(aLocale, skeleton, aDateTimePatternGenerator,
+ aBag.hourCycle, aTimeZoneOverride);
+}
+
+/* static */
+Result<UniquePtr<DateTimeFormat>, ICUError>
+DateTimeFormat::TryCreateFromPattern(
+ Span<const char> aLocale, Span<const char16_t> aPattern,
+ Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ // The time zone is optional.
+ int32_t tzIDLength = -1;
+ const UChar* tzID = nullptr;
+ if (aTimeZoneOverride) {
+ tzIDLength = static_cast<int32_t>(aTimeZoneOverride->size());
+ tzID = aTimeZoneOverride->data();
+ }
+
+ // Create the date formatter.
+ UDateFormat* dateFormat = udat_open(
+ UDAT_PATTERN, UDAT_PATTERN, IcuLocale(aLocale), tzID, tzIDLength,
+ aPattern.data(), static_cast<int32_t>(aPattern.size()), &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ // The DateTimeFormat wrapper will control the life cycle of the ICU
+ // dateFormat object.
+ return UniquePtr<DateTimeFormat>(new DateTimeFormat(dateFormat));
+}
+
+/* static */
+Result<UniquePtr<DateTimeFormat>, ICUError>
+DateTimeFormat::TryCreateFromSkeleton(
+ Span<const char> aLocale, Span<const char16_t> aSkeleton,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<DateTimeFormat::HourCycle> aHourCycle,
+ Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ if (!aDateTimePatternGenerator) {
+ return Err(ICUError::InternalError);
+ }
+
+ // Compute the best pattern for the skeleton.
+ DateTimeFormat::PatternVector pattern;
+ auto options = PatternMatchOptions(aSkeleton);
+ MOZ_TRY(
+ aDateTimePatternGenerator->GetBestPattern(aSkeleton, pattern, options));
+
+ if (aHourCycle) {
+ DateTimeFormat::ReplaceHourSymbol(pattern, *aHourCycle);
+ }
+
+ auto result =
+ DateTimeFormat::TryCreateFromPattern(aLocale, pattern, aTimeZoneOverride);
+ if (result.isErr()) {
+ return Err(result.unwrapErr());
+ }
+ auto dateTimeFormat = result.unwrap();
+ MOZ_TRY(dateTimeFormat->CacheSkeleton(aSkeleton));
+ return dateTimeFormat;
+}
+
+ICUResult DateTimeFormat::CacheSkeleton(Span<const char16_t> aSkeleton) {
+ if (mOriginalSkeleton.append(aSkeleton.Elements(), aSkeleton.Length())) {
+ return Ok();
+ }
+ return Err(ICUError::OutOfMemory);
+}
+
+void DateTimeFormat::SetStartTimeIfGregorian(double aTime) {
+ UErrorCode status = U_ZERO_ERROR;
+ UCalendar* cal = const_cast<UCalendar*>(udat_getCalendar(mDateFormat));
+ ucal_setGregorianChange(cal, aTime, &status);
+ // An error here means the calendar is not Gregorian, and can be ignored.
+}
+
+/* static */
+Result<UniquePtr<Calendar>, ICUError> DateTimeFormat::CloneCalendar(
+ double aUnixEpoch) const {
+ UErrorCode status = U_ZERO_ERROR;
+ UCalendar* calendarRaw = ucal_clone(udat_getCalendar(mDateFormat), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ auto calendar = MakeUnique<Calendar>(calendarRaw);
+
+ MOZ_TRY(calendar->SetTimeInMs(aUnixEpoch));
+
+ return calendar;
+}
+
+/**
+ * ICU locale identifier consisting of a language and a region subtag.
+ */
+class LanguageRegionLocaleId {
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+ static constexpr size_t LanguageLength = 8;
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ static constexpr size_t RegionLength = 3;
+
+ // Add +1 to account for the separator.
+ static constexpr size_t LRLength = LanguageLength + RegionLength + 1;
+
+ // Add +1 to zero terminate the string.
+ char mLocale[LRLength + 1] = {};
+
+ // Pointer to the start of the region subtag within |locale_|.
+ char* mRegion = nullptr;
+
+ public:
+ LanguageRegionLocaleId(Span<const char> aLanguage,
+ Maybe<Span<const char>> aRegion);
+
+ const char* languageRegion() const { return mLocale; }
+ const char* region() const { return mRegion; }
+};
+
+LanguageRegionLocaleId::LanguageRegionLocaleId(
+ Span<const char> aLanguage, Maybe<Span<const char>> aRegion) {
+ MOZ_RELEASE_ASSERT(aLanguage.Length() <= LanguageLength);
+ MOZ_RELEASE_ASSERT(!aRegion || aRegion->Length() <= RegionLength);
+
+ size_t languageLength = aLanguage.Length();
+
+ std::memcpy(mLocale, aLanguage.Elements(), languageLength);
+
+ // ICU locale identifiers are separated by underscores.
+ mLocale[languageLength] = '_';
+
+ mRegion = mLocale + languageLength + 1;
+ if (aRegion) {
+ std::memcpy(mRegion, aRegion->Elements(), aRegion->Length());
+ } else {
+ // Use "001" (UN M.49 code for the World) as the fallback to match ICU.
+ std::strcpy(mRegion, "001");
+ }
+}
+
+/* static */
+Result<DateTimeFormat::HourCyclesVector, ICUError>
+DateTimeFormat::GetAllowedHourCycles(Span<const char> aLanguage,
+ Maybe<Span<const char>> aRegion) {
+ // ICU doesn't expose a public API to retrieve the hour cyles for a locale, so
+ // we have to reconstruct |DateTimePatternGenerator::getAllowedHourFormats()|
+ // using the public UResourceBundle API.
+ //
+ // The time data format is specified in UTS 35 at [1] and the data itself is
+ // located at [2].
+ //
+ // [1] https://unicode.org/reports/tr35/tr35-dates.html#Time_Data
+ // [2]
+ // https://github.com/unicode-org/cldr/blob/master/common/supplemental/supplementalData.xml
+
+ HourCyclesVector result;
+
+ // Reserve space for the maximum number of hour cycles. This call always
+ // succeeds because it matches the inline capacity. We can now infallibly
+ // append all hour cycles to the vector.
+ MOZ_ALWAYS_TRUE(result.reserve(HourCyclesVector::InlineLength));
+
+ LanguageRegionLocaleId localeId(aLanguage, aRegion);
+
+ // First open the "supplementalData" resource bundle.
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle* res = ures_openDirect(nullptr, "supplementalData", &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UResourceBundle, ures_close> closeRes(res);
+ MOZ_ASSERT(ures_getType(res) == URES_TABLE);
+
+ // Locate "timeDate" within the "supplementalData" resource bundle.
+ UResourceBundle* timeData = ures_getByKey(res, "timeData", nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UResourceBundle, ures_close> closeTimeData(timeData);
+ MOZ_ASSERT(ures_getType(timeData) == URES_TABLE);
+
+ // Try to find a matching resource within "timeData". The two possible keys
+ // into the "timeData" resource bundle are `language_region` and `region`.
+ // Prefer `language_region` and otherwise fallback to `region`.
+ UResourceBundle* hclocale =
+ ures_getByKey(timeData, localeId.languageRegion(), nullptr, &status);
+ if (status == U_MISSING_RESOURCE_ERROR) {
+ status = U_ZERO_ERROR;
+ hclocale = ures_getByKey(timeData, localeId.region(), nullptr, &status);
+ }
+ if (status == U_MISSING_RESOURCE_ERROR) {
+ // Default to "h23" if no resource was found at all. This matches ICU.
+ result.infallibleAppend(HourCycle::H23);
+ return result;
+ }
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UResourceBundle, ures_close> closeHcLocale(hclocale);
+ MOZ_ASSERT(ures_getType(hclocale) == URES_TABLE);
+
+ EnumSet<HourCycle> added{};
+
+ auto addToResult = [&](const UChar* str, int32_t len) {
+ // An hour cycle strings is one of "K", "h", "H", or "k"; optionally
+ // followed by the suffix "b" or "B". We ignore the suffix because day
+ // periods can't be expressed in the "hc" Unicode extension.
+ MOZ_ASSERT(len == 1 || len == 2);
+
+ // Default to "h23" for unsupported hour cycle strings.
+ HourCycle hc = HourCycle::H23;
+ switch (str[0]) {
+ case 'K':
+ hc = HourCycle::H11;
+ break;
+ case 'h':
+ hc = HourCycle::H12;
+ break;
+ case 'H':
+ hc = HourCycle::H23;
+ break;
+ case 'k':
+ hc = HourCycle::H24;
+ break;
+ }
+
+ // Add each unique hour cycle to the result array.
+ if (!added.contains(hc)) {
+ added += hc;
+
+ result.infallibleAppend(hc);
+ }
+ };
+
+ // Determine the preferred hour cycle for the locale.
+ int32_t len = 0;
+ const UChar* hc = ures_getStringByKey(hclocale, "preferred", &len, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ addToResult(hc, len);
+
+ // Find any additionally allowed hour cycles of the locale.
+ UResourceBundle* allowed =
+ ures_getByKey(hclocale, "allowed", nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UResourceBundle, ures_close> closeAllowed(allowed);
+ MOZ_ASSERT(ures_getType(allowed) == URES_ARRAY ||
+ ures_getType(allowed) == URES_STRING);
+
+ while (ures_hasNext(allowed)) {
+ int32_t len = 0;
+ const UChar* hc = ures_getNextString(allowed, &len, nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ addToResult(hc, len);
+ }
+
+ return result;
+}
+
+Result<DateTimeFormat::ComponentsBag, ICUError>
+DateTimeFormat::ResolveComponents() {
+ // Maps an ICU pattern string to a corresponding set of date-time components
+ // and their values, and adds properties for these components to the result
+ // object, which will be returned by the resolvedOptions method. For the
+ // interpretation of ICU pattern characters, see
+ // http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+
+ DateTimeFormat::PatternVector pattern{};
+ VectorToBufferAdaptor buffer(pattern);
+ MOZ_TRY(GetPattern(buffer));
+
+ DateTimeFormat::ComponentsBag bag{};
+
+ using Text = DateTimeFormat::Text;
+ using HourCycle = DateTimeFormat::HourCycle;
+ using Numeric = DateTimeFormat::Numeric;
+ using Month = DateTimeFormat::Month;
+
+ auto text = Text::Long;
+ auto numeric = Numeric::Numeric;
+ auto month = Month::Long;
+ uint8_t fractionalSecondDigits = 0;
+
+ for (size_t i = 0, len = pattern.length(); i < len;) {
+ char16_t c = pattern[i++];
+ if (c == u'\'') {
+ // Skip past string literals.
+ while (i < len && pattern[i] != u'\'') {
+ i++;
+ }
+ i++;
+ continue;
+ }
+
+ // Count how many times the character is repeated.
+ size_t count = 1;
+ while (i < len && pattern[i] == c) {
+ i++;
+ count++;
+ }
+
+ // Determine the enum case of the field.
+ switch (c) {
+ // "text" cases
+ case u'G':
+ case u'E':
+ case u'c':
+ case u'B':
+ case u'z':
+ case u'O':
+ case u'v':
+ case u'V':
+ if (count <= 3) {
+ text = Text::Short;
+ } else if (count == 4) {
+ text = Text::Long;
+ } else {
+ text = Text::Narrow;
+ }
+ break;
+ // "number" cases
+ case u'y':
+ case u'd':
+ case u'h':
+ case u'H':
+ case u'm':
+ case u's':
+ case u'k':
+ case u'K':
+ if (count == 2) {
+ numeric = Numeric::TwoDigit;
+ } else {
+ numeric = Numeric::Numeric;
+ }
+ break;
+ // "text & number" cases
+ case u'M':
+ case u'L':
+ if (count == 1) {
+ month = Month::Numeric;
+ } else if (count == 2) {
+ month = Month::TwoDigit;
+ } else if (count == 3) {
+ month = Month::Short;
+ } else if (count == 4) {
+ month = Month::Long;
+ } else {
+ month = Month::Narrow;
+ }
+ break;
+ case u'S':
+ fractionalSecondDigits = count;
+ break;
+ default: {
+ // skip other pattern characters and literal text
+ }
+ }
+
+ // Map ICU pattern characters back to the corresponding date-time
+ // components of DateTimeFormat. See
+ // http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+ switch (c) {
+ case u'E':
+ case u'c':
+ bag.weekday = Some(text);
+ break;
+ case u'G':
+ bag.era = Some(text);
+ break;
+ case u'y':
+ bag.year = Some(numeric);
+ break;
+ case u'M':
+ case u'L':
+ bag.month = Some(month);
+ break;
+ case u'd':
+ bag.day = Some(numeric);
+ break;
+ case u'B':
+ bag.dayPeriod = Some(text);
+ break;
+ case u'K':
+ bag.hourCycle = Some(HourCycle::H11);
+ bag.hour = Some(numeric);
+ bag.hour12 = Some(true);
+ break;
+ case u'h':
+ bag.hourCycle = Some(HourCycle::H12);
+ bag.hour = Some(numeric);
+ bag.hour12 = Some(true);
+ break;
+ case u'H':
+ bag.hourCycle = Some(HourCycle::H23);
+ bag.hour = Some(numeric);
+ bag.hour12 = Some(false);
+ break;
+ case u'k':
+ bag.hourCycle = Some(HourCycle::H24);
+ bag.hour = Some(numeric);
+ bag.hour12 = Some(false);
+ break;
+ case u'm':
+ bag.minute = Some(numeric);
+ break;
+ case u's':
+ bag.second = Some(numeric);
+ break;
+ case u'S':
+ bag.fractionalSecondDigits = Some(fractionalSecondDigits);
+ break;
+ case u'z':
+ switch (text) {
+ case Text::Long:
+ bag.timeZoneName = Some(TimeZoneName::Long);
+ break;
+ case Text::Short:
+ case Text::Narrow:
+ bag.timeZoneName = Some(TimeZoneName::Short);
+ break;
+ }
+ break;
+ case u'O':
+ switch (text) {
+ case Text::Long:
+ bag.timeZoneName = Some(TimeZoneName::LongOffset);
+ break;
+ case Text::Short:
+ case Text::Narrow:
+ bag.timeZoneName = Some(TimeZoneName::ShortOffset);
+ break;
+ }
+ break;
+ case u'v':
+ case u'V':
+ switch (text) {
+ case Text::Long:
+ bag.timeZoneName = Some(TimeZoneName::LongGeneric);
+ break;
+ case Text::Short:
+ case Text::Narrow:
+ bag.timeZoneName = Some(TimeZoneName::ShortGeneric);
+ break;
+ }
+ break;
+ }
+ }
+ return bag;
+}
+
+const char* DateTimeFormat::ToString(
+ DateTimeFormat::TimeZoneName aTimeZoneName) {
+ switch (aTimeZoneName) {
+ case TimeZoneName::Long:
+ return "long";
+ case TimeZoneName::Short:
+ return "short";
+ case TimeZoneName::ShortOffset:
+ return "shortOffset";
+ case TimeZoneName::LongOffset:
+ return "longOffset";
+ case TimeZoneName::ShortGeneric:
+ return "shortGeneric";
+ case TimeZoneName::LongGeneric:
+ return "longGeneric";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::TimeZoneName");
+}
+
+const char* DateTimeFormat::ToString(DateTimeFormat::Month aMonth) {
+ switch (aMonth) {
+ case Month::Numeric:
+ return "numeric";
+ case Month::TwoDigit:
+ return "2-digit";
+ case Month::Long:
+ return "long";
+ case Month::Short:
+ return "short";
+ case Month::Narrow:
+ return "narrow";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::Month");
+}
+
+const char* DateTimeFormat::ToString(DateTimeFormat::Text aText) {
+ switch (aText) {
+ case Text::Long:
+ return "long";
+ case Text::Short:
+ return "short";
+ case Text::Narrow:
+ return "narrow";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::Text");
+}
+
+const char* DateTimeFormat::ToString(DateTimeFormat::Numeric aNumeric) {
+ switch (aNumeric) {
+ case Numeric::Numeric:
+ return "numeric";
+ case Numeric::TwoDigit:
+ return "2-digit";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::Numeric");
+}
+
+const char* DateTimeFormat::ToString(DateTimeFormat::Style aStyle) {
+ switch (aStyle) {
+ case Style::Full:
+ return "full";
+ case Style::Long:
+ return "long";
+ case Style::Medium:
+ return "medium";
+ case Style::Short:
+ return "short";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::Style");
+}
+
+const char* DateTimeFormat::ToString(DateTimeFormat::HourCycle aHourCycle) {
+ switch (aHourCycle) {
+ case HourCycle::H11:
+ return "h11";
+ case HourCycle::H12:
+ return "h12";
+ case HourCycle::H23:
+ return "h23";
+ case HourCycle::H24:
+ return "h24";
+ }
+ MOZ_CRASH("Unexpected DateTimeFormat::HourCycle");
+}
+
+ICUResult DateTimeFormat::TryFormatToParts(
+ UFieldPositionIterator* aFieldPositionIterator, size_t aSpanSize,
+ DateTimePartVector& aParts) const {
+ ScopedICUObject<UFieldPositionIterator, ufieldpositer_close> toClose(
+ aFieldPositionIterator);
+
+ size_t lastEndIndex = 0;
+ auto AppendPart = [&](DateTimePartType type, size_t endIndex) {
+ // For the part defined in FormatDateTimeToParts, it doesn't have ||Source||
+ // property, we store Shared for simplicity,
+ if (!aParts.emplaceBack(type, endIndex, DateTimePartSource::Shared)) {
+ return false;
+ }
+
+ lastEndIndex = endIndex;
+ return true;
+ };
+
+ int32_t fieldInt, beginIndexInt, endIndexInt;
+ while ((fieldInt = ufieldpositer_next(aFieldPositionIterator, &beginIndexInt,
+ &endIndexInt)) >= 0) {
+ MOZ_ASSERT(beginIndexInt <= endIndexInt,
+ "field iterator returning invalid range");
+
+ size_t beginIndex = AssertedCast<size_t>(beginIndexInt);
+ size_t endIndex = AssertedCast<size_t>(endIndexInt);
+
+ // Technically this isn't guaranteed. But it appears true in pratice,
+ // and http://bugs.icu-project.org/trac/ticket/12024 is expected to
+ // correct the documentation lapse.
+ MOZ_ASSERT(lastEndIndex <= beginIndex,
+ "field iteration didn't return fields in order start to "
+ "finish as expected");
+
+ DateTimePartType type =
+ ConvertUFormatFieldToPartType(static_cast<UDateFormatField>(fieldInt));
+ if (lastEndIndex < beginIndex) {
+ if (!AppendPart(DateTimePartType::Literal, beginIndex)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ if (!AppendPart(type, endIndex)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ // Append any final literal.
+ if (lastEndIndex < aSpanSize) {
+ if (!AppendPart(DateTimePartType::Literal, aSpanSize)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ return Ok();
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/DateTimeFormat.h b/intl/components/src/DateTimeFormat.h
new file mode 100644
index 0000000000..b3e32cd276
--- /dev/null
+++ b/intl/components/src/DateTimeFormat.h
@@ -0,0 +1,593 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DateTimeFormat_h_
+#define intl_components_DateTimeFormat_h_
+#include <functional>
+#include "unicode/udat.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+
+#include "mozilla/intl/DateTimePart.h"
+#include "mozilla/intl/DateTimePatternGenerator.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Utf8.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+
+/*
+ * To work around webcompat problems caused by Narrow No-Break Space in
+ * formatted date/time output, where existing code on the web naively
+ * assumes there will be a normal Space, we replace any occurrences of
+ * U+202F in the formatted results with U+0020.
+ *
+ * The intention is to undo this hack once other major browsers are also
+ * ready to ship with the updated (ICU72) i18n data that uses NNBSP.
+ *
+ * See https://bugzilla.mozilla.org/show_bug.cgi?id=1806042 for details,
+ * and see DateIntervalFormat.cpp for the other piece of this hack.
+ */
+#define DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES 1
+
+namespace mozilla::intl {
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+static inline bool IsSpecialSpace(char16_t c) {
+ // NARROW NO-BREAK SPACE and THIN SPACE
+ return c == 0x202F || c == 0x2009;
+}
+#endif
+
+class Calendar;
+
+/**
+ * Intro to mozilla::intl::DateTimeFormat
+ * ======================================
+ *
+ * This component is a Mozilla-focused API for the date formatting provided by
+ * ICU. The methods internally call out to ICU4C. This is responsible for and
+ * owns any resources opened through ICU, through RAII.
+ *
+ * The construction of a DateTimeFormat contains the majority of the cost
+ * of the DateTimeFormat operation. DateTimeFormat::TryFormat should be
+ * relatively inexpensive after the initial construction.
+ *
+ * This class supports creating from Styles (a fixed set of options) and from a
+ * components bag (a list of components and their lengths).
+ *
+ * This API serves to back the ECMA-402 Intl.DateTimeFormat API.
+ * https://tc39.es/ecma402/#datetimeformat-objects
+ *
+ *
+ * ECMA-402 Intl.DateTimeFormat API and implementation details with ICU
+ * skeletons and patterns.
+ * ====================================================================
+ *
+ * Different locales have different ways to display dates using the same
+ * basic components. For example, en-US might use "Sept. 24, 2012" while
+ * fr-FR might use "24 Sept. 2012". The intent of Intl.DateTimeFormat is to
+ * permit production of a format for the locale that best matches the
+ * set of date-time components and their desired representation as specified
+ * by the API client.
+ *
+ * ICU4C supports specification of date and time formats in three ways:
+ *
+ * 1) A style is just one of the identifiers FULL, LONG, MEDIUM, or SHORT.
+ * The date-time components included in each style and their representation
+ * are defined by ICU using CLDR locale data (CLDR is the Unicode
+ * Consortium's Common Locale Data Repository).
+ *
+ * 2) A skeleton is a string specifying which date-time components to include,
+ * and which representations to use for them. For example, "yyyyMMMMdd"
+ * specifies a year with at least four digits, a full month name, and a
+ * two-digit day. It does not specify in which order the components appear,
+ * how they are separated, the localized strings for textual components
+ * (such as weekday or month), whether the month is in format or
+ * stand-alone form¹, or the numbering system used for numeric components.
+ * All that information is filled in by ICU using CLDR locale data.
+ * ¹ The format form is the one used in formatted strings that include a
+ * day; the stand-alone form is used when not including days, e.g., in
+ * calendar headers. The two forms differ at least in some Slavic languages,
+ * e.g. Russian: "22 марта 2013 г." vs. "Март 2013".
+ *
+ * 3) A pattern is a string specifying which date-time components to include,
+ * in which order, with which separators, in which grammatical case. For
+ * example, "EEEE, d MMMM y" specifies the full localized weekday name,
+ * followed by comma and space, followed by the day, followed by space,
+ * followed by the full month name in format form, followed by space,
+ * followed by the full year. It
+ * still does not specify localized strings for textual components and the
+ * numbering system - these are determined by ICU using CLDR locale data or
+ * possibly API parameters.
+ *
+ * All actual formatting in ICU4C is done with patterns; styles and skeletons
+ * have to be mapped to patterns before processing.
+ *
+ * The options of Intl.DateTimeFormat most closely correspond to ICU skeletons.
+ * This implementation therefore converts DateTimeFormat options to ICU
+ * skeletons, and then lets ICU map skeletons to actual ICU patterns. The
+ * pattern may not directly correspond to what the skeleton requests, as the
+ * mapper (UDateTimePatternGenerator) is constrained by the available locale
+ * data for the locale.
+ *
+ * An ICU pattern represents the information of the following DateTimeFormat
+ * internal properties described in the specification, which therefore don't
+ * exist separately in the implementation:
+ * - [[weekday]], [[era]], [[year]], [[month]], [[day]], [[hour]], [[minute]],
+ * [[second]], [[timeZoneName]]
+ * - [[hour12]]
+ * - [[hourCycle]]
+ * - [[hourNo0]]
+ * When needed for the resolvedOptions method, the resolveICUPattern function
+ * queries the UDateFormat's internal pattern and then maps the it back to the
+ * specified properties of the object returned by resolvedOptions.
+ *
+ * ICU date-time skeletons and patterns aren't fully documented in the ICU
+ * documentation (see http://bugs.icu-project.org/trac/ticket/9627). The best
+ * documentation at this point is in UTR 35:
+ * http://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns
+ *
+ * Future support for ICU4X
+ * ========================
+ * This implementation exposes a components bag, and internally handles the
+ * complexity of working with skeletons and patterns to generate the correct
+ * results. In the future, if and when we switch to ICU4X, the complexities of
+ * manipulating patterns will be able to be removed, as ICU4X will directly know
+ * how to apply the components bag.
+ */
+class DateTimeFormat final {
+ public:
+ /**
+ * The hour cycle for components.
+ */
+ enum class HourCycle {
+ H11,
+ H12,
+ H23,
+ H24,
+ };
+
+ /**
+ * The style for dates or times.
+ */
+ enum class Style {
+ Full,
+ Long,
+ Medium,
+ Short,
+ };
+
+ /**
+ * A bag of options to determine the length of the time and date styles. The
+ * hour cycle can be overridden.
+ */
+ struct StyleBag {
+ Maybe<Style> date = Nothing();
+ Maybe<Style> time = Nothing();
+ Maybe<HourCycle> hourCycle = Nothing();
+ Maybe<bool> hour12 = Nothing();
+ };
+
+ /**
+ * How to to display numeric components such as the year and the day.
+ */
+ enum class Numeric {
+ Numeric,
+ TwoDigit,
+ };
+
+ /**
+ * How to display the text components, such as the weekday or day period.
+ */
+ enum class Text {
+ Long,
+ Short,
+ Narrow,
+ };
+
+ /**
+ * How to display the month.
+ */
+ enum class Month {
+ Numeric,
+ TwoDigit,
+ Long,
+ Short,
+ Narrow,
+ };
+
+ /**
+ * How to display the time zone name.
+ */
+ enum class TimeZoneName {
+ Long,
+ Short,
+ ShortOffset,
+ LongOffset,
+ ShortGeneric,
+ LongGeneric,
+ };
+
+ /**
+ * Get static strings representing the enums. These match ECMA-402's resolved
+ * options.
+ * https://tc39.es/ecma402/#sec-intl.datetimeformat.prototype.resolvedoptions
+ */
+ static const char* ToString(DateTimeFormat::HourCycle aHourCycle);
+ static const char* ToString(DateTimeFormat::Style aStyle);
+ static const char* ToString(DateTimeFormat::Numeric aNumeric);
+ static const char* ToString(DateTimeFormat::Text aText);
+ static const char* ToString(DateTimeFormat::Month aMonth);
+ static const char* ToString(DateTimeFormat::TimeZoneName aTimeZoneName);
+
+ /**
+ * A components bag specifies the components used to display a DateTime. Each
+ * component can be styled individually, and ICU will attempt to create a best
+ * match for a given locale.
+ */
+ struct ComponentsBag {
+ Maybe<Text> era = Nothing();
+ Maybe<Numeric> year = Nothing();
+ Maybe<Month> month = Nothing();
+ Maybe<Numeric> day = Nothing();
+ Maybe<Text> weekday = Nothing();
+ Maybe<Numeric> hour = Nothing();
+ Maybe<Numeric> minute = Nothing();
+ Maybe<Numeric> second = Nothing();
+ Maybe<TimeZoneName> timeZoneName = Nothing();
+ Maybe<bool> hour12 = Nothing();
+ Maybe<HourCycle> hourCycle = Nothing();
+ Maybe<Text> dayPeriod = Nothing();
+ Maybe<uint8_t> fractionalSecondDigits = Nothing();
+ };
+
+ // Do not allow copy as this class owns the ICU resource. Move is not
+ // currently implemented, but a custom move operator could be created if
+ // needed.
+ DateTimeFormat(const DateTimeFormat&) = delete;
+ DateTimeFormat& operator=(const DateTimeFormat&) = delete;
+
+ // mozilla::Vector can avoid heap allocations for small transient buffers.
+ using PatternVector = Vector<char16_t, 128>;
+ using SkeletonVector = Vector<char16_t, 16>;
+
+ /**
+ * Create a DateTimeFormat from styles.
+ *
+ * The "style" model uses different options for formatting a date or time
+ * based on how the result will be styled, rather than picking specific
+ * fields or lengths.
+ *
+ * Takes an optional time zone which will override the user's default
+ * time zone. This is a UTF-16 string that takes the form "GMT±hh:mm", or
+ * an IANA time zone identifier, e.g. "America/Chicago".
+ */
+ static Result<UniquePtr<DateTimeFormat>, ICUError> TryCreateFromStyle(
+ Span<const char> aLocale, const StyleBag& aStyleBag,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<Span<const char16_t>> aTimeZoneOverride = Nothing{});
+
+ private:
+ /**
+ * Create a DateTimeFormat from a UTF-16 skeleton.
+ *
+ * A skeleton is an unordered list of fields that are used to find an
+ * appropriate date time format pattern. Example skeletons would be "yMd",
+ * "yMMMd", "EBhm". If the skeleton includes string literals or other
+ * information, it will be discarded when matching against skeletons.
+ *
+ * Takes an optional time zone which will override the user's default
+ * time zone. This is a string that takes the form "GMT±hh:mm", or
+ * an IANA time zone identifier, e.g. "America/Chicago".
+ */
+ static Result<UniquePtr<DateTimeFormat>, ICUError> TryCreateFromSkeleton(
+ Span<const char> aLocale, Span<const char16_t> aSkeleton,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<DateTimeFormat::HourCycle> aHourCycle,
+ Maybe<Span<const char16_t>> aTimeZoneOverride);
+
+ public:
+ /**
+ * Create a DateTimeFormat from a ComponentsBag.
+ *
+ * See the ComponentsBag for additional documentation.
+ *
+ * Takes an optional time zone which will override the user's default
+ * time zone. This is a string that takes the form "GMT±hh:mm", or
+ * an IANA time zone identifier, e.g. "America/Chicago".
+ */
+ static Result<UniquePtr<DateTimeFormat>, ICUError> TryCreateFromComponents(
+ Span<const char> aLocale, const ComponentsBag& bag,
+ DateTimePatternGenerator* aDateTimePatternGenerator,
+ Maybe<Span<const char16_t>> aTimeZoneOverride = Nothing{});
+
+ /**
+ * Create a DateTimeFormat from a raw pattern.
+ *
+ * Warning: This method should not be added to new code. In the near future we
+ * plan to remove it.
+ */
+ static Result<UniquePtr<DateTimeFormat>, ICUError> TryCreateFromPattern(
+ Span<const char> aLocale, Span<const char16_t> aPattern,
+ Maybe<Span<const char16_t>> aTimeZoneOverride = Nothing{});
+
+ /**
+ * Use the format settings to format a date time into a string. The non-null
+ * terminated string will be placed into the provided buffer. The idea behind
+ * this API is that the constructor is expensive, and then the format
+ * operation is cheap.
+ *
+ * aUnixEpoch is the number of milliseconds since 1 January 1970, UTC.
+ */
+ template <typename B>
+ ICUResult TryFormat(double aUnixEpoch, B& aBuffer) const {
+ static_assert(
+ std::is_same_v<typename B::CharType, unsigned char> ||
+ std::is_same_v<typename B::CharType, char> ||
+ std::is_same_v<typename B::CharType, char16_t>,
+ "The only buffer CharTypes supported by DateTimeFormat are char "
+ "(for UTF-8 support) and char16_t (for UTF-16 support).");
+
+ if constexpr (std::is_same_v<typename B::CharType, char> ||
+ std::is_same_v<typename B::CharType, unsigned char>) {
+ // The output buffer is UTF-8, but ICU uses UTF-16 internally.
+
+ // Write the formatted date into the u16Buffer.
+ PatternVector u16Vec;
+
+ auto result = FillBufferWithICUCall(
+ u16Vec, [this, &aUnixEpoch](UChar* target, int32_t length,
+ UErrorCode* status) {
+ return udat_format(mDateFormat, aUnixEpoch, target, length,
+ /* UFieldPosition* */ nullptr, status);
+ });
+ if (result.isErr()) {
+ return result;
+ }
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+ for (auto& c : u16Vec) {
+ if (IsSpecialSpace(c)) {
+ c = ' ';
+ }
+ }
+#endif
+
+ if (!FillBuffer(u16Vec, aBuffer)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok{};
+ } else {
+ static_assert(std::is_same_v<typename B::CharType, char16_t>);
+
+ // The output buffer is UTF-16. ICU can output directly into this buffer.
+ auto result = FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return udat_format(mDateFormat, aUnixEpoch, target, length, nullptr,
+ status);
+ });
+ if (result.isErr()) {
+ return result;
+ }
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+ for (auto& c : Span(aBuffer.data(), aBuffer.length())) {
+ if (IsSpecialSpace(c)) {
+ c = ' ';
+ }
+ }
+#endif
+
+ return Ok{};
+ }
+ };
+
+ /**
+ * Format the Unix epoch time into a DateTimePartVector.
+ *
+ * The caller has to create the buffer and the vector and pass to this method.
+ * The formatted string will be stored in the buffer and formatted parts in
+ * the vector.
+ *
+ * aUnixEpoch is the number of milliseconds since 1 January 1970, UTC.
+ *
+ * See:
+ * https://tc39.es/ecma402/#sec-formatdatetimetoparts
+ */
+ template <typename B>
+ ICUResult TryFormatToParts(double aUnixEpoch, B& aBuffer,
+ DateTimePartVector& aParts) const {
+ static_assert(std::is_same_v<typename B::CharType, char16_t>,
+ "Only char16_t is supported (for UTF-16 support) now.");
+
+ UErrorCode status = U_ZERO_ERROR;
+ UFieldPositionIterator* fpositer = ufieldpositer_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ auto result = FillBufferWithICUCall(
+ aBuffer, [this, aUnixEpoch, fpositer](UChar* chars, int32_t size,
+ UErrorCode* status) {
+ return udat_formatForFields(mDateFormat, aUnixEpoch, chars, size,
+ fpositer, status);
+ });
+ if (result.isErr()) {
+ ufieldpositer_close(fpositer);
+ return result.propagateErr();
+ }
+
+#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
+ for (auto& c : Span(aBuffer.data(), aBuffer.length())) {
+ if (IsSpecialSpace(c)) {
+ c = ' ';
+ }
+ }
+#endif
+
+ return TryFormatToParts(fpositer, aBuffer.length(), aParts);
+ }
+
+ /**
+ * Copies the pattern for the current DateTimeFormat to a buffer.
+ *
+ * Warning: This method should not be added to new code. In the near future we
+ * plan to remove it.
+ */
+ template <typename B>
+ ICUResult GetPattern(B& aBuffer) const {
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return udat_toPattern(mDateFormat, /* localized*/ false, target,
+ length, status);
+ });
+ }
+
+ /**
+ * Copies the skeleton that was used to generate the current DateTimeFormat to
+ * the given buffer. If no skeleton was used, then a skeleton is generated
+ * from the resolved pattern. Note that going from skeleton -> resolved
+ * pattern -> skeleton is not a 1:1 mapping, as the resolved pattern can
+ * contain different symbols than the requested skeleton.
+ *
+ * Warning: This method should not be added to new code. In the near future we
+ * plan to remove it.
+ */
+ template <typename B>
+ ICUResult GetOriginalSkeleton(B& aBuffer) {
+ static_assert(std::is_same_v<typename B::CharType, char16_t>);
+ if (mOriginalSkeleton.length() == 0) {
+ // Generate a skeleton from the resolved pattern, there was no originally
+ // cached skeleton.
+ PatternVector pattern{};
+ VectorToBufferAdaptor buffer(pattern);
+ MOZ_TRY(GetPattern(buffer));
+
+ VectorToBufferAdaptor skeleton(mOriginalSkeleton);
+ MOZ_TRY(DateTimePatternGenerator::GetSkeleton(pattern, skeleton));
+ }
+
+ if (!FillBuffer(mOriginalSkeleton, aBuffer)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok();
+ }
+ /**
+ * Set the start time of the Gregorian calendar. This is useful for
+ * ensuring the consistent use of a proleptic Gregorian calendar for ECMA-402.
+ * https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar
+ */
+ void SetStartTimeIfGregorian(double aTime);
+
+ /**
+ * Determines the resolved components for the current DateTimeFormat.
+ *
+ * When a DateTimeFormat is created, even from a components bag, the resolved
+ * formatter may tweak the resolved components depending on the configuration
+ * and the locale.
+ *
+ * For the implementation, with ICU4C, this takes a string pattern and maps it
+ * back to a ComponentsBag.
+ */
+ Result<ComponentsBag, ICUError> ResolveComponents();
+
+ ~DateTimeFormat();
+
+ /**
+ * Clones the Calendar from a DateTimeFormat, and sets its time with the
+ * relative milliseconds since 1 January 1970, UTC.
+ */
+ Result<UniquePtr<Calendar>, ICUError> CloneCalendar(double aUnixEpoch) const;
+
+ /**
+ * Return the hour cycle used in the input pattern or Nothing if none was
+ * found.
+ */
+ static Maybe<DateTimeFormat::HourCycle> HourCycleFromPattern(
+ Span<const char16_t> aPattern);
+
+ using HourCyclesVector = Vector<HourCycle, 4>;
+
+ /**
+ * Returns the allowed hour cycles for the input locale.
+ *
+ * NOTE: This function currently takes a language subtag and an optional
+ * region subtag. This is a restriction until bug 1719746 has migrated
+ * language tag processing into the unified Intl component. After bug 1719746,
+ * this function should be changed to accept a single locale tag.
+ */
+ static Result<HourCyclesVector, ICUError> GetAllowedHourCycles(
+ Span<const char> aLanguage, Maybe<Span<const char>> aRegion);
+
+ /**
+ * Returns an iterator over all supported date-time formatter locales.
+ *
+ * The returned strings are ICU locale identifiers and NOT BCP 47 language
+ * tags.
+ *
+ * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+ */
+ static auto GetAvailableLocales() {
+ return AvailableLocalesEnumeration<udat_countAvailable,
+ udat_getAvailable>();
+ }
+
+ private:
+ explicit DateTimeFormat(UDateFormat* aDateFormat);
+
+ ICUResult CacheSkeleton(Span<const char16_t> aSkeleton);
+
+ ICUResult TryFormatToParts(UFieldPositionIterator* aFieldPositionIterator,
+ size_t aSpanSize,
+ DateTimePartVector& aParts) const;
+ /**
+ * Replaces all hour pattern characters in |patternOrSkeleton| to use the
+ * matching hour representation for |hourCycle|.
+ */
+ static void ReplaceHourSymbol(Span<char16_t> aPatternOrSkeleton,
+ DateTimeFormat::HourCycle aHourCycle);
+
+ /**
+ * Find a matching pattern using the requested hour-12 options.
+ *
+ * This function is needed to work around the following two issues.
+ * - https://unicode-org.atlassian.net/browse/ICU-21023
+ * - https://unicode-org.atlassian.net/browse/CLDR-13425
+ *
+ * We're currently using a relatively simple workaround, which doesn't give
+ * the most accurate results. For example:
+ *
+ * ```
+ * var dtf = new Intl.DateTimeFormat("en", {
+ * timeZone: "UTC",
+ * dateStyle: "long",
+ * timeStyle: "long",
+ * hourCycle: "h12",
+ * });
+ * print(dtf.format(new Date("2020-01-01T00:00Z")));
+ * ```
+ *
+ * Returns the pattern "MMMM d, y 'at' h:mm:ss a z", but when going through
+ * |DateTimePatternGenerator::GetSkeleton| and then
+ * |DateTimePatternGenerator::GetBestPattern| to find an equivalent pattern
+ * for "h23", we'll end up with the pattern "MMMM d, y, HH:mm:ss z", so the
+ * combinator element " 'at' " was lost in the process.
+ */
+ static ICUResult FindPatternWithHourCycle(
+ DateTimePatternGenerator& aDateTimePatternGenerator,
+ DateTimeFormat::PatternVector& aPattern, bool aHour12,
+ DateTimeFormat::SkeletonVector& aSkeleton);
+
+ UDateFormat* mDateFormat = nullptr;
+
+ SkeletonVector mOriginalSkeleton;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/DateTimeFormatUtils.cpp b/intl/components/src/DateTimeFormatUtils.cpp
new file mode 100644
index 0000000000..fd0649461e
--- /dev/null
+++ b/intl/components/src/DateTimeFormatUtils.cpp
@@ -0,0 +1,104 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+
+#include "DateTimeFormatUtils.h"
+
+namespace mozilla::intl {
+
+DateTimePartType ConvertUFormatFieldToPartType(UDateFormatField fieldName) {
+ // See intl/icu/source/i18n/unicode/udat.h for a detailed field list. This
+ // switch is deliberately exhaustive: cases might have to be added/removed
+ // if this code is compiled with a different ICU with more
+ // UDateFormatField enum initializers. Please guard such cases with
+ // appropriate ICU version-testing #ifdefs, should cross-version divergence
+ // occur.
+ switch (fieldName) {
+ case UDAT_ERA_FIELD:
+ return DateTimePartType::Era;
+
+ case UDAT_YEAR_FIELD:
+ case UDAT_YEAR_WOY_FIELD:
+ case UDAT_EXTENDED_YEAR_FIELD:
+ return DateTimePartType::Year;
+
+ case UDAT_YEAR_NAME_FIELD:
+ return DateTimePartType::YearName;
+
+ case UDAT_MONTH_FIELD:
+ case UDAT_STANDALONE_MONTH_FIELD:
+ return DateTimePartType::Month;
+
+ case UDAT_DATE_FIELD:
+ case UDAT_JULIAN_DAY_FIELD:
+ return DateTimePartType::Day;
+
+ case UDAT_HOUR_OF_DAY1_FIELD:
+ case UDAT_HOUR_OF_DAY0_FIELD:
+ case UDAT_HOUR1_FIELD:
+ case UDAT_HOUR0_FIELD:
+ return DateTimePartType::Hour;
+
+ case UDAT_MINUTE_FIELD:
+ return DateTimePartType::Minute;
+
+ case UDAT_SECOND_FIELD:
+ return DateTimePartType::Second;
+
+ case UDAT_DAY_OF_WEEK_FIELD:
+ case UDAT_STANDALONE_DAY_FIELD:
+ case UDAT_DOW_LOCAL_FIELD:
+ case UDAT_DAY_OF_WEEK_IN_MONTH_FIELD:
+ return DateTimePartType::Weekday;
+
+ case UDAT_AM_PM_FIELD:
+ case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
+ return DateTimePartType::DayPeriod;
+
+ case UDAT_TIMEZONE_FIELD:
+ case UDAT_TIMEZONE_GENERIC_FIELD:
+ case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
+ return DateTimePartType::TimeZoneName;
+
+ case UDAT_FRACTIONAL_SECOND_FIELD:
+ return DateTimePartType::FractionalSecondDigits;
+
+#ifndef U_HIDE_INTERNAL_API
+ case UDAT_RELATED_YEAR_FIELD:
+ return DateTimePartType::RelatedYear;
+#endif
+
+ case UDAT_DAY_OF_YEAR_FIELD:
+ case UDAT_WEEK_OF_YEAR_FIELD:
+ case UDAT_WEEK_OF_MONTH_FIELD:
+ case UDAT_MILLISECONDS_IN_DAY_FIELD:
+ case UDAT_TIMEZONE_RFC_FIELD:
+ case UDAT_QUARTER_FIELD:
+ case UDAT_STANDALONE_QUARTER_FIELD:
+ case UDAT_TIMEZONE_SPECIAL_FIELD:
+ case UDAT_TIMEZONE_ISO_FIELD:
+ case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
+ case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
+#ifndef U_HIDE_INTERNAL_API
+ case UDAT_TIME_SEPARATOR_FIELD:
+#endif
+ // These fields are all unsupported.
+ return DateTimePartType::Unknown;
+
+#ifndef U_HIDE_DEPRECATED_API
+ case UDAT_FIELD_COUNT:
+ MOZ_ASSERT_UNREACHABLE(
+ "format field sentinel value returned by "
+ "iterator!");
+#endif
+ }
+
+ MOZ_ASSERT_UNREACHABLE(
+ "unenumerated, undocumented format field returned "
+ "by iterator");
+ return DateTimePartType::Unknown;
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/DateTimeFormatUtils.h b/intl/components/src/DateTimeFormatUtils.h
new file mode 100644
index 0000000000..89187b9871
--- /dev/null
+++ b/intl/components/src/DateTimeFormatUtils.h
@@ -0,0 +1,14 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DateTimeFormatUtils_h_
+#define intl_components_DateTimeFormatUtils_h_
+#include "unicode/udat.h"
+
+#include "mozilla/intl/DateTimePart.h"
+
+namespace mozilla::intl {
+DateTimePartType ConvertUFormatFieldToPartType(UDateFormatField fieldName);
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/DateTimePart.h b/intl/components/src/DateTimePart.h
new file mode 100644
index 0000000000..4de2c22996
--- /dev/null
+++ b/intl/components/src/DateTimePart.h
@@ -0,0 +1,84 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DateTimePart_h_
+#define intl_components_DateTimePart_h_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "mozilla/Vector.h"
+
+namespace mozilla::intl {
+
+enum class DateTimePartType : int16_t {
+ Literal,
+ Weekday,
+ Era,
+ Year,
+ YearName,
+ RelatedYear,
+ Month,
+ Day,
+ DayPeriod,
+ Hour,
+ Minute,
+ Second,
+ FractionalSecondDigits,
+ TimeZoneName,
+ Unknown
+};
+
+enum class DateTimePartSource : int16_t { Shared, StartRange, EndRange };
+
+/**
+ * The 'Part' object defined in FormatDateTimeToParts and
+ * FormatDateTimeRangeToParts
+ *
+ * Each part consists of three properties: ||Type||, ||Value|| and ||Source||,
+ * with the ||Source|| property is set to DateTimePartSource::Shared by default.
+ * (Note: From the spec, the part from FormatDateTimeToParts doesn't have the
+ * ||Source|| property, so if the caller is FormatDateTimeToParts, it should
+ * ignore the ||Source|| property).
+ *
+ * To store DateTimePart more efficiently, it doesn't store the ||Value|| of
+ * type string in this struct. Instead, it stores the end index of the string
+ * in the buffer(which is passed to DateTimeFormat::TryFormatToParts() or
+ * can be got by calling AutoFormattedDateInterval::ToSpan()). The begin index
+ * of the ||Value|| is the mEndIndex of the previous part.
+ *
+ * Buffer
+ * 0 i j
+ * +---------------+---------------+---------------+
+ * | Part[0].Value | Part[1].Value | Part[2].Value | ....
+ * +---------------+---------------+---------------+
+ *
+ * Part[0].mEndIndex is i. Part[0].Value is stored in the Buffer[0..i].
+ * Part[1].mEndIndex is j. Part[1].Value is stored in the Buffer[i..j].
+ *
+ * See:
+ * https://tc39.es/ecma402/#sec-formatdatetimetoparts
+ * https://tc39.es/ecma402/#sec-formatdatetimerangetoparts
+ */
+struct DateTimePart {
+ DateTimePart(DateTimePartType type, size_t endIndex,
+ DateTimePartSource source)
+ : mEndIndex(endIndex), mType(type), mSource(source) {}
+
+ // See the above comments for details, mEndIndex is placed first for reducing
+ // padding.
+ size_t mEndIndex;
+ DateTimePartType mType;
+ DateTimePartSource mSource;
+};
+
+// The common parts are 'month', 'literal', 'day', 'literal', 'year', 'literal',
+// 'hour', 'literal', 'minute', 'literal', which are 10 parts, for DateTimeRange
+// the number will be doubled, so choosing 32 as the initial length to prevent
+// heap allocation.
+constexpr size_t INITIAL_DATETIME_PART_VECTOR_SIZE = 32;
+using DateTimePartVector =
+ mozilla::Vector<DateTimePart, INITIAL_DATETIME_PART_VECTOR_SIZE>;
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/DateTimePatternGenerator.cpp b/intl/components/src/DateTimePatternGenerator.cpp
new file mode 100644
index 0000000000..4362061172
--- /dev/null
+++ b/intl/components/src/DateTimePatternGenerator.cpp
@@ -0,0 +1,49 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/DateTimePatternGenerator.h"
+
+namespace mozilla::intl {
+
+DateTimePatternGenerator::~DateTimePatternGenerator() {
+ // The mGenerator will not exist when the DateTimePatternGenerator is being
+ // moved.
+ if (mGenerator) {
+ udatpg_close(mGenerator.GetMut());
+ }
+}
+
+/* static */
+Result<UniquePtr<DateTimePatternGenerator>, ICUError>
+DateTimePatternGenerator::TryCreate(const char* aLocale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UDateTimePatternGenerator* generator =
+ udatpg_open(IcuLocale(aLocale), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return MakeUnique<DateTimePatternGenerator>(generator);
+};
+
+DateTimePatternGenerator::DateTimePatternGenerator(
+ DateTimePatternGenerator&& other) noexcept
+ : mGenerator(other.mGenerator.GetMut()) {
+ other.mGenerator = nullptr;
+}
+
+DateTimePatternGenerator& DateTimePatternGenerator::operator=(
+ DateTimePatternGenerator&& other) noexcept {
+ if (this == &other) {
+ return *this;
+ }
+
+ if (mGenerator) {
+ udatpg_close(mGenerator.GetMut());
+ }
+ mGenerator = other.mGenerator.GetMut();
+ other.mGenerator = nullptr;
+
+ return *this;
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/DateTimePatternGenerator.h b/intl/components/src/DateTimePatternGenerator.h
new file mode 100644
index 0000000000..d9d6de3928
--- /dev/null
+++ b/intl/components/src/DateTimePatternGenerator.h
@@ -0,0 +1,161 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DateTimePatternGenerator_h_
+#define intl_components_DateTimePatternGenerator_h_
+
+#include "unicode/udatpg.h"
+#include "mozilla/EnumSet.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+
+namespace mozilla::intl {
+
+class DisplayNames;
+
+/**
+ * The DateTimePatternGenerator is the machinery used to work with DateTime
+ * pattern manipulation. It is expensive to create one, and so generally it is
+ * created once and then cached. It may be needed to be passed in as an argument
+ * for different mozilla::intl APIs.
+ */
+class DateTimePatternGenerator final {
+ public:
+ explicit DateTimePatternGenerator(UDateTimePatternGenerator* aGenerator)
+ : mGenerator(aGenerator) {
+ MOZ_ASSERT(aGenerator);
+ };
+
+ // Transfer ownership of the UDateTimePatternGenerator in the move
+ // constructor.
+ DateTimePatternGenerator(DateTimePatternGenerator&& other) noexcept;
+
+ // Transfer ownership of the UEnumeration in the move assignment operator.
+ DateTimePatternGenerator& operator=(
+ DateTimePatternGenerator&& other) noexcept;
+
+ // Disallow copy.
+ DateTimePatternGenerator(const DateTimePatternGenerator&) = delete;
+ DateTimePatternGenerator& operator=(const DateTimePatternGenerator&) = delete;
+
+ ~DateTimePatternGenerator();
+
+ static Result<UniquePtr<DateTimePatternGenerator>, ICUError> TryCreate(
+ const char* aLocale);
+
+ enum class PatternMatchOption {
+ /**
+ * Adjust the 'hour' field in the resolved pattern to match the input
+ * skeleton width.
+ */
+ HourField,
+
+ /**
+ * Adjust the 'minute' field in the resolved pattern to match the input
+ * skeleton width.
+ */
+ MinuteField,
+
+ /**
+ * Adjust the 'second' field in the resolved pattern to match the input
+ * skeleton width.
+ */
+ SecondField,
+ };
+
+ /**
+ * Given a skeleton (a string with unordered datetime fields), get a best
+ * pattern that will fit for that locale. This pattern will be filled into the
+ * buffer. e.g. The skeleton "yMd" would return the pattern "M/d/y" for en-US,
+ * or "dd/MM/y" for en-GB.
+ */
+ template <typename B>
+ ICUResult GetBestPattern(Span<const char16_t> aSkeleton, B& aBuffer,
+ EnumSet<PatternMatchOption> options = {}) {
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return udatpg_getBestPatternWithOptions(
+ mGenerator.GetMut(), aSkeleton.data(),
+ static_cast<int32_t>(aSkeleton.Length()),
+ toUDateTimePatternMatchOptions(options), target, length, status);
+ });
+ }
+
+ /**
+ * Get a skeleton (a string with unordered datetime fields) from a pattern.
+ * For example, both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
+ */
+ template <typename B>
+ static ICUResult GetSkeleton(Span<const char16_t> aPattern, B& aBuffer) {
+ // At one time udatpg_getSkeleton required a UDateTimePatternGenerator*, but
+ // now it is valid to pass in a nullptr.
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return udatpg_getSkeleton(nullptr, aPattern.data(),
+ static_cast<int32_t>(aPattern.Length()),
+ target, length, status);
+ });
+ }
+
+ /**
+ * Get a pattern of the form "{1} {0}" to combine separate date and time
+ * patterns into a single pattern. The "{0}" part is the placeholder for the
+ * time pattern and "{1}" is the placeholder for the date pattern.
+ *
+ * See dateTimeFormat from
+ * https://unicode.org/reports/tr35/tr35-dates.html#dateTimeFormat
+ *
+ * Note:
+ * In CLDR, it's called Date-Time Combined Format
+ * https://cldr.unicode.org/translation/date-time/datetime-patterns#h.x7ca7qwzh4m
+ *
+ * The naming 'placeholder pattern' is from ICU4X.
+ * https://unicode-org.github.io/icu4x-docs/doc/icu_pattern/index.html
+ */
+ Span<const char16_t> GetPlaceholderPattern() const {
+ int32_t length;
+ const char16_t* combined =
+ udatpg_getDateTimeFormat(mGenerator.GetConst(), &length);
+ return Span{combined, static_cast<size_t>(length)};
+ }
+
+ private:
+ // Allow other mozilla::intl components to access the underlying
+ // UDateTimePatternGenerator.
+ friend class DisplayNames;
+
+ UDateTimePatternGenerator* GetUDateTimePatternGenerator() {
+ return mGenerator.GetMut();
+ }
+
+ ICUPointer<UDateTimePatternGenerator> mGenerator =
+ ICUPointer<UDateTimePatternGenerator>(nullptr);
+
+ static UDateTimePatternMatchOptions toUDateTimePatternMatchOptions(
+ EnumSet<PatternMatchOption> options) {
+ struct OptionMap {
+ PatternMatchOption from;
+ UDateTimePatternMatchOptions to;
+ } static constexpr map[] = {
+ {PatternMatchOption::HourField, UDATPG_MATCH_HOUR_FIELD_LENGTH},
+#ifndef U_HIDE_INTERNAL_API
+ {PatternMatchOption::MinuteField, UDATPG_MATCH_MINUTE_FIELD_LENGTH},
+ {PatternMatchOption::SecondField, UDATPG_MATCH_SECOND_FIELD_LENGTH},
+#endif
+ };
+
+ UDateTimePatternMatchOptions result = UDATPG_MATCH_NO_OPTIONS;
+ for (const auto& entry : map) {
+ if (options.contains(entry.from)) {
+ result = UDateTimePatternMatchOptions(result | entry.to);
+ }
+ }
+ return result;
+ }
+};
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/DisplayNames.cpp b/intl/components/src/DisplayNames.cpp
new file mode 100644
index 0000000000..252969ccbb
--- /dev/null
+++ b/intl/components/src/DisplayNames.cpp
@@ -0,0 +1,234 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/DisplayNames.h"
+#include "ScopedICUObject.h"
+
+namespace mozilla::intl {
+
+DisplayNames::~DisplayNames() {
+ // The mDisplayNames will not exist when the DisplayNames is being
+ // moved.
+ if (auto* uldn = mULocaleDisplayNames.GetMut()) {
+ uldn_close(uldn);
+ }
+}
+
+DisplayNamesError DisplayNames::ToError(ICUError aError) const {
+ switch (aError) {
+ case ICUError::InternalError:
+ case ICUError::OverflowError:
+ return DisplayNamesError::InternalError;
+ case ICUError::OutOfMemory:
+ return DisplayNamesError::OutOfMemory;
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ return DisplayNamesError::InternalError;
+}
+
+DisplayNamesError DisplayNames::ToError(
+ Locale::CanonicalizationError aError) const {
+ switch (aError) {
+ case Locale::CanonicalizationError::DuplicateVariant:
+ return DisplayNamesError::DuplicateVariantSubtag;
+ case Locale::CanonicalizationError::InternalError:
+ return DisplayNamesError::InternalError;
+ case Locale::CanonicalizationError::OutOfMemory:
+ return DisplayNamesError::OutOfMemory;
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ return DisplayNamesError::InternalError;
+}
+
+/* static */
+Result<UniquePtr<DisplayNames>, ICUError> DisplayNames::TryCreate(
+ const char* aLocale, Options aOptions) {
+ UErrorCode status = U_ZERO_ERROR;
+ UDisplayContext contexts[] = {
+ // Use either standard or dialect names.
+ // For example either "English (GB)" or "British English".
+ aOptions.languageDisplay == DisplayNames::LanguageDisplay::Standard
+ ? UDISPCTX_STANDARD_NAMES
+ : UDISPCTX_DIALECT_NAMES,
+
+ // Assume the display names are used in a stand-alone context.
+ UDISPCTX_CAPITALIZATION_FOR_STANDALONE,
+
+ // Select either the long or short form. There's no separate narrow form
+ // available in ICU, therefore we equate "narrow"/"short" styles here.
+ aOptions.style == DisplayNames::Style::Long ? UDISPCTX_LENGTH_FULL
+ : UDISPCTX_LENGTH_SHORT,
+
+ // Don't apply substitutes, because we need to apply our own fallbacks.
+ UDISPCTX_NO_SUBSTITUTE,
+ };
+
+ const char* locale = IcuLocale(aLocale);
+
+ ULocaleDisplayNames* uLocaleDisplayNames =
+ uldn_openForContext(locale, contexts, std::size(contexts), &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return MakeUnique<DisplayNames>(uLocaleDisplayNames, MakeStringSpan(locale),
+ aOptions);
+};
+
+#ifdef DEBUG
+static bool IsStandaloneMonth(UDateFormatSymbolType symbolType) {
+ switch (symbolType) {
+ case UDAT_STANDALONE_MONTHS:
+ case UDAT_STANDALONE_SHORT_MONTHS:
+ case UDAT_STANDALONE_NARROW_MONTHS:
+ return true;
+
+ case UDAT_ERAS:
+ case UDAT_MONTHS:
+ case UDAT_SHORT_MONTHS:
+ case UDAT_WEEKDAYS:
+ case UDAT_SHORT_WEEKDAYS:
+ case UDAT_AM_PMS:
+ case UDAT_LOCALIZED_CHARS:
+ case UDAT_ERA_NAMES:
+ case UDAT_NARROW_MONTHS:
+ case UDAT_NARROW_WEEKDAYS:
+ case UDAT_STANDALONE_WEEKDAYS:
+ case UDAT_STANDALONE_SHORT_WEEKDAYS:
+ case UDAT_STANDALONE_NARROW_WEEKDAYS:
+ case UDAT_QUARTERS:
+ case UDAT_SHORT_QUARTERS:
+ case UDAT_STANDALONE_QUARTERS:
+ case UDAT_STANDALONE_SHORT_QUARTERS:
+ case UDAT_SHORTER_WEEKDAYS:
+ case UDAT_STANDALONE_SHORTER_WEEKDAYS:
+ case UDAT_CYCLIC_YEARS_WIDE:
+ case UDAT_CYCLIC_YEARS_ABBREVIATED:
+ case UDAT_CYCLIC_YEARS_NARROW:
+ case UDAT_ZODIAC_NAMES_WIDE:
+ case UDAT_ZODIAC_NAMES_ABBREVIATED:
+ case UDAT_ZODIAC_NAMES_NARROW:
+ case UDAT_NARROW_QUARTERS:
+ case UDAT_STANDALONE_NARROW_QUARTERS:
+ return false;
+ }
+
+ MOZ_ASSERT_UNREACHABLE("unenumerated, undocumented symbol type");
+ return false;
+}
+#endif
+
+Result<Ok, DisplayNamesError> DisplayNames::ComputeDateTimeDisplayNames(
+ UDateFormatSymbolType symbolType, mozilla::Span<const int32_t> indices,
+ Span<const char> aCalendar) {
+ if (!mDateTimeDisplayNames.empty()) {
+ // No need to re-compute the display names.
+ return Ok();
+ }
+ mozilla::intl::Locale tag;
+ // Do not use mLocale.AsSpan() as it includes the null terminator inside the
+ // span.
+ if (LocaleParser::TryParse(Span(mLocale.Elements(), mLocale.Length() - 1),
+ tag)
+ .isErr()) {
+ return Err(DisplayNamesError::InvalidLanguageTag);
+ }
+
+ if (!aCalendar.empty()) {
+ // Add the calendar extension to the locale. This is only available via
+ // the MozExtension.
+ Vector<char, 32> extension;
+ Span<const char> prefix = MakeStringSpan("u-ca-");
+ if (!extension.append(prefix.data(), prefix.size()) ||
+ !extension.append(aCalendar.data(), aCalendar.size())) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ // This overwrites any other Unicode extensions, but should be okay to do
+ // here.
+ if (auto result = tag.SetUnicodeExtension(extension); result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ }
+
+ constexpr char16_t* timeZone = nullptr;
+ constexpr int32_t timeZoneLength = 0;
+
+ constexpr char16_t* pattern = nullptr;
+ constexpr int32_t patternLength = 0;
+
+ Vector<char, DisplayNames::LocaleVecLength> localeWithCalendar;
+ VectorToBufferAdaptor buffer(localeWithCalendar);
+ if (auto result = tag.ToString(buffer); result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ if (!localeWithCalendar.append('\0')) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UDateFormat* fmt = udat_open(
+ UDAT_DEFAULT, UDAT_DEFAULT,
+ IcuLocale(
+ // IcuLocale takes a Span that does not include the null terminator.
+ Span(localeWithCalendar.begin(), localeWithCalendar.length() - 1)),
+ timeZone, timeZoneLength, pattern, patternLength, &status);
+ if (U_FAILURE(status)) {
+ return Err(DisplayNamesError::InternalError);
+ }
+ ScopedICUObject<UDateFormat, udat_close> datToClose(fmt);
+
+ Vector<char16_t, DisplayNames::LocaleVecLength> name;
+ for (int32_t index : indices) {
+ auto result = FillBufferWithICUCall(name, [&](UChar* target, int32_t length,
+ UErrorCode* status) {
+ return udat_getSymbols(fmt, symbolType, index, target, length, status);
+ });
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+
+ // Everything except Undecimber should always have a non-empty name.
+ MOZ_ASSERT_IF(!IsStandaloneMonth(symbolType) || index != UCAL_UNDECIMBER,
+ !name.empty());
+
+ if (!mDateTimeDisplayNames.emplaceBack(Span(name.begin(), name.length()))) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ }
+ return Ok();
+}
+
+Span<const char> DisplayNames::ToCodeString(Month aMonth) {
+ switch (aMonth) {
+ case Month::January:
+ return MakeStringSpan("1");
+ case Month::February:
+ return MakeStringSpan("2");
+ case Month::March:
+ return MakeStringSpan("3");
+ case Month::April:
+ return MakeStringSpan("4");
+ case Month::May:
+ return MakeStringSpan("5");
+ case Month::June:
+ return MakeStringSpan("6");
+ case Month::July:
+ return MakeStringSpan("7");
+ case Month::August:
+ return MakeStringSpan("8");
+ case Month::September:
+ return MakeStringSpan("9");
+ case Month::October:
+ return MakeStringSpan("10");
+ case Month::November:
+ return MakeStringSpan("11");
+ case Month::December:
+ return MakeStringSpan("12");
+ case Month::Undecimber:
+ return MakeStringSpan("13");
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ return MakeStringSpan("1");
+};
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/DisplayNames.h b/intl/components/src/DisplayNames.h
new file mode 100644
index 0000000000..ae519f61ce
--- /dev/null
+++ b/intl/components/src/DisplayNames.h
@@ -0,0 +1,971 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_DisplayNames_h_
+#define intl_components_DisplayNames_h_
+
+#include <string>
+#include <string_view>
+#include "unicode/udat.h"
+#include "unicode/udatpg.h"
+#include "unicode/uldnames.h"
+#include "unicode/uloc.h"
+#include "unicode/ucurr.h"
+#include "mozilla/intl/Calendar.h"
+#include "mozilla/intl/DateTimePatternGenerator.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/Locale.h"
+#include "mozilla/Buffer.h"
+#include "mozilla/Casting.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla::intl {
+/**
+ * Provide more granular errors for DisplayNames rather than use the generic
+ * ICUError type. This helps with providing more actionable feedback for
+ * errors with input validation.
+ *
+ * This type can't be nested in the DisplayNames class because it needs the
+ * UnusedZero and HasFreeLSB definitions.
+ */
+enum class DisplayNamesError {
+ // Since we claim UnusedZero<DisplayNamesError>::value and
+ // HasFreeLSB<Error>::value == true below, we must only use positive,
+ // even enum values.
+ InternalError = 2,
+ OutOfMemory = 4,
+ InvalidOption = 6,
+ DuplicateVariantSubtag = 8,
+ InvalidLanguageTag = 10,
+};
+} // namespace mozilla::intl
+
+namespace mozilla::detail {
+// Ensure the efficient packing of the error types into the result. See
+// ICUError.h and the ICUError comments for more information.
+template <>
+struct UnusedZero<intl::DisplayNamesError>
+ : UnusedZeroEnum<intl::DisplayNamesError> {};
+
+template <>
+struct HasFreeLSB<intl::DisplayNamesError> {
+ static constexpr bool value = true;
+};
+} // namespace mozilla::detail
+
+namespace mozilla::intl {
+
+// NOTE: The UTF-35 canonical "code" value for months and quarters are 1-based
+// integers, so some of the following enums are 1-based for consistency with
+// that. For simplicity, we make all of the following enums 1-based, but use
+// `EnumToIndex` (see below) to convert to zero based if indexing into internal
+// (non-ICU) tables.
+
+/**
+ * Month choices for display names.
+ */
+enum class Month : uint8_t {
+ January = 1,
+ February,
+ March,
+ April,
+ May,
+ June,
+ July,
+ August,
+ September,
+ October,
+ November,
+ December,
+ // Some calendar systems feature a 13th month.
+ // https://en.wikipedia.org/wiki/Undecimber
+ Undecimber
+};
+
+/**
+ * Quarter choices for display names.
+ */
+enum class Quarter : uint8_t {
+ Q1 = 1,
+ Q2,
+ Q3,
+ Q4,
+};
+
+/**
+ * Day period choices for display names.
+ */
+enum class DayPeriod : uint8_t {
+ AM = 1,
+ PM,
+};
+
+/**
+ * DateTimeField choices for display names.
+ */
+enum class DateTimeField : uint8_t {
+ Era = 1,
+ Year,
+ Quarter,
+ Month,
+ WeekOfYear,
+ Weekday,
+ Day,
+ DayPeriod,
+ Hour,
+ Minute,
+ Second,
+ TimeZoneName,
+};
+
+/**
+ * DisplayNames provide a way to get the localized names of various types of
+ * information such as the names of the day of the week, months, currency etc.
+ *
+ * This class backs SpiderMonkeys implementation of Intl.DisplayNames
+ * https://tc39.es/ecma402/#intl-displaynames-objects
+ */
+class DisplayNames final {
+ public:
+ /**
+ * The style of the display name, specified by the amount of space available
+ * for displaying the text.
+ */
+ enum class Style {
+ Narrow,
+ Short,
+ Long,
+ // Note: Abbreviated is not part of ECMA-402, but it is available for
+ // internal Mozilla usage.
+ Abbreviated,
+ };
+
+ /**
+ * Use either standard or dialect names for the "Language" type.
+ */
+ enum class LanguageDisplay {
+ Standard,
+ Dialect,
+ };
+
+ /**
+ * Determines the fallback behavior if no match is found.
+ */
+ enum class Fallback {
+ // The buffer will contain an empty string.
+ None,
+ // The buffer will contain the code, but typically in a canonicalized form.
+ Code
+ };
+
+ /**
+ * These options correlate to the ECMA-402 DisplayNames options. The defaults
+ * values must match the default initialized values of ECMA-402. The type
+ * option is omitted as the C++ API relies on directly calling the
+ * DisplayNames::Get* methods.
+ *
+ * https://tc39.es/ecma402/#intl-displaynames-objects
+ * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DisplayNames
+ */
+ struct Options {
+ Style style = Style::Long;
+ LanguageDisplay languageDisplay = LanguageDisplay::Standard;
+ };
+
+ DisplayNames(ULocaleDisplayNames* aDisplayNames, Span<const char> aLocale,
+ Options aOptions)
+ : mOptions(aOptions), mULocaleDisplayNames(aDisplayNames) {
+ MOZ_ASSERT(aDisplayNames);
+
+ // Copy the span and ensure null termination.
+ mLocale = Buffer<char>(aLocale.size() + 1);
+ PodCopy(mLocale.begin(), aLocale.data(), aLocale.size());
+ mLocale[aLocale.size()] = '\0';
+ }
+
+ /**
+ * Initialize a new DisplayNames for the provided locale and using the
+ * provided options.
+ *
+ * https://tc39.es/ecma402/#sec-Intl.DisplayNames
+ */
+ static Result<UniquePtr<DisplayNames>, ICUError> TryCreate(
+ const char* aLocale, Options aOptions);
+
+ // Not copyable or movable
+ DisplayNames(const DisplayNames&) = delete;
+ DisplayNames& operator=(const DisplayNames&) = delete;
+
+ ~DisplayNames();
+
+ /**
+ * Easily convert to a more specific DisplayNames error.
+ */
+ DisplayNamesError ToError(ICUError aError) const;
+
+ /**
+ * Easily convert to a more specific DisplayNames error.
+ */
+ DisplayNamesError ToError(Locale::CanonicalizationError aError) const;
+
+ private:
+ /**
+ * A helper function to handle the fallback behavior, where if there is a
+ * fallback the buffer is filled with the "code", often in canonicalized form.
+ */
+ template <typename B, typename Fn>
+ static Result<Ok, DisplayNamesError> HandleFallback(B& aBuffer,
+ Fallback aFallback,
+ Fn aGetFallbackSpan) {
+ if (aBuffer.length() == 0 &&
+ aFallback == mozilla::intl::DisplayNames::Fallback::Code) {
+ if (!FillBuffer(aGetFallbackSpan(), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ }
+ return Ok();
+ }
+
+ /**
+ * This is a specialized form of the FillBufferWithICUCall for DisplayNames.
+ * Different APIs report that no display name is found with different
+ * statuses. This method signals no display name was found by setting the
+ * buffer to 0.
+ *
+ * The display name APIs such as `uldn_scriptDisplayName`,
+ * `uloc_getDisplayScript`, and `uldn_regionDisplayName` report
+ * U_ILLEGAL_ARGUMENT_ERROR when no display name was found. In order to
+ * accomodate fallbacking, return an empty string in this case.
+ */
+ template <typename B, typename F>
+ static ICUResult FillBufferWithICUDisplayNames(
+ B& aBuffer, UErrorCode aNoDisplayNameStatus, F aCallback) {
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ int32_t res = aCallback(target, length, status);
+
+ if (*status == aNoDisplayNameStatus) {
+ *status = U_ZERO_ERROR;
+ res = 0;
+ }
+ return res;
+ });
+ }
+
+ /**
+ * An internal helper to compute the list of display names for various
+ * DateTime options.
+ */
+ Result<Ok, DisplayNamesError> ComputeDateTimeDisplayNames(
+ UDateFormatSymbolType symbolType, mozilla::Span<const int32_t> indices,
+ Span<const char> aCalendar);
+
+ // The following are the stack-allocated sizes for various strings using the
+ // mozilla::Vector. The numbers should be large enough to fit the common
+ // cases, and when the strings are too large they will fall back to heap
+ // allocations.
+
+ // Fit BCP 47 locales such as "en-US", "zh-Hant". Locales can get quite long,
+ // but 32 should fit most smaller locales without a lot of extensions.
+ static constexpr size_t LocaleVecLength = 32;
+ // Fit calendar names such as "gregory", "buddhist", "islamic-civil".
+ // "islamic-umalqura" is 16 bytes + 1 for null termination, so round up to 32.
+ static constexpr size_t CalendarVecLength = 32;
+
+ /**
+ * Given an ASCII alpha, convert it to upper case.
+ */
+ static inline char16_t AsciiAlphaToUpperCase(char16_t aCh) {
+ MOZ_ASSERT(IsAsciiAlpha(aCh));
+ return AsciiToUpperCase(aCh);
+ };
+
+ /**
+ * Attempt to use enums to safely index into an array.
+ *
+ * Note: The enums we support here are all defined starting from 1.
+ */
+ template <typename T>
+ inline int32_t EnumToIndex(size_t aSize, T aEnum) {
+ size_t index = static_cast<size_t>(aEnum) - 1;
+ MOZ_RELEASE_ASSERT(index < aSize,
+ "Enum indexing mismatch for display names.");
+ return index;
+ }
+
+ /**
+ * Convert the month to a numeric code as a string.
+ */
+ static Span<const char> ToCodeString(Month aMonth);
+
+ public:
+ /**
+ * Get the localized name of a language. Part of ECMA-402.
+ *
+ * Accepts:
+ * languageCode ["-" scriptCode] ["-" regionCode ] *("-" variant )
+ * Where the language code is:
+ * 1. A two letters ISO 639-1 language code
+ * https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+ * 2. A three letters ISO 639-2 language code
+ * https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes
+ *
+ * Examples:
+ * "es-ES" => "European Spanish" (en-US), "español de España" (es-ES)
+ * "zh-Hant" => "Traditional Chinese" (en-US), "chino tradicional" (es-ES)
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetLanguage(
+ B& aBuffer, Span<const char> aLanguage,
+ Fallback aFallback = Fallback::None) const {
+ static_assert(std::is_same<typename B::CharType, char16_t>::value);
+ mozilla::intl::Locale tag;
+ if (LocaleParser::TryParseBaseName(aLanguage, tag).isErr()) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+
+ {
+ // ICU always canonicalizes the input locale, but since we know that ICU's
+ // canonicalization is incomplete, we need to perform our own
+ // canonicalization to ensure consistent result.
+ auto result = tag.CanonicalizeBaseName();
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ }
+
+ Vector<char, DisplayNames::LocaleVecLength> tagVec;
+ {
+ VectorToBufferAdaptor tagBuffer(tagVec);
+ auto result = tag.ToString(tagBuffer);
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ if (!tagVec.append('\0')) {
+ // The tag should be null terminated.
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ }
+
+ auto result = FillBufferWithICUDisplayNames(
+ aBuffer, U_ILLEGAL_ARGUMENT_ERROR,
+ [&](UChar* target, int32_t length, UErrorCode* status) {
+ return uldn_localeDisplayName(mULocaleDisplayNames.GetConst(),
+ tagVec.begin(), target, length, status);
+ });
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+
+ return HandleFallback(aBuffer, aFallback, [&] {
+ // Remove the null terminator.
+ return Span(tagVec.begin(), tagVec.length() - 1);
+ });
+ };
+
+ /**
+ * Get the localized name of a region. Part of ECMA-402.
+ *
+ * Accepts:
+ * 1. an ISO-3166 two letters:
+ * https://www.iso.org/iso-3166-country-codes.html
+ * 2. region code, or a three digits UN M49 Geographic Regions.
+ * https://unstats.un.org/unsd/methodology/m49/
+ *
+ * Examples
+ * "US" => "United States" (en-US), "Estados Unidos", (es-ES)
+ * "158" => "Taiwan" (en-US), "Taiwán", (es-ES)
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetRegion(
+ B& aBuffer, Span<const char> aCode,
+ Fallback aFallback = Fallback::None) const {
+ static_assert(std::is_same<typename B::CharType, char16_t>::value);
+
+ mozilla::intl::RegionSubtag region;
+ if (!IsStructurallyValidRegionTag(aCode)) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+ region.Set(aCode);
+
+ mozilla::intl::Locale tag;
+ tag.SetLanguage("und");
+ tag.SetRegion(region);
+
+ {
+ // ICU always canonicalizes the input locale, but since we know that ICU's
+ // canonicalization is incomplete, we need to perform our own
+ // canonicalization to ensure consistent result.
+ auto result = tag.CanonicalizeBaseName();
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ }
+
+ MOZ_ASSERT(tag.Region().Present());
+
+ // Note: ICU requires the region subtag to be in canonical case.
+ const mozilla::intl::RegionSubtag& canonicalRegion = tag.Region();
+
+ char regionChars[mozilla::intl::LanguageTagLimits::RegionLength + 1] = {};
+ std::copy_n(canonicalRegion.Span().data(), canonicalRegion.Length(),
+ regionChars);
+
+ auto result = FillBufferWithICUDisplayNames(
+ aBuffer, U_ILLEGAL_ARGUMENT_ERROR,
+ [&](UChar* chars, uint32_t size, UErrorCode* status) {
+ return uldn_regionDisplayName(
+ mULocaleDisplayNames.GetConst(), regionChars, chars,
+ AssertedCast<int32_t, uint32_t>(size), status);
+ });
+
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+
+ return HandleFallback(aBuffer, aFallback, [&] {
+ region.ToUpperCase();
+ return region.Span();
+ });
+ }
+
+ /**
+ * Get the localized name of a currency. Part of ECMA-402.
+ *
+ * Accepts:
+ * A 3-letter ISO 4217 currency code.
+ * https://en.wikipedia.org/wiki/ISO_4217
+ *
+ * Examples:
+ * "EUR" => "Euro" (en-US), "euro" (es_ES), "欧元", (zh)
+ * "JPY" => "Japanese Yen" (en-US), "yen" (es_ES), "日元", (zh)
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetCurrency(
+ B& aBuffer, Span<const char> aCurrency,
+ Fallback aFallback = Fallback::None) const {
+ static_assert(std::is_same<typename B::CharType, char16_t>::value);
+ if (aCurrency.size() != 3) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+
+ if (!mozilla::IsAsciiAlpha(aCurrency[0]) ||
+ !mozilla::IsAsciiAlpha(aCurrency[1]) ||
+ !mozilla::IsAsciiAlpha(aCurrency[2])) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+
+ // Normally this type of operation wouldn't be safe, but ASCII characters
+ // all take 1 byte in UTF-8 encoding, and can be zero padded to be valid
+ // UTF-16. Currency codes are all three ASCII letters.
+ char16_t currency[] = {static_cast<char16_t>(aCurrency[0]),
+ static_cast<char16_t>(aCurrency[1]),
+ static_cast<char16_t>(aCurrency[2]), u'\0'};
+
+ UCurrNameStyle style;
+ switch (mOptions.style) {
+ case Style::Long:
+ style = UCURR_LONG_NAME;
+ break;
+ case Style::Abbreviated:
+ case Style::Short:
+ style = UCURR_SYMBOL_NAME;
+ break;
+ case Style::Narrow:
+ style = UCURR_NARROW_SYMBOL_NAME;
+ break;
+ }
+
+ int32_t length = 0;
+ UErrorCode status = U_ZERO_ERROR;
+ const char16_t* name = ucurr_getName(currency, IcuLocale(mLocale), style,
+ nullptr, &length, &status);
+ if (U_FAILURE(status)) {
+ return Err(DisplayNamesError::InternalError);
+ }
+
+ if (status == U_USING_DEFAULT_WARNING) {
+ // A resource bundle lookup returned a result from the root locale.
+ if (aFallback == DisplayNames::Fallback::Code) {
+ // Return the canonicalized input when no localized currency name was
+ // found. Canonical case for currency is upper case.
+ if (!aBuffer.reserve(3)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ aBuffer.data()[0] = AsciiAlphaToUpperCase(currency[0]);
+ aBuffer.data()[1] = AsciiAlphaToUpperCase(currency[1]);
+ aBuffer.data()[2] = AsciiAlphaToUpperCase(currency[2]);
+ aBuffer.written(3);
+ } else if (aBuffer.length() != 0) {
+ // Ensure an empty string is in the buffer when there is no fallback.
+ aBuffer.written(0);
+ }
+ return Ok();
+ }
+
+ if (!FillBuffer(Span(name, length), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ return Ok();
+ }
+
+ /**
+ * Get the localized name of a script. Part of ECMA-402.
+ *
+ * Accepts:
+ * ECMA-402 expects the ISO-15924 four letters script code.
+ * https://unicode.org/iso15924/iso15924-codes.html
+ * e.g. "Latn"
+ *
+ * Examples:
+ * "Cher" => "Cherokee" (en-US), "cherokee" (es-ES)
+ * "Latn" => "Latin" (en-US), "latino" (es-ES)
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetScript(
+ B& aBuffer, Span<const char> aScript,
+ Fallback aFallback = Fallback::None) const {
+ static_assert(std::is_same<typename B::CharType, char16_t>::value);
+ mozilla::intl::ScriptSubtag script;
+ if (!IsStructurallyValidScriptTag(aScript)) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+ script.Set(aScript);
+
+ mozilla::intl::Locale tag;
+ tag.SetLanguage("und");
+
+ tag.SetScript(script);
+
+ {
+ // ICU always canonicalizes the input locale, but since we know that ICU's
+ // canonicalization is incomplete, we need to perform our own
+ // canonicalization to ensure consistent result.
+ auto result = tag.CanonicalizeBaseName();
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ }
+
+ MOZ_ASSERT(tag.Script().Present());
+ mozilla::Vector<char, DisplayNames::LocaleVecLength> tagString;
+ VectorToBufferAdaptor buffer(tagString);
+
+ switch (mOptions.style) {
+ case Style::Long: {
+ // |uldn_scriptDisplayName| doesn't use the stand-alone form for script
+ // subtags, so we're using |uloc_getDisplayScript| instead. (This only
+ // applies to the long form.)
+ //
+ // ICU bug: https://unicode-org.atlassian.net/browse/ICU-9301
+
+ // |uloc_getDisplayScript| expects a full locale identifier as its
+ // input.
+ if (auto result = tag.ToString(buffer); result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+
+ // Null terminate the tag string.
+ if (!tagString.append('\0')) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ auto result = FillBufferWithICUDisplayNames(
+ aBuffer, U_USING_DEFAULT_WARNING,
+ [&](UChar* target, int32_t length, UErrorCode* status) {
+ return uloc_getDisplayScript(tagString.begin(),
+ IcuLocale(mLocale), target, length,
+ status);
+ });
+
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ break;
+ }
+ case Style::Abbreviated:
+ case Style::Short:
+ case Style::Narrow: {
+ // Note: ICU requires the script subtag to be in canonical case.
+ const mozilla::intl::ScriptSubtag& canonicalScript = tag.Script();
+
+ char scriptChars[mozilla::intl::LanguageTagLimits::ScriptLength + 1] =
+ {};
+ MOZ_ASSERT(canonicalScript.Length() <=
+ mozilla::intl::LanguageTagLimits::ScriptLength + 1);
+ std::copy_n(canonicalScript.Span().data(), canonicalScript.Length(),
+ scriptChars);
+
+ auto result = FillBufferWithICUDisplayNames(
+ aBuffer, U_ILLEGAL_ARGUMENT_ERROR,
+ [&](UChar* target, int32_t length, UErrorCode* status) {
+ return uldn_scriptDisplayName(mULocaleDisplayNames.GetConst(),
+ scriptChars, target, length,
+ status);
+ });
+
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ break;
+ }
+ }
+
+ return HandleFallback(aBuffer, aFallback, [&] {
+ script.ToTitleCase();
+ return script.Span();
+ });
+ };
+
+ /**
+ * Get the localized name of a calendar.
+ * Part of Intl.DisplayNames V2. https://tc39.es/intl-displaynames-v2/
+ * Accepts:
+ * Unicode calendar key:
+ * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetCalendar(
+ B& aBuffer, Span<const char> aCalendar,
+ Fallback aFallback = Fallback::None) const {
+ if (aCalendar.empty() || !IsAscii(aCalendar)) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+
+ if (LocaleParser::CanParseUnicodeExtensionType(aCalendar).isErr()) {
+ return Err(DisplayNamesError::InvalidOption);
+ }
+
+ // Convert into canonical case before searching for replacements.
+ Vector<char, DisplayNames::CalendarVecLength> lowerCaseCalendar;
+ for (size_t i = 0; i < aCalendar.size(); i++) {
+ if (!lowerCaseCalendar.append(AsciiToLowerCase(aCalendar[i]))) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+ }
+ if (!lowerCaseCalendar.append('\0')) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ Span<const char> canonicalCalendar = mozilla::Span(
+ lowerCaseCalendar.begin(), lowerCaseCalendar.length() - 1);
+
+ // Search if there's a replacement for the Unicode calendar keyword.
+ {
+ Span<const char> key = mozilla::MakeStringSpan("ca");
+ Span<const char> type = canonicalCalendar;
+ if (const char* replacement =
+ mozilla::intl::Locale::ReplaceUnicodeExtensionType(key, type)) {
+ canonicalCalendar = MakeStringSpan(replacement);
+ }
+ }
+
+ // The input calendar name is user-controlled, so be extra cautious before
+ // passing arbitrarily large strings to ICU.
+ static constexpr size_t maximumCalendarLength = 100;
+
+ if (canonicalCalendar.size() <= maximumCalendarLength) {
+ // |uldn_keyValueDisplayName| expects old-style keyword values.
+ if (const char* legacyCalendar =
+ uloc_toLegacyType("calendar", canonicalCalendar.Elements())) {
+ auto result = FillBufferWithICUDisplayNames(
+ aBuffer, U_ILLEGAL_ARGUMENT_ERROR,
+ [&](UChar* chars, uint32_t size, UErrorCode* status) {
+ // |uldn_keyValueDisplayName| expects old-style keyword values.
+ return uldn_keyValueDisplayName(mULocaleDisplayNames.GetConst(),
+ "calendar", legacyCalendar, chars,
+ size, status);
+ });
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ } else {
+ aBuffer.written(0);
+ }
+ } else {
+ aBuffer.written(0);
+ }
+
+ return HandleFallback(aBuffer, aFallback,
+ [&] { return canonicalCalendar; });
+ }
+
+ /**
+ * Get the localized name of a weekday. This is a MozExtension, and not
+ * currently part of ECMA-402.
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetWeekday(
+ B& aBuffer, Weekday aWeekday, Span<const char> aCalendar,
+ Fallback aFallback = Fallback::None) {
+ // SpiderMonkey static casts the enum, so ensure it is correctly in range.
+ MOZ_ASSERT(aWeekday >= Weekday::Monday && aWeekday <= Weekday::Sunday);
+
+ UDateFormatSymbolType symbolType;
+ switch (mOptions.style) {
+ case DisplayNames::Style::Long:
+ symbolType = UDAT_STANDALONE_WEEKDAYS;
+ break;
+
+ case DisplayNames::Style::Abbreviated:
+ // ICU "short" is CLDR "abbreviated" format.
+ symbolType = UDAT_STANDALONE_SHORT_WEEKDAYS;
+ break;
+
+ case DisplayNames::Style::Short:
+ // ICU "shorter" is CLDR "short" format.
+ symbolType = UDAT_STANDALONE_SHORTER_WEEKDAYS;
+ break;
+
+ case DisplayNames::Style::Narrow:
+ symbolType = UDAT_STANDALONE_NARROW_WEEKDAYS;
+ break;
+ }
+
+ static constexpr int32_t indices[] = {
+ UCAL_MONDAY, UCAL_TUESDAY, UCAL_WEDNESDAY, UCAL_THURSDAY,
+ UCAL_FRIDAY, UCAL_SATURDAY, UCAL_SUNDAY};
+
+ if (auto result = ComputeDateTimeDisplayNames(
+ symbolType, mozilla::Span(indices), aCalendar);
+ result.isErr()) {
+ return result.propagateErr();
+ }
+ MOZ_ASSERT(mDateTimeDisplayNames.length() == std::size(indices));
+
+ auto& name =
+ mDateTimeDisplayNames[EnumToIndex(std::size(indices), aWeekday)];
+ if (!FillBuffer(name.AsSpan(), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ // There is no need to fallback, as invalid options are
+ // DisplayNamesError::InvalidOption.
+ return Ok();
+ }
+
+ /**
+ * Get the localized name of a month. This is a MozExtension, and not
+ * currently part of ECMA-402.
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetMonth(B& aBuffer, Month aMonth,
+ Span<const char> aCalendar,
+ Fallback aFallback = Fallback::None) {
+ // SpiderMonkey static casts the enum, so ensure it is correctly in range.
+ MOZ_ASSERT(aMonth >= Month::January && aMonth <= Month::Undecimber);
+
+ UDateFormatSymbolType symbolType;
+ switch (mOptions.style) {
+ case DisplayNames::Style::Long:
+ symbolType = UDAT_STANDALONE_MONTHS;
+ break;
+
+ case DisplayNames::Style::Abbreviated:
+ case DisplayNames::Style::Short:
+ symbolType = UDAT_STANDALONE_SHORT_MONTHS;
+ break;
+
+ case DisplayNames::Style::Narrow:
+ symbolType = UDAT_STANDALONE_NARROW_MONTHS;
+ break;
+ }
+
+ static constexpr int32_t indices[] = {
+ UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL,
+ UCAL_MAY, UCAL_JUNE, UCAL_JULY, UCAL_AUGUST,
+ UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER,
+ UCAL_UNDECIMBER};
+
+ if (auto result = ComputeDateTimeDisplayNames(
+ symbolType, mozilla::Span(indices), aCalendar);
+ result.isErr()) {
+ return result.propagateErr();
+ }
+ MOZ_ASSERT(mDateTimeDisplayNames.length() == std::size(indices));
+ auto& name = mDateTimeDisplayNames[EnumToIndex(std::size(indices), aMonth)];
+ if (!FillBuffer(Span(name.AsSpan()), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ return HandleFallback(aBuffer, aFallback,
+ [&] { return ToCodeString(aMonth); });
+ }
+
+ /**
+ * Get the localized name of a quarter. This is a MozExtension, and not
+ * currently part of ECMA-402.
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetQuarter(
+ B& aBuffer, Quarter aQuarter, Span<const char> aCalendar,
+ Fallback aFallback = Fallback::None) {
+ // SpiderMonkey static casts the enum, so ensure it is correctly in range.
+ MOZ_ASSERT(aQuarter >= Quarter::Q1 && aQuarter <= Quarter::Q4);
+
+ UDateFormatSymbolType symbolType;
+ switch (mOptions.style) {
+ case DisplayNames::Style::Long:
+ symbolType = UDAT_STANDALONE_QUARTERS;
+ break;
+
+ case DisplayNames::Style::Abbreviated:
+ case DisplayNames::Style::Short:
+ symbolType = UDAT_STANDALONE_SHORT_QUARTERS;
+ break;
+
+ case DisplayNames::Style::Narrow:
+ symbolType = UDAT_STANDALONE_NARROW_QUARTERS;
+ break;
+ }
+
+ // ICU doesn't provide an enum for quarters.
+ static constexpr int32_t indices[] = {0, 1, 2, 3};
+
+ if (auto result = ComputeDateTimeDisplayNames(
+ symbolType, mozilla::Span(indices), aCalendar);
+ result.isErr()) {
+ return result.propagateErr();
+ }
+ MOZ_ASSERT(mDateTimeDisplayNames.length() == std::size(indices));
+
+ auto& name =
+ mDateTimeDisplayNames[EnumToIndex(std::size(indices), aQuarter)];
+ if (!FillBuffer(Span(name.AsSpan()), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ // There is no need to fallback, as invalid options are
+ // DisplayNamesError::InvalidOption.
+ return Ok();
+ }
+
+ /**
+ * Get the localized name of a day period. This is a MozExtension, and not
+ * currently part of ECMA-402.
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetDayPeriod(
+ B& aBuffer, DayPeriod aDayPeriod, Span<const char> aCalendar,
+ Fallback aFallback = Fallback::None) {
+ UDateFormatSymbolType symbolType = UDAT_AM_PMS;
+
+ static constexpr int32_t indices[] = {UCAL_AM, UCAL_PM};
+
+ if (auto result = ComputeDateTimeDisplayNames(
+ symbolType, mozilla::Span(indices), aCalendar);
+ result.isErr()) {
+ return result.propagateErr();
+ }
+ MOZ_ASSERT(mDateTimeDisplayNames.length() == std::size(indices));
+
+ auto& name =
+ mDateTimeDisplayNames[EnumToIndex(std::size(indices), aDayPeriod)];
+ if (!FillBuffer(name.AsSpan(), aBuffer)) {
+ return Err(DisplayNamesError::OutOfMemory);
+ }
+
+ // There is no need to fallback, as invalid options are
+ // DisplayNamesError::InvalidOption.
+ return Ok();
+ }
+
+ /**
+ * Get the localized name of a date time field.
+ * Part of Intl.DisplayNames V2. https://tc39.es/intl-displaynames-v2/
+ * Accepts:
+ * "era", "year", "quarter", "month", "weekOfYear", "weekday", "day",
+ * "dayPeriod", "hour", "minute", "second", "timeZoneName"
+ * Examples:
+ * "weekday" => "day of the week"
+ * "dayPeriod" => "AM/PM"
+ */
+ template <typename B>
+ Result<Ok, DisplayNamesError> GetDateTimeField(
+ B& aBuffer, DateTimeField aField,
+ DateTimePatternGenerator& aDateTimePatternGen,
+ Fallback aFallback = Fallback::None) {
+ UDateTimePatternField field;
+ switch (aField) {
+ case DateTimeField::Era:
+ field = UDATPG_ERA_FIELD;
+ break;
+ case DateTimeField::Year:
+ field = UDATPG_YEAR_FIELD;
+ break;
+ case DateTimeField::Quarter:
+ field = UDATPG_QUARTER_FIELD;
+ break;
+ case DateTimeField::Month:
+ field = UDATPG_MONTH_FIELD;
+ break;
+ case DateTimeField::WeekOfYear:
+ field = UDATPG_WEEK_OF_YEAR_FIELD;
+ break;
+ case DateTimeField::Weekday:
+ field = UDATPG_WEEKDAY_FIELD;
+ break;
+ case DateTimeField::Day:
+ field = UDATPG_DAY_FIELD;
+ break;
+ case DateTimeField::DayPeriod:
+ field = UDATPG_DAYPERIOD_FIELD;
+ break;
+ case DateTimeField::Hour:
+ field = UDATPG_HOUR_FIELD;
+ break;
+ case DateTimeField::Minute:
+ field = UDATPG_MINUTE_FIELD;
+ break;
+ case DateTimeField::Second:
+ field = UDATPG_SECOND_FIELD;
+ break;
+ case DateTimeField::TimeZoneName:
+ field = UDATPG_ZONE_FIELD;
+ break;
+ }
+
+ UDateTimePGDisplayWidth width;
+ switch (mOptions.style) {
+ case DisplayNames::Style::Long:
+ width = UDATPG_WIDE;
+ break;
+ case DisplayNames::Style::Abbreviated:
+ case DisplayNames::Style::Short:
+ width = UDATPG_ABBREVIATED;
+ break;
+ case DisplayNames::Style::Narrow:
+ width = UDATPG_NARROW;
+ break;
+ }
+
+ auto result = FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return udatpg_getFieldDisplayName(
+ aDateTimePatternGen.GetUDateTimePatternGenerator(), field, width,
+ target, length, status);
+ });
+
+ if (result.isErr()) {
+ return Err(ToError(result.unwrapErr()));
+ }
+ // There is no need to fallback, as invalid options are
+ // DisplayNamesError::InvalidOption.
+ return Ok();
+ }
+
+ Options mOptions;
+ Buffer<char> mLocale;
+ Vector<Buffer<char16_t>> mDateTimeDisplayNames;
+ ICUPointer<ULocaleDisplayNames> mULocaleDisplayNames =
+ ICUPointer<ULocaleDisplayNames>(nullptr);
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/FormatBuffer.h b/intl/components/src/FormatBuffer.h
new file mode 100644
index 0000000000..774e74d2ba
--- /dev/null
+++ b/intl/components/src/FormatBuffer.h
@@ -0,0 +1,77 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_FormatBuffer_h
+#define intl_components_FormatBuffer_h
+
+/**
+ * This file contains public adaptors for the mozilla::intl Buffer template
+ * argument. Adaptors that can automatically be deduced are kept as private
+ * in ICU4CGlue.h. There is also the SpiderMonkey specific adaptor
+ * js::intl::FormatBuffer in js/src/builtin/intl/FormatBuffer.h.
+ */
+
+#include "nsTString.h"
+
+namespace mozilla::intl {
+
+/**
+ * mozilla::intl APIs require sizeable buffers. This class abstracts over
+ * the nsTSubstring.
+ */
+template <typename T>
+class nsTStringToBufferAdapter {
+ public:
+ using CharType = T;
+
+ // Do not allow copy or move. Move could be added in the future if needed.
+ nsTStringToBufferAdapter(const nsTStringToBufferAdapter&) = delete;
+ nsTStringToBufferAdapter& operator=(const nsTStringToBufferAdapter&) = delete;
+
+ explicit nsTStringToBufferAdapter(nsTSubstring<CharType>& aString)
+ : mString(aString) {}
+
+ /**
+ * Ensures the buffer has enough space to accommodate |size| elements.
+ */
+ [[nodiscard]] bool reserve(size_t size) {
+ return mString.SetLength(size, fallible);
+ }
+
+ /**
+ * Returns the raw data inside the buffer.
+ */
+ CharType* data() { return mString.BeginWriting(); }
+
+ /**
+ * Returns the count of elements written into the buffer.
+ */
+ size_t length() const { return mString.Length(); }
+
+ /**
+ * Returns the buffer's overall capacity.
+ */
+ size_t capacity() const {
+ // nsString's Capacity() method is protected, so just return the length.
+ return mString.Length();
+ }
+
+ /**
+ * Resizes the buffer to the given amount of written elements.
+ */
+ void written(size_t amount) {
+ MOZ_ASSERT(amount <= mString.Length());
+ // This sets |mString|'s internal size so that it matches how much was
+ // written. This is necessary because the write happens across FFI
+ // boundaries.
+ mString.SetLength(amount);
+ }
+
+ private:
+ nsTSubstring<CharType>& mString;
+};
+
+} // namespace mozilla::intl
+
+#endif /* intl_components_FormatBuffer_h */
diff --git a/intl/components/src/ICU4CGlue.cpp b/intl/components/src/ICU4CGlue.cpp
new file mode 100644
index 0000000000..6b9e0c0c58
--- /dev/null
+++ b/intl/components/src/ICU4CGlue.cpp
@@ -0,0 +1,44 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "unicode/uformattedvalue.h"
+
+namespace mozilla::intl {
+
+// Starting with ICU 59, UChar defaults to char16_t.
+static_assert(std::is_same_v<UChar, char16_t>,
+ "Gecko doesn't support redefining UChar to a different type");
+
+ICUError ToICUError(UErrorCode status) {
+ MOZ_ASSERT(!U_SUCCESS(status));
+ switch (status) {
+ case U_MEMORY_ALLOCATION_ERROR:
+ return ICUError::OutOfMemory;
+ default:
+ return ICUError::InternalError;
+ }
+}
+
+ICUResult ToICUResult(UErrorCode status) {
+ if (U_SUCCESS(status)) {
+ return Ok();
+ }
+ return Err(ToICUError(status));
+}
+
+// static
+Result<Span<const char16_t>, ICUError> FormattedResult::ToSpanImpl(
+ const UFormattedValue* value) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t strLength;
+ const char16_t* str = ufmtval_getString(value, &strLength, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return Span{str, AssertedCast<size_t>(strLength)};
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/ICU4CGlue.h b/intl/components/src/ICU4CGlue.h
new file mode 100644
index 0000000000..af1590680b
--- /dev/null
+++ b/intl/components/src/ICU4CGlue.h
@@ -0,0 +1,722 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_ICUUtils_h
+#define intl_components_ICUUtils_h
+
+#include "unicode/uenum.h"
+#include "unicode/utypes.h"
+#include "mozilla/Buffer.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/Utf8.h"
+#include "mozilla/Vector.h"
+#include "mozilla/intl/ICUError.h"
+
+// When building standalone js shell, it will include headers from
+// intl/components if JS_HAS_INTL_API is true (the default value), but js shell
+// won't include headers from XPCOM, so don't include nsTArray.h when building
+// standalone js shell.
+#ifndef JS_STANDALONE
+# include "nsTArray.h"
+#endif
+
+#include <cstring>
+#include <iterator>
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <string_view>
+
+struct UFormattedValue;
+namespace mozilla::intl {
+
+template <typename CharType>
+static inline CharType* AssertNullTerminatedString(Span<CharType> aSpan) {
+ // Intentionally check one past the last character, because we expect that the
+ // NUL character isn't part of the string.
+ MOZ_ASSERT(*(aSpan.data() + aSpan.size()) == '\0');
+
+ // Also ensure there aren't any other NUL characters within the string.
+ MOZ_ASSERT(std::char_traits<CharType>::length(aSpan.data()) == aSpan.size());
+
+ return aSpan.data();
+}
+
+static inline const char* AssertNullTerminatedString(std::string_view aView) {
+ // Intentionally check one past the last character, because we expect that the
+ // NUL character isn't part of the string.
+ MOZ_ASSERT(*(aView.data() + aView.size()) == '\0');
+
+ // Also ensure there aren't any other NUL characters within the string.
+ MOZ_ASSERT(std::strlen(aView.data()) == aView.size());
+
+ return aView.data();
+}
+
+/**
+ * Map the "und" locale to an empty string, which ICU uses internally.
+ */
+static inline const char* IcuLocale(const char* aLocale) {
+ // Return the empty string if the input is exactly equal to the string "und".
+ const char* locale = aLocale;
+ if (!std::strcmp(locale, "und")) {
+ locale = ""; // ICU root locale
+ }
+ return locale;
+}
+
+/**
+ * Ensure a locale is null-terminated, and map the "und" locale to an empty
+ * string, which ICU uses internally.
+ */
+static inline const char* IcuLocale(Span<const char> aLocale) {
+ return IcuLocale(AssertNullTerminatedString(aLocale));
+}
+
+/**
+ * Ensure a locale in the buffer is null-terminated, and map the "und" locale to
+ * an empty string, which ICU uses internally.
+ */
+static inline const char* IcuLocale(const Buffer<char>& aLocale) {
+ return IcuLocale(Span(aLocale.begin(), aLocale.Length() - 1));
+}
+
+using ICUResult = Result<Ok, ICUError>;
+
+/**
+ * Convert a UErrorCode to ICUError. This will correctly apply the OutOfMemory
+ * case.
+ */
+ICUError ToICUError(UErrorCode status);
+
+/**
+ * Convert a UErrorCode to ICUResult. This will correctly apply the OutOfMemory
+ * case.
+ */
+ICUResult ToICUResult(UErrorCode status);
+
+/**
+ * The ICU status can complain about a string not being terminated, but this
+ * is fine for this API, as it deals with the mozilla::Span that has a pointer
+ * and a length.
+ */
+static inline bool ICUSuccessForStringSpan(UErrorCode status) {
+ return U_SUCCESS(status) || status == U_STRING_NOT_TERMINATED_WARNING;
+}
+
+/**
+ * This class enforces that the unified mozilla::intl methods match the
+ * const-ness of the underlying ICU4C API calls. const ICU4C APIs take a const
+ * pointer, while mutable ones take a non-const pointer.
+ *
+ * For const ICU4C calls use:
+ * ICUPointer::GetConst().
+ *
+ * For non-const ICU4C calls use:
+ * ICUPointer::GetMut().
+ *
+ * This will propagate the `const` specifier from the ICU4C API call to the
+ * unified method, and it will be enforced by the compiler. This helps ensures
+ * a consistence and correct implementation.
+ */
+template <typename T>
+class ICUPointer {
+ public:
+ explicit ICUPointer(T* aPointer) : mPointer(aPointer) {}
+
+ // Only allow moves of ICUPointers, no copies.
+ ICUPointer(ICUPointer&& other) noexcept = default;
+ ICUPointer& operator=(ICUPointer&& other) noexcept = default;
+
+ // Implicitly take ownership of a raw pointer through copy assignment.
+ ICUPointer& operator=(T* aPointer) noexcept {
+ mPointer = aPointer;
+ return *this;
+ };
+
+ const T* GetConst() const { return const_cast<const T*>(mPointer); }
+ T* GetMut() { return mPointer; }
+
+ explicit operator bool() const { return !!mPointer; }
+
+ private:
+ T* mPointer;
+};
+
+/**
+ * Calling into ICU with the C-API can be a bit tricky. This function wraps up
+ * the relatively risky operations involving pointers, lengths, and buffers into
+ * a simpler call. This function accepts a lambda that performs the ICU call,
+ * and returns the length of characters in the buffer. When using a temporary
+ * stack-based buffer, the calls can often be done in one trip. However, if
+ * additional memory is needed, this function will call the C-API twice, in
+ * order to first get the size of the result, and then second to copy the result
+ * over to the buffer.
+ */
+template <typename ICUStringFunction, typename Buffer>
+static ICUResult FillBufferWithICUCall(Buffer& buffer,
+ const ICUStringFunction& strFn) {
+ static_assert(std::is_same_v<typename Buffer::CharType, char16_t> ||
+ std::is_same_v<typename Buffer::CharType, char> ||
+ std::is_same_v<typename Buffer::CharType, uint8_t>);
+
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t length = strFn(buffer.data(), buffer.capacity(), &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ MOZ_ASSERT(length >= 0);
+
+ if (!buffer.reserve(length)) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ status = U_ZERO_ERROR;
+ mozilla::DebugOnly<int32_t> length2 = strFn(buffer.data(), length, &status);
+ MOZ_ASSERT(length == length2);
+ }
+ if (!ICUSuccessForStringSpan(status)) {
+ return Err(ToICUError(status));
+ }
+
+ buffer.written(length);
+
+ return Ok{};
+}
+
+/**
+ * Adaptor for mozilla::Vector to implement the Buffer interface.
+ */
+template <typename T, size_t N>
+class VectorToBufferAdaptor {
+ mozilla::Vector<T, N>& vector;
+
+ public:
+ using CharType = T;
+
+ explicit VectorToBufferAdaptor(mozilla::Vector<T, N>& vector)
+ : vector(vector) {}
+
+ T* data() { return vector.begin(); }
+
+ size_t capacity() const { return vector.capacity(); }
+
+ bool reserve(size_t length) { return vector.reserve(length); }
+
+ void written(size_t length) {
+ mozilla::DebugOnly<bool> result = vector.resizeUninitialized(length);
+ MOZ_ASSERT(result);
+ }
+};
+
+/**
+ * An overload of FillBufferWithICUCall that accepts a mozilla::Vector rather
+ * than a Buffer.
+ */
+template <typename ICUStringFunction, size_t InlineSize, typename CharType>
+static ICUResult FillBufferWithICUCall(Vector<CharType, InlineSize>& vector,
+ const ICUStringFunction& strFn) {
+ VectorToBufferAdaptor buffer(vector);
+ return FillBufferWithICUCall(buffer, strFn);
+}
+
+#ifndef JS_STANDALONE
+/**
+ * mozilla::intl APIs require sizeable buffers. This class abstracts over
+ * the nsTArray.
+ */
+template <typename T>
+class nsTArrayToBufferAdapter {
+ public:
+ using CharType = T;
+
+ // Do not allow copy or move. Move could be added in the future if needed.
+ nsTArrayToBufferAdapter(const nsTArrayToBufferAdapter&) = delete;
+ nsTArrayToBufferAdapter& operator=(const nsTArrayToBufferAdapter&) = delete;
+
+ explicit nsTArrayToBufferAdapter(nsTArray<CharType>& aArray)
+ : mArray(aArray) {}
+
+ /**
+ * Ensures the buffer has enough space to accommodate |size| elements.
+ */
+ [[nodiscard]] bool reserve(size_t size) {
+ // Use fallible behavior here.
+ return mArray.SetCapacity(size, fallible);
+ }
+
+ /**
+ * Returns the raw data inside the buffer.
+ */
+ CharType* data() { return mArray.Elements(); }
+
+ /**
+ * Returns the count of elements written into the buffer.
+ */
+ size_t length() const { return mArray.Length(); }
+
+ /**
+ * Returns the buffer's overall capacity.
+ */
+ size_t capacity() const { return mArray.Capacity(); }
+
+ /**
+ * Resizes the buffer to the given amount of written elements.
+ */
+ void written(size_t amount) {
+ MOZ_ASSERT(amount <= mArray.Capacity());
+ // This sets |mArray|'s internal size so that it matches how much was
+ // written. This is necessary because the write happens across FFI
+ // boundaries.
+ mArray.SetLengthAndRetainStorage(amount);
+ }
+
+ private:
+ nsTArray<CharType>& mArray;
+};
+
+template <typename T, size_t N>
+class AutoTArrayToBufferAdapter : public nsTArrayToBufferAdapter<T> {
+ using nsTArrayToBufferAdapter<T>::nsTArrayToBufferAdapter;
+};
+
+/**
+ * An overload of FillBufferWithICUCall that accepts a nsTArray.
+ */
+template <typename ICUStringFunction, typename CharType>
+static ICUResult FillBufferWithICUCall(nsTArray<CharType>& array,
+ const ICUStringFunction& strFn) {
+ nsTArrayToBufferAdapter<CharType> buffer(array);
+ return FillBufferWithICUCall(buffer, strFn);
+}
+
+template <typename ICUStringFunction, typename CharType, size_t N>
+static ICUResult FillBufferWithICUCall(AutoTArray<CharType, N>& array,
+ const ICUStringFunction& strFn) {
+ AutoTArrayToBufferAdapter<CharType, N> buffer(array);
+ return FillBufferWithICUCall(buffer, strFn);
+}
+#endif
+
+/**
+ * Fill a UTF-8 or a UTF-16 buffer with a UTF-16 span. ICU4C mostly uses UTF-16
+ * internally, but different consumers may have different situations with their
+ * buffers.
+ */
+template <typename Buffer>
+[[nodiscard]] bool FillBuffer(Span<const char16_t> utf16Span,
+ Buffer& targetBuffer) {
+ static_assert(std::is_same_v<typename Buffer::CharType, char> ||
+ std::is_same_v<typename Buffer::CharType, unsigned char> ||
+ std::is_same_v<typename Buffer::CharType, char16_t>);
+
+ if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
+ std::is_same_v<typename Buffer::CharType, unsigned char>) {
+ if (utf16Span.Length() & mozilla::tl::MulOverflowMask<3>::value) {
+ // Tripling the size of the buffer overflows the size_t.
+ return false;
+ }
+
+ if (!targetBuffer.reserve(3 * utf16Span.Length())) {
+ return false;
+ }
+
+ size_t amount = ConvertUtf16toUtf8(
+ utf16Span, Span(reinterpret_cast<char*>(targetBuffer.data()),
+ targetBuffer.capacity()));
+
+ targetBuffer.written(amount);
+ }
+ if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
+ size_t amount = utf16Span.Length();
+ if (!targetBuffer.reserve(amount)) {
+ return false;
+ }
+ for (size_t i = 0; i < amount; i++) {
+ targetBuffer.data()[i] = utf16Span[i];
+ }
+ targetBuffer.written(amount);
+ }
+
+ return true;
+}
+
+/**
+ * Fill a UTF-8 or a UTF-16 buffer with a UTF-8 span. ICU4C mostly uses UTF-16
+ * internally, but different consumers may have different situations with their
+ * buffers.
+ */
+template <typename Buffer>
+[[nodiscard]] bool FillBuffer(Span<const char> utf8Span, Buffer& targetBuffer) {
+ static_assert(std::is_same_v<typename Buffer::CharType, char> ||
+ std::is_same_v<typename Buffer::CharType, unsigned char> ||
+ std::is_same_v<typename Buffer::CharType, char16_t>);
+
+ if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
+ std::is_same_v<typename Buffer::CharType, unsigned char>) {
+ size_t amount = utf8Span.Length();
+ if (!targetBuffer.reserve(amount)) {
+ return false;
+ }
+ for (size_t i = 0; i < amount; i++) {
+ targetBuffer.data()[i] =
+ // Static cast in case of a mismatch between `unsigned char` and
+ // `char`
+ static_cast<typename Buffer::CharType>(utf8Span[i]);
+ }
+ targetBuffer.written(amount);
+ }
+ if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
+ if (!targetBuffer.reserve(utf8Span.Length() + 1)) {
+ return false;
+ }
+
+ size_t amount = ConvertUtf8toUtf16(
+ utf8Span, Span(targetBuffer.data(), targetBuffer.capacity()));
+
+ targetBuffer.written(amount);
+ }
+
+ return true;
+}
+
+/**
+ * It is convenient for callers to be able to pass in UTF-8 strings to the API.
+ * This function can be used to convert that to a stack-allocated UTF-16
+ * mozilla::Vector that can then be passed into ICU calls. The string will be
+ * null terminated.
+ */
+template <size_t StackSize>
+[[nodiscard]] static bool FillUTF16Vector(
+ Span<const char> utf8Span,
+ mozilla::Vector<char16_t, StackSize>& utf16TargetVec) {
+ // Per ConvertUtf8toUtf16: The length of aDest must be at least one greater
+ // than the length of aSource. This additional length will be used for null
+ // termination.
+ if (!utf16TargetVec.reserve(utf8Span.Length() + 1)) {
+ return false;
+ }
+
+ // ConvertUtf8toUtf16 fills the buffer with the data, but the length of the
+ // vector is unchanged.
+ size_t length = ConvertUtf8toUtf16(
+ utf8Span, Span(utf16TargetVec.begin(), utf16TargetVec.capacity()));
+
+ // Assert that the last element is free for writing a null terminator.
+ MOZ_ASSERT(length < utf16TargetVec.capacity());
+ utf16TargetVec.begin()[length] = '\0';
+
+ // The call to resizeUninitialized notifies the vector of how much was written
+ // exclusive of the null terminated character.
+ return utf16TargetVec.resizeUninitialized(length);
+}
+
+/**
+ * An iterable class that wraps calls to the ICU UEnumeration C API.
+ *
+ * Usage:
+ *
+ * // Make sure the range expression is non-temporary, otherwise there is a
+ * // risk of undefined behavior:
+ * auto result = Calendar::GetBcp47KeywordValuesForLocale("en-US");
+ *
+ * for (auto name : result.unwrap()) {
+ * MOZ_ASSERT(name.unwrap(), "An iterable value exists".);
+ * }
+ */
+template <typename CharType, typename T, T(Mapper)(const CharType*, int32_t)>
+class Enumeration {
+ public:
+ class Iterator;
+ friend class Iterator;
+
+ // Transfer ownership of the UEnumeration in the move constructor.
+ Enumeration(Enumeration&& other) noexcept
+ : mUEnumeration(other.mUEnumeration) {
+ other.mUEnumeration = nullptr;
+ }
+
+ // Transfer ownership of the UEnumeration in the move assignment operator.
+ Enumeration& operator=(Enumeration&& other) noexcept {
+ if (this == &other) {
+ return *this;
+ }
+ if (mUEnumeration) {
+ uenum_close(mUEnumeration);
+ }
+ mUEnumeration = other.mUEnumeration;
+ other.mUEnumeration = nullptr;
+ return *this;
+ }
+
+ class Iterator {
+ Enumeration& mEnumeration;
+ // `Nothing` signifies that no enumeration has been loaded through ICU yet.
+ Maybe<int32_t> mIteration = Nothing{};
+ const CharType* mNext = nullptr;
+ int32_t mNextLength = 0;
+
+ public:
+ using value_type = const CharType*;
+ using reference = T;
+ using iterator_category = std::input_iterator_tag;
+
+ explicit Iterator(Enumeration& aEnumeration, bool aIsBegin)
+ : mEnumeration(aEnumeration) {
+ if (aIsBegin) {
+ AdvanceUEnum();
+ }
+ }
+
+ Iterator& operator++() {
+ AdvanceUEnum();
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator retval = *this;
+ ++(*this);
+ return retval;
+ }
+
+ bool operator==(Iterator other) const {
+ return mIteration == other.mIteration;
+ }
+
+ bool operator!=(Iterator other) const { return !(*this == other); }
+
+ T operator*() const {
+ // Map the iterated value to something new.
+ return Mapper(mNext, mNextLength);
+ }
+
+ private:
+ void AdvanceUEnum() {
+ if (mIteration.isNothing()) {
+ mIteration = Some(-1);
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ if constexpr (std::is_same_v<CharType, char16_t>) {
+ mNext = uenum_unext(mEnumeration.mUEnumeration, &mNextLength, &status);
+ } else {
+ static_assert(std::is_same_v<CharType, char>,
+ "Only char16_t and char are supported by "
+ "mozilla::intl::Enumeration.");
+ mNext = uenum_next(mEnumeration.mUEnumeration, &mNextLength, &status);
+ }
+ if (U_FAILURE(status)) {
+ mNext = nullptr;
+ }
+
+ if (mNext) {
+ (*mIteration)++;
+ } else {
+ // The iterator is complete.
+ mIteration = Nothing{};
+ }
+ }
+ };
+
+ Iterator begin() { return Iterator(*this, true); }
+ Iterator end() { return Iterator(*this, false); }
+
+ explicit Enumeration(UEnumeration* aUEnumeration)
+ : mUEnumeration(aUEnumeration) {}
+
+ ~Enumeration() {
+ if (mUEnumeration) {
+ // Only close when the object is being destructed, not moved.
+ uenum_close(mUEnumeration);
+ }
+ }
+
+ private:
+ UEnumeration* mUEnumeration = nullptr;
+};
+
+template <typename CharType>
+Result<Span<const CharType>, InternalError> SpanMapper(const CharType* string,
+ int32_t length) {
+ // Return the raw value from this Iterator.
+ if (string == nullptr) {
+ return Err(InternalError{});
+ }
+ MOZ_ASSERT(length >= 0);
+ return Span<const CharType>(string, static_cast<size_t>(length));
+}
+
+template <typename CharType>
+using SpanResult = Result<Span<const CharType>, InternalError>;
+
+template <typename CharType>
+using SpanEnumeration = Enumeration<CharType, SpanResult<CharType>, SpanMapper>;
+
+/**
+ * An iterable class that wraps calls to ICU's available locales API.
+ */
+template <int32_t(CountAvailable)(), const char*(GetAvailable)(int32_t)>
+class AvailableLocalesEnumeration final {
+ // The overall count of available locales.
+ int32_t mLocalesCount = 0;
+
+ public:
+ AvailableLocalesEnumeration() { mLocalesCount = CountAvailable(); }
+
+ class Iterator {
+ public:
+ // std::iterator traits.
+ using iterator_category = std::input_iterator_tag;
+ using value_type = const char*;
+ using difference_type = ptrdiff_t;
+ using pointer = value_type*;
+ using reference = value_type&;
+
+ private:
+ // The current position in the list of available locales.
+ int32_t mLocalesPos = 0;
+
+ public:
+ explicit Iterator(int32_t aLocalesPos) : mLocalesPos(aLocalesPos) {}
+
+ Iterator& operator++() {
+ mLocalesPos++;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator result = *this;
+ ++(*this);
+ return result;
+ }
+
+ bool operator==(const Iterator& aOther) const {
+ return mLocalesPos == aOther.mLocalesPos;
+ }
+
+ bool operator!=(const Iterator& aOther) const { return !(*this == aOther); }
+
+ value_type operator*() const { return GetAvailable(mLocalesPos); }
+ };
+
+ // std::iterator begin() and end() methods.
+
+ /**
+ * Return an iterator pointing to the first available locale.
+ */
+ Iterator begin() const { return Iterator(0); }
+
+ /**
+ * Return an iterator pointing to one past the last available locale.
+ */
+ Iterator end() const { return Iterator(mLocalesCount); }
+};
+
+/**
+ * A helper class to wrap calling ICU function in cpp file so we don't have to
+ * include the ICU header here.
+ */
+class FormattedResult {
+ protected:
+ static Result<Span<const char16_t>, ICUError> ToSpanImpl(
+ const UFormattedValue* value);
+};
+
+/**
+ * A RAII class to hold the formatted value of format result.
+ *
+ * The caller will need to create this AutoFormattedResult on the stack, with
+ * the following parameters:
+ * 1. Native ICU type.
+ * 2. An ICU function which opens the result.
+ * 3. An ICU function which can get the result as UFormattedValue.
+ * 4. An ICU function which closes the result.
+ *
+ * After the object is created, caller needs to call IsValid() method to check
+ * if the native object has been created properly, and then passes this
+ * object to other format interfaces.
+ * The format result will be stored in this object, the caller can use ToSpan()
+ * method to get the formatted string.
+ *
+ * The methods GetFormatted() and Value() are private methods since they expose
+ * native ICU types. If the caller wants to call these methods, the caller needs
+ * to register itself as a friend class in AutoFormattedResult.
+ *
+ * The formatted value and the native ICU object will be released once this
+ * class is destructed.
+ */
+template <typename T, T*(Open)(UErrorCode*),
+ const UFormattedValue*(GetValue)(const T*, UErrorCode*),
+ void(Close)(T*)>
+class MOZ_RAII AutoFormattedResult : FormattedResult {
+ public:
+ AutoFormattedResult() {
+ mFormatted = Open(&mError);
+ if (U_FAILURE(mError)) {
+ mFormatted = nullptr;
+ }
+ }
+ ~AutoFormattedResult() {
+ if (mFormatted) {
+ Close(mFormatted);
+ }
+ }
+
+ AutoFormattedResult(const AutoFormattedResult& other) = delete;
+ AutoFormattedResult& operator=(const AutoFormattedResult& other) = delete;
+
+ AutoFormattedResult(AutoFormattedResult&& other) = delete;
+ AutoFormattedResult& operator=(AutoFormattedResult&& other) = delete;
+
+ /**
+ * Check if the native UFormattedDateInterval was created successfully.
+ */
+ bool IsValid() const { return !!mFormatted; }
+
+ /**
+ * Get error code if IsValid() returns false.
+ */
+ ICUError GetError() const { return ToICUError(mError); }
+
+ /**
+ * Get the formatted result.
+ */
+ Result<Span<const char16_t>, ICUError> ToSpan() const {
+ if (!IsValid()) {
+ return Err(GetError());
+ }
+
+ const UFormattedValue* value = Value();
+ if (!value) {
+ return Err(ICUError::InternalError);
+ }
+
+ return ToSpanImpl(value);
+ }
+
+ private:
+ friend class DateIntervalFormat;
+ friend class ListFormat;
+ T* GetFormatted() const { return mFormatted; }
+
+ const UFormattedValue* Value() const {
+ if (!IsValid()) {
+ return nullptr;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ const UFormattedValue* value = GetValue(mFormatted, &status);
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ return value;
+ };
+
+ T* mFormatted = nullptr;
+ UErrorCode mError = U_ZERO_ERROR;
+};
+} // namespace mozilla::intl
+
+#endif /* intl_components_ICUUtils_h */
diff --git a/intl/components/src/ICU4CLibrary.cpp b/intl/components/src/ICU4CLibrary.cpp
new file mode 100644
index 0000000000..d13bc40ad4
--- /dev/null
+++ b/intl/components/src/ICU4CLibrary.cpp
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/ICU4CLibrary.h"
+
+#include "unicode/putil.h"
+#include "unicode/uclean.h"
+#include "unicode/utypes.h"
+#include "unicode/uversion.h"
+
+namespace mozilla::intl {
+
+ICUResult ICU4CLibrary::Initialize() {
+#if !MOZ_SYSTEM_ICU
+ // Explicitly set the data directory to its default value, but only when we're
+ // sure that we use our in-tree ICU copy. See bug 1527879 and ICU bug
+ // report <https://unicode-org.atlassian.net/browse/ICU-20491>.
+ u_setDataDirectory("");
+#endif
+
+ UErrorCode status = U_ZERO_ERROR;
+ u_init(&status);
+ return ToICUResult(status);
+}
+
+void ICU4CLibrary::Cleanup() { u_cleanup(); }
+
+ICUResult ICU4CLibrary::SetMemoryFunctions(MemoryFunctions aMemoryFunctions) {
+ UErrorCode status = U_ZERO_ERROR;
+ u_setMemoryFunctions(/* context = */ nullptr, aMemoryFunctions.mAllocFn,
+ aMemoryFunctions.mReallocFn, aMemoryFunctions.mFreeFn,
+ &status);
+ return ToICUResult(status);
+}
+
+Span<const char> ICU4CLibrary::GetVersion() {
+ return MakeStringSpan(U_ICU_VERSION);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/ICU4CLibrary.h b/intl/components/src/ICU4CLibrary.h
new file mode 100644
index 0000000000..67cd1e205f
--- /dev/null
+++ b/intl/components/src/ICU4CLibrary.h
@@ -0,0 +1,74 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_ICU4CLibrary_h
+#define intl_components_ICU4CLibrary_h
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Span.h"
+
+#include <stddef.h>
+
+namespace mozilla::intl {
+/**
+ * Wrapper around non-portable, ICU4C specific functions.
+ */
+class ICU4CLibrary final {
+ public:
+ ICU4CLibrary() = delete;
+
+ /**
+ * Initializes the ICU4C library.
+ *
+ * Note: This function should only be called once.
+ */
+ static ICUResult Initialize();
+
+ /**
+ * Releases any memory held by ICU. Any open ICU objects and resources are
+ * left in an undefined state after this operation.
+ *
+ * NOTE: This function is not thread-safe.
+ */
+ static void Cleanup();
+
+ struct MemoryFunctions {
+ // These are equivalent to ICU's |UMemAllocFn|, |UMemReallocFn|, and
+ // |UMemFreeFn| types. The first argument (called |context| in the ICU
+ // docs) will always be nullptr and should be ignored.
+ using AllocFn = void* (*)(const void*, size_t);
+ using ReallocFn = void* (*)(const void*, void*, size_t);
+ using FreeFn = void (*)(const void*, void*);
+
+ /**
+ * Function called when allocating memory.
+ */
+ AllocFn mAllocFn = nullptr;
+
+ /**
+ * Function called when reallocating memory.
+ */
+ ReallocFn mReallocFn = nullptr;
+
+ /**
+ * Function called when freeing memory.
+ */
+ FreeFn mFreeFn = nullptr;
+ };
+
+ /**
+ * Sets the ICU memory functions.
+ *
+ * This function can only be called before the initial call to Initialize()!
+ */
+ static ICUResult SetMemoryFunctions(MemoryFunctions aMemoryFunctions);
+
+ /**
+ * Return the ICU version number.
+ */
+ static Span<const char> GetVersion();
+};
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/ICUError.h b/intl/components/src/ICUError.h
new file mode 100644
index 0000000000..c3ef236210
--- /dev/null
+++ b/intl/components/src/ICUError.h
@@ -0,0 +1,118 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_ICUError_h
+#define intl_components_ICUError_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/Result.h"
+
+#include <cstdint>
+#include <type_traits>
+
+namespace mozilla::intl {
+
+/**
+ * General purpose error type for operations that can result in an ICU error.
+ */
+enum class ICUError : uint8_t {
+ // Since we claim UnusedZero<ICUError>::value and
+ // HasFreeLSB<ICUError>::value == true below, we must only use positive,
+ // even enum values.
+
+ OutOfMemory = 2,
+ InternalError = 4,
+ OverflowError = 6,
+};
+
+/**
+ * Error type when a method call can only result in an internal ICU error.
+ */
+struct InternalError {
+ // Since we claim UnusedZero<InternalError>::value and
+ // HasFreeLSB<InternalError>::value == true below, we must only use positive,
+ // even enum values.
+ enum class ErrorKind : uint8_t { Unspecified = 2 };
+
+ const ErrorKind kind = ErrorKind::Unspecified;
+
+ constexpr InternalError() = default;
+
+ private:
+ friend struct mozilla::detail::UnusedZero<InternalError>;
+
+ constexpr MOZ_IMPLICIT InternalError(ErrorKind aKind) : kind(aKind) {}
+};
+
+} // namespace mozilla::intl
+
+namespace mozilla::detail {
+
+// Provide specializations for UnusedZero and HasFreeLSB to enable more
+// efficient packing for mozilla::Result. This also avoids having to include
+// the ResultVariant.h header.
+//
+// UnusedZero specialization:
+//
+// The UnusedZero specialization makes it possible to use CompactPair as the
+// underlying storage type for Result. For this optimization to work, it is
+// necessary that a distinct null-value is present for the error type. The
+// null-value represents the success case and must be different from all actual
+// error values.
+// This optimization can be easily enabled when the error type is a scoped enum.
+// No enum value must use zero as its value and UnusedZero must be specialized
+// through the helper struct UnusedZeroEnum.
+// For non-enum error types, a more complicated setup is necessary. The
+// UnusedZero specialization must implement all necessary interface methods
+// (i.e. `Inspect`, `Unwrap`, and `Store`) as well as all necessary constants
+// and types (i.e. `StorageType`, `value`, and `nullValue`).
+//
+// HasFreeLSB specialization:
+//
+// When the value and the error type are both providing specializations for
+// HasFreeLSB, Result uses an optimization to store both types within a single
+// storage location. This optimization uses the least significant bit as a tag
+// bit to mark the error case. And because the least significant bit is used for
+// tagging, it can't be used by the error type. That means for example when the
+// error type is an enum, all enum values must be even, because odd integer
+// values have the least significant bit set.
+// The actual HasFreeLSB specialization just needs to define `value` as a static
+// constant with the value `true`.
+
+template <>
+struct UnusedZero<mozilla::intl::ICUError>
+ : UnusedZeroEnum<mozilla::intl::ICUError> {};
+
+template <>
+struct UnusedZero<mozilla::intl::InternalError> {
+ using Error = mozilla::intl::InternalError;
+ using StorageType = std::underlying_type_t<Error::ErrorKind>;
+
+ static constexpr bool value = true;
+ static constexpr StorageType nullValue = 0;
+
+ static constexpr Error Inspect(const StorageType& aValue) {
+ return static_cast<Error::ErrorKind>(aValue);
+ }
+ static constexpr Error Unwrap(StorageType aValue) {
+ return static_cast<Error::ErrorKind>(aValue);
+ }
+ static constexpr StorageType Store(Error aValue) {
+ return static_cast<StorageType>(aValue.kind);
+ }
+};
+
+template <>
+struct HasFreeLSB<mozilla::intl::ICUError> {
+ static constexpr bool value = true;
+};
+
+template <>
+struct HasFreeLSB<mozilla::intl::InternalError> {
+ static constexpr bool value = true;
+};
+
+} // namespace mozilla::detail
+
+#endif
diff --git a/intl/components/src/IDNA.cpp b/intl/components/src/IDNA.cpp
new file mode 100644
index 0000000000..9b5303f4e8
--- /dev/null
+++ b/intl/components/src/IDNA.cpp
@@ -0,0 +1,26 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/IDNA.h"
+
+namespace mozilla::intl {
+
+// static
+Result<UniquePtr<IDNA>, ICUError> IDNA::TryCreate(ProcessingType aProcessing) {
+ uint32_t IDNAOptions = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ;
+ if (aProcessing == ProcessingType::NonTransitional) {
+ IDNAOptions |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UIDNA* idna = uidna_openUTS46(IDNAOptions, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return UniquePtr<IDNA>(new IDNA(idna));
+}
+
+IDNA::~IDNA() { uidna_close(mIDNA.GetMut()); }
+} // namespace mozilla::intl
diff --git a/intl/components/src/IDNA.h b/intl/components/src/IDNA.h
new file mode 100644
index 0000000000..9f18661403
--- /dev/null
+++ b/intl/components/src/IDNA.h
@@ -0,0 +1,130 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_IDNA_h_
+#define intl_components_IDNA_h_
+
+#include "mozilla/intl/ICU4CGlue.h"
+
+#include "unicode/uidna.h"
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for the Internationalizing Domain
+ * Names in Applications (IDNA).
+ *
+ * See UTS #46 for details.
+ * http://unicode.org/reports/tr46/
+ */
+class IDNA final {
+ public:
+ ~IDNA();
+
+ /**
+ * UTS #46 specifies two specific types of processing: Transitional Processing
+ * and NonTransitional Processing.
+ *
+ * See http://unicode.org/reports/tr46/#Compatibility_Processing
+ */
+ enum class ProcessingType {
+ Transitional,
+ NonTransitional,
+ };
+
+ /**
+ * Create an IDNA object, with specifying the type of processing by enum
+ * ProcessingType.
+ *
+ * Currently the implementation enables CheckBidi flag and CheckJoiners by
+ * default.
+ *
+ * See UTS #46, '4 Processing' for details.
+ * http://unicode.org/reports/tr46/#Processing
+ */
+ static Result<UniquePtr<IDNA>, ICUError> TryCreate(
+ ProcessingType aProcessing);
+
+ /**
+ * This class contains the error code information of IDNA processing.
+ */
+ class Info final {
+ public:
+ /**
+ * Check if there's any error.
+ */
+ bool HasErrors() const { return mErrorCode != 0; }
+
+ /**
+ * If the domain name label starts with "xn--", then the label contains
+ * Punycode. This checks if the domain name label has invalid Punycode.
+ *
+ * See https://www.rfc-editor.org/rfc/rfc3492.html
+ */
+ bool HasInvalidPunycode() const {
+ return (mErrorCode & UIDNA_ERROR_PUNYCODE) != 0;
+ }
+
+ /* The label was successfully ACE (Punycode) decoded but the resulting
+ * string had severe validation errors. For example,
+ * it might contain characters that are not allowed in ACE labels,
+ * or it might not be normalized.
+ */
+ bool HasInvalidAceLabel() const {
+ return (mErrorCode & UIDNA_ERROR_INVALID_ACE_LABEL) != 0;
+ }
+
+ /**
+ * Checks if the domain name label has any invalid hyphen characters.
+ *
+ * See CheckHyphens flag for details in UTS #46[1].
+ * - The label must not contain a U+002D HYPHEN-MINUS character in both the
+ * third and fourth positions.
+ * - The label must neither begin nor end with a U+002D HYPHEN-MINUS
+ * character.
+ *
+ * [1]: http://unicode.org/reports/tr46/#Validity_Criteria
+ */
+ bool HasInvalidHyphen() const {
+ uint32_t hyphenErrors = UIDNA_ERROR_LEADING_HYPHEN |
+ UIDNA_ERROR_TRAILING_HYPHEN |
+ UIDNA_ERROR_HYPHEN_3_4;
+ return (mErrorCode & hyphenErrors) != 0;
+ }
+
+ private:
+ friend class IDNA;
+ explicit Info(const UIDNAInfo* aUinfo) : mErrorCode(aUinfo->errors) {}
+
+ uint32_t mErrorCode = 0;
+ };
+
+ /**
+ * Converts a domain name label to its Unicode form for human-readable
+ * display, and writes the Unicode form into buffer, and returns IDNA::Info
+ * object.
+ * The IDNA::Info object contains the detail information about the processing
+ * result of IDNA call, caller should check the result by calling
+ * IDNA::Info::HasErrors() as well.
+ */
+ template <typename Buffer>
+ Result<Info, ICUError> LabelToUnicode(Span<const char16_t> aLabel,
+ Buffer& aBuffer) {
+ UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
+ MOZ_TRY(FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return uidna_labelToUnicode(mIDNA.GetConst(), aLabel.data(),
+ aLabel.size(), target, length, &uinfo,
+ status);
+ }));
+
+ return Info{&uinfo};
+ }
+
+ private:
+ explicit IDNA(UIDNA* aIDNA) : mIDNA(aIDNA) {}
+
+ ICUPointer<UIDNA> mIDNA = ICUPointer<UIDNA>(nullptr);
+};
+} // namespace mozilla::intl
+#endif // intl_components_IDNA_h_
diff --git a/intl/components/src/ListFormat.cpp b/intl/components/src/ListFormat.cpp
new file mode 100644
index 0000000000..6d1e10826a
--- /dev/null
+++ b/intl/components/src/ListFormat.cpp
@@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/ListFormat.h"
+
+#include "ScopedICUObject.h"
+
+namespace mozilla::intl {
+
+/*static*/ Result<UniquePtr<ListFormat>, ICUError> ListFormat::TryCreate(
+ mozilla::Span<const char> aLocale, const Options& aOptions) {
+ UListFormatterType utype = ToUListFormatterType(aOptions.mType);
+ UListFormatterWidth uwidth = ToUListFormatterWidth(aOptions.mStyle);
+
+ UErrorCode status = U_ZERO_ERROR;
+ UListFormatter* fmt =
+ ulistfmt_openForType(IcuLocale(aLocale), utype, uwidth, &status);
+ if (U_FAILURE(status)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return UniquePtr<ListFormat>(new ListFormat(fmt));
+}
+
+ListFormat::~ListFormat() {
+ if (mListFormatter) {
+ ulistfmt_close(mListFormatter.GetMut());
+ }
+}
+
+/* static */ UListFormatterType ListFormat::ToUListFormatterType(Type type) {
+ switch (type) {
+ case Type::Conjunction:
+ return ULISTFMT_TYPE_AND;
+ case Type::Disjunction:
+ return ULISTFMT_TYPE_OR;
+ case Type::Unit:
+ return ULISTFMT_TYPE_UNITS;
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ return ULISTFMT_TYPE_AND;
+}
+
+/* static */ UListFormatterWidth ListFormat::ToUListFormatterWidth(
+ Style style) {
+ switch (style) {
+ case Style::Long:
+ return ULISTFMT_WIDTH_WIDE;
+ case Style::Short:
+ return ULISTFMT_WIDTH_SHORT;
+ case Style::Narrow:
+ return ULISTFMT_WIDTH_NARROW;
+ }
+ MOZ_ASSERT_UNREACHABLE();
+ return ULISTFMT_WIDTH_WIDE;
+}
+
+ICUResult ListFormat::FormattedToParts(const UFormattedValue* formattedValue,
+ size_t formattedSize,
+ PartVector& parts) {
+ size_t lastEndIndex = 0;
+
+ auto AppendPart = [&](PartType type, size_t endIndex) {
+ if (!parts.emplaceBack(type, endIndex)) {
+ return false;
+ }
+
+ lastEndIndex = endIndex;
+ return true;
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConstrainedFieldPosition* fpos = ucfpos_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ICUError::InternalError);
+ }
+ ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
+
+ // We're only interested in ULISTFMT_ELEMENT_FIELD fields.
+ ucfpos_constrainField(fpos, UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD,
+ &status);
+ if (U_FAILURE(status)) {
+ return Err(ICUError::InternalError);
+ }
+
+ while (true) {
+ bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ICUError::InternalError);
+ }
+ if (!hasMore) {
+ break;
+ }
+
+ int32_t beginIndexInt, endIndexInt;
+ ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status);
+ if (U_FAILURE(status)) {
+ return Err(ICUError::InternalError);
+ }
+
+ MOZ_ASSERT(beginIndexInt <= endIndexInt,
+ "field iterator returning invalid range");
+
+ size_t beginIndex = AssertedCast<size_t>(beginIndexInt);
+ size_t endIndex = AssertedCast<size_t>(endIndexInt);
+
+ // Indices are guaranteed to be returned in order (from left to right).
+ MOZ_ASSERT(lastEndIndex <= beginIndex,
+ "field iteration didn't return fields in order start to "
+ "finish as expected");
+
+ if (lastEndIndex < beginIndex) {
+ if (!AppendPart(PartType::Literal, beginIndex)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ if (!AppendPart(PartType::Element, endIndex)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ // Append any final literal.
+ if (lastEndIndex < formattedSize) {
+ if (!AppendPart(PartType::Literal, formattedSize)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ return Ok();
+}
+} // namespace mozilla::intl
diff --git a/intl/components/src/ListFormat.h b/intl/components/src/ListFormat.h
new file mode 100644
index 0000000000..4952512f97
--- /dev/null
+++ b/intl/components/src/ListFormat.h
@@ -0,0 +1,223 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_ListFormat_h_
+#define intl_components_ListFormat_h_
+
+#include "mozilla/CheckedInt.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/Result.h"
+#include "mozilla/Vector.h"
+#include "unicode/ulistformatter.h"
+
+struct UListFormatter;
+
+namespace mozilla::intl {
+
+static constexpr size_t DEFAULT_LIST_LENGTH = 8;
+
+/**
+ * This component is a Mozilla-focused API for the list formatting provided by
+ * ICU. It implements the API provided by the ECMA-402 Intl.ListFormat object.
+ *
+ * https://tc39.es/ecma402/#listformat-objects
+ */
+class ListFormat final {
+ public:
+ /**
+ * The [[Type]] and [[Style]] properties of ListFormat instances.
+ *
+ * https://tc39.es/ecma402/#sec-properties-of-intl-listformat-instances
+ */
+ // [[Type]]
+ enum class Type { Conjunction, Disjunction, Unit };
+ // [[Style]]
+ enum class Style { Long, Short, Narrow };
+
+ /**
+ * The 'options' object to create Intl.ListFormat instance.
+ *
+ * https://tc39.es/ecma402/#sec-Intl.ListFormat
+ */
+ struct Options {
+ // "conjunction" is the default fallback value.
+ Type mType = Type::Conjunction;
+
+ // "long" is the default fallback value.
+ Style mStyle = Style::Long;
+ };
+
+ /**
+ * Create a ListFormat object for the provided locale and options.
+ *
+ * https://tc39.es/ecma402/#sec-Intl.ListFormat
+ */
+ static Result<UniquePtr<ListFormat>, ICUError> TryCreate(
+ mozilla::Span<const char> aLocale, const Options& aOptions);
+
+ ~ListFormat();
+
+ /**
+ * The list of String values for FormatList and FormatListToParts.
+ *
+ * https://tc39.es/ecma402/#sec-formatlist
+ * https://tc39.es/ecma402/#sec-formatlisttoparts
+ */
+ using StringList =
+ mozilla::Vector<mozilla::Span<const char16_t>, DEFAULT_LIST_LENGTH>;
+
+ /**
+ * Format the list according and write the result in buffer.
+ *
+ * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.format
+ * https://tc39.es/ecma402/#sec-formatlist
+ */
+ template <typename Buffer>
+ ICUResult Format(const StringList& list, Buffer& buffer) const {
+ static_assert(std::is_same_v<typename Buffer::CharType, char16_t>,
+ "Currently only UTF-16 buffers are supported.");
+
+ mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings;
+ mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens;
+ MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens));
+
+ int32_t u16stringCount = mozilla::AssertedCast<int32_t>(list.length());
+ MOZ_TRY(FillBufferWithICUCall(
+ buffer, [this, &u16strings, &u16stringLens, u16stringCount](
+ char16_t* chars, int32_t size, UErrorCode* status) {
+ return ulistfmt_format(mListFormatter.GetConst(), u16strings.begin(),
+ u16stringLens.begin(), u16stringCount, chars,
+ size, status);
+ }));
+
+ return Ok{};
+ }
+
+ /**
+ * The corresponding list of parts according to the effective locale and the
+ * formatting options of ListFormat.
+ * Each part has a [[Type]] field, which must be "element" or "literal", and a
+ * [[Value]] field.
+ *
+ * To store Part more efficiently, it doesn't store the ||Value|| of type
+ * string in this struct. Instead, it stores the end index of the string in
+ * the buffer(which is passed to ListFormat::FormatToParts()). The begin index
+ * of the ||Value|| is the index of the previous part.
+ *
+ * Buffer
+ * 0 i j
+ * +---------------+---------------+---------------+
+ * | Part[0].Value | Part[1].Value | Part[2].Value | ....
+ * +---------------+---------------+---------------+
+ *
+ * Part[0].index is i. Part[0].Value is stored in the Buffer[0..i].
+ * Part[1].index is j. Part[1].Value is stored in the Buffer[i..j].
+ *
+ * See https://tc39.es/ecma402/#sec-createpartsfromlist
+ */
+ enum class PartType {
+ Element,
+ Literal,
+ };
+ // The 2nd field is the end index to the buffer as mentioned above.
+ using Part = std::pair<PartType, size_t>;
+ using PartVector = mozilla::Vector<Part, DEFAULT_LIST_LENGTH>;
+
+ /**
+ * Format the list to a list of parts, and store the formatted result of
+ * UTF-16 string into buffer, and formatted parts into the vector 'parts'.
+ *
+ * See:
+ * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts
+ * https://tc39.es/ecma402/#sec-formatlisttoparts
+ */
+ template <typename Buffer>
+ ICUResult FormatToParts(const StringList& list, Buffer& buffer,
+ PartVector& parts) {
+ static_assert(std::is_same_v<typename Buffer::CharType, char16_t>,
+ "Currently only UTF-16 buffers are supported.");
+
+ mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings;
+ mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens;
+ MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens));
+
+ AutoFormattedList formatted;
+ UErrorCode status = U_ZERO_ERROR;
+ ulistfmt_formatStringsToResult(
+ mListFormatter.GetConst(), u16strings.begin(), u16stringLens.begin(),
+ int32_t(list.length()), formatted.GetFormatted(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ auto spanResult = formatted.ToSpan();
+ if (spanResult.isErr()) {
+ return spanResult.propagateErr();
+ }
+ auto formattedSpan = spanResult.unwrap();
+ if (!FillBuffer(formattedSpan, buffer)) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ const UFormattedValue* value = formatted.Value();
+ if (!value) {
+ return Err(ICUError::InternalError);
+ }
+ return FormattedToParts(value, buffer.length(), parts);
+ }
+
+ private:
+ ListFormat() = delete;
+ explicit ListFormat(UListFormatter* fmt) : mListFormatter(fmt) {}
+ ListFormat(const ListFormat&) = delete;
+ ListFormat& operator=(const ListFormat&) = delete;
+
+ ICUPointer<UListFormatter> mListFormatter =
+ ICUPointer<UListFormatter>(nullptr);
+
+ // Convert StringList to an array of type 'const char16_t*' and an array of
+ // int32 for ICU-API.
+ ICUResult ConvertStringListToVectors(
+ const StringList& list,
+ mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH>& u16strings,
+ mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH>& u16stringLens) const {
+ // Keep a conservative running count of overall length.
+ mozilla::CheckedInt<int32_t> stringLengthTotal(0);
+ for (const auto& string : list) {
+ if (!u16strings.append(string.data())) {
+ return Err(ICUError::InternalError);
+ }
+
+ int32_t len = mozilla::AssertedCast<int32_t>(string.size());
+ if (!u16stringLens.append(len)) {
+ return Err(ICUError::InternalError);
+ }
+
+ stringLengthTotal += len;
+ }
+
+ // Add space for N unrealistically large conjunctions.
+ constexpr int32_t MaxConjunctionLen = 100;
+ stringLengthTotal += CheckedInt<int32_t>(list.length()) * MaxConjunctionLen;
+ // If the overestimate exceeds ICU length limits, don't try to format.
+ if (!stringLengthTotal.isValid()) {
+ return Err(ICUError::OverflowError);
+ }
+
+ return Ok{};
+ }
+
+ using AutoFormattedList =
+ AutoFormattedResult<UFormattedList, ulistfmt_openResult,
+ ulistfmt_resultAsValue, ulistfmt_closeResult>;
+
+ ICUResult FormattedToParts(const UFormattedValue* formattedValue,
+ size_t formattedSize, PartVector& parts);
+
+ static UListFormatterType ToUListFormatterType(Type type);
+ static UListFormatterWidth ToUListFormatterWidth(Style style);
+};
+
+} // namespace mozilla::intl
+#endif // intl_components_ListFormat_h_
diff --git a/intl/components/src/Locale.cpp b/intl/components/src/Locale.cpp
new file mode 100644
index 0000000000..9a043518cf
--- /dev/null
+++ b/intl/components/src/Locale.cpp
@@ -0,0 +1,1471 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/Locale.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/Variant.h"
+
+#include "ICU4CGlue.h"
+
+#include <algorithm>
+#include <iterator>
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <string.h>
+#include <type_traits>
+#include <utility>
+
+#include "unicode/uloc.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+using namespace intl::LanguageTagLimits;
+
+template <typename CharT>
+bool IsStructurallyValidLanguageTag(Span<const CharT> aLanguage) {
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8};
+ size_t length = aLanguage.size();
+ const CharT* str = aLanguage.data();
+ return ((2 <= length && length <= 3) || (5 <= length && length <= 8)) &&
+ std::all_of(str, str + length, IsAsciiAlpha<CharT>);
+}
+
+template bool IsStructurallyValidLanguageTag(Span<const char> aLanguage);
+template bool IsStructurallyValidLanguageTag(Span<const Latin1Char> aLanguage);
+template bool IsStructurallyValidLanguageTag(Span<const char16_t> aLanguage);
+
+template <typename CharT>
+bool IsStructurallyValidScriptTag(Span<const CharT> aScript) {
+ // unicode_script_subtag = alpha{4} ;
+ size_t length = aScript.size();
+ const CharT* str = aScript.data();
+ return length == 4 && std::all_of(str, str + length, IsAsciiAlpha<CharT>);
+}
+
+template bool IsStructurallyValidScriptTag(Span<const char> aScript);
+template bool IsStructurallyValidScriptTag(Span<const Latin1Char> aScript);
+template bool IsStructurallyValidScriptTag(Span<const char16_t> aScript);
+
+template <typename CharT>
+bool IsStructurallyValidRegionTag(Span<const CharT> aRegion) {
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ size_t length = aRegion.size();
+ const CharT* str = aRegion.data();
+ return (length == 2 && std::all_of(str, str + length, IsAsciiAlpha<CharT>)) ||
+ (length == 3 && std::all_of(str, str + length, IsAsciiDigit<CharT>));
+}
+
+template bool IsStructurallyValidRegionTag(Span<const char> aRegion);
+template bool IsStructurallyValidRegionTag(Span<const Latin1Char> aRegion);
+template bool IsStructurallyValidRegionTag(Span<const char16_t> aRegion);
+
+#ifdef DEBUG
+bool IsStructurallyValidVariantTag(Span<const char> aVariant) {
+ // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+ size_t length = aVariant.size();
+ const char* str = aVariant.data();
+ return ((5 <= length && length <= 8) ||
+ (length == 4 && IsAsciiDigit(str[0]))) &&
+ std::all_of(str, str + length, IsAsciiAlphanumeric<char>);
+}
+
+bool IsStructurallyValidUnicodeExtensionTag(Span<const char> aExtension) {
+ return LocaleParser::CanParseUnicodeExtension(aExtension).isOk();
+}
+
+static bool IsStructurallyValidExtensionTag(Span<const char> aExtension) {
+ // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+ // NB: Allow any extension, including Unicode and Transform here, because
+ // this function is only used for an assertion.
+
+ size_t length = aExtension.size();
+ const char* str = aExtension.data();
+ const char* const end = aExtension.data() + length;
+ if (length <= 2) {
+ return false;
+ }
+ if (!IsAsciiAlphanumeric(str[0]) || str[0] == 'x' || str[0] == 'X') {
+ return false;
+ }
+ str++;
+ if (*str++ != '-') {
+ return false;
+ }
+ while (true) {
+ const char* sep =
+ reinterpret_cast<const char*>(memchr(str, '-', end - str));
+ size_t len = (sep ? sep : end) - str;
+ if (len < 2 || len > 8 ||
+ !std::all_of(str, str + len, IsAsciiAlphanumeric<char>)) {
+ return false;
+ }
+ if (!sep) {
+ return true;
+ }
+ str = sep + 1;
+ }
+}
+
+bool IsStructurallyValidPrivateUseTag(Span<const char> aPrivateUse) {
+ // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+
+ size_t length = aPrivateUse.size();
+ const char* str = aPrivateUse.data();
+ const char* const end = aPrivateUse.data() + length;
+ if (length <= 2) {
+ return false;
+ }
+ if (str[0] != 'x' && str[0] != 'X') {
+ return false;
+ }
+ str++;
+ if (*str++ != '-') {
+ return false;
+ }
+ while (true) {
+ const char* sep =
+ reinterpret_cast<const char*>(memchr(str, '-', end - str));
+ size_t len = (sep ? sep : end) - str;
+ if (len == 0 || len > 8 ||
+ !std::all_of(str, str + len, IsAsciiAlphanumeric<char>)) {
+ return false;
+ }
+ if (!sep) {
+ return true;
+ }
+ str = sep + 1;
+ }
+}
+#endif
+
+ptrdiff_t Locale::UnicodeExtensionIndex() const {
+ // The extension subtags aren't necessarily sorted, so we can't use binary
+ // search here.
+ auto p = std::find_if(
+ mExtensions.begin(), mExtensions.end(),
+ [](const auto& ext) { return ext[0] == 'u' || ext[0] == 'U'; });
+ if (p != mExtensions.end()) {
+ return std::distance(mExtensions.begin(), p);
+ }
+ return -1;
+}
+
+Maybe<Span<const char>> Locale::GetUnicodeExtension() const {
+ ptrdiff_t index = UnicodeExtensionIndex();
+ if (index >= 0) {
+ return Some(MakeStringSpan(mExtensions[index].get()));
+ }
+ return Nothing();
+}
+
+ICUResult Locale::SetUnicodeExtension(Span<const char> aExtension) {
+ MOZ_ASSERT(IsStructurallyValidUnicodeExtensionTag(aExtension));
+
+ auto duplicated = DuplicateStringToUniqueChars(aExtension);
+
+ // Replace the existing Unicode extension subtag or append a new one.
+ ptrdiff_t index = UnicodeExtensionIndex();
+ if (index >= 0) {
+ mExtensions[index] = std::move(duplicated);
+ return Ok();
+ }
+ if (!mExtensions.append(std::move(duplicated))) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok();
+}
+
+void Locale::ClearUnicodeExtension() {
+ ptrdiff_t index = UnicodeExtensionIndex();
+ if (index >= 0) {
+ mExtensions.erase(mExtensions.begin() + index);
+ }
+}
+
+template <size_t InitialCapacity>
+static bool SortAlphabetically(Vector<UniqueChars, InitialCapacity>& aSubtags) {
+ size_t length = aSubtags.length();
+
+ // Zero or one element lists are already sorted.
+ if (length < 2) {
+ return true;
+ }
+
+ // Handle two element lists inline.
+ if (length == 2) {
+ if (strcmp(aSubtags[0].get(), aSubtags[1].get()) > 0) {
+ aSubtags[0].swap(aSubtags[1]);
+ }
+ return true;
+ }
+
+ Vector<char*, 8> scratch;
+ if (!scratch.resizeUninitialized(length)) {
+ return false;
+ }
+ for (size_t i = 0; i < length; i++) {
+ scratch[i] = aSubtags[i].release();
+ }
+
+ std::stable_sort(
+ scratch.begin(), scratch.end(),
+ [](const char* a, const char* b) { return strcmp(a, b) < 0; });
+
+ for (size_t i = 0; i < length; i++) {
+ aSubtags[i] = UniqueChars(scratch[i]);
+ }
+ return true;
+}
+
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeBaseName() {
+ // Per 6.2.3 CanonicalizeUnicodeLocaleId, the very first step is to
+ // canonicalize the syntax by normalizing the case and ordering all subtags.
+ // The canonical syntax form is specified in UTS 35, 3.2.1.
+
+ // Language codes need to be in lower case. "JA" -> "ja"
+ mLanguage.ToLowerCase();
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
+
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ mScript.ToTitleCase();
+ MOZ_ASSERT(Script().Missing() ||
+ IsStructurallyValidScriptTag(Script().Span()));
+
+ // Region codes need to be in upper case. "bu" -> "BU"
+ mRegion.ToUpperCase();
+ MOZ_ASSERT(Region().Missing() ||
+ IsStructurallyValidRegionTag(Region().Span()));
+
+ // The canonical case for variant subtags is lowercase.
+ for (UniqueChars& variant : mVariants) {
+ char* variantChars = variant.get();
+ size_t variantLength = strlen(variantChars);
+ AsciiToLowerCase(variantChars, variantLength, variantChars);
+
+ MOZ_ASSERT(IsStructurallyValidVariantTag({variantChars, variantLength}));
+ }
+
+ // Extensions and privateuse subtags are case normalized in the
+ // |canonicalizeExtensions| method.
+
+ // The second step in UTS 35, 3.2.1, is to order all subtags.
+
+ if (mVariants.length() > 1) {
+ // 1. Any variants are in alphabetical order.
+ if (!SortAlphabetically(mVariants)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ // Reject the Locale identifier if a duplicate variant was found, e.g.
+ // "en-variant-Variant".
+ const UniqueChars* duplicate = std::adjacent_find(
+ mVariants.begin(), mVariants.end(), [](const auto& a, const auto& b) {
+ return strcmp(a.get(), b.get()) == 0;
+ });
+ if (duplicate != mVariants.end()) {
+ return Err(CanonicalizationError::DuplicateVariant);
+ }
+ }
+
+ // 2. Any extensions are in alphabetical order by their singleton.
+ // 3. All attributes are sorted in alphabetical order.
+ // 4. All keywords and tfields are sorted by alphabetical order of their keys,
+ // within their respective extensions.
+ // 5. Any type or tfield value "true" is removed.
+ // - A subsequent call to canonicalizeExtensions() will perform these steps.
+
+ // 6.2.3 CanonicalizeUnicodeLocaleId, step 2 transforms the locale identifier
+ // into its canonical form per UTS 3.2.1.
+
+ // 1. Use the bcp47 data to replace keys, types, tfields, and tvalues by their
+ // canonical forms.
+ // - A subsequent call to canonicalizeExtensions() will perform this step.
+
+ // 2. Replace aliases in the unicode_language_id and tlang (if any).
+ // - tlang is handled in canonicalizeExtensions().
+
+ // Replace deprecated language, region, and variant subtags with their
+ // preferred mappings.
+
+ if (!UpdateLegacyMappings()) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ // Replace deprecated language subtags with their preferred values.
+ if (!LanguageMapping(mLanguage) && ComplexLanguageMapping(mLanguage)) {
+ PerformComplexLanguageMappings();
+ }
+
+ // Replace deprecated script subtags with their preferred values.
+ if (Script().Present()) {
+ ScriptMapping(mScript);
+ }
+
+ // Replace deprecated region subtags with their preferred values.
+ if (Region().Present()) {
+ if (!RegionMapping(mRegion) && ComplexRegionMapping(mRegion)) {
+ PerformComplexRegionMappings();
+ }
+ }
+
+ // Replace deprecated variant subtags with their preferred values.
+ if (!PerformVariantMappings()) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ // No extension replacements are currently present.
+ // Private use sequences are left as is.
+
+ // 3. Replace aliases in special key values.
+ // - A subsequent call to canonicalizeExtensions() will perform this step.
+
+ return Ok();
+}
+
+#ifdef DEBUG
+static bool IsAsciiLowercaseAlphanumericOrDash(Span<const char> aSpan) {
+ const char* ptr = aSpan.data();
+ size_t length = aSpan.size();
+ return std::all_of(ptr, ptr + length, [](auto c) {
+ return IsAsciiLowercaseAlpha(c) || IsAsciiDigit(c) || c == '-';
+ });
+}
+#endif
+
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeExtensions() {
+ // The canonical case for all extension subtags is lowercase.
+ for (UniqueChars& extension : mExtensions) {
+ char* extensionChars = extension.get();
+ size_t extensionLength = strlen(extensionChars);
+ AsciiToLowerCase(extensionChars, extensionLength, extensionChars);
+
+ MOZ_ASSERT(
+ IsStructurallyValidExtensionTag({extensionChars, extensionLength}));
+ }
+
+ // Any extensions are in alphabetical order by their singleton.
+ // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
+ if (!SortAlphabetically(mExtensions)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ for (UniqueChars& extension : mExtensions) {
+ if (extension[0] == 'u') {
+ MOZ_TRY(CanonicalizeUnicodeExtension(extension));
+ } else if (extension[0] == 't') {
+ MOZ_TRY(CanonicalizeTransformExtension(extension));
+ }
+
+ MOZ_ASSERT(
+ IsAsciiLowercaseAlphanumericOrDash(MakeStringSpan(extension.get())));
+ }
+
+ // The canonical case for privateuse subtags is lowercase.
+ if (char* privateuse = mPrivateUse.get()) {
+ size_t privateuseLength = strlen(privateuse);
+ AsciiToLowerCase(privateuse, privateuseLength, privateuse);
+
+ MOZ_ASSERT(
+ IsStructurallyValidPrivateUseTag({privateuse, privateuseLength}));
+ }
+ return Ok();
+}
+
+template <size_t N>
+static inline bool AppendSpan(Vector<char, N>& vector, Span<const char> aSpan) {
+ return vector.append(aSpan.data(), aSpan.size());
+}
+
+/**
+ * CanonicalizeUnicodeExtension( attributes, keywords )
+ *
+ * Canonical syntax per
+ * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All attributes and keywords are in lowercase.
+ * - Note: The parser already converted keywords to lowercase.
+ * - All attributes are sorted in alphabetical order.
+ * - All keywords are sorted by alphabetical order of their keys.
+ * - Any type value "true" is removed.
+ *
+ * Canonical form:
+ * - All keys and types use the canonical form (from the name attribute;
+ * see Section 3.6.4 U Extension Data Files).
+ */
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeUnicodeExtension(
+ UniqueChars& aUnicodeExtension) {
+ Span<const char> extension = MakeStringSpan(aUnicodeExtension.get());
+ MOZ_ASSERT(extension[0] == 'u');
+ MOZ_ASSERT(extension[1] == '-');
+ MOZ_ASSERT(IsStructurallyValidExtensionTag(extension));
+
+ LocaleParser::AttributesVector attributes;
+ LocaleParser::KeywordsVector keywords;
+
+ using Attribute = LocaleParser::AttributesVector::ElementType;
+ using Keyword = LocaleParser::KeywordsVector::ElementType;
+
+ if (LocaleParser::ParseUnicodeExtension(extension, attributes, keywords)
+ .isErr()) {
+ MOZ_ASSERT_UNREACHABLE("unexpected invalid Unicode extension subtag");
+ return Err(CanonicalizationError::InternalError);
+ }
+
+ auto attributesLess = [extension](const Attribute& a, const Attribute& b) {
+ auto astr = extension.Subspan(a.Begin(), a.Length());
+ auto bstr = extension.Subspan(b.Begin(), b.Length());
+ return astr < bstr;
+ };
+
+ // All attributes are sorted in alphabetical order.
+ if (attributes.length() > 1) {
+ std::stable_sort(attributes.begin(), attributes.end(), attributesLess);
+ }
+
+ auto keywordsLess = [extension](const Keyword& a, const Keyword& b) {
+ auto astr = extension.Subspan(a.Begin(), UnicodeKeyLength);
+ auto bstr = extension.Subspan(b.Begin(), UnicodeKeyLength);
+ return astr < bstr;
+ };
+
+ // All keywords are sorted by alphabetical order of keys.
+ if (keywords.length() > 1) {
+ // Using a stable sort algorithm, guarantees that two keywords using the
+ // same key are never reordered. That means for example
+ // when we have the input "u-nu-thai-kf-false-nu-latn", we are guaranteed to
+ // get the result "u-kf-false-nu-thai-nu-latn", i.e. "nu-thai" still occurs
+ // before "nu-latn".
+ // This is required so that deduplication below preserves the first keyword
+ // for a given key and discards the rest.
+ std::stable_sort(keywords.begin(), keywords.end(), keywordsLess);
+ }
+
+ Vector<char, 32> sb;
+ if (!sb.append('u')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ // Append all Unicode extension attributes.
+ for (size_t i = 0; i < attributes.length(); i++) {
+ const auto& attribute = attributes[i];
+ auto span = extension.Subspan(attribute.Begin(), attribute.Length());
+
+ // Skip duplicate attributes.
+ if (i > 0) {
+ const auto& lastAttribute = attributes[i - 1];
+ if (span ==
+ extension.Subspan(lastAttribute.Begin(), lastAttribute.Length())) {
+ continue;
+ }
+ MOZ_ASSERT(attributesLess(lastAttribute, attribute));
+ }
+
+ if (!sb.append('-')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ if (!AppendSpan(sb, span)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ }
+
+ static constexpr size_t UnicodeKeyWithSepLength = UnicodeKeyLength + 1;
+
+ using StringSpan = Span<const char>;
+
+ static constexpr StringSpan True = MakeStringSpan("true");
+
+ // Append all Unicode extension keywords.
+ for (size_t i = 0; i < keywords.length(); i++) {
+ const auto& keyword = keywords[i];
+
+ // Skip duplicate keywords.
+ if (i > 0) {
+ const auto& lastKeyword = keywords[i - 1];
+ if (extension.Subspan(keyword.Begin(), UnicodeKeyLength) ==
+ extension.Subspan(lastKeyword.Begin(), UnicodeKeyLength)) {
+ continue;
+ }
+ MOZ_ASSERT(keywordsLess(lastKeyword, keyword));
+ }
+
+ if (!sb.append('-')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ StringSpan span = extension.Subspan(keyword.Begin(), keyword.Length());
+ if (span.size() == UnicodeKeyLength) {
+ // Keyword without type value.
+ if (!AppendSpan(sb, span)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ } else {
+ StringSpan key = span.To(UnicodeKeyLength);
+ StringSpan type = span.From(UnicodeKeyWithSepLength);
+
+ // Search if there's a replacement for the current Unicode keyword.
+ if (const char* replacement = ReplaceUnicodeExtensionType(key, type)) {
+ StringSpan repl = MakeStringSpan(replacement);
+ if (repl == True) {
+ // Elide the type "true" if present in the replacement.
+ if (!AppendSpan(sb, key)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ } else {
+ // Otherwise append the Unicode key (including the separator) and the
+ // replaced type.
+ if (!AppendSpan(sb, span.To(UnicodeKeyWithSepLength))) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ if (!AppendSpan(sb, repl)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ }
+ } else {
+ if (type == True) {
+ // Elide the Unicode extension type "true".
+ if (!AppendSpan(sb, key)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ } else {
+ // Otherwise append the complete Unicode extension keyword.
+ if (!AppendSpan(sb, span)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ }
+ }
+ }
+ }
+
+ // We can keep the previous extension when canonicalization didn't modify it.
+ if (static_cast<Span<const char>>(sb) != extension) {
+ // Otherwise replace the previous extension with the canonical extension.
+ UniqueChars canonical = DuplicateStringToUniqueChars(sb);
+ if (!canonical) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ aUnicodeExtension = std::move(canonical);
+ }
+
+ return Ok();
+}
+
+template <class Buffer>
+static bool LocaleToString(const Locale& aTag, Buffer& aBuffer) {
+ auto appendSubtag = [&aBuffer](const auto& subtag) {
+ auto span = subtag.Span();
+ MOZ_ASSERT(!span.empty());
+ return aBuffer.append(span.data(), span.size());
+ };
+
+ auto appendSubtagSpan = [&aBuffer](Span<const char> subtag) {
+ MOZ_ASSERT(!subtag.empty());
+ return aBuffer.append(subtag.data(), subtag.size());
+ };
+
+ auto appendSubtags = [&aBuffer, &appendSubtagSpan](const auto& subtags) {
+ for (const auto& subtag : subtags) {
+ if (!aBuffer.append('-') || !appendSubtagSpan(subtag)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ // Append the language subtag.
+ if (!appendSubtag(aTag.Language())) {
+ return false;
+ }
+
+ // Append the script subtag if present.
+ if (aTag.Script().Present()) {
+ if (!aBuffer.append('-') || !appendSubtag(aTag.Script())) {
+ return false;
+ }
+ }
+
+ // Append the region subtag if present.
+ if (aTag.Region().Present()) {
+ if (!aBuffer.append('-') || !appendSubtag(aTag.Region())) {
+ return false;
+ }
+ }
+
+ // Append the variant subtags if present.
+ if (!appendSubtags(aTag.Variants())) {
+ return false;
+ }
+
+ // Append the extensions subtags if present.
+ if (!appendSubtags(aTag.Extensions())) {
+ return false;
+ }
+
+ // Append the private-use subtag if present.
+ if (auto privateuse = aTag.PrivateUse()) {
+ if (!aBuffer.append('-') || !appendSubtagSpan(privateuse.value())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * CanonicalizeTransformExtension
+ *
+ * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
+ *
+ * - These subtags are all in lowercase (that is the canonical casing for these
+ * subtags), [...].
+ *
+ * And per
+ * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All keywords and tfields are sorted by alphabetical order of their keys,
+ * within their respective extensions.
+ */
+Result<Ok, Locale::CanonicalizationError>
+Locale::CanonicalizeTransformExtension(UniqueChars& aTransformExtension) {
+ Span<const char> extension = MakeStringSpan(aTransformExtension.get());
+ MOZ_ASSERT(extension[0] == 't');
+ MOZ_ASSERT(extension[1] == '-');
+ MOZ_ASSERT(IsStructurallyValidExtensionTag(extension));
+
+ Locale tag;
+ LocaleParser::TFieldVector fields;
+
+ using TField = LocaleParser::TFieldVector::ElementType;
+
+ if (LocaleParser::ParseTransformExtension(extension, tag, fields).isErr()) {
+ MOZ_ASSERT_UNREACHABLE("unexpected invalid transform extension subtag");
+ return Err(CanonicalizationError::InternalError);
+ }
+
+ auto tfieldLess = [extension](const TField& a, const TField& b) {
+ auto astr = extension.Subspan(a.Begin(), TransformKeyLength);
+ auto bstr = extension.Subspan(b.Begin(), TransformKeyLength);
+ return astr < bstr;
+ };
+
+ // All tfields are sorted by alphabetical order of their keys.
+ if (fields.length() > 1) {
+ std::stable_sort(fields.begin(), fields.end(), tfieldLess);
+ }
+
+ Vector<char, 32> sb;
+ if (!sb.append('t')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ // Append the language subtag if present.
+ //
+ // Replace aliases in tlang per
+ // <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>.
+ if (tag.Language().Present()) {
+ if (!sb.append('-')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ MOZ_TRY(tag.CanonicalizeBaseName());
+
+ // The canonical case for Transform extensions is lowercase per
+ // <https://unicode.org/reports/tr35/#BCP47_T_Extension>. Convert the two
+ // subtags which don't use lowercase for their canonical syntax.
+ tag.mScript.ToLowerCase();
+ tag.mRegion.ToLowerCase();
+
+ if (!LocaleToString(tag, sb)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ }
+
+ static constexpr size_t TransformKeyWithSepLength = TransformKeyLength + 1;
+
+ using StringSpan = Span<const char>;
+
+ // Append all fields.
+ //
+ // UTS 35, 3.2.1 specifies:
+ // - Any type or tfield value "true" is removed.
+ //
+ // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so ignore
+ // this apparently invalid part of the UTS 35 specification and simply
+ // append all `tfield` subtags.
+ for (const auto& field : fields) {
+ if (!sb.append('-')) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+
+ StringSpan span = extension.Subspan(field.Begin(), field.Length());
+ StringSpan key = span.To(TransformKeyLength);
+ StringSpan value = span.From(TransformKeyWithSepLength);
+
+ // Search if there's a replacement for the current transform keyword.
+ if (const char* replacement = ReplaceTransformExtensionType(key, value)) {
+ if (!AppendSpan(sb, span.To(TransformKeyWithSepLength))) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ if (!AppendSpan(sb, MakeStringSpan(replacement))) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ } else {
+ if (!AppendSpan(sb, span)) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ }
+ }
+
+ // We can keep the previous extension when canonicalization didn't modify it.
+ if (static_cast<Span<const char>>(sb) != extension) {
+ // Otherwise replace the previous extension with the canonical extension.
+ UniqueChars canonical = DuplicateStringToUniqueChars(sb);
+ if (!canonical) {
+ return Err(CanonicalizationError::OutOfMemory);
+ }
+ aTransformExtension = std::move(canonical);
+ }
+
+ return Ok();
+}
+
+// Zero-terminated ICU Locale ID.
+using LocaleId =
+ Vector<char, LanguageLength + 1 + ScriptLength + 1 + RegionLength + 1>;
+
+enum class LikelySubtags : bool { Add, Remove };
+
+// Return true iff the locale is already maximized resp. minimized.
+static bool HasLikelySubtags(LikelySubtags aLikelySubtags, const Locale& aTag) {
+ // The locale is already maximized if the language, script, and region
+ // subtags are present and no placeholder subtags ("und", "Zzzz", "ZZ") are
+ // used.
+ if (aLikelySubtags == LikelySubtags::Add) {
+ return !aTag.Language().EqualTo("und") &&
+ (aTag.Script().Present() && !aTag.Script().EqualTo("Zzzz")) &&
+ (aTag.Region().Present() && !aTag.Region().EqualTo("ZZ"));
+ }
+
+ // The locale is already minimized if it only contains a language
+ // subtag whose value is not the placeholder value "und".
+ return !aTag.Language().EqualTo("und") && aTag.Script().Missing() &&
+ aTag.Region().Missing();
+}
+
+// Create an ICU locale ID from the given locale.
+static bool CreateLocaleForLikelySubtags(const Locale& aTag,
+ LocaleId& aLocale) {
+ MOZ_ASSERT(aLocale.length() == 0);
+
+ auto appendSubtag = [&aLocale](const auto& subtag) {
+ auto span = subtag.Span();
+ MOZ_ASSERT(!span.empty());
+ return aLocale.append(span.data(), span.size());
+ };
+
+ // Append the language subtag.
+ if (!appendSubtag(aTag.Language())) {
+ return false;
+ }
+
+ // Append the script subtag if present.
+ if (aTag.Script().Present()) {
+ if (!aLocale.append('_') || !appendSubtag(aTag.Script())) {
+ return false;
+ }
+ }
+
+ // Append the region subtag if present.
+ if (aTag.Region().Present()) {
+ if (!aLocale.append('_') || !appendSubtag(aTag.Region())) {
+ return false;
+ }
+ }
+
+ // Zero-terminated for use with ICU.
+ return aLocale.append('\0');
+}
+
+static ICUError ParserErrorToICUError(LocaleParser::ParserError aErr) {
+ using ParserError = LocaleParser::ParserError;
+
+ switch (aErr) {
+ case ParserError::NotParseable:
+ return ICUError::InternalError;
+ case ParserError::OutOfMemory:
+ return ICUError::OutOfMemory;
+ }
+ MOZ_CRASH("Unexpected parser error");
+}
+
+static ICUError CanonicalizationErrorToICUError(
+ Locale::CanonicalizationError aErr) {
+ using CanonicalizationError = Locale::CanonicalizationError;
+
+ switch (aErr) {
+ case CanonicalizationError::DuplicateVariant:
+ case CanonicalizationError::InternalError:
+ return ICUError::InternalError;
+ case CanonicalizationError::OutOfMemory:
+ return ICUError::OutOfMemory;
+ }
+ MOZ_CRASH("Unexpected canonicalization error");
+}
+
+// Assign the language, script, and region subtags from an ICU locale ID.
+//
+// ICU provides |uloc_getLanguage|, |uloc_getScript|, and |uloc_getCountry| to
+// retrieve these subtags, but unfortunately these functions are rather slow, so
+// we use our own implementation.
+static ICUResult AssignFromLocaleId(LocaleId& aLocaleId, Locale& aTag) {
+ // Replace the ICU locale ID separator.
+ std::replace(aLocaleId.begin(), aLocaleId.end(), '_', '-');
+
+ // ICU replaces "und" with the empty string, which means "und" becomes "" and
+ // "und-Latn" becomes "-Latn". Handle this case separately.
+ if (aLocaleId.empty() || aLocaleId[0] == '-') {
+ static constexpr auto und = MakeStringSpan("und");
+ constexpr size_t length = und.size();
+
+ // Insert "und" in front of the locale ID.
+ if (!aLocaleId.growBy(length)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ memmove(aLocaleId.begin() + length, aLocaleId.begin(), aLocaleId.length());
+ memmove(aLocaleId.begin(), und.data(), length);
+ }
+
+ // Retrieve the language, script, and region subtags from the locale ID
+ Locale localeTag;
+ MOZ_TRY(LocaleParser::TryParseBaseName(aLocaleId, localeTag)
+ .mapErr(ParserErrorToICUError));
+
+ aTag.SetLanguage(localeTag.Language());
+ aTag.SetScript(localeTag.Script());
+ aTag.SetRegion(localeTag.Region());
+
+ return Ok();
+}
+
+template <decltype(uloc_addLikelySubtags) likelySubtagsFn>
+static ICUResult CallLikelySubtags(const LocaleId& aLocaleId,
+ LocaleId& aResult) {
+ // Locale ID must be zero-terminated before passing it to ICU.
+ MOZ_ASSERT(aLocaleId.back() == '\0');
+ MOZ_ASSERT(aResult.length() == 0);
+
+ // Ensure there's enough room for the result.
+ MOZ_ALWAYS_TRUE(aResult.resize(LocaleId::InlineLength));
+
+ return FillBufferWithICUCall(
+ aResult, [&aLocaleId](char* chars, int32_t size, UErrorCode* status) {
+ return likelySubtagsFn(aLocaleId.begin(), chars, size, status);
+ });
+}
+
+// The canonical way to compute the Unicode BCP 47 locale identifier with likely
+// subtags is as follows:
+//
+// 1. Call uloc_forLanguageTag() to transform the locale identifer into an ICU
+// locale ID.
+// 2. Call uloc_addLikelySubtags() to add the likely subtags to the locale ID.
+// 3. Call uloc_toLanguageTag() to transform the resulting locale ID back into
+// a Unicode BCP 47 locale identifier.
+//
+// Since uloc_forLanguageTag() and uloc_toLanguageTag() are both kind of slow
+// and we know, by construction, that the input Unicode BCP 47 locale identifier
+// only contains valid language, script, and region subtags, we can avoid both
+// calls if we implement them ourselves, see CreateLocaleForLikelySubtags() and
+// AssignFromLocaleId(). (Where "slow" means about 50% of the execution time of
+// |Intl.Locale.prototype.maximize|.)
+static ICUResult LikelySubtags(LikelySubtags aLikelySubtags, Locale& aTag) {
+ // Return early if the input is already maximized/minimized.
+ if (HasLikelySubtags(aLikelySubtags, aTag)) {
+ return Ok();
+ }
+
+ // Create the locale ID for the input argument.
+ LocaleId locale;
+ if (!CreateLocaleForLikelySubtags(aTag, locale)) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ // Either add or remove likely subtags to/from the locale ID.
+ LocaleId localeLikelySubtags;
+ if (aLikelySubtags == LikelySubtags::Add) {
+ MOZ_TRY(
+ CallLikelySubtags<uloc_addLikelySubtags>(locale, localeLikelySubtags));
+ } else {
+ MOZ_TRY(
+ CallLikelySubtags<uloc_minimizeSubtags>(locale, localeLikelySubtags));
+ }
+
+ // Assign the language, script, and region subtags from the locale ID.
+ MOZ_TRY(AssignFromLocaleId(localeLikelySubtags, aTag));
+
+ // Update mappings in case ICU returned a non-canonical locale.
+ MOZ_TRY(aTag.CanonicalizeBaseName().mapErr(CanonicalizationErrorToICUError));
+
+ return Ok();
+}
+
+ICUResult Locale::AddLikelySubtags() {
+ return LikelySubtags(LikelySubtags::Add, *this);
+}
+
+ICUResult Locale::RemoveLikelySubtags() {
+ return LikelySubtags(LikelySubtags::Remove, *this);
+}
+
+UniqueChars Locale::DuplicateStringToUniqueChars(const char* aStr) {
+ size_t length = strlen(aStr) + 1;
+ auto duplicate = MakeUnique<char[]>(length);
+ memcpy(duplicate.get(), aStr, length);
+ return duplicate;
+}
+
+UniqueChars Locale::DuplicateStringToUniqueChars(Span<const char> aStr) {
+ size_t length = aStr.size();
+ auto duplicate = MakeUnique<char[]>(length + 1);
+ memcpy(duplicate.get(), aStr.data(), length);
+ duplicate[length] = '\0';
+ return duplicate;
+}
+
+size_t Locale::ToStringCapacity() const {
+ // This is a bit awkward, the buffer class currently does not support
+ // being resized, so we need to calculate the required size up front and
+ // reserve it all at once.
+ auto lengthSubtag = [](const auto& subtag) {
+ auto span = subtag.Span();
+ MOZ_ASSERT(!span.empty());
+ return span.size();
+ };
+
+ auto lengthSubtagZ = [](const char* subtag) {
+ size_t length = strlen(subtag);
+ MOZ_ASSERT(length > 0);
+ return length;
+ };
+
+ auto lengthSubtagsZ = [&lengthSubtagZ](const auto& subtags) {
+ size_t length = 0;
+ for (const auto& subtag : subtags) {
+ length += lengthSubtagZ(subtag.get()) + 1;
+ }
+ return length;
+ };
+
+ // First calculate required capacity
+ size_t capacity = 0;
+
+ capacity += lengthSubtag(mLanguage);
+
+ if (mScript.Present()) {
+ capacity += lengthSubtag(mScript) + 1;
+ }
+
+ if (mRegion.Present()) {
+ capacity += lengthSubtag(mRegion) + 1;
+ }
+
+ capacity += lengthSubtagsZ(mVariants);
+
+ capacity += lengthSubtagsZ(mExtensions);
+
+ if (mPrivateUse.get()) {
+ capacity += lengthSubtagZ(mPrivateUse.get()) + 1;
+ }
+
+ return capacity;
+}
+
+size_t Locale::ToStringAppend(char* aBuffer) const {
+ // Current write position inside buffer.
+ size_t offset = 0;
+
+ auto appendHyphen = [&offset, &aBuffer]() {
+ aBuffer[offset] = '-';
+ offset += 1;
+ };
+
+ auto appendSubtag = [&offset, &aBuffer](const auto& subtag) {
+ auto span = subtag.Span();
+ memcpy(aBuffer + offset, span.data(), span.size());
+ offset += span.size();
+ };
+
+ auto appendSubtagZ = [&offset, &aBuffer](const char* subtag) {
+ size_t length = strlen(subtag);
+ memcpy(aBuffer + offset, subtag, length);
+ offset += length;
+ };
+
+ auto appendSubtagsZ = [&appendHyphen, &appendSubtagZ](const auto& subtags) {
+ for (const auto& subtag : subtags) {
+ appendHyphen();
+ appendSubtagZ(subtag.get());
+ }
+ };
+
+ // Append the language subtag.
+ appendSubtag(mLanguage);
+
+ // Append the script subtag if present.
+ if (mScript.Present()) {
+ appendHyphen();
+ appendSubtag(mScript);
+ }
+
+ // Append the region subtag if present.
+ if (mRegion.Present()) {
+ appendHyphen();
+ appendSubtag(mRegion);
+ }
+
+ // Append the variant subtags if present.
+ appendSubtagsZ(mVariants);
+
+ // Append the extensions subtags if present.
+ appendSubtagsZ(mExtensions);
+
+ // Append the private-use subtag if present.
+ if (mPrivateUse.get()) {
+ appendHyphen();
+ appendSubtagZ(mPrivateUse.get());
+ }
+
+ return offset;
+}
+
+LocaleParser::Token LocaleParser::NextToken() {
+ MOZ_ASSERT(mIndex <= mLength + 1, "called after 'None' token was read");
+
+ TokenKind kind = TokenKind::None;
+ size_t tokenLength = 0;
+ for (size_t i = mIndex; i < mLength; i++) {
+ // UTS 35, section 3.1.
+ // alpha = [A-Z a-z] ;
+ // digit = [0-9] ;
+ char c = CharAt(i);
+ if (IsAsciiAlpha(c)) {
+ kind |= TokenKind::Alpha;
+ } else if (IsAsciiDigit(c)) {
+ kind |= TokenKind::Digit;
+ } else if (c == '-' && i > mIndex && i + 1 < mLength) {
+ break;
+ } else {
+ return {TokenKind::Error, 0, 0};
+ }
+ tokenLength += 1;
+ }
+
+ Token token{kind, mIndex, tokenLength};
+ mIndex += tokenLength + 1;
+ return token;
+}
+
+UniqueChars LocaleParser::Chars(size_t aIndex, size_t aLength) const {
+ // Add +1 to null-terminate the string.
+ auto chars = MakeUnique<char[]>(aLength + 1);
+ char* dest = chars.get();
+ std::copy_n(mLocale + aIndex, aLength, dest);
+ dest[aLength] = '\0';
+ return chars;
+}
+
+// Parse the `unicode_language_id` production.
+//
+// unicode_language_id = unicode_language_subtag
+// (sep unicode_script_subtag)?
+// (sep unicode_region_subtag)?
+// (sep unicode_variant_subtag)* ;
+//
+// sep = "-"
+//
+// Note: Unicode CLDR locale identifier backward compatibility extensions
+// removed from `unicode_language_id`.
+//
+// |tok| is the current token from |ts|.
+//
+// All subtags will be added unaltered to |tag|, without canonicalizing their
+// case or, in the case of variant subtags, detecting and rejecting duplicate
+// variants. Users must subsequently |CanonicalizeBaseName| to perform these
+// actions.
+//
+// Do not use this function directly: use |ParseBaseName| or
+// |ParseTlangFromTransformExtension| instead.
+Result<Ok, LocaleParser::ParserError> LocaleParser::InternalParseBaseName(
+ LocaleParser& aLocaleParser, Locale& aTag, Token& aTok) {
+ if (aLocaleParser.IsLanguage(aTok)) {
+ aLocaleParser.CopyChars(aTok, aTag.mLanguage);
+
+ aTok = aLocaleParser.NextToken();
+ } else {
+ // The language subtag is mandatory.
+ return Err(ParserError::NotParseable);
+ }
+
+ if (aLocaleParser.IsScript(aTok)) {
+ aLocaleParser.CopyChars(aTok, aTag.mScript);
+
+ aTok = aLocaleParser.NextToken();
+ }
+
+ if (aLocaleParser.IsRegion(aTok)) {
+ aLocaleParser.CopyChars(aTok, aTag.mRegion);
+
+ aTok = aLocaleParser.NextToken();
+ }
+
+ auto& variants = aTag.mVariants;
+ MOZ_ASSERT(variants.length() == 0);
+ while (aLocaleParser.IsVariant(aTok)) {
+ auto variant = aLocaleParser.Chars(aTok);
+ if (!variants.append(std::move(variant))) {
+ return Err(ParserError::OutOfMemory);
+ }
+
+ aTok = aLocaleParser.NextToken();
+ }
+
+ return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::TryParse(
+ mozilla::Span<const char> aLocale, Locale& aTag) {
+ // |aTag| must be a new, empty Locale.
+ MOZ_ASSERT(aTag.Language().Missing());
+ MOZ_ASSERT(aTag.Script().Missing());
+ MOZ_ASSERT(aTag.Region().Missing());
+ MOZ_ASSERT(aTag.Variants().empty());
+ MOZ_ASSERT(aTag.Extensions().empty());
+ MOZ_ASSERT(aTag.PrivateUse().isNothing());
+
+ // unicode_locale_id = unicode_language_id
+ // extensions*
+ // pu_extensions? ;
+
+ LocaleParser ts(aLocale);
+ Token tok = ts.NextToken();
+
+ MOZ_TRY(ParseBaseName(ts, aTag, tok));
+
+ // extensions = unicode_locale_extensions
+ // | transformed_extensions
+ // | other_extensions ;
+
+ // Bit set of seen singletons.
+ uint64_t seenSingletons = 0;
+
+ auto& extensions = aTag.mExtensions;
+ while (ts.IsExtensionStart(tok)) {
+ char singleton = ts.SingletonKey(tok);
+
+ // Reject the input if a duplicate singleton was found.
+ uint64_t hash = 1ULL << (AsciiAlphanumericToNumber(singleton) + 1);
+ if (seenSingletons & hash) {
+ return Err(ParserError::NotParseable);
+ }
+ seenSingletons |= hash;
+
+ Token start = tok;
+ tok = ts.NextToken();
+
+ // We'll check for missing non-singleton subtags after this block by
+ // comparing |startValue| with the then-current position.
+ size_t startValue = tok.Index();
+
+ if (singleton == 'u') {
+ while (ts.IsUnicodeExtensionPart(tok)) {
+ tok = ts.NextToken();
+ }
+ } else if (singleton == 't') {
+ // transformed_extensions = sep [tT]
+ // ((sep tlang (sep tfield)*)
+ // | (sep tfield)+) ;
+
+ // tlang = unicode_language_subtag
+ // (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)?
+ // (sep unicode_variant_subtag)* ;
+ if (ts.IsLanguage(tok)) {
+ tok = ts.NextToken();
+
+ if (ts.IsScript(tok)) {
+ tok = ts.NextToken();
+ }
+
+ if (ts.IsRegion(tok)) {
+ tok = ts.NextToken();
+ }
+
+ while (ts.IsVariant(tok)) {
+ tok = ts.NextToken();
+ }
+ }
+
+ // tfield = tkey tvalue;
+ while (ts.IsTransformExtensionKey(tok)) {
+ tok = ts.NextToken();
+
+ size_t startTValue = tok.Index();
+ while (ts.IsTransformExtensionPart(tok)) {
+ tok = ts.NextToken();
+ }
+
+ // `tfield` requires at least one `tvalue`.
+ if (tok.Index() <= startTValue) {
+ return Err(ParserError::NotParseable);
+ }
+ }
+ } else {
+ while (ts.IsOtherExtensionPart(tok)) {
+ tok = ts.NextToken();
+ }
+ }
+
+ // Singletons must be followed by a non-singleton subtag, "en-a-b" is not
+ // allowed.
+ if (tok.Index() <= startValue) {
+ return Err(ParserError::NotParseable);
+ }
+
+ UniqueChars extension = ts.Extension(start, tok);
+ if (!extensions.append(std::move(extension))) {
+ return Err(ParserError::OutOfMemory);
+ }
+ }
+
+ // Trailing `pu_extension` component of the `unicode_locale_id` production.
+ if (ts.IsPrivateUseStart(tok)) {
+ Token start = tok;
+ tok = ts.NextToken();
+
+ size_t startValue = tok.Index();
+ while (ts.IsPrivateUsePart(tok)) {
+ tok = ts.NextToken();
+ }
+
+ // There must be at least one subtag after the "-x-".
+ if (tok.Index() <= startValue) {
+ return Err(ParserError::NotParseable);
+ }
+
+ UniqueChars privateUse = ts.Extension(start, tok);
+ aTag.mPrivateUse = std::move(privateUse);
+ }
+
+ if (!tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::TryParseBaseName(
+ Span<const char> aLocale, Locale& aTag) {
+ // |aTag| must be a new, empty Locale.
+ MOZ_ASSERT(aTag.Language().Missing());
+ MOZ_ASSERT(aTag.Script().Missing());
+ MOZ_ASSERT(aTag.Region().Missing());
+ MOZ_ASSERT(aTag.Variants().empty());
+ MOZ_ASSERT(aTag.Extensions().empty());
+ MOZ_ASSERT(aTag.PrivateUse().isNothing());
+
+ LocaleParser ts(aLocale);
+ Token tok = ts.NextToken();
+
+ MOZ_TRY(ParseBaseName(ts, aTag, tok));
+ if (!tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ return Ok();
+}
+
+// Parse |aExtension|, which must be a valid `transformed_extensions` subtag,
+// and fill |aTag| and |aFields| from the `tlang` and `tfield` components.
+Result<Ok, LocaleParser::ParserError> LocaleParser::ParseTransformExtension(
+ Span<const char> aExtension, Locale& aTag, TFieldVector& aFields) {
+ LocaleParser ts(aExtension);
+ Token tok = ts.NextToken();
+
+ if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 't') {
+ return Err(ParserError::NotParseable);
+ }
+
+ tok = ts.NextToken();
+
+ if (tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ if (ts.IsLanguage(tok)) {
+ // We're parsing a possible `tlang` in a known-valid transform extension, so
+ // use the special-purpose function that takes advantage of this to compute
+ // lowercased |tag| contents in an optimal manner.
+ MOZ_TRY(ParseTlangInTransformExtension(ts, aTag, tok));
+
+ // After `tlang` we must have a `tfield` and its `tkey`, or we're at the end
+ // of the transform extension.
+ MOZ_ASSERT(ts.IsTransformExtensionKey(tok) || tok.IsNone());
+ } else {
+ // If there's no `tlang` subtag, at least one `tfield` must be present.
+ MOZ_ASSERT(ts.IsTransformExtensionKey(tok));
+ }
+
+ // Trailing `tfield` subtags. (Any other trailing subtags are an error,
+ // because we're guaranteed to only see a valid tranform extension here.)
+ while (ts.IsTransformExtensionKey(tok)) {
+ size_t begin = tok.Index();
+ tok = ts.NextToken();
+
+ size_t startTValue = tok.Index();
+ while (ts.IsTransformExtensionPart(tok)) {
+ tok = ts.NextToken();
+ }
+
+ // `tfield` requires at least one `tvalue`.
+ if (tok.Index() <= startTValue) {
+ return Err(ParserError::NotParseable);
+ }
+
+ size_t length = tok.Index() - 1 - begin;
+ if (!aFields.emplaceBack(begin, length)) {
+ return Err(ParserError::OutOfMemory);
+ }
+ }
+
+ if (!tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ return Ok();
+}
+
+// Parse |aExtension|, which must be a valid `unicode_locale_extensions` subtag,
+// and fill |aAttributes| and |aKeywords| from the `attribute` and `keyword`
+// components.
+Result<Ok, LocaleParser::ParserError> LocaleParser::ParseUnicodeExtension(
+ Span<const char> aExtension, AttributesVector& aAttributes,
+ KeywordsVector& aKeywords) {
+ LocaleParser ts(aExtension);
+ Token tok = ts.NextToken();
+
+ // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+ // (sep attribute)+ (sep keyword)*) ;
+
+ if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 'u') {
+ return Err(ParserError::NotParseable);
+ }
+
+ tok = ts.NextToken();
+
+ if (tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ while (ts.IsUnicodeExtensionAttribute(tok)) {
+ if (!aAttributes.emplaceBack(tok.Index(), tok.Length())) {
+ return Err(ParserError::OutOfMemory);
+ }
+
+ tok = ts.NextToken();
+ }
+
+ // keyword = key (sep type)? ;
+ while (ts.IsUnicodeExtensionKey(tok)) {
+ size_t begin = tok.Index();
+ tok = ts.NextToken();
+
+ while (ts.IsUnicodeExtensionType(tok)) {
+ tok = ts.NextToken();
+ }
+
+ if (tok.IsError()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ size_t length = tok.Index() - 1 - begin;
+ if (!aKeywords.emplaceBack(begin, length)) {
+ return Err(ParserError::OutOfMemory);
+ }
+ }
+
+ if (!tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::CanParseUnicodeExtension(
+ Span<const char> aExtension) {
+ LocaleParser ts(aExtension);
+ Token tok = ts.NextToken();
+
+ // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+ // (sep attribute)+ (sep keyword)*) ;
+
+ if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 'u') {
+ return Err(ParserError::NotParseable);
+ }
+
+ tok = ts.NextToken();
+
+ if (tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ while (ts.IsUnicodeExtensionAttribute(tok)) {
+ tok = ts.NextToken();
+ }
+
+ // keyword = key (sep type)? ;
+ while (ts.IsUnicodeExtensionKey(tok)) {
+ tok = ts.NextToken();
+
+ while (ts.IsUnicodeExtensionType(tok)) {
+ tok = ts.NextToken();
+ }
+
+ if (tok.IsError()) {
+ return Err(ParserError::NotParseable);
+ }
+ }
+
+ if (!tok.IsNone()) {
+ return Err(ParserError::OutOfMemory);
+ }
+
+ return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError>
+LocaleParser::CanParseUnicodeExtensionType(Span<const char> aUnicodeType) {
+ MOZ_ASSERT(!aUnicodeType.empty(), "caller must exclude empty strings");
+
+ LocaleParser ts(aUnicodeType);
+ Token tok = ts.NextToken();
+
+ while (ts.IsUnicodeExtensionType(tok)) {
+ tok = ts.NextToken();
+ }
+
+ if (!tok.IsNone()) {
+ return Err(ParserError::NotParseable);
+ }
+
+ return Ok();
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/Locale.h b/intl/components/src/Locale.h
new file mode 100644
index 0000000000..478d5f4a9e
--- /dev/null
+++ b/intl/components/src/Locale.h
@@ -0,0 +1,773 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Structured representation of Unicode locale IDs used with Intl functions. */
+
+#ifndef intl_components_Locale_h
+#define intl_components_Locale_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+#include "mozilla/Result.h"
+
+#include <algorithm>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <utility>
+
+#include "unicode/uloc.h"
+
+namespace mozilla::intl {
+
+/**
+ * Return true if |language| is a valid language subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> aLanguage);
+
+/**
+ * Return true if |script| is a valid script subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> aScript);
+
+/**
+ * Return true if |region| is a valid region subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> aRegion);
+
+#ifdef DEBUG
+/**
+ * Return true if |variant| is a valid variant subtag.
+ */
+bool IsStructurallyValidVariantTag(mozilla::Span<const char> aVariant);
+
+/**
+ * Return true if |extension| is a valid Unicode extension subtag.
+ */
+bool IsStructurallyValidUnicodeExtensionTag(
+ mozilla::Span<const char> aExtension);
+
+/**
+ * Return true if |privateUse| is a valid private-use subtag.
+ */
+bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> aPrivateUse);
+
+#endif
+
+template <typename CharT>
+char AsciiToLowerCase(CharT aChar) {
+ MOZ_ASSERT(mozilla::IsAscii(aChar));
+ return mozilla::IsAsciiUppercaseAlpha(aChar) ? (aChar + 0x20) : aChar;
+}
+
+template <typename CharT>
+char AsciiToUpperCase(CharT aChar) {
+ MOZ_ASSERT(mozilla::IsAscii(aChar));
+ return mozilla::IsAsciiLowercaseAlpha(aChar) ? (aChar - 0x20) : aChar;
+}
+
+template <typename CharT>
+void AsciiToLowerCase(CharT* aChars, size_t aLength, char* aDest) {
+ char (&fn)(CharT) = AsciiToLowerCase;
+ std::transform(aChars, aChars + aLength, aDest, fn);
+}
+
+template <typename CharT>
+void AsciiToUpperCase(CharT* aChars, size_t aLength, char* aDest) {
+ char (&fn)(CharT) = AsciiToUpperCase;
+ std::transform(aChars, aChars + aLength, aDest, fn);
+}
+
+template <typename CharT>
+void AsciiToTitleCase(CharT* aChars, size_t aLength, char* aDest) {
+ if (aLength > 0) {
+ AsciiToUpperCase(aChars, 1, aDest);
+ AsciiToLowerCase(aChars + 1, aLength - 1, aDest + 1);
+ }
+}
+
+// Constants for language subtag lengths.
+namespace LanguageTagLimits {
+
+// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+static constexpr size_t LanguageLength = 8;
+
+// unicode_script_subtag = alpha{4} ;
+static constexpr size_t ScriptLength = 4;
+
+// unicode_region_subtag = (alpha{2} | digit{3}) ;
+static constexpr size_t RegionLength = 3;
+static constexpr size_t AlphaRegionLength = 2;
+static constexpr size_t DigitRegionLength = 3;
+
+// key = alphanum alpha ;
+static constexpr size_t UnicodeKeyLength = 2;
+
+// tkey = alpha digit ;
+static constexpr size_t TransformKeyLength = 2;
+
+} // namespace LanguageTagLimits
+
+// Fixed size language subtag which is stored inline in Locale.
+template <size_t SubtagLength>
+class LanguageTagSubtag final {
+ uint8_t mLength = 0;
+ char mChars[SubtagLength] = {}; // zero initialize
+
+ public:
+ LanguageTagSubtag() = default;
+
+ LanguageTagSubtag(const LanguageTagSubtag& aOther) {
+ std::copy_n(aOther.mChars, SubtagLength, mChars);
+ mLength = aOther.mLength;
+ }
+
+ LanguageTagSubtag& operator=(const LanguageTagSubtag& aOther) {
+ std::copy_n(aOther.mChars, SubtagLength, mChars);
+ mLength = aOther.mLength;
+ return *this;
+ }
+
+ size_t Length() const { return mLength; }
+ bool Missing() const { return mLength == 0; }
+ bool Present() const { return mLength > 0; }
+
+ mozilla::Span<const char> Span() const { return {mChars, mLength}; }
+
+ template <typename CharT>
+ void Set(mozilla::Span<const CharT> str) {
+ MOZ_ASSERT(str.size() <= SubtagLength);
+ std::copy_n(str.data(), str.size(), mChars);
+ mLength = str.size();
+ }
+
+ // The toXYZCase() methods are using |SubtagLength| instead of |length()|,
+ // because current compilers (tested GCC and Clang) can't infer the maximum
+ // string length - even when using hints like |std::min| - and instead are
+ // emitting SIMD optimized code. Using a fixed sized length avoids emitting
+ // the SIMD code. (Emitting SIMD code doesn't make sense here, because the
+ // SIMD code only kicks in for long strings.) A fixed length will
+ // additionally ensure the compiler unrolls the loop in the case conversion
+ // code.
+
+ void ToLowerCase() { AsciiToLowerCase(mChars, SubtagLength, mChars); }
+
+ void ToUpperCase() { AsciiToUpperCase(mChars, SubtagLength, mChars); }
+
+ void ToTitleCase() { AsciiToTitleCase(mChars, SubtagLength, mChars); }
+
+ template <size_t N>
+ bool EqualTo(const char (&str)[N]) const {
+ static_assert(N - 1 <= SubtagLength,
+ "subtag literals must not exceed the maximum subtag length");
+
+ return mLength == N - 1 && memcmp(mChars, str, N - 1) == 0;
+ }
+};
+
+using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
+using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
+using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
+
+using Latin1Char = unsigned char;
+using UniqueChars = UniquePtr<char[]>;
+
+/**
+ * Object representing a Unicode BCP 47 locale identifier.
+ *
+ * All subtags are already in canonicalized case.
+ */
+class MOZ_STACK_CLASS Locale final {
+ LanguageSubtag mLanguage = {};
+ ScriptSubtag mScript = {};
+ RegionSubtag mRegion = {};
+
+ using VariantsVector = Vector<UniqueChars, 2>;
+ using ExtensionsVector = Vector<UniqueChars, 2>;
+
+ VariantsVector mVariants;
+ ExtensionsVector mExtensions;
+ UniqueChars mPrivateUse = nullptr;
+
+ friend class LocaleParser;
+
+ public:
+ enum class CanonicalizationError : uint8_t {
+ DuplicateVariant,
+ InternalError,
+ OutOfMemory,
+ };
+
+ private:
+ Result<Ok, CanonicalizationError> CanonicalizeUnicodeExtension(
+ UniqueChars& unicodeExtension);
+
+ Result<Ok, CanonicalizationError> CanonicalizeTransformExtension(
+ UniqueChars& transformExtension);
+
+ public:
+ static bool LanguageMapping(LanguageSubtag& aLanguage);
+ static bool ComplexLanguageMapping(const LanguageSubtag& aLanguage);
+
+ private:
+ static bool ScriptMapping(ScriptSubtag& aScript);
+ static bool RegionMapping(RegionSubtag& aRegion);
+ static bool ComplexRegionMapping(const RegionSubtag& aRegion);
+
+ void PerformComplexLanguageMappings();
+ void PerformComplexRegionMappings();
+ [[nodiscard]] bool PerformVariantMappings();
+
+ [[nodiscard]] bool UpdateLegacyMappings();
+
+ static bool SignLanguageMapping(LanguageSubtag& aLanguage,
+ const RegionSubtag& aRegion);
+
+ static const char* ReplaceTransformExtensionType(
+ mozilla::Span<const char> aKey, mozilla::Span<const char> aType);
+
+ public:
+ /**
+ * Given a Unicode key and type, return the null-terminated preferred
+ * replacement for that type if there is one, or null if there is none, e.g.
+ * in effect
+ * |ReplaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"|
+ * and
+ * |ReplaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|.
+ */
+ static const char* ReplaceUnicodeExtensionType(
+ mozilla::Span<const char> aKey, mozilla::Span<const char> aType);
+
+ public:
+ Locale() = default;
+ Locale(const Locale&) = delete;
+ Locale& operator=(const Locale&) = delete;
+ Locale(Locale&&) = default;
+ Locale& operator=(Locale&&) = default;
+
+ template <class Vec>
+ class SubtagIterator {
+ using Iter = decltype(std::declval<const Vec>().begin());
+
+ Iter mIter;
+
+ public:
+ explicit SubtagIterator(Iter iter) : mIter(iter) {}
+
+ // std::iterator traits.
+ using iterator_category = std::input_iterator_tag;
+ using value_type = Span<const char>;
+ using difference_type = ptrdiff_t;
+ using pointer = value_type*;
+ using reference = value_type&;
+
+ SubtagIterator& operator++() {
+ mIter++;
+ return *this;
+ }
+
+ SubtagIterator operator++(int) {
+ SubtagIterator result = *this;
+ ++(*this);
+ return result;
+ }
+
+ bool operator==(const SubtagIterator& aOther) const {
+ return mIter == aOther.mIter;
+ }
+
+ bool operator!=(const SubtagIterator& aOther) const {
+ return !(*this == aOther);
+ }
+
+ value_type operator*() const { return MakeStringSpan(mIter->get()); }
+ };
+
+ template <size_t N>
+ class SubtagEnumeration {
+ using Vec = Vector<UniqueChars, N>;
+
+ const Vec& mVector;
+
+ public:
+ explicit SubtagEnumeration(const Vec& aVector) : mVector(aVector) {}
+
+ size_t length() const { return mVector.length(); }
+ bool empty() const { return mVector.empty(); }
+
+ auto begin() const { return SubtagIterator<Vec>(mVector.begin()); }
+ auto end() const { return SubtagIterator<Vec>(mVector.end()); }
+
+ Span<const char> operator[](size_t aIndex) const {
+ return MakeStringSpan(mVector[aIndex].get());
+ }
+ };
+
+ const LanguageSubtag& Language() const { return mLanguage; }
+ const ScriptSubtag& Script() const { return mScript; }
+ const RegionSubtag& Region() const { return mRegion; }
+ auto Variants() const { return SubtagEnumeration(mVariants); }
+ auto Extensions() const { return SubtagEnumeration(mExtensions); }
+ Maybe<Span<const char>> PrivateUse() const {
+ if (const char* p = mPrivateUse.get()) {
+ return Some(MakeStringSpan(p));
+ }
+ return Nothing();
+ }
+
+ /**
+ * Return the Unicode extension subtag or Nothing if not present.
+ */
+ Maybe<Span<const char>> GetUnicodeExtension() const;
+
+ private:
+ ptrdiff_t UnicodeExtensionIndex() const;
+
+ public:
+ /**
+ * Set the language subtag. The input must be a valid language subtag.
+ */
+ template <size_t N>
+ void SetLanguage(const char (&aLanguage)[N]) {
+ mozilla::Span<const char> span(aLanguage, N - 1);
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
+ mLanguage.Set(span);
+ }
+
+ /**
+ * Set the language subtag. The input must be a valid language subtag.
+ */
+ void SetLanguage(const LanguageSubtag& aLanguage) {
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(aLanguage.Span()));
+ mLanguage.Set(aLanguage.Span());
+ }
+
+ /**
+ * Set the script subtag. The input must be a valid script subtag.
+ */
+ template <size_t N>
+ void SetScript(const char (&aScript)[N]) {
+ mozilla::Span<const char> span(aScript, N - 1);
+ MOZ_ASSERT(IsStructurallyValidScriptTag(span));
+ mScript.Set(span);
+ }
+
+ /**
+ * Set the script subtag. The input must be a valid script subtag or the empty
+ * string.
+ */
+ void SetScript(const ScriptSubtag& aScript) {
+ MOZ_ASSERT(aScript.Missing() ||
+ IsStructurallyValidScriptTag(aScript.Span()));
+ mScript.Set(aScript.Span());
+ }
+
+ /**
+ * Set the region subtag. The input must be a valid region subtag.
+ */
+ template <size_t N>
+ void SetRegion(const char (&aRegion)[N]) {
+ mozilla::Span<const char> span(aRegion, N - 1);
+ MOZ_ASSERT(IsStructurallyValidRegionTag(span));
+ mRegion.Set(span);
+ }
+
+ /**
+ * Set the region subtag. The input must be a valid region subtag or the empty
+ * empty string.
+ */
+ void SetRegion(const RegionSubtag& aRegion) {
+ MOZ_ASSERT(aRegion.Missing() ||
+ IsStructurallyValidRegionTag(aRegion.Span()));
+ mRegion.Set(aRegion.Span());
+ }
+
+ /**
+ * Removes all variant subtags.
+ */
+ void ClearVariants() { mVariants.clearAndFree(); }
+
+ /**
+ * Set the Unicode extension subtag. The input must be a valid Unicode
+ * extension subtag.
+ */
+ ICUResult SetUnicodeExtension(Span<const char> aExtension);
+
+ /**
+ * Remove any Unicode extension subtag if present.
+ */
+ void ClearUnicodeExtension();
+
+ /** Canonicalize the base-name (language, script, region, variant) subtags. */
+ Result<Ok, CanonicalizationError> CanonicalizeBaseName();
+
+ /**
+ * Canonicalize all extension subtags.
+ */
+ Result<Ok, CanonicalizationError> CanonicalizeExtensions();
+
+ /**
+ * Canonicalizes the given structurally valid Unicode BCP 47 locale
+ * identifier, including regularized case of subtags. For example, the
+ * locale Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
+ * where
+ *
+ * Zh ; 2*3ALPHA
+ * -haNS ; ["-" script]
+ * -bu ; ["-" region]
+ * -variant2 ; *("-" variant)
+ * -Variant1
+ * -u-ca-chinese ; *("-" extension)
+ * -t-Zh-laTN
+ * -x-PRIVATE ; ["-" privateuse]
+ *
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
+ *
+ * Spec: ECMAScript Internationalization API Specification, 6.2.3.
+ */
+ Result<Ok, CanonicalizationError> Canonicalize() {
+ MOZ_TRY(CanonicalizeBaseName());
+ return CanonicalizeExtensions();
+ }
+
+ /**
+ * Fill the buffer with a string representation of the locale.
+ */
+ template <typename B>
+ ICUResult ToString(B& aBuffer) const {
+ static_assert(std::is_same_v<typename B::CharType, char>);
+
+ size_t capacity = ToStringCapacity();
+
+ // Attempt to reserve needed capacity
+ if (!aBuffer.reserve(capacity)) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ size_t offset = ToStringAppend(aBuffer.data());
+
+ MOZ_ASSERT(capacity == offset);
+ aBuffer.written(offset);
+
+ return Ok();
+ }
+
+ /**
+ * Add likely-subtags to the locale.
+ *
+ * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
+ */
+ ICUResult AddLikelySubtags();
+
+ /**
+ * Remove likely-subtags from the locale.
+ *
+ * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
+ */
+ ICUResult RemoveLikelySubtags();
+
+ /**
+ * Returns the default locale as an ICU locale identifier. The returned string
+ * is NOT a valid BCP 47 locale!
+ *
+ * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+ */
+ static const char* GetDefaultLocale() { return uloc_getDefault(); }
+
+ /**
+ * Returns an iterator over all supported locales.
+ *
+ * The returned strings are ICU locale identifiers and NOT BCP 47 language
+ * tags.
+ *
+ * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+ */
+ static auto GetAvailableLocales() {
+ return AvailableLocalesEnumeration<uloc_countAvailable,
+ uloc_getAvailable>();
+ }
+
+ private:
+ static UniqueChars DuplicateStringToUniqueChars(const char* aStr);
+ static UniqueChars DuplicateStringToUniqueChars(Span<const char> aStr);
+ size_t ToStringCapacity() const;
+ size_t ToStringAppend(char* aBuffer) const;
+};
+
+/**
+ * Parser for Unicode BCP 47 locale identifiers.
+ *
+ * <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers>
+ */
+class MOZ_STACK_CLASS LocaleParser final {
+ public:
+ enum class ParserError : uint8_t {
+ // Input was not parseable as a locale, subtag or extension.
+ NotParseable,
+ // Unable to allocate memory for the parser to operate.
+ OutOfMemory,
+ };
+
+ // Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
+ enum class TokenKind : uint8_t {
+ None = 0b000,
+ Alpha = 0b001,
+ Digit = 0b010,
+ AlphaDigit = 0b011,
+ Error = 0b100
+ };
+
+ private:
+ class Token final {
+ size_t mIndex;
+ size_t mLength;
+ TokenKind mKind;
+
+ public:
+ Token(TokenKind aKind, size_t aIndex, size_t aLength)
+ : mIndex(aIndex), mLength(aLength), mKind(aKind) {}
+
+ TokenKind Kind() const { return mKind; }
+ size_t Index() const { return mIndex; }
+ size_t Length() const { return mLength; }
+
+ bool IsError() const { return mKind == TokenKind::Error; }
+ bool IsNone() const { return mKind == TokenKind::None; }
+ bool IsAlpha() const { return mKind == TokenKind::Alpha; }
+ bool IsDigit() const { return mKind == TokenKind::Digit; }
+ bool IsAlphaDigit() const { return mKind == TokenKind::AlphaDigit; }
+ };
+
+ const char* mLocale;
+ size_t mLength;
+ size_t mIndex = 0;
+
+ explicit LocaleParser(Span<const char> aLocale)
+ : mLocale(aLocale.data()), mLength(aLocale.size()) {}
+
+ char CharAt(size_t aIndex) const { return mLocale[aIndex]; }
+
+ // Copy the token characters into |subtag|.
+ template <size_t N>
+ void CopyChars(const Token& aTok, LanguageTagSubtag<N>& aSubtag) const {
+ aSubtag.Set(mozilla::Span(mLocale + aTok.Index(), aTok.Length()));
+ }
+
+ // Create a string copy of |length| characters starting at |index|.
+ UniqueChars Chars(size_t aIndex, size_t aLength) const;
+
+ // Create a string copy of the token characters.
+ UniqueChars Chars(const Token& aTok) const {
+ return Chars(aTok.Index(), aTok.Length());
+ }
+
+ UniqueChars Extension(const Token& aStart, const Token& aEnd) const {
+ MOZ_ASSERT(aStart.Index() < aEnd.Index());
+
+ size_t length = aEnd.Index() - 1 - aStart.Index();
+ return Chars(aStart.Index(), length);
+ }
+
+ Token NextToken();
+
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+ //
+ // Four character language subtags are not allowed in Unicode BCP 47 locale
+ // identifiers. Also see the comparison to Unicode CLDR locale identifiers in
+ // <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+ bool IsLanguage(const Token& aTok) const {
+ return aTok.IsAlpha() && ((2 <= aTok.Length() && aTok.Length() <= 3) ||
+ (5 <= aTok.Length() && aTok.Length() <= 8));
+ }
+
+ // unicode_script_subtag = alpha{4} ;
+ bool IsScript(const Token& aTok) const {
+ return aTok.IsAlpha() && aTok.Length() == 4;
+ }
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ bool IsRegion(const Token& aTok) const {
+ return (aTok.IsAlpha() && aTok.Length() == 2) ||
+ (aTok.IsDigit() && aTok.Length() == 3);
+ }
+
+ // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+ bool IsVariant(const Token& aTok) const {
+ return (5 <= aTok.Length() && aTok.Length() <= 8) ||
+ (aTok.Length() == 4 && mozilla::IsAsciiDigit(CharAt(aTok.Index())));
+ }
+
+ // Returns the code unit of the first character at the given singleton token.
+ // Always returns the lower case form of an alphabetical character.
+ char SingletonKey(const Token& aTok) const {
+ MOZ_ASSERT(aTok.Length() == 1);
+ return AsciiToLowerCase(CharAt(aTok.Index()));
+ }
+
+ // extensions = unicode_locale_extensions |
+ // transformed_extensions |
+ // other_extensions ;
+ //
+ // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+ // (sep attribute)+ (sep keyword)*) ;
+ //
+ // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
+ // (sep tfield)+) ;
+ //
+ // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+ bool IsExtensionStart(const Token& aTok) const {
+ return aTok.Length() == 1 && SingletonKey(aTok) != 'x';
+ }
+
+ // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+ bool IsOtherExtensionPart(const Token& aTok) const {
+ return 2 <= aTok.Length() && aTok.Length() <= 8;
+ }
+
+ // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+ // (sep attribute)+ (sep keyword)*) ;
+ // keyword = key (sep type)? ;
+ bool IsUnicodeExtensionPart(const Token& aTok) const {
+ return IsUnicodeExtensionKey(aTok) || IsUnicodeExtensionType(aTok) ||
+ IsUnicodeExtensionAttribute(aTok);
+ }
+
+ // attribute = alphanum{3,8} ;
+ bool IsUnicodeExtensionAttribute(const Token& aTok) const {
+ return 3 <= aTok.Length() && aTok.Length() <= 8;
+ }
+
+ // key = alphanum alpha ;
+ bool IsUnicodeExtensionKey(const Token& aTok) const {
+ return aTok.Length() == 2 &&
+ mozilla::IsAsciiAlpha(CharAt(aTok.Index() + 1));
+ }
+
+ // type = alphanum{3,8} (sep alphanum{3,8})* ;
+ bool IsUnicodeExtensionType(const Token& aTok) const {
+ return 3 <= aTok.Length() && aTok.Length() <= 8;
+ }
+
+ // tkey = alpha digit ;
+ bool IsTransformExtensionKey(const Token& aTok) const {
+ return aTok.Length() == 2 && mozilla::IsAsciiAlpha(CharAt(aTok.Index())) &&
+ mozilla::IsAsciiDigit(CharAt(aTok.Index() + 1));
+ }
+
+ // tvalue = (sep alphanum{3,8})+ ;
+ bool IsTransformExtensionPart(const Token& aTok) const {
+ return 3 <= aTok.Length() && aTok.Length() <= 8;
+ }
+
+ // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+ bool IsPrivateUseStart(const Token& aTok) const {
+ return aTok.Length() == 1 && SingletonKey(aTok) == 'x';
+ }
+
+ // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+ bool IsPrivateUsePart(const Token& aTok) const {
+ return 1 <= aTok.Length() && aTok.Length() <= 8;
+ }
+
+ // Helper function for use in |ParseBaseName| and
+ // |ParseTlangInTransformExtension|. Do not use this directly!
+ static Result<Ok, ParserError> InternalParseBaseName(
+ LocaleParser& aLocaleParser, Locale& aTag, Token& aTok);
+
+ // Parse the `unicode_language_id` production, i.e. the
+ // language/script/region/variants portion of a locale, into |aTag|.
+ // |aTok| must be the current token.
+ static Result<Ok, ParserError> ParseBaseName(LocaleParser& aLocaleParser,
+ Locale& aTag, Token& aTok) {
+ return InternalParseBaseName(aLocaleParser, aTag, aTok);
+ }
+
+ // Parse the `tlang` production within a parsed 't' transform extension.
+ // The precise requirements for "previously parsed" are:
+ //
+ // * the input begins from current token |tok| with a valid `tlang`
+ // * the `tlang` is wholly lowercase (*not* canonical case)
+ // * variant subtags in the `tlang` may contain duplicates and be
+ // unordered
+ //
+ // Return an error on internal failure. Otherwise, return a success value. If
+ // there was no `tlang`, then |tag.language().missing()|. But if there was a
+ // `tlang`, then |tag| is filled with subtags exactly as they appeared in the
+ // parse input.
+ static Result<Ok, ParserError> ParseTlangInTransformExtension(
+ LocaleParser& aLocaleParser, Locale& aTag, Token& aTok) {
+ MOZ_ASSERT(aLocaleParser.IsLanguage(aTok));
+ return InternalParseBaseName(aLocaleParser, aTag, aTok);
+ }
+
+ friend class Locale;
+
+ class Range final {
+ size_t mBegin;
+ size_t mLength;
+
+ public:
+ Range(size_t aBegin, size_t aLength) : mBegin(aBegin), mLength(aLength) {}
+
+ size_t Begin() const { return mBegin; }
+ size_t Length() const { return mLength; }
+ };
+
+ using TFieldVector = Vector<Range, 8>;
+ using AttributesVector = Vector<Range, 8>;
+ using KeywordsVector = Vector<Range, 8>;
+
+ // Parse |extension|, which must be a validated, fully lowercase
+ // `transformed_extensions` subtag, and fill |tag| and |fields| from the
+ // `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
+ // with |extension|.
+ static Result<Ok, ParserError> ParseTransformExtension(
+ mozilla::Span<const char> aExtension, Locale& aTag,
+ TFieldVector& aFields);
+
+ // Parse |extension|, which must be a validated, fully lowercase
+ // `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
+ // from the `attribute` and `keyword` components.
+ static Result<Ok, ParserError> ParseUnicodeExtension(
+ mozilla::Span<const char> aExtension, AttributesVector& aAttributes,
+ KeywordsVector& aKeywords);
+
+ public:
+ // Parse the input string as a locale.
+ //
+ // NOTE: |aTag| must be a new, empty Locale.
+ static Result<Ok, ParserError> TryParse(Span<const char> aLocale,
+ Locale& aTag);
+
+ // Parse the input string as the base-name parts (language, script, region,
+ // variants) of a locale.
+ //
+ // NOTE: |aTag| must be a new, empty Locale.
+ static Result<Ok, ParserError> TryParseBaseName(Span<const char> aLocale,
+ Locale& aTag);
+
+ // Return Ok() iff |extension| can be parsed as a Unicode extension subtag.
+ static Result<Ok, ParserError> CanParseUnicodeExtension(
+ Span<const char> aExtension);
+
+ // Return Ok() iff |unicodeType| can be parsed as a Unicode extension type.
+ static Result<Ok, ParserError> CanParseUnicodeExtensionType(
+ Span<const char> aUnicodeType);
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LocaleParser::TokenKind)
+
+} // namespace mozilla::intl
+
+#endif /* intl_components_Locale_h */
diff --git a/intl/components/src/LocaleCanonicalizer.cpp b/intl/components/src/LocaleCanonicalizer.cpp
new file mode 100644
index 0000000000..8a83874390
--- /dev/null
+++ b/intl/components/src/LocaleCanonicalizer.cpp
@@ -0,0 +1,36 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/LocaleCanonicalizer.h"
+#include <cstdio>
+#include "unicode/uloc.h"
+
+namespace mozilla::intl {
+
+/* static */
+ICUResult LocaleCanonicalizer::CanonicalizeICULevel1(
+ const char* aLocaleIn, LocaleCanonicalizer::Vector& aLocaleOut) {
+ auto result = FillBufferWithICUCall(
+ aLocaleOut,
+ [&aLocaleIn](char* target, int32_t length, UErrorCode* status) {
+ return uloc_canonicalize(aLocaleIn, target, length, status);
+ });
+
+ if (result.isErr()) {
+ return Err(result.unwrapErr());
+ }
+
+ // This step is not included in the normal ICU4C canonicalization step, but
+ // consumers were expecting the results to actually be ASCII. It seemed safer
+ // to include it.
+ for (auto byte : aLocaleOut) {
+ if (static_cast<unsigned char>(byte) > 127) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ return Ok();
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/LocaleCanonicalizer.h b/intl/components/src/LocaleCanonicalizer.h
new file mode 100644
index 0000000000..bd17c9dfd4
--- /dev/null
+++ b/intl/components/src/LocaleCanonicalizer.h
@@ -0,0 +1,43 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_LocaleCanonicalizer_h_
+#define intl_components_LocaleCanonicalizer_h_
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Span.h"
+#include "mozilla/Vector.h"
+
+namespace mozilla::intl {
+
+/**
+ * 32 is somewhat an arbitrary size, but it should fit most locales on the
+ * stack to avoid heap allocations.
+ */
+constexpr size_t INITIAL_LOCALE_CANONICALIZER_BUFFER_SIZE = 32;
+
+/**
+ * Eventually this class will unify the behaviors of Locale Canonicalization.
+ * See Bug 1723586.
+ */
+class LocaleCanonicalizer {
+ public:
+ using Vector =
+ mozilla::Vector<char, INITIAL_LOCALE_CANONICALIZER_BUFFER_SIZE>;
+
+ /**
+ * This static method will canonicalize a locale string, per the Level 1
+ * canonicalization steps outlined in:
+ * http://userguide.icu-project.org/locale#TOC-Canonicalization
+ *
+ * For instance it will turn the string "en-US" to "en_US". It guarantees that
+ * the string span targeted will be in the ASCII range. The canonicalization
+ * process on ICU is somewhat permissive in what it accepts as input, but only
+ * ASCII locales are technically correct.
+ */
+ static ICUResult CanonicalizeICULevel1(
+ const char* aLocale, LocaleCanonicalizer::Vector& aLocaleOut);
+};
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/LocaleGenerated.cpp b/intl/components/src/LocaleGenerated.cpp
new file mode 100644
index 0000000000..421384b0c3
--- /dev/null
+++ b/intl/components/src/LocaleGenerated.cpp
@@ -0,0 +1,1129 @@
+// Generated by make_intl_data.py. DO NOT EDIT.
+// Version: CLDR-42
+// URL: https://unicode.org/Public/cldr/42/core.zip
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <type_traits>
+
+#include "mozilla/intl/Locale.h"
+
+using namespace mozilla::intl::LanguageTagLimits;
+
+template <size_t Length, size_t TagLength, size_t SubtagLength>
+static inline bool HasReplacement(
+ const char (&subtags)[Length][TagLength],
+ const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
+ MOZ_ASSERT(subtag.Length() == TagLength - 1,
+ "subtag must have the same length as the list of subtags");
+
+ const char* ptr = subtag.Span().data();
+ return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
+ [](const char* a, const char* b) {
+ return memcmp(a, b, TagLength - 1) < 0;
+ });
+}
+
+template <size_t Length, size_t TagLength, size_t SubtagLength>
+static inline const char* SearchReplacement(
+ const char (&subtags)[Length][TagLength], const char* (&aliases)[Length],
+ const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
+ MOZ_ASSERT(subtag.Length() == TagLength - 1,
+ "subtag must have the same length as the list of subtags");
+
+ const char* ptr = subtag.Span().data();
+ auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
+ [](const char* a, const char* b) {
+ return memcmp(a, b, TagLength - 1) < 0;
+ });
+ if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
+ return aliases[std::distance(std::begin(subtags), p)];
+ }
+ return nullptr;
+}
+
+#ifdef DEBUG
+static bool IsAsciiLowercaseAlphanumeric(char c) {
+ return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
+}
+
+static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
+ return IsAsciiLowercaseAlphanumeric(c) || c == '-';
+}
+
+static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
+ return std::all_of(span.begin(), span.end(),
+ mozilla::IsAsciiLowercaseAlpha<char>);
+}
+
+static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) {
+ return mozilla::IsAsciiUppercaseAlpha(span[0]) &&
+ std::all_of(span.begin() + 1, span.end(),
+ mozilla::IsAsciiLowercaseAlpha<char>);
+}
+
+static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
+ return std::all_of(span.begin(), span.end(),
+ mozilla::IsAsciiUppercaseAlpha<char>) ||
+ std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
+}
+
+static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
+ return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
+ return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
+ return std::all_of(type.begin(), type.end(),
+ IsAsciiLowercaseAlphanumericOrDash);
+}
+
+static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
+ return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
+ return std::all_of(type.begin(), type.end(),
+ IsAsciiLowercaseAlphanumericOrDash);
+}
+#endif
+
+// Mappings from language subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
+
+ if (language.Length() == 2) {
+ static const char languages[8][3] = {
+ "bh", "in", "iw", "ji", "jw", "mo", "tl", "tw",
+ };
+ static const char* aliases[8] = {
+ "bho", "id", "he", "yi", "jv", "ro", "fil", "ak",
+ };
+
+ if (const char* replacement = SearchReplacement(languages, aliases, language)) {
+ language.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+
+ if (language.Length() == 3) {
+ static const char languages[408][4] = {
+ "aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajt", "aju", "aka",
+ "alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", "aue",
+ "ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", "baq",
+ "baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih", "bis",
+ "bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul", "bur",
+ "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv",
+ "cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre",
+ "cwd", "cym", "cze", "daf", "dan", "dap", "deu", "dgo", "dhd", "dik",
+ "diq", "dit", "div", "djl", "dkl", "drh", "drr", "dud", "duj", "dut",
+ "dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", "epo", "esk", "est",
+ "eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", "fry",
+ "fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", "ger", "gfx", "ggn",
+ "ggo", "ggr", "gio", "gla", "gle", "glg", "gli", "glv", "gno", "gre",
+ "grn", "gti", "gug", "guj", "guv", "gya", "hat", "hau", "hdn", "hea",
+ "heb", "her", "him", "hin", "hmo", "hrr", "hrv", "hun", "hye", "ibi",
+ "ibo", "ice", "ido", "iii", "ike", "iku", "ile", "ill", "ilw", "ina",
+ "ind", "ipk", "isl", "ita", "izi", "jar", "jav", "jeg", "jpn", "kal",
+ "kan", "kas", "kat", "kau", "kaz", "kdv", "kgc", "kgd", "kgh", "khk",
+ "khm", "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", "kom",
+ "kon", "kor", "kpp", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq",
+ "kxe", "kxl", "kzh", "kzj", "kzt", "lak", "lao", "lat", "lav", "lbk",
+ "leg", "lii", "lim", "lin", "lit", "llo", "lmm", "ltz", "lub", "lug",
+ "lvs", "mac", "mah", "mal", "mao", "mar", "may", "meg", "mgx", "mhr",
+ "mkd", "mlg", "mlt", "mnk", "mnt", "mof", "mol", "mon", "mri", "msa",
+ "mst", "mup", "mwd", "mwj", "mya", "myd", "myt", "nad", "nau", "nav",
+ "nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", "nld", "nln", "nlr",
+ "nno", "nns", "nnx", "nob", "noo", "nor", "npi", "nts", "nxu", "nya",
+ "oci", "ojg", "oji", "ori", "orm", "ory", "oss", "oun", "pan", "pat",
+ "pbu", "pcr", "per", "pes", "pli", "plt", "pmc", "pmu", "pnb", "pol",
+ "por", "ppa", "ppr", "pry", "pus", "puz", "que", "quz", "rmr", "rmy",
+ "roh", "ron", "rum", "run", "rus", "sag", "san", "sap", "sca", "scc",
+ "scr", "sgl", "sin", "skk", "slk", "slo", "slv", "smd", "sme", "smo",
+ "sna", "snb", "snd", "som", "sot", "spa", "spy", "sqi", "src", "srd",
+ "srp", "ssw", "sul", "sum", "sun", "swa", "swe", "swh", "tah", "tam",
+ "tat", "tdu", "tel", "tgg", "tgk", "tgl", "tha", "thc", "thw", "thx",
+ "tib", "tid", "tie", "tir", "tkk", "tlw", "tmp", "tne", "ton", "tsf",
+ "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp",
+ "uok", "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit",
+ "wiw", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj",
+ "xsl", "ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", "yos",
+ "yuu", "zai", "zha", "zho", "zir", "zsm", "zul", "zyb",
+ };
+ static const char* aliases[408] = {
+ "aas", "aa", "ab", "dz", "af", "apf", "ami", "aeb", "jrb", "ak",
+ "sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as", "ktz",
+ "av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", "eu",
+ "nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho", "bi",
+ "drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg", "my",
+ "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", "cv",
+ "mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh", "cr",
+ "cr", "cy", "cs", "dnj", "da", "njz", "de", "doi", "mwr", "din",
+ "zza", "dif", "dv", "dze", "aqd", "mn", "kzk", "uth", "dwu", "nl",
+ "dbt", "dz", "et", "el", "amq", "man", "en", "eo", "ik", "et",
+ "eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr", "fy",
+ "ff", "ff", "dev", "om", "wny", "grb", "ka", "de", "vaj", "gvr",
+ "esg", "gtu", "aou", "gd", "ga", "gl", "kzk", "gv", "gon", "el",
+ "gn", "nyc", "gn", "gu", "duz", "gba", "ht", "ha", "hai", "hmn",
+ "he", "hz", "srx", "hi", "ho", "jal", "hr", "hu", "hy", "opa",
+ "ig", "is", "io", "ii", "iu", "iu", "ie", "ilm", "gal", "ia",
+ "id", "ik", "is", "it", "eza", "jgk", "jv", "oyb", "ja", "kl",
+ "kn", "ks", "ka", "kr", "kk", "zkd", "tdf", "ncq", "kml", "mn",
+ "km", "ki", "rw", "ky", "ku", "kr", "kg", "kok", "kwv", "kv",
+ "kg", "ko", "jkm", "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam",
+ "tvd", "kru", "dgl", "dtp", "dtp", "ksp", "lo", "la", "lv", "bnc",
+ "enl", "raq", "li", "ln", "lt", "ngt", "rmx", "lb", "lu", "lg",
+ "lv", "mk", "mh", "ml", "mi", "mr", "ms", "cir", "jbk", "chm",
+ "mk", "mg", "mt", "man", "wnn", "xnt", "ro", "mn", "mi", "ms",
+ "mry", "raj", "dmw", "vaj", "my", "aog", "mry", "xny", "na", "nv",
+ "nru", "nr", "ekc", "kdz", "nd", "ng", "ne", "nl", "azd", "nrk",
+ "nn", "nbr", "ngv", "nb", "dtd", "no", "ne", "pij", "bpp", "ny",
+ "oc", "oj", "oj", "or", "om", "or", "os", "vaj", "pa", "kxr",
+ "ps", "adx", "fa", "fa", "pi", "mg", "huw", "phr", "lah", "pl",
+ "pt", "bfy", "lcq", "prt", "ps", "pub", "qu", "qu", "emx", "rom",
+ "rm", "ro", "ro", "rn", "ru", "sg", "sa", "aqt", "hle", "sr",
+ "hr", "isk", "si", "oyb", "sk", "sk", "sl", "kmb", "se", "sm",
+ "sn", "iba", "sd", "so", "st", "es", "kln", "sq", "sc", "sc",
+ "sr", "ss", "sgd", "ulw", "su", "sw", "sv", "sw", "ty", "ta",
+ "tt", "dtp", "te", "bjp", "tg", "fil", "th", "tpo", "ola", "oyb",
+ "bo", "itd", "ras", "ti", "twm", "weo", "tyj", "kak", "to", "taj",
+ "tn", "ts", "tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro",
+ "ema", "ur", "uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol",
+ "nwo", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj",
+ "den", "rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", "zom",
+ "yug", "zap", "za", "zh", "scv", "ms", "zu", "za",
+ };
+
+ if (const char* replacement = SearchReplacement(languages, aliases, language)) {
+ language.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+
+ return false;
+}
+
+// Language subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) {
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
+
+ if (language.Length() == 2) {
+ return language.EqualTo("sh");
+ }
+
+ if (language.Length() == 3) {
+ static const char languages[6][4] = {
+ "cnr", "drw", "hbs", "prs", "swc", "tnf",
+ };
+
+ return HasReplacement(languages, language);
+ }
+
+ return false;
+}
+
+// Mappings from script subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
+ MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span()));
+
+ {
+ if (script.EqualTo("Qaai")) {
+ script.Set(mozilla::MakeStringSpan("Zinh"));
+ return true;
+ }
+ return false;
+ }
+}
+
+// Mappings from region subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
+
+ if (region.Length() == 2) {
+ static const char regions[23][3] = {
+ "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI",
+ "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK",
+ "YD", "YU", "ZR",
+ };
+ static const char* aliases[23] = {
+ "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM",
+ "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM",
+ "YE", "RS", "CD",
+ };
+
+ if (const char* replacement = SearchReplacement(regions, aliases, region)) {
+ region.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+
+ {
+ static const char regions[299][4] = {
+ "004", "008", "010", "012", "016", "020", "024", "028", "031", "032",
+ "036", "040", "044", "048", "050", "051", "052", "056", "060", "064",
+ "068", "070", "072", "074", "076", "084", "086", "090", "092", "096",
+ "100", "104", "108", "112", "116", "120", "124", "132", "136", "140",
+ "144", "148", "152", "156", "158", "162", "166", "170", "174", "175",
+ "178", "180", "184", "188", "191", "192", "196", "203", "204", "208",
+ "212", "214", "218", "222", "226", "230", "231", "232", "233", "234",
+ "238", "239", "242", "246", "248", "249", "250", "254", "258", "260",
+ "262", "266", "268", "270", "275", "276", "278", "280", "288", "292",
+ "296", "300", "304", "308", "312", "316", "320", "324", "328", "332",
+ "334", "336", "340", "344", "348", "352", "356", "360", "364", "368",
+ "372", "376", "380", "384", "388", "392", "398", "400", "404", "408",
+ "410", "414", "417", "418", "422", "426", "428", "430", "434", "438",
+ "440", "442", "446", "450", "454", "458", "462", "466", "470", "474",
+ "478", "480", "484", "492", "496", "498", "499", "500", "504", "508",
+ "512", "516", "520", "524", "528", "531", "533", "534", "535", "540",
+ "548", "554", "558", "562", "566", "570", "574", "578", "580", "581",
+ "583", "584", "585", "586", "591", "598", "600", "604", "608", "612",
+ "616", "620", "624", "626", "630", "634", "638", "642", "643", "646",
+ "652", "654", "659", "660", "662", "663", "666", "670", "674", "678",
+ "682", "686", "688", "690", "694", "702", "703", "704", "705", "706",
+ "710", "716", "720", "724", "728", "729", "732", "736", "740", "744",
+ "748", "752", "756", "760", "762", "764", "768", "772", "776", "780",
+ "784", "788", "792", "795", "796", "798", "800", "804", "807", "818",
+ "826", "830", "831", "832", "833", "834", "840", "850", "854", "858",
+ "860", "862", "876", "882", "886", "887", "891", "894", "958", "959",
+ "960", "962", "963", "964", "965", "966", "967", "968", "969", "970",
+ "971", "972", "973", "974", "975", "976", "977", "978", "979", "980",
+ "981", "982", "983", "984", "985", "986", "987", "988", "989", "990",
+ "991", "992", "993", "994", "995", "996", "997", "998", "999",
+ };
+ static const char* aliases[299] = {
+ "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR",
+ "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "BT",
+ "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", "BN",
+ "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", "CF",
+ "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", "YT",
+ "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", "DK",
+ "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", "FO",
+ "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", "TF",
+ "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", "GI",
+ "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", "HT",
+ "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ",
+ "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", "KP",
+ "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", "LI",
+ "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", "MQ",
+ "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", "MZ",
+ "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", "NC",
+ "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", "UM",
+ "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", "PN",
+ "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", "RW",
+ "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", "ST",
+ "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", "SO",
+ "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", "SJ",
+ "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", "TT",
+ "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", "EG",
+ "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", "UY",
+ "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", "QM",
+ "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", "QX",
+ "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH",
+ "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", "XR",
+ "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ",
+ };
+
+ if (const char* replacement = SearchReplacement(regions, aliases, region)) {
+ region.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+}
+
+// Region subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
+
+ if (region.Length() == 2) {
+ return region.EqualTo("AN") ||
+ region.EqualTo("NT") ||
+ region.EqualTo("PC") ||
+ region.EqualTo("SU");
+ }
+
+ {
+ static const char regions[9][4] = {
+ "062", "172", "200", "530", "532", "536", "582", "810", "890",
+ };
+
+ return HasReplacement(regions, region);
+ }
+}
+
+// Language subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+void mozilla::intl::Locale::PerformComplexLanguageMappings() {
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
+
+ if (Language().EqualTo("cnr")) {
+ SetLanguage("sr");
+ if (Region().Missing()) {
+ SetRegion("ME");
+ }
+ }
+ else if (Language().EqualTo("drw") ||
+ Language().EqualTo("prs") ||
+ Language().EqualTo("tnf")) {
+ SetLanguage("fa");
+ if (Region().Missing()) {
+ SetRegion("AF");
+ }
+ }
+ else if (Language().EqualTo("hbs") ||
+ Language().EqualTo("sh")) {
+ SetLanguage("sr");
+ if (Script().Missing()) {
+ SetScript("Latn");
+ }
+ }
+ else if (Language().EqualTo("swc")) {
+ SetLanguage("sw");
+ if (Region().Missing()) {
+ SetRegion("CD");
+ }
+ }
+}
+
+// Region subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+void mozilla::intl::Locale::PerformComplexRegionMappings() {
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span()));
+
+ if (Region().EqualTo("062")) {
+ if (Language().EqualTo("oui") ||
+ (Language().EqualTo("und") && Script().EqualTo("Ougr"))) {
+ SetRegion("143");
+ }
+ else {
+ SetRegion("034");
+ }
+ }
+ else if (Region().EqualTo("172")) {
+ if (Language().EqualTo("hy") ||
+ (Language().EqualTo("und") && Script().EqualTo("Armn"))) {
+ SetRegion("AM");
+ }
+ else if (Language().EqualTo("az") ||
+ Language().EqualTo("tkr") ||
+ Language().EqualTo("tly") ||
+ Language().EqualTo("ttt")) {
+ SetRegion("AZ");
+ }
+ else if (Language().EqualTo("be")) {
+ SetRegion("BY");
+ }
+ else if (Language().EqualTo("ab") ||
+ Language().EqualTo("ka") ||
+ (Language().EqualTo("ku") && Script().EqualTo("Yezi")) ||
+ Language().EqualTo("os") ||
+ (Language().EqualTo("und") && Script().EqualTo("Geor")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Yezi")) ||
+ Language().EqualTo("xmf")) {
+ SetRegion("GE");
+ }
+ else if (Language().EqualTo("ky")) {
+ SetRegion("KG");
+ }
+ else if (Language().EqualTo("kk") ||
+ (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
+ SetRegion("KZ");
+ }
+ else if (Language().EqualTo("gag")) {
+ SetRegion("MD");
+ }
+ else if (Language().EqualTo("tg")) {
+ SetRegion("TJ");
+ }
+ else if (Language().EqualTo("tk")) {
+ SetRegion("TM");
+ }
+ else if (Language().EqualTo("crh") ||
+ Language().EqualTo("got") ||
+ Language().EqualTo("ji") ||
+ Language().EqualTo("rue") ||
+ Language().EqualTo("uk") ||
+ (Language().EqualTo("und") && Script().EqualTo("Goth"))) {
+ SetRegion("UA");
+ }
+ else if (Language().EqualTo("kaa") ||
+ Language().EqualTo("sog") ||
+ (Language().EqualTo("und") && Script().EqualTo("Chrs")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Sogd")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Sogo")) ||
+ Language().EqualTo("uz") ||
+ Language().EqualTo("xco")) {
+ SetRegion("UZ");
+ }
+ else {
+ SetRegion("RU");
+ }
+ }
+ else if (Region().EqualTo("200")) {
+ if (Language().EqualTo("sk")) {
+ SetRegion("SK");
+ }
+ else {
+ SetRegion("CZ");
+ }
+ }
+ else if (Region().EqualTo("530") ||
+ Region().EqualTo("532") ||
+ Region().EqualTo("AN")) {
+ if (Language().EqualTo("vic")) {
+ SetRegion("SX");
+ }
+ else {
+ SetRegion("CW");
+ }
+ }
+ else if (Region().EqualTo("536") ||
+ Region().EqualTo("NT")) {
+ if (Language().EqualTo("akk") ||
+ Language().EqualTo("ckb") ||
+ (Language().EqualTo("ku") && Script().EqualTo("Arab")) ||
+ Language().EqualTo("syr") ||
+ (Language().EqualTo("und") && Script().EqualTo("Syrc")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Xsux"))) {
+ SetRegion("IQ");
+ }
+ else {
+ SetRegion("SA");
+ }
+ }
+ else if (Region().EqualTo("582") ||
+ Region().EqualTo("PC")) {
+ if (Language().EqualTo("mh")) {
+ SetRegion("MH");
+ }
+ else if (Language().EqualTo("pau")) {
+ SetRegion("PW");
+ }
+ else {
+ SetRegion("FM");
+ }
+ }
+ else if (Region().EqualTo("810") ||
+ Region().EqualTo("SU")) {
+ if (Language().EqualTo("hy") ||
+ (Language().EqualTo("und") && Script().EqualTo("Armn"))) {
+ SetRegion("AM");
+ }
+ else if (Language().EqualTo("az") ||
+ Language().EqualTo("tkr") ||
+ Language().EqualTo("tly") ||
+ Language().EqualTo("ttt")) {
+ SetRegion("AZ");
+ }
+ else if (Language().EqualTo("be")) {
+ SetRegion("BY");
+ }
+ else if (Language().EqualTo("et") ||
+ Language().EqualTo("vro")) {
+ SetRegion("EE");
+ }
+ else if (Language().EqualTo("ab") ||
+ Language().EqualTo("ka") ||
+ (Language().EqualTo("ku") && Script().EqualTo("Yezi")) ||
+ Language().EqualTo("os") ||
+ (Language().EqualTo("und") && Script().EqualTo("Geor")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Yezi")) ||
+ Language().EqualTo("xmf")) {
+ SetRegion("GE");
+ }
+ else if (Language().EqualTo("ky")) {
+ SetRegion("KG");
+ }
+ else if (Language().EqualTo("kk") ||
+ (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
+ SetRegion("KZ");
+ }
+ else if (Language().EqualTo("lt") ||
+ Language().EqualTo("sgs")) {
+ SetRegion("LT");
+ }
+ else if (Language().EqualTo("ltg") ||
+ Language().EqualTo("lv")) {
+ SetRegion("LV");
+ }
+ else if (Language().EqualTo("gag")) {
+ SetRegion("MD");
+ }
+ else if (Language().EqualTo("tg")) {
+ SetRegion("TJ");
+ }
+ else if (Language().EqualTo("tk")) {
+ SetRegion("TM");
+ }
+ else if (Language().EqualTo("crh") ||
+ Language().EqualTo("got") ||
+ Language().EqualTo("ji") ||
+ Language().EqualTo("rue") ||
+ Language().EqualTo("uk") ||
+ (Language().EqualTo("und") && Script().EqualTo("Goth"))) {
+ SetRegion("UA");
+ }
+ else if (Language().EqualTo("kaa") ||
+ Language().EqualTo("sog") ||
+ (Language().EqualTo("und") && Script().EqualTo("Chrs")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Sogd")) ||
+ (Language().EqualTo("und") && Script().EqualTo("Sogo")) ||
+ Language().EqualTo("uz") ||
+ Language().EqualTo("xco")) {
+ SetRegion("UZ");
+ }
+ else {
+ SetRegion("RU");
+ }
+ }
+ else if (Region().EqualTo("890")) {
+ if (Language().EqualTo("bs")) {
+ SetRegion("BA");
+ }
+ else if (Language().EqualTo("hr")) {
+ SetRegion("HR");
+ }
+ else if (Language().EqualTo("mk")) {
+ SetRegion("MK");
+ }
+ else if (Language().EqualTo("sl")) {
+ SetRegion("SI");
+ }
+ else {
+ SetRegion("RS");
+ }
+ }
+}
+
+static const char* ToCharPointer(const char* str) {
+ return str;
+}
+
+static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) {
+ return str.get();
+}
+
+template <typename T, typename U = T>
+static bool IsLessThan(const T& a, const U& b) {
+ return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0;
+}
+
+// Mappings from variant subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::PerformVariantMappings() {
+ // The variant subtags need to be sorted for binary search.
+ MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(),
+ IsLessThan<decltype(mVariants)::ElementType>));
+
+ auto removeVariantAt = [&](size_t index) {
+ mVariants.erase(mVariants.begin() + index);
+ };
+
+ auto insertVariantSortedIfNotPresent = [&](const char* variant) {
+ auto* p = std::lower_bound(
+ mVariants.begin(), mVariants.end(), variant,
+ IsLessThan<decltype(mVariants)::ElementType, decltype(variant)>);
+
+ // Don't insert the replacement when already present.
+ if (p != mVariants.end() && strcmp(p->get(), variant) == 0) {
+ return true;
+ }
+
+ // Insert the preferred variant in sort order.
+ auto preferred = DuplicateStringToUniqueChars(variant);
+ return !!mVariants.insert(p, std::move(preferred));
+ };
+
+ for (size_t i = 0; i < mVariants.length();) {
+ const char* variant = mVariants[i].get();
+ MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant)));
+
+ if (strcmp(variant, "arevela") == 0 ||
+ strcmp(variant, "arevmda") == 0 ||
+ strcmp(variant, "bokmal") == 0 ||
+ strcmp(variant, "hakka") == 0 ||
+ strcmp(variant, "lojban") == 0 ||
+ strcmp(variant, "nynorsk") == 0 ||
+ strcmp(variant, "saaho") == 0 ||
+ strcmp(variant, "xiang") == 0) {
+ removeVariantAt(i);
+ }
+ else if (strcmp(variant, "aaland") == 0) {
+ removeVariantAt(i);
+ SetRegion("AX");
+ }
+ else if (strcmp(variant, "heploc") == 0) {
+ removeVariantAt(i);
+ if (!insertVariantSortedIfNotPresent("alalc97")) {
+ return false;
+ }
+ }
+ else if (strcmp(variant, "polytoni") == 0) {
+ removeVariantAt(i);
+ if (!insertVariantSortedIfNotPresent("polyton")) {
+ return false;
+ }
+ }
+ else {
+ i++;
+ }
+ }
+ return true;
+}
+
+// Canonicalize legacy locale identifiers.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::UpdateLegacyMappings() {
+ // We're mapping legacy tags to non-legacy form here.
+ // Other tags remain unchanged.
+ //
+ // Legacy tags are either sign language tags ("sgn") or have one or multiple
+ // variant subtags. Therefore we can quickly exclude most tags by checking
+ // these two subtags.
+
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
+
+ if (!Language().EqualTo("sgn") && mVariants.length() == 0) {
+ return true;
+ }
+
+#ifdef DEBUG
+ for (const auto& variant : Variants()) {
+ MOZ_ASSERT(IsStructurallyValidVariantTag(variant));
+ MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant));
+ }
+#endif
+
+ // The variant subtags need to be sorted for binary search.
+ MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(),
+ IsLessThan<decltype(mVariants)::ElementType>));
+
+ auto findVariant = [this](const char* variant) {
+ auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant,
+ IsLessThan<decltype(mVariants)::ElementType,
+ decltype(variant)>);
+
+ if (p != mVariants.end() && strcmp(p->get(), variant) == 0) {
+ return p;
+ }
+ return static_cast<decltype(p)>(nullptr);
+ };
+
+ auto insertVariantSortedIfNotPresent = [&](const char* variant) {
+ auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant,
+ IsLessThan<decltype(mVariants)::ElementType,
+ decltype(variant)>);
+
+ // Don't insert the replacement when already present.
+ if (p != mVariants.end() && strcmp(p->get(), variant) == 0) {
+ return true;
+ }
+
+ // Insert the preferred variant in sort order.
+ auto preferred = DuplicateStringToUniqueChars(variant);
+ return !!mVariants.insert(p, std::move(preferred));
+ };
+
+ auto removeVariant = [&](auto* p) {
+ size_t index = std::distance(mVariants.begin(), p);
+ mVariants.erase(mVariants.begin() + index);
+ };
+
+ auto removeVariants = [&](auto* p, auto* q) {
+ size_t pIndex = std::distance(mVariants.begin(), p);
+ size_t qIndex = std::distance(mVariants.begin(), q);
+ MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted");
+
+ mVariants.erase(mVariants.begin() + qIndex);
+ mVariants.erase(mVariants.begin() + pIndex);
+ };
+
+ if (mVariants.length() >= 2) {
+ if (auto* hepburn = findVariant("hepburn")) {
+ if (auto* heploc = findVariant("heploc")) {
+ removeVariants(hepburn, heploc);
+
+ if (!insertVariantSortedIfNotPresent("alalc97")) {
+ return false;
+ }
+ }
+ }
+ }
+
+ if (Language().EqualTo("sgn")) {
+ if (Region().Present() && SignLanguageMapping(mLanguage, Region())) {
+ mRegion.Set(mozilla::MakeStringSpan(""));
+ }
+ }
+ else if (Language().EqualTo("aa") ||
+ Language().EqualTo("aar")) {
+ if (auto* saaho = findVariant("saaho")) {
+ removeVariant(saaho);
+ SetLanguage("ssy");
+ }
+ }
+ else if (Language().EqualTo("arm") ||
+ Language().EqualTo("hy") ||
+ Language().EqualTo("hye")) {
+ if (auto* arevmda = findVariant("arevmda")) {
+ removeVariant(arevmda);
+ SetLanguage("hyw");
+ }
+ }
+ else if (Language().EqualTo("art")) {
+ if (auto* lojban = findVariant("lojban")) {
+ removeVariant(lojban);
+ SetLanguage("jbo");
+ }
+ }
+ else if (Language().EqualTo("cel")) {
+ if (auto* gaulish = findVariant("gaulish")) {
+ removeVariant(gaulish);
+ SetLanguage("xtg");
+ }
+ }
+ else if (Language().EqualTo("chi") ||
+ Language().EqualTo("cmn") ||
+ Language().EqualTo("zh") ||
+ Language().EqualTo("zho")) {
+ if (auto* guoyu = findVariant("guoyu")) {
+ if (auto* hakka = findVariant("hakka")) {
+ removeVariants(guoyu, hakka);
+ SetLanguage("hak");
+ return true;
+ }
+ }
+ if (auto* guoyu = findVariant("guoyu")) {
+ if (auto* xiang = findVariant("xiang")) {
+ removeVariants(guoyu, xiang);
+ SetLanguage("hsn");
+ return true;
+ }
+ }
+ if (auto* guoyu = findVariant("guoyu")) {
+ removeVariant(guoyu);
+ SetLanguage("zh");
+ }
+ else if (auto* hakka = findVariant("hakka")) {
+ removeVariant(hakka);
+ SetLanguage("hak");
+ }
+ else if (auto* xiang = findVariant("xiang")) {
+ removeVariant(xiang);
+ SetLanguage("hsn");
+ }
+ }
+ else if (Language().EqualTo("no") ||
+ Language().EqualTo("nor")) {
+ if (auto* bokmal = findVariant("bokmal")) {
+ removeVariant(bokmal);
+ SetLanguage("nb");
+ }
+ else if (auto* nynorsk = findVariant("nynorsk")) {
+ removeVariant(nynorsk);
+ SetLanguage("nn");
+ }
+ }
+
+ return true;
+}
+
+// Mappings from legacy sign languages.
+// Derived from CLDR Supplemental Data, version 42.
+// https://unicode.org/Public/cldr/42/core.zip
+bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language,
+ const RegionSubtag& region) {
+ MOZ_ASSERT(language.EqualTo("sgn"));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
+
+ if (region.Length() == 2) {
+ static const char regions[22][3] = {
+ "BR", "CO", "DD", "DE", "DK", "ES", "FR", "FX", "GB", "GR",
+ "IE", "IT", "JP", "MX", "NI", "NL", "NO", "PT", "SE", "UK",
+ "US", "ZA",
+ };
+ static const char* aliases[22] = {
+ "bzs", "csn", "gsg", "gsg", "dsl", "ssp", "fsl", "fsl", "bfi", "gss",
+ "isg", "ise", "jsl", "mfs", "ncs", "dse", "nsi", "psr", "swl", "bfi",
+ "ase", "sfs",
+ };
+
+ if (const char* replacement = SearchReplacement(regions, aliases, region)) {
+ language.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+
+ {
+ static const char regions[22][4] = {
+ "076", "170", "208", "249", "250", "276", "278", "280", "300", "372",
+ "380", "392", "484", "528", "558", "578", "620", "710", "724", "752",
+ "826", "840",
+ };
+ static const char* aliases[22] = {
+ "bzs", "csn", "dsl", "fsl", "fsl", "gsg", "gsg", "gsg", "gss", "isg",
+ "ise", "jsl", "mfs", "dse", "ncs", "nsi", "psr", "sfs", "ssp", "swl",
+ "bfi", "ase",
+ };
+
+ if (const char* replacement = SearchReplacement(regions, aliases, region)) {
+ language.Set(mozilla::MakeStringSpan(replacement));
+ return true;
+ }
+ return false;
+ }
+}
+
+template <size_t Length>
+static inline bool IsUnicodeKey(mozilla::Span<const char> key, const char (&str)[Length]) {
+ static_assert(Length == UnicodeKeyLength + 1,
+ "Unicode extension key is two characters long");
+ return memcmp(key.data(), str, Length - 1) == 0;
+}
+
+template <size_t Length>
+static inline bool IsUnicodeType(mozilla::Span<const char> type, const char (&str)[Length]) {
+ static_assert(Length > UnicodeKeyLength + 1,
+ "Unicode extension type contains more than two characters");
+ return type.size() == (Length - 1) &&
+ memcmp(type.data(), str, Length - 1) == 0;
+}
+
+static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
+ MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'),
+ "unexpected null-character in string");
+
+ using UnsignedChar = unsigned char;
+ for (size_t i = 0; i < b.size(); i++) {
+ // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
+ // we've reached the end of |a|, the below if-statement will always be true.
+ // That ensures we don't read past the end of |a|.
+ if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
+ return r;
+ }
+ }
+
+ // Return zero if both strings are equal or a positive number if |b| is a
+ // prefix of |a|.
+ return int32_t(UnsignedChar(a[b.size()]));
+}
+
+template <size_t Length>
+static inline const char* SearchUnicodeReplacement(
+ const char* (&types)[Length], const char* (&aliases)[Length],
+ mozilla::Span<const char> type) {
+
+ auto p = std::lower_bound(std::begin(types), std::end(types), type,
+ [](const auto& a, const auto& b) {
+ return CompareUnicodeType(a, b) < 0;
+ });
+ if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
+ return aliases[std::distance(std::begin(types), p)];
+ }
+ return nullptr;
+}
+
+/**
+ * Mapping from deprecated BCP 47 Unicode extension types to their preferred
+ * values.
+ *
+ * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
+ * Spec: https://www.unicode.org/reports/tr35/#t_Extension
+ */
+const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType(
+ mozilla::Span<const char> key, mozilla::Span<const char> type) {
+ MOZ_ASSERT(key.size() == UnicodeKeyLength);
+ MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key));
+
+ MOZ_ASSERT(type.size() > UnicodeKeyLength);
+ MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type));
+
+ if (IsUnicodeKey(key, "ca")) {
+ if (IsUnicodeType(type, "ethiopic-amete-alem")) {
+ return "ethioaa";
+ }
+ if (IsUnicodeType(type, "islamicc")) {
+ return "islamic-civil";
+ }
+ }
+ else if (IsUnicodeKey(key, "kb") ||
+ IsUnicodeKey(key, "kc") ||
+ IsUnicodeKey(key, "kh") ||
+ IsUnicodeKey(key, "kk") ||
+ IsUnicodeKey(key, "kn")) {
+ if (IsUnicodeType(type, "yes")) {
+ return "true";
+ }
+ }
+ else if (IsUnicodeKey(key, "ks")) {
+ if (IsUnicodeType(type, "primary")) {
+ return "level1";
+ }
+ if (IsUnicodeType(type, "tertiary")) {
+ return "level3";
+ }
+ }
+ else if (IsUnicodeKey(key, "ms")) {
+ if (IsUnicodeType(type, "imperial")) {
+ return "uksystem";
+ }
+ }
+ else if (IsUnicodeKey(key, "rg") ||
+ IsUnicodeKey(key, "sd")) {
+ static const char* types[144] = {
+ "cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" ,
+ "cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" ,
+ "cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" ,
+ "cn50" , "cn51" , "cn52" , "cn53" , "cn54" , "cn61" , "cn62" ,
+ "cn63" , "cn64" , "cn65" , "cn71" , "cn91" , "cn92" , "cz10a" ,
+ "cz10b" , "cz10c" , "cz10d" , "cz10e" , "cz10f" , "cz611" , "cz612" ,
+ "cz613" , "cz614" , "cz615" , "cz621" , "cz622" , "cz623" , "cz624" ,
+ "cz626" , "cz627" , "czjc" , "czjm" , "czka" , "czkr" , "czli" ,
+ "czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" ,
+ "czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" ,
+ "frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" ,
+ "frh" , "fri" , "frj" , "frk" , "frl" , "frm" , "frmf" ,
+ "frmq" , "frn" , "frnc" , "fro" , "frp" , "frpf" , "frpm" ,
+ "frq" , "frr" , "frre" , "frs" , "frt" , "frtf" , "fru" ,
+ "frv" , "frwf" , "fryt" , "laxn" , "lud" , "lug" , "lul" ,
+ "mrnkc" , "nlaw" , "nlcw" , "nlsx" , "no23" , "nzn" , "nzs" ,
+ "omba" , "omsh" , "plds" , "plkp" , "pllb" , "plld" , "pllu" ,
+ "plma" , "plmz" , "plop" , "plpd" , "plpk" , "plpm" , "plsk" ,
+ "plsl" , "plwn" , "plwp" , "plzp" , "shta" , "tteto" , "ttrcm" ,
+ "ttwto" , "twkhq" , "twtnq" , "twtpq" , "twtxq" , "usas" , "usgu" ,
+ "usmp" , "uspr" , "usum" , "usvi" ,
+ };
+ static const char* aliases[144] = {
+ "cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" ,
+ "cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" ,
+ "cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" ,
+ "cncq" , "cnsc" , "cngz" , "cnyn" , "cnxz" , "cnsn" , "cngs" ,
+ "cnqh" , "cnnx" , "cnxj" , "twzzzz", "hkzzzz", "mozzzz", "cz110" ,
+ "cz111" , "cz112" , "cz113" , "cz114" , "cz115" , "cz663" , "cz632" ,
+ "cz633" , "cz634" , "cz635" , "cz641" , "cz642" , "cz643" , "cz644" ,
+ "cz646" , "cz647" , "cz31" , "cz64" , "cz41" , "cz52" , "cz51" ,
+ "cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" ,
+ "cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" ,
+ "cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz",
+ "frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "frges" , "mfzzzz",
+ "mqzzzz", "frocc" , "nczzzz", "frhdf" , "frnor" , "pfzzzz", "pmzzzz",
+ "frnor" , "frpdl" , "rezzzz", "frhdf" , "frnaq" , "tfzzzz", "frpac" ,
+ "frara" , "wfzzzz", "ytzzzz", "laxs" , "lucl" , "luec" , "luca" ,
+ "mr13" , "awzzzz", "cwzzzz", "sxzzzz", "no50" , "nzauk" , "nzcan" ,
+ "ombj" , "omsj" , "pl02" , "pl04" , "pl08" , "pl10" , "pl06" ,
+ "pl12" , "pl14" , "pl16" , "pl20" , "pl18" , "pl22" , "pl26" ,
+ "pl24" , "pl28" , "pl30" , "pl32" , "tazzzz", "tttob" , "ttmrc" ,
+ "tttob" , "twkhh" , "twtnn" , "twnwt" , "twtxg" , "aszzzz", "guzzzz",
+ "mpzzzz", "przzzz", "umzzzz", "vizzzz",
+ };
+ return SearchUnicodeReplacement(types, aliases, type);
+ }
+ else if (IsUnicodeKey(key, "tz")) {
+ static const char* types[29] = {
+ "aqams" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" , "egypt" ,
+ "eire" , "est" , "gaza" , "gmt0" , "hongkong", "hst" ,
+ "iceland" , "iran" , "israel" , "jamaica" , "japan" , "libya" ,
+ "mst" , "navajo" , "poland" , "portugal", "prc" , "roc" ,
+ "rok" , "turkey" , "uct" , "usnavajo", "zulu" ,
+ };
+ static const char* aliases[29] = {
+ "nzakl" , "cnsha" , "cnsha" , "cnurc" , "cuhav" , "egcai" ,
+ "iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" , "utcw10" ,
+ "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" ,
+ "utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" , "twtpe" ,
+ "krsel" , "trist" , "utc" , "usden" , "utc" ,
+ };
+ return SearchUnicodeReplacement(types, aliases, type);
+ }
+ return nullptr;
+}
+
+template <size_t Length>
+static inline bool IsTransformKey(mozilla::Span<const char> key, const char (&str)[Length]) {
+ static_assert(Length == TransformKeyLength + 1,
+ "Transform extension key is two characters long");
+ return memcmp(key.data(), str, Length - 1) == 0;
+}
+
+template <size_t Length>
+static inline bool IsTransformType(mozilla::Span<const char> type, const char (&str)[Length]) {
+ static_assert(Length > TransformKeyLength + 1,
+ "Transform extension type contains more than two characters");
+ return type.size() == (Length - 1) &&
+ memcmp(type.data(), str, Length - 1) == 0;
+}
+
+/**
+ * Mapping from deprecated BCP 47 Transform extension types to their preferred
+ * values.
+ *
+ * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
+ * Spec: https://www.unicode.org/reports/tr35/#t_Extension
+ */
+const char* mozilla::intl::Locale::ReplaceTransformExtensionType(
+ mozilla::Span<const char> key, mozilla::Span<const char> type) {
+ MOZ_ASSERT(key.size() == TransformKeyLength);
+ MOZ_ASSERT(IsCanonicallyCasedTransformKey(key));
+
+ MOZ_ASSERT(type.size() > TransformKeyLength);
+ MOZ_ASSERT(IsCanonicallyCasedTransformType(type));
+
+ if (IsTransformKey(key, "d0")) {
+ if (IsTransformType(type, "name")) {
+ return "charname";
+ }
+ }
+ else if (IsTransformKey(key, "m0")) {
+ if (IsTransformType(type, "beta-metsehaf")) {
+ return "betamets";
+ }
+ if (IsTransformType(type, "ies-jes")) {
+ return "iesjes";
+ }
+ if (IsTransformType(type, "names")) {
+ return "prprname";
+ }
+ if (IsTransformType(type, "tekie-alibekit")) {
+ return "tekieali";
+ }
+ }
+ return nullptr;
+}
diff --git a/intl/components/src/MeasureUnit.cpp b/intl/components/src/MeasureUnit.cpp
new file mode 100644
index 0000000000..3b932c9168
--- /dev/null
+++ b/intl/components/src/MeasureUnit.cpp
@@ -0,0 +1,110 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/MeasureUnit.h"
+
+#include "unicode/udata.h"
+#include "unicode/ures.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+void MeasureUnit::UResourceBundleDeleter::operator()(UResourceBundle* aPtr) {
+ ures_close(aPtr);
+}
+
+MeasureUnit::Enumeration::Enumeration(UniqueUResourceBundle aRootLocale,
+ UniqueUResourceBundle aUnits)
+ : mRootLocale(std::move(aRootLocale)), mUnits(std::move(aUnits)) {
+ mUnitsSize = ures_getSize(mUnits.get());
+}
+
+MeasureUnit::Enumeration::Iterator::value_type
+MeasureUnit::Enumeration::Iterator::operator*() const {
+ // Return an error result after an ICU error has occurred.
+ if (mHasError) {
+ return Err(InternalError{});
+ }
+
+ // Otherwise return the name of the current measurement unit.
+ const char* unitIdentifier = ures_getKey(mSubtype.get());
+ MOZ_ASSERT(unitIdentifier);
+ return MakeStringSpan(unitIdentifier);
+}
+
+void MeasureUnit::Enumeration::Iterator::advance() {
+ // Reject any attempts to modify this iterator after an error has occurred.
+ if (mHasError) {
+ return;
+ }
+
+ while (true) {
+ // Read the next measurement unit in the types table.
+ if (mTypePos < mTypeSize) {
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle* rawSubtype =
+ ures_getByIndex(mType.get(), mTypePos, nullptr, &status);
+ if (U_FAILURE(status)) {
+ mHasError = true;
+ return;
+ }
+
+ mTypePos += 1;
+ mSubtype.reset(rawSubtype);
+ return;
+ }
+
+ // Read the next measurement unit type in the "units" table.
+ if (mUnitsPos < mEnumeration.mUnitsSize) {
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle* rawType = ures_getByIndex(mEnumeration.mUnits.get(),
+ mUnitsPos, nullptr, &status);
+ if (U_FAILURE(status)) {
+ mHasError = true;
+ return;
+ }
+
+ mUnitsPos += 1;
+ mType.reset(rawType);
+ mTypeSize = ures_getSize(rawType);
+ mTypePos = 0;
+ continue;
+ }
+
+ // All measurement units have been processed. Reset the two |mType*| fields
+ // to zero to match the end-iterator state and then return.
+ MOZ_ASSERT(mUnitsPos == mEnumeration.mUnitsSize);
+ mTypePos = 0;
+ mTypeSize = 0;
+ return;
+ }
+}
+
+Result<MeasureUnit::Enumeration, ICUError>
+MeasureUnit::Enumeration::TryCreate() {
+ // Look up the available measurement units in the resource bundle of the root
+ // locale.
+
+ static const char packageName[] =
+ U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit";
+ static const char rootLocale[] = "";
+
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle* rawRes = ures_open(packageName, rootLocale, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ UniqueUResourceBundle res(rawRes);
+
+ UResourceBundle* rawUnits =
+ ures_getByKey(res.get(), "units", nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ UniqueUResourceBundle units(rawUnits);
+
+ return MeasureUnit::Enumeration(std::move(res), std::move(units));
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/MeasureUnit.h b/intl/components/src/MeasureUnit.h
new file mode 100644
index 0000000000..8a8cf1629a
--- /dev/null
+++ b/intl/components/src/MeasureUnit.h
@@ -0,0 +1,155 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_MeasureUnit_h_
+#define intl_components_MeasureUnit_h_
+
+#include "mozilla/Assertions.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+#include <iterator>
+#include <stddef.h>
+#include <stdint.h>
+#include <utility>
+
+struct UResourceBundle;
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with measurement units in
+ * internationalization code. It is used in coordination with other operations
+ * such as number formatting.
+ */
+class MeasureUnit final {
+ class UResourceBundleDeleter {
+ public:
+ void operator()(UResourceBundle* aPtr);
+ };
+
+ using UniqueUResourceBundle =
+ UniquePtr<UResourceBundle, UResourceBundleDeleter>;
+
+ public:
+ MeasureUnit() = delete;
+
+ class Enumeration final {
+ // Resource bundle for the root locale.
+ UniqueUResourceBundle mRootLocale = nullptr;
+
+ // Resource bundle for the root locale's "units" resource table.
+ UniqueUResourceBundle mUnits = nullptr;
+
+ // The overall amount of available units.
+ int32_t mUnitsSize = 0;
+
+ public:
+ Enumeration(UniqueUResourceBundle aRootLocale,
+ UniqueUResourceBundle aUnits);
+
+ class Iterator {
+ public:
+ // std::iterator traits.
+ using iterator_category = std::input_iterator_tag;
+ using value_type = SpanResult<char>;
+ using difference_type = ptrdiff_t;
+ using pointer = value_type*;
+ using reference = value_type&;
+
+ private:
+ const Enumeration& mEnumeration;
+
+ // Resource bundle to a measurement type within the "units" table.
+ //
+ // Measurement types describe various categories, like "area", "length",
+ // or "mass".
+ UniqueUResourceBundle mType = nullptr;
+
+ // Resource bundle to a specific subtype within the type table.
+ //
+ // Measurement subtypes describe concrete measure units, like "acre",
+ // "meter", or "kilogram".
+ UniqueUResourceBundle mSubtype = nullptr;
+
+ // The next position within the "units" table.
+ int32_t mUnitsPos = 0;
+
+ // The overall amount of types within the |mType| table.
+ int32_t mTypeSize = 0;
+
+ // The next position within the |mType| table.
+ int32_t mTypePos = 0;
+
+ // Flag set when an ICU error has occurred. All further operations on this
+ // iterator will return an error result when this flag is set.
+ bool mHasError = false;
+
+ void advance();
+
+ public:
+ Iterator(const Enumeration& aEnumeration, int32_t aUnitsPos)
+ : mEnumeration(aEnumeration), mUnitsPos(aUnitsPos) {
+ advance();
+ }
+
+ Iterator& operator++() {
+ advance();
+ return *this;
+ }
+
+ // The post-increment operator would return an invalid iterator, so it's
+ // not implemented.
+ Iterator operator++(int) = delete;
+
+ bool operator==(const Iterator& aOther) const {
+ // It's an error to compare an iterator against an iterator from a
+ // different enumeration.
+ MOZ_ASSERT(&mEnumeration == &aOther.mEnumeration);
+
+ return mUnitsPos == aOther.mUnitsPos && mTypeSize == aOther.mTypeSize &&
+ mTypePos == aOther.mTypePos && mHasError == aOther.mHasError;
+ }
+
+ bool operator!=(const Iterator& aOther) const {
+ return !(*this == aOther);
+ }
+
+ value_type operator*() const;
+ };
+
+ friend class Iterator;
+
+ // std::iterator begin() and end() methods.
+
+ /**
+ * Return an iterator pointing to the start of the "units" table.
+ */
+ Iterator begin() { return Iterator(*this, 0); }
+
+ /**
+ * Return an iterator pointing to the end of the "units" table.
+ */
+ Iterator end() { return Iterator(*this, mUnitsSize); }
+
+ /**
+ * Create a new measurement unit enumeration.
+ */
+ static Result<Enumeration, ICUError> TryCreate();
+ };
+
+ /**
+ * Return an enumeration over all available measurement units.
+ */
+ static Result<Enumeration, ICUError> GetAvailable() {
+ return Enumeration::TryCreate();
+ }
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/MeasureUnitGenerated.h b/intl/components/src/MeasureUnitGenerated.h
new file mode 100644
index 0000000000..8febc88649
--- /dev/null
+++ b/intl/components/src/MeasureUnitGenerated.h
@@ -0,0 +1,70 @@
+// Generated by make_intl_data.py. DO NOT EDIT.
+
+#ifndef intl_components_MeasureUnitGenerated_h
+#define intl_components_MeasureUnitGenerated_h
+
+namespace mozilla::intl {
+
+struct SimpleMeasureUnit {
+ const char* const type;
+ const char* const name;
+};
+
+/**
+ * The list of currently supported simple unit identifiers.
+ *
+ * The list must be kept in alphabetical order of |name|.
+ */
+inline constexpr SimpleMeasureUnit simpleMeasureUnits[] = {
+ // clang-format off
+ {"area", "acre"},
+ {"digital", "bit"},
+ {"digital", "byte"},
+ {"temperature", "celsius"},
+ {"length", "centimeter"},
+ {"duration", "day"},
+ {"angle", "degree"},
+ {"temperature", "fahrenheit"},
+ {"volume", "fluid-ounce"},
+ {"length", "foot"},
+ {"volume", "gallon"},
+ {"digital", "gigabit"},
+ {"digital", "gigabyte"},
+ {"mass", "gram"},
+ {"area", "hectare"},
+ {"duration", "hour"},
+ {"length", "inch"},
+ {"digital", "kilobit"},
+ {"digital", "kilobyte"},
+ {"mass", "kilogram"},
+ {"length", "kilometer"},
+ {"volume", "liter"},
+ {"digital", "megabit"},
+ {"digital", "megabyte"},
+ {"length", "meter"},
+ {"duration", "microsecond"},
+ {"length", "mile"},
+ {"length", "mile-scandinavian"},
+ {"volume", "milliliter"},
+ {"length", "millimeter"},
+ {"duration", "millisecond"},
+ {"duration", "minute"},
+ {"duration", "month"},
+ {"duration", "nanosecond"},
+ {"mass", "ounce"},
+ {"concentr", "percent"},
+ {"digital", "petabyte"},
+ {"mass", "pound"},
+ {"duration", "second"},
+ {"mass", "stone"},
+ {"digital", "terabit"},
+ {"digital", "terabyte"},
+ {"duration", "week"},
+ {"length", "yard"},
+ {"duration", "year"},
+ // clang-format on
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberFormat.cpp b/intl/components/src/NumberFormat.cpp
new file mode 100644
index 0000000000..cbbf7278c9
--- /dev/null
+++ b/intl/components/src/NumberFormat.cpp
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/NumberFormat.h"
+#include "NumberFormatFields.h"
+#include "NumberFormatterSkeleton.h"
+#include "ScopedICUObject.h"
+
+#include "unicode/unumberformatter.h"
+#include "unicode/upluralrules.h"
+
+namespace mozilla::intl {
+
+/*static*/ Result<UniquePtr<NumberFormat>, ICUError> NumberFormat::TryCreate(
+ std::string_view aLocale, const NumberFormatOptions& aOptions) {
+ UniquePtr<NumberFormat> nf = MakeUnique<NumberFormat>();
+ Result<Ok, ICUError> result = nf->initialize(aLocale, aOptions);
+ if (result.isOk()) {
+ return nf;
+ }
+
+ return Err(result.unwrapErr());
+}
+
+NumberFormat::~NumberFormat() {
+ if (mFormattedNumber) {
+ unumf_closeResult(mFormattedNumber);
+ }
+ if (mNumberFormatter) {
+ unumf_close(mNumberFormatter);
+ }
+}
+
+Result<Ok, ICUError> NumberFormat::initialize(
+ std::string_view aLocale, const NumberFormatOptions& aOptions) {
+ mFormatForUnit = aOptions.mUnit.isSome();
+ NumberFormatterSkeleton skeleton(aOptions);
+ mNumberFormatter = skeleton.toFormatter(aLocale);
+ if (mNumberFormatter) {
+ UErrorCode status = U_ZERO_ERROR;
+ mFormattedNumber = unumf_openResult(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return Ok();
+ }
+ return Err(ICUError::InternalError);
+}
+
+Result<std::u16string_view, ICUError> NumberFormat::formatToParts(
+ double number, NumberPartVector& parts) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ bool isNegative = !IsNaN(number) && IsNegative(number);
+
+ return FormatResultToParts(mFormattedNumber, Some(number), isNegative,
+ mFormatForUnit, parts);
+}
+
+Result<std::u16string_view, ICUError> NumberFormat::formatToParts(
+ int64_t number, NumberPartVector& parts) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return FormatResultToParts(mFormattedNumber, Nothing(), number < 0,
+ mFormatForUnit, parts);
+}
+
+Result<std::u16string_view, ICUError> NumberFormat::formatToParts(
+ std::string_view number, NumberPartVector& parts) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ // Non-finite numbers aren't currently supported here. If we ever need to
+ // support those, the |Maybe<double>| argument must be computed here.
+ MOZ_ASSERT(number != "Infinity");
+ MOZ_ASSERT(number != "+Infinity");
+ MOZ_ASSERT(number != "-Infinity");
+ MOZ_ASSERT(number != "NaN");
+
+ bool isNegative = !number.empty() && number[0] == '-';
+
+ return FormatResultToParts(mFormattedNumber, Nothing(), isNegative,
+ mFormatForUnit, parts);
+}
+
+Result<int32_t, ICUError> NumberFormat::selectFormatted(
+ double number, char16_t* keyword, int32_t keywordSize,
+ UPluralRules* pluralRules) const {
+ MOZ_ASSERT(keyword && pluralRules);
+ UErrorCode status = U_ZERO_ERROR;
+
+ MOZ_TRY(format(number));
+
+ int32_t utf16KeywordLength = uplrules_selectFormatted(
+ pluralRules, mFormattedNumber, keyword, keywordSize, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return utf16KeywordLength;
+}
+
+bool NumberFormat::formatInternal(double number) const {
+ // ICU incorrectly formats NaN values with the sign bit set, as if they
+ // were negative. Replace all NaNs with a single pattern with sign bit
+ // unset ("positive", that is) until ICU is fixed.
+ if (MOZ_UNLIKELY(IsNaN(number))) {
+ number = SpecificNaN<double>(0, 1);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ unumf_formatDouble(mNumberFormatter, number, mFormattedNumber, &status);
+ return U_SUCCESS(status);
+}
+
+bool NumberFormat::formatInternal(int64_t number) const {
+ UErrorCode status = U_ZERO_ERROR;
+ unumf_formatInt(mNumberFormatter, number, mFormattedNumber, &status);
+ return U_SUCCESS(status);
+}
+
+bool NumberFormat::formatInternal(std::string_view number) const {
+ UErrorCode status = U_ZERO_ERROR;
+ unumf_formatDecimal(mNumberFormatter, number.data(), number.size(),
+ mFormattedNumber, &status);
+ return U_SUCCESS(status);
+}
+
+Result<std::u16string_view, ICUError> NumberFormat::formatResult() const {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const UFormattedValue* formattedValue =
+ unumf_resultAsValue(mFormattedNumber, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t utf16Length;
+ const char16_t* utf16Str =
+ ufmtval_getString(formattedValue, &utf16Length, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberFormat.h b/intl/components/src/NumberFormat.h
new file mode 100644
index 0000000000..684b772e30
--- /dev/null
+++ b/intl/components/src/NumberFormat.h
@@ -0,0 +1,426 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberFormat_h_
+#define intl_components_NumberFormat_h_
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/Result.h"
+#include "mozilla/Utf8.h"
+#include "mozilla/Vector.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/NumberPart.h"
+
+#include "unicode/ustring.h"
+#include "unicode/unum.h"
+#include "unicode/unumberformatter.h"
+
+struct UPluralRules;
+
+namespace mozilla::intl {
+
+struct PluralRulesOptions;
+
+/**
+ * Configure NumberFormat options.
+ * The supported display styles are:
+ * * Decimal (default)
+ * * Currency (controlled by mCurrency)
+ * * Unit (controlled by mUnit)
+ * * Percent (controlled by mPercent)
+ *
+ * Only one of mCurrency, mUnit or mPercent should be set. If none are set,
+ * the number will formatted as a decimal.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
+ */
+struct MOZ_STACK_CLASS NumberFormatOptions {
+ /**
+ * Display a currency amount. |currency| must be a three-letter currency code.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width
+ */
+ enum class CurrencyDisplay {
+ Symbol,
+ Code,
+ Name,
+ NarrowSymbol,
+ };
+ Maybe<std::pair<std::string_view, CurrencyDisplay>> mCurrency;
+
+ /**
+ * Set the fraction digits settings. |min| can be zero, |max| must be
+ * larger-or-equal to |min|.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision
+ */
+ Maybe<std::pair<uint32_t, uint32_t>> mFractionDigits;
+
+ /**
+ * Set the minimum number of integer digits. |min| must be a non-zero
+ * number.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#integer-width
+ */
+ Maybe<uint32_t> mMinIntegerDigits;
+
+ /**
+ * Set the significant digits settings. |min| must be a non-zero number, |max|
+ * must be larger-or-equal to |min|.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#significant-digits-precision
+ */
+ Maybe<std::pair<uint32_t, uint32_t>> mSignificantDigits;
+
+ /**
+ * Display a unit amount. |unit| must be a well-formed unit identifier.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#per-unit
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width
+ */
+ enum class UnitDisplay { Short, Narrow, Long };
+ Maybe<std::pair<std::string_view, UnitDisplay>> mUnit;
+
+ /**
+ * Display a percent number.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#scale
+ */
+ bool mPercent = false;
+
+ /**
+ * Set to true to strip trailing zeros after the decimal point for integer
+ * values.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#trailing-zero-display
+ */
+ bool mStripTrailingZero = false;
+
+ /**
+ * Enable or disable grouping.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#grouping
+ */
+ enum class Grouping {
+ Auto,
+ Always,
+ Min2,
+ Never,
+ } mGrouping = Grouping::Auto;
+
+ /**
+ * Set the notation style.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#notation
+ */
+ enum class Notation {
+ Standard,
+ Scientific,
+ Engineering,
+ CompactShort,
+ CompactLong
+ } mNotation = Notation::Standard;
+
+ /**
+ * Set the sign-display.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#sign-display
+ */
+ enum class SignDisplay {
+ Auto,
+ Never,
+ Always,
+ ExceptZero,
+ Negative,
+ Accounting,
+ AccountingAlways,
+ AccountingExceptZero,
+ AccountingNegative,
+ } mSignDisplay = SignDisplay::Auto;
+
+ /**
+ * Set the rounding increment, which must be a non-zero number.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#precision
+ */
+ uint32_t mRoundingIncrement = 1;
+
+ /**
+ * Set the rounding mode.
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#rounding-mode
+ */
+ enum class RoundingMode {
+ Ceil,
+ Floor,
+ Expand,
+ Trunc,
+ HalfCeil,
+ HalfFloor,
+ HalfExpand,
+ HalfTrunc,
+ HalfEven,
+ HalfOdd,
+ } mRoundingMode = RoundingMode::HalfExpand;
+
+ /**
+ * Set the rounding priority. |mFractionDigits| and |mSignificantDigits| must
+ * both be set if the rounding priority isn't equal to "auto".
+ *
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision
+ */
+ enum class RoundingPriority {
+ Auto,
+ MorePrecision,
+ LessPrecision,
+ } mRoundingPriority = RoundingPriority::Auto;
+};
+
+/**
+ * According to http://userguide.icu-project.org/design, as long as we constrain
+ * ourselves to const APIs ICU is const-correct.
+ */
+
+/**
+ * A NumberFormat implementation that roughly mirrors the API provided by
+ * the ECMA-402 Intl.NumberFormat object.
+ *
+ * https://tc39.es/ecma402/#numberformat-objects
+ */
+class NumberFormat final {
+ public:
+ /**
+ * Initialize a new NumberFormat for the provided locale and using the
+ * provided options.
+ *
+ * https://tc39.es/ecma402/#sec-initializenumberformat
+ */
+ static Result<UniquePtr<NumberFormat>, ICUError> TryCreate(
+ std::string_view aLocale, const NumberFormatOptions& aOptions);
+
+ NumberFormat() = default;
+ NumberFormat(const NumberFormat&) = delete;
+ NumberFormat& operator=(const NumberFormat&) = delete;
+ ~NumberFormat();
+
+ /**
+ * Formats a double to a utf-16 string. The string view is valid until
+ * another number is formatted. Accessing the string view after this event
+ * is undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ Result<std::u16string_view, ICUError> format(double number) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult();
+ }
+
+ /**
+ * Formats a double to a utf-16 string, and fills the provided parts vector.
+ * The string view is valid until another number is formatted. Accessing the
+ * string view after this event is undefined behavior.
+ *
+ * This is utf-16 only because the only current use case is in
+ * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
+ * in NumberPartVector from fixed width to variable width, which might be
+ * tricky to get right and is work that won't be necessary if we switch to
+ * ICU4X (see Bug 1707035).
+ *
+ * https://tc39.es/ecma402/#sec-partitionnumberpattern
+ */
+ Result<std::u16string_view, ICUError> formatToParts(
+ double number, NumberPartVector& parts) const;
+
+ /**
+ * Formats a double to the provider buffer (either utf-8 or utf-16)
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ template <typename B>
+ Result<Ok, ICUError> format(double number, B& buffer) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult<typename B::CharType, B>(buffer);
+ }
+
+ /**
+ * Formats an int64_t to a utf-16 string. The string view is valid until
+ * another number is formatted. Accessing the string view after this event is
+ * undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ Result<std::u16string_view, ICUError> format(int64_t number) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult();
+ }
+
+ /**
+ * Formats a int64_t to a utf-16 string, and fills the provided parts vector.
+ * The string view is valid until another number is formatted. Accessing the
+ * string view after this event is undefined behavior.
+ *
+ * This is utf-16 only because the only current use case is in
+ * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
+ * in NumberPartVector from fixed width to variable width, which might be
+ * tricky to get right and is work that won't be necessary if we switch to
+ * ICU4X (see Bug 1707035).
+ *
+ * https://tc39.es/ecma402/#sec-partitionnumberpattern
+ */
+ Result<std::u16string_view, ICUError> formatToParts(
+ int64_t number, NumberPartVector& parts) const;
+
+ /**
+ * Formats an int64_t to the provider buffer (either utf-8 or utf-16).
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ template <typename B>
+ Result<Ok, ICUError> format(int64_t number, B& buffer) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult<typename B::CharType, B>(buffer);
+ }
+
+ /**
+ * Formats a string encoded decimal number to a utf-16 string. The string view
+ * is valid until another number is formatted. Accessing the string view
+ * after this event is undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ Result<std::u16string_view, ICUError> format(std::string_view number) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult();
+ }
+
+ /**
+ * Formats a string encoded decimal number to a utf-16 string, and fills the
+ * provided parts vector. The string view is valid until another number is
+ * formatted. Accessing the string view after this event is undefined
+ * behavior.
+ *
+ * This is utf-16 only because the only current use case is in
+ * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
+ * in NumberPartVector from fixed width to variable width, which might be
+ * tricky to get right and is work that won't be necessary if we switch to
+ * ICU4X (see Bug 1707035).
+ *
+ * https://tc39.es/ecma402/#sec-partitionnumberpattern
+ */
+ Result<std::u16string_view, ICUError> formatToParts(
+ std::string_view number, NumberPartVector& parts) const;
+
+ /**
+ * Formats a string encoded decimal number to the provider buffer
+ * (either utf-8 or utf-16).
+ *
+ * https://tc39.es/ecma402/#sec-formatnumberstring
+ */
+ template <typename B>
+ Result<Ok, ICUError> format(std::string_view number, B& buffer) const {
+ if (!formatInternal(number)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult<typename B::CharType, B>(buffer);
+ }
+
+ /**
+ * Formats the number and selects the keyword by using a provided
+ * UPluralRules object.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.select
+ *
+ * TODO(1713917) This is necessary because both PluralRules and
+ * NumberFormat have a shared dependency on the raw UFormattedNumber
+ * type. Once we transition to using ICU4X, the FFI calls should no
+ * longer require such shared dependencies. At that time, this
+ * functionality should be removed from NumberFormat and invoked
+ * solely from PluralRules.
+ */
+ Result<int32_t, ICUError> selectFormatted(double number, char16_t* keyword,
+ int32_t keywordSize,
+ UPluralRules* pluralRules) const;
+
+ /**
+ * Returns an iterator over all supported number formatter locales.
+ *
+ * The returned strings are ICU locale identifiers and NOT BCP 47 language
+ * tags.
+ *
+ * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+ */
+ static auto GetAvailableLocales() {
+ return AvailableLocalesEnumeration<unum_countAvailable,
+ unum_getAvailable>();
+ }
+
+ private:
+ UNumberFormatter* mNumberFormatter = nullptr;
+ UFormattedNumber* mFormattedNumber = nullptr;
+ bool mFormatForUnit = false;
+
+ Result<Ok, ICUError> initialize(std::string_view aLocale,
+ const NumberFormatOptions& aOptions);
+
+ [[nodiscard]] bool formatInternal(double number) const;
+ [[nodiscard]] bool formatInternal(int64_t number) const;
+ [[nodiscard]] bool formatInternal(std::string_view number) const;
+
+ Result<std::u16string_view, ICUError> formatResult() const;
+
+ template <typename C, typename B>
+ Result<Ok, ICUError> formatResult(B& buffer) const {
+ // We only support buffers with char or char16_t.
+ static_assert(std::is_same_v<C, char> || std::is_same_v<C, char16_t>);
+
+ return formatResult().andThen(
+ [&buffer](std::u16string_view result) -> Result<Ok, ICUError> {
+ if constexpr (std::is_same_v<C, char>) {
+ if (!FillBuffer(Span(result.data(), result.size()), buffer)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok();
+ } else {
+ // ICU provides APIs which accept a buffer, but they just copy from
+ // an internal buffer behind the scenes anyway.
+ if (!buffer.reserve(result.size())) {
+ return Err(ICUError::OutOfMemory);
+ }
+ PodCopy(static_cast<char16_t*>(buffer.data()), result.data(),
+ result.size());
+ buffer.written(result.size());
+
+ return Ok();
+ }
+ });
+ }
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberFormatFields.cpp b/intl/components/src/NumberFormatFields.cpp
new file mode 100644
index 0000000000..654d936bf3
--- /dev/null
+++ b/intl/components/src/NumberFormatFields.cpp
@@ -0,0 +1,398 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "ICU4CGlue.h"
+#include "NumberFormatFields.h"
+#include "ScopedICUObject.h"
+
+#include "mozilla/FloatingPoint.h"
+#include "unicode/uformattedvalue.h"
+#include "unicode/unum.h"
+#include "unicode/unumberformatter.h"
+
+namespace mozilla::intl {
+
+bool NumberFormatFields::append(NumberPartType type, int32_t begin,
+ int32_t end) {
+ MOZ_ASSERT(begin >= 0);
+ MOZ_ASSERT(end >= 0);
+ MOZ_ASSERT(begin < end, "erm, aren't fields always non-empty?");
+
+ return fields_.emplaceBack(uint32_t(begin), uint32_t(end), type);
+}
+
+bool NumberFormatFields::toPartsVector(size_t overallLength,
+ const NumberPartSourceMap& sourceMap,
+ NumberPartVector& parts) {
+ std::sort(fields_.begin(), fields_.end(),
+ [](const NumberFormatField& left, const NumberFormatField& right) {
+ // Sort first by begin index, then to place
+ // enclosing fields before nested fields.
+ return left.begin < right.begin ||
+ (left.begin == right.begin && left.end > right.end);
+ });
+
+ // Then iterate over the sorted field list to generate a sequence of parts
+ // (what ECMA-402 actually exposes). A part is a maximal character sequence
+ // entirely within no field or a single most-nested field.
+ //
+ // Diagrams may be helpful to illustrate how fields map to parts. Consider
+ // formatting -19,766,580,028,249.41, the US national surplus (negative
+ // because it's actually a debt) on October 18, 2016.
+ //
+ // var options =
+ // { style: "currency", currency: "USD", currencyDisplay: "name" };
+ // var usdFormatter = new Intl.NumberFormat("en-US", options);
+ // usdFormatter.format(-19766580028249.41);
+ //
+ // The formatted result is "-19,766,580,028,249.41 US dollars". ICU
+ // identifies these fields in the string:
+ //
+ // UNUM_GROUPING_SEPARATOR_FIELD
+ // |
+ // UNUM_SIGN_FIELD | UNUM_DECIMAL_SEPARATOR_FIELD
+ // | __________/| |
+ // | / | | | |
+ // "-19,766,580,028,249.41 US dollars"
+ // \________________/ |/ \_______/
+ // | | |
+ // UNUM_INTEGER_FIELD | UNUM_CURRENCY_FIELD
+ // |
+ // UNUM_FRACTION_FIELD
+ //
+ // These fields map to parts as follows:
+ //
+ // integer decimal
+ // _____|________ |
+ // / /| |\ |\ |\ | literal
+ // /| / | | \ | \ | \| |
+ // "-19,766,580,028,249.41 US dollars"
+ // | \___|___|___/ |/ \________/
+ // | | | |
+ // | group | currency
+ // | |
+ // minusSign fraction
+ //
+ // The sign is a part. Each comma is a part, splitting the integer field
+ // into parts for trillions/billions/&c. digits. The decimal point is a
+ // part. Cents are a part. The space between cents and currency is a part
+ // (outside any field). Last, the currency field is a part.
+
+ class PartGenerator {
+ // The fields in order from start to end, then least to most nested.
+ const FieldsVector& fields;
+
+ // Index of the current field, in |fields|, being considered to
+ // determine part boundaries. |lastEnd <= fields[index].begin| is an
+ // invariant.
+ size_t index = 0;
+
+ // The end index of the last part produced, always less than or equal
+ // to |limit|, strictly increasing.
+ uint32_t lastEnd = 0;
+
+ // The length of the overall formatted string.
+ const uint32_t limit = 0;
+
+ NumberPartSourceMap sourceMap;
+
+ Vector<size_t, 4> enclosingFields;
+
+ void popEnclosingFieldsEndingAt(uint32_t end) {
+ MOZ_ASSERT_IF(enclosingFields.length() > 0,
+ fields[enclosingFields.back()].end >= end);
+
+ while (enclosingFields.length() > 0 &&
+ fields[enclosingFields.back()].end == end) {
+ enclosingFields.popBack();
+ }
+ }
+
+ bool nextPartInternal(NumberPart* part) {
+ size_t len = fields.length();
+ MOZ_ASSERT(index <= len);
+
+ // If we're out of fields, all that remains are part(s) consisting
+ // of trailing portions of enclosing fields, and maybe a final
+ // literal part.
+ if (index == len) {
+ if (enclosingFields.length() > 0) {
+ const auto& enclosing = fields[enclosingFields.popCopy()];
+ *part = {enclosing.type, sourceMap.source(enclosing), enclosing.end};
+
+ // If additional enclosing fields end where this part ends,
+ // pop them as well.
+ popEnclosingFieldsEndingAt(part->endIndex);
+ } else {
+ *part = {NumberPartType::Literal, sourceMap.source(limit), limit};
+ }
+
+ return true;
+ }
+
+ // Otherwise we still have a field to process.
+ const NumberFormatField* current = &fields[index];
+ MOZ_ASSERT(lastEnd <= current->begin);
+ MOZ_ASSERT(current->begin < current->end);
+
+ // But first, deal with inter-field space.
+ if (lastEnd < current->begin) {
+ if (enclosingFields.length() > 0) {
+ // Space between fields, within an enclosing field, is part
+ // of that enclosing field, until the start of the current
+ // field or the end of the enclosing field, whichever is
+ // earlier.
+ const auto& enclosing = fields[enclosingFields.back()];
+ *part = {enclosing.type, sourceMap.source(enclosing),
+ std::min(enclosing.end, current->begin)};
+ popEnclosingFieldsEndingAt(part->endIndex);
+ } else {
+ // If there's no enclosing field, the space is a literal.
+ *part = {NumberPartType::Literal, sourceMap.source(current->begin),
+ current->begin};
+ }
+
+ return true;
+ }
+
+ // Otherwise, the part spans a prefix of the current field. Find
+ // the most-nested field containing that prefix.
+ const NumberFormatField* next;
+ do {
+ current = &fields[index];
+
+ // If the current field is last, the part extends to its end.
+ if (++index == len) {
+ *part = {current->type, sourceMap.source(*current), current->end};
+ return true;
+ }
+
+ next = &fields[index];
+ MOZ_ASSERT(current->begin <= next->begin);
+ MOZ_ASSERT(current->begin < next->end);
+
+ // If the next field nests within the current field, push an
+ // enclosing field. (If there are no nested fields, don't
+ // bother pushing a field that'd be immediately popped.)
+ if (current->end > next->begin) {
+ if (!enclosingFields.append(index - 1)) {
+ return false;
+ }
+ }
+
+ // Do so until the next field begins after this one.
+ } while (current->begin == next->begin);
+
+ if (current->end <= next->begin) {
+ // The next field begins after the current field ends. Therefore
+ // the current part ends at the end of the current field.
+ *part = {current->type, sourceMap.source(*current), current->end};
+ popEnclosingFieldsEndingAt(part->endIndex);
+ } else {
+ // The current field encloses the next one. The current part
+ // ends where the next field/part will start.
+ *part = {current->type, sourceMap.source(*current), next->begin};
+ }
+
+ return true;
+ }
+
+ public:
+ PartGenerator(const FieldsVector& vec, uint32_t limit,
+ const NumberPartSourceMap& sourceMap)
+ : fields(vec), limit(limit), sourceMap(sourceMap), enclosingFields() {}
+
+ bool nextPart(bool* hasPart, NumberPart* part) {
+ // There are no parts left if we've partitioned the entire string.
+ if (lastEnd == limit) {
+ MOZ_ASSERT(enclosingFields.length() == 0);
+ *hasPart = false;
+ return true;
+ }
+
+ if (!nextPartInternal(part)) {
+ return false;
+ }
+
+ *hasPart = true;
+ lastEnd = part->endIndex;
+ return true;
+ }
+ };
+
+ // Finally, generate the result array.
+ size_t lastEndIndex = 0;
+
+ PartGenerator gen(fields_, overallLength, sourceMap);
+ do {
+ bool hasPart;
+ NumberPart part;
+ if (!gen.nextPart(&hasPart, &part)) {
+ return false;
+ }
+
+ if (!hasPart) {
+ break;
+ }
+
+ MOZ_ASSERT(lastEndIndex < part.endIndex);
+
+ if (!parts.append(part)) {
+ return false;
+ }
+
+ lastEndIndex = part.endIndex;
+ } while (true);
+
+ MOZ_ASSERT(lastEndIndex == overallLength,
+ "result array must partition the entire string");
+
+ return lastEndIndex == overallLength;
+}
+
+Result<std::u16string_view, ICUError> FormatResultToParts(
+ const UFormattedNumber* value, Maybe<double> number, bool isNegative,
+ bool formatForUnit, NumberPartVector& parts) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const UFormattedValue* formattedValue = unumf_resultAsValue(value, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return FormatResultToParts(formattedValue, number, isNegative, formatForUnit,
+ parts);
+}
+
+Result<std::u16string_view, ICUError> FormatResultToParts(
+ const UFormattedValue* value, Maybe<double> number, bool isNegative,
+ bool formatForUnit, NumberPartVector& parts) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ int32_t utf16Length;
+ const char16_t* utf16Str = ufmtval_getString(value, &utf16Length, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ UConstrainedFieldPosition* fpos = ucfpos_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
+
+ // We're only interested in UFIELD_CATEGORY_NUMBER fields.
+ ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_NUMBER, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ // Vacuum up fields in the overall formatted string.
+ NumberFormatFields fields;
+
+ while (true) {
+ bool hasMore = ufmtval_nextPosition(value, fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ if (!hasMore) {
+ break;
+ }
+
+ int32_t fieldName = ucfpos_getField(fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t beginIndex, endIndex;
+ ucfpos_getIndexes(fpos, &beginIndex, &endIndex, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ Maybe<NumberPartType> partType = GetPartTypeForNumberField(
+ UNumberFormatFields(fieldName), number, isNegative, formatForUnit);
+ if (!partType || !fields.append(*partType, beginIndex, endIndex)) {
+ return Err(ICUError::InternalError);
+ }
+ }
+
+ if (!fields.toPartsVector(utf16Length, parts)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
+}
+
+// See intl/icu/source/i18n/unicode/unum.h for a detailed field list. This
+// list is deliberately exhaustive: cases might have to be added/removed if
+// this code is compiled with a different ICU with more UNumberFormatFields
+// enum initializers. Please guard such cases with appropriate ICU
+// version-testing #ifdefs, should cross-version divergence occur.
+Maybe<NumberPartType> GetPartTypeForNumberField(UNumberFormatFields fieldName,
+ Maybe<double> number,
+ bool isNegative,
+ bool formatForUnit) {
+ switch (fieldName) {
+ case UNUM_INTEGER_FIELD:
+ if (number.isSome()) {
+ if (IsNaN(*number)) {
+ return Some(NumberPartType::Nan);
+ }
+ if (!IsFinite(*number)) {
+ return Some(NumberPartType::Infinity);
+ }
+ }
+ return Some(NumberPartType::Integer);
+ case UNUM_FRACTION_FIELD:
+ return Some(NumberPartType::Fraction);
+ case UNUM_DECIMAL_SEPARATOR_FIELD:
+ return Some(NumberPartType::Decimal);
+ case UNUM_EXPONENT_SYMBOL_FIELD:
+ return Some(NumberPartType::ExponentSeparator);
+ case UNUM_EXPONENT_SIGN_FIELD:
+ return Some(NumberPartType::ExponentMinusSign);
+ case UNUM_EXPONENT_FIELD:
+ return Some(NumberPartType::ExponentInteger);
+ case UNUM_GROUPING_SEPARATOR_FIELD:
+ return Some(NumberPartType::Group);
+ case UNUM_CURRENCY_FIELD:
+ return Some(NumberPartType::Currency);
+ case UNUM_PERCENT_FIELD:
+ if (formatForUnit) {
+ return Some(NumberPartType::Unit);
+ }
+ return Some(NumberPartType::Percent);
+ case UNUM_PERMILL_FIELD:
+ MOZ_ASSERT_UNREACHABLE(
+ "unexpected permill field found, even though "
+ "we don't use any user-defined patterns that "
+ "would require a permill field");
+ break;
+ case UNUM_SIGN_FIELD:
+ if (isNegative) {
+ return Some(NumberPartType::MinusSign);
+ }
+ return Some(NumberPartType::PlusSign);
+ case UNUM_MEASURE_UNIT_FIELD:
+ return Some(NumberPartType::Unit);
+ case UNUM_COMPACT_FIELD:
+ return Some(NumberPartType::Compact);
+#ifndef U_HIDE_DRAFT_API
+ case UNUM_APPROXIMATELY_SIGN_FIELD:
+ return Some(NumberPartType::ApproximatelySign);
+#endif
+#ifndef U_HIDE_DEPRECATED_API
+ case UNUM_FIELD_COUNT:
+ MOZ_ASSERT_UNREACHABLE(
+ "format field sentinel value returned by iterator!");
+ break;
+#endif
+ }
+
+ MOZ_ASSERT_UNREACHABLE(
+ "unenumerated, undocumented format field returned by iterator");
+ return Nothing();
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberFormatFields.h b/intl/components/src/NumberFormatFields.h
new file mode 100644
index 0000000000..4f05d4e98b
--- /dev/null
+++ b/intl/components/src/NumberFormatFields.h
@@ -0,0 +1,91 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberFormatFields_h_
+#define intl_components_NumberFormatFields_h_
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/NumberPart.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Vector.h"
+
+#include "unicode/unum.h"
+
+struct UFormattedNumber;
+struct UFormattedValue;
+
+namespace mozilla::intl {
+
+struct NumberFormatField {
+ uint32_t begin;
+ uint32_t end;
+ NumberPartType type;
+
+ // Needed for vector-resizing scratch space.
+ NumberFormatField() = default;
+
+ NumberFormatField(uint32_t begin, uint32_t end, NumberPartType type)
+ : begin(begin), end(end), type(type) {}
+};
+
+struct NumberPartSourceMap {
+ struct Range {
+ uint32_t begin = 0;
+ uint32_t end = 0;
+ };
+
+ // Begin and end position of the start range.
+ Range start;
+
+ // Begin and end position of the end range.
+ Range end;
+
+ NumberPartSource source(uint32_t endIndex) {
+ if (start.begin < endIndex && endIndex <= start.end) {
+ return NumberPartSource::Start;
+ }
+ if (end.begin < endIndex && endIndex <= end.end) {
+ return NumberPartSource::End;
+ }
+ return NumberPartSource::Shared;
+ }
+
+ NumberPartSource source(const NumberFormatField& field) {
+ return source(field.end);
+ }
+};
+
+class NumberFormatFields {
+ using FieldsVector = Vector<NumberFormatField, 16>;
+
+ FieldsVector fields_;
+
+ public:
+ [[nodiscard]] bool append(NumberPartType type, int32_t begin, int32_t end);
+
+ [[nodiscard]] bool toPartsVector(size_t overallLength,
+ NumberPartVector& parts) {
+ return toPartsVector(overallLength, {}, parts);
+ }
+
+ [[nodiscard]] bool toPartsVector(size_t overallLength,
+ const NumberPartSourceMap& sourceMap,
+ NumberPartVector& parts);
+};
+
+Result<std::u16string_view, ICUError> FormatResultToParts(
+ const UFormattedNumber* value, Maybe<double> number, bool isNegative,
+ bool formatForUnit, NumberPartVector& parts);
+
+Result<std::u16string_view, ICUError> FormatResultToParts(
+ const UFormattedValue* value, Maybe<double> number, bool isNegative,
+ bool formatForUnit, NumberPartVector& parts);
+
+Maybe<NumberPartType> GetPartTypeForNumberField(UNumberFormatFields fieldName,
+ Maybe<double> number,
+ bool isNegative,
+ bool formatForUnit);
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberFormatterSkeleton.cpp b/intl/components/src/NumberFormatterSkeleton.cpp
new file mode 100644
index 0000000000..5f62d77c2b
--- /dev/null
+++ b/intl/components/src/NumberFormatterSkeleton.cpp
@@ -0,0 +1,473 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "NumberFormatterSkeleton.h"
+#include "NumberFormat.h"
+
+#include "MeasureUnitGenerated.h"
+
+#include "mozilla/RangedPtr.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "unicode/unumberrangeformatter.h"
+
+namespace mozilla::intl {
+
+NumberFormatterSkeleton::NumberFormatterSkeleton(
+ const NumberFormatOptions& options) {
+ if (options.mCurrency.isSome()) {
+ if (!currency(options.mCurrency->first) ||
+ !currencyDisplay(options.mCurrency->second)) {
+ return;
+ }
+ } else if (options.mUnit.isSome()) {
+ if (!unit(options.mUnit->first) || !unitDisplay(options.mUnit->second)) {
+ return;
+ }
+ } else if (options.mPercent) {
+ if (!percent()) {
+ return;
+ }
+ }
+
+ if (options.mRoundingIncrement != 1) {
+ auto fd = options.mFractionDigits.valueOr(std::pair{0, 0});
+ if (!roundingIncrement(options.mRoundingIncrement, fd.first, fd.second,
+ options.mStripTrailingZero)) {
+ return;
+ }
+ } else if (options.mRoundingPriority ==
+ NumberFormatOptions::RoundingPriority::Auto) {
+ if (options.mFractionDigits.isSome()) {
+ if (!fractionDigits(options.mFractionDigits->first,
+ options.mFractionDigits->second,
+ options.mStripTrailingZero)) {
+ return;
+ }
+ }
+
+ if (options.mSignificantDigits.isSome()) {
+ if (!significantDigits(options.mSignificantDigits->first,
+ options.mSignificantDigits->second,
+ options.mStripTrailingZero)) {
+ return;
+ }
+ }
+ } else {
+ MOZ_ASSERT(options.mFractionDigits);
+ MOZ_ASSERT(options.mSignificantDigits);
+
+ bool relaxed = options.mRoundingPriority ==
+ NumberFormatOptions::RoundingPriority::MorePrecision;
+ if (!fractionWithSignificantDigits(options.mFractionDigits->first,
+ options.mFractionDigits->second,
+ options.mSignificantDigits->first,
+ options.mSignificantDigits->second,
+ relaxed, options.mStripTrailingZero)) {
+ return;
+ }
+ }
+
+ if (options.mMinIntegerDigits.isSome()) {
+ if (!minIntegerDigits(*options.mMinIntegerDigits)) {
+ return;
+ }
+ }
+
+ if (!grouping(options.mGrouping)) {
+ return;
+ }
+
+ if (!notation(options.mNotation)) {
+ return;
+ }
+
+ if (!signDisplay(options.mSignDisplay)) {
+ return;
+ }
+
+ if (!roundingMode(options.mRoundingMode)) {
+ return;
+ }
+
+ mValidSkeleton = true;
+}
+
+bool NumberFormatterSkeleton::currency(std::string_view currency) {
+ MOZ_ASSERT(currency.size() == 3,
+ "IsWellFormedCurrencyCode permits only length-3 strings");
+
+ char16_t currencyChars[] = {static_cast<char16_t>(currency[0]),
+ static_cast<char16_t>(currency[1]),
+ static_cast<char16_t>(currency[2]), '\0'};
+ return append(u"currency/") && append(currencyChars) && append(' ');
+}
+
+bool NumberFormatterSkeleton::currencyDisplay(
+ NumberFormatOptions::CurrencyDisplay display) {
+ switch (display) {
+ case NumberFormatOptions::CurrencyDisplay::Code:
+ return appendToken(u"unit-width-iso-code");
+ case NumberFormatOptions::CurrencyDisplay::Name:
+ return appendToken(u"unit-width-full-name");
+ case NumberFormatOptions::CurrencyDisplay::Symbol:
+ // Default, no additional tokens needed.
+ return true;
+ case NumberFormatOptions::CurrencyDisplay::NarrowSymbol:
+ return appendToken(u"unit-width-narrow");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected currency display type");
+ return false;
+}
+
+static const SimpleMeasureUnit& FindSimpleMeasureUnit(std::string_view name) {
+ const auto* measureUnit = std::lower_bound(
+ std::begin(simpleMeasureUnits), std::end(simpleMeasureUnits), name,
+ [](const auto& measureUnit, std::string_view name) {
+ return name.compare(measureUnit.name) > 0;
+ });
+ MOZ_ASSERT(measureUnit != std::end(simpleMeasureUnits),
+ "unexpected unit identifier: unit not found");
+ MOZ_ASSERT(measureUnit->name == name,
+ "unexpected unit identifier: wrong unit found");
+ return *measureUnit;
+}
+
+static constexpr size_t MaxUnitLength() {
+ size_t length = 0;
+ for (const auto& unit : simpleMeasureUnits) {
+ length = std::max(length, std::char_traits<char>::length(unit.name));
+ }
+ return length * 2 + std::char_traits<char>::length("-per-");
+}
+
+bool NumberFormatterSkeleton::unit(std::string_view unit) {
+ MOZ_RELEASE_ASSERT(unit.length() <= MaxUnitLength());
+
+ auto appendUnit = [this](const SimpleMeasureUnit& unit) {
+ return append(unit.type, strlen(unit.type)) && append('-') &&
+ append(unit.name, strlen(unit.name));
+ };
+
+ // |unit| can be a compound unit identifier, separated by "-per-".
+ static constexpr char separator[] = "-per-";
+ size_t separator_len = strlen(separator);
+ size_t offset = unit.find(separator);
+ if (offset != std::string_view::npos) {
+ const auto& numerator = FindSimpleMeasureUnit(unit.substr(0, offset));
+ const auto& denominator = FindSimpleMeasureUnit(
+ std::string_view(unit.data() + offset + separator_len,
+ unit.length() - offset - separator_len));
+ return append(u"measure-unit/") && appendUnit(numerator) && append(' ') &&
+ append(u"per-measure-unit/") && appendUnit(denominator) &&
+ append(' ');
+ }
+
+ const auto& simple = FindSimpleMeasureUnit(unit);
+ return append(u"measure-unit/") && appendUnit(simple) && append(' ');
+}
+
+bool NumberFormatterSkeleton::unitDisplay(
+ NumberFormatOptions::UnitDisplay display) {
+ switch (display) {
+ case NumberFormatOptions::UnitDisplay::Short:
+ return appendToken(u"unit-width-short");
+ case NumberFormatOptions::UnitDisplay::Narrow:
+ return appendToken(u"unit-width-narrow");
+ case NumberFormatOptions::UnitDisplay::Long:
+ return appendToken(u"unit-width-full-name");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected unit display type");
+ return false;
+}
+
+bool NumberFormatterSkeleton::percent() {
+ return appendToken(u"percent scale/100");
+}
+
+bool NumberFormatterSkeleton::fractionDigits(uint32_t min, uint32_t max,
+ bool stripTrailingZero) {
+ // Note: |min| can be zero here.
+ MOZ_ASSERT(min <= max);
+ if (!append('.') || !appendN('0', min) || !appendN('#', max - min)) {
+ return false;
+ }
+ if (stripTrailingZero) {
+ if (!append(u"/w")) {
+ return false;
+ }
+ }
+ return append(' ');
+}
+
+bool NumberFormatterSkeleton::fractionWithSignificantDigits(
+ uint32_t mnfd, uint32_t mxfd, uint32_t mnsd, uint32_t mxsd, bool relaxed,
+ bool stripTrailingZero) {
+ // Note: |mnfd| can be zero here.
+ MOZ_ASSERT(mnfd <= mxfd);
+ MOZ_ASSERT(mnsd > 0);
+ MOZ_ASSERT(mnsd <= mxsd);
+
+ if (!append('.') || !appendN('0', mnfd) || !appendN('#', mxfd - mnfd)) {
+ return false;
+ }
+ if (!append('/') || !appendN('@', mnsd) || !appendN('#', mxsd - mnsd)) {
+ return false;
+ }
+ if (!append(relaxed ? 'r' : 's')) {
+ return false;
+ }
+ if (stripTrailingZero) {
+ if (!append(u"/w")) {
+ return false;
+ }
+ }
+ return append(' ');
+}
+
+bool NumberFormatterSkeleton::minIntegerDigits(uint32_t min) {
+ MOZ_ASSERT(min > 0);
+ return append(u"integer-width/+") && appendN('0', min) && append(' ');
+}
+
+bool NumberFormatterSkeleton::significantDigits(uint32_t min, uint32_t max,
+ bool stripTrailingZero) {
+ MOZ_ASSERT(min > 0);
+ MOZ_ASSERT(min <= max);
+ if (!appendN('@', min) || !appendN('#', max - min)) {
+ return false;
+ }
+ if (stripTrailingZero) {
+ if (!append(u"/w")) {
+ return false;
+ }
+ }
+ return append(' ');
+}
+
+bool NumberFormatterSkeleton::grouping(NumberFormatOptions::Grouping grouping) {
+ switch (grouping) {
+ case NumberFormatOptions::Grouping::Auto:
+ // Default, no additional tokens needed.
+ return true;
+ case NumberFormatOptions::Grouping::Always:
+ return appendToken(u"group-on-aligned");
+ case NumberFormatOptions::Grouping::Min2:
+ return appendToken(u"group-min2");
+ case NumberFormatOptions::Grouping::Never:
+ return appendToken(u"group-off");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected grouping mode");
+ return false;
+}
+
+bool NumberFormatterSkeleton::notation(NumberFormatOptions::Notation style) {
+ switch (style) {
+ case NumberFormatOptions::Notation::Standard:
+ // Default, no additional tokens needed.
+ return true;
+ case NumberFormatOptions::Notation::Scientific:
+ return appendToken(u"scientific");
+ case NumberFormatOptions::Notation::Engineering:
+ return appendToken(u"engineering");
+ case NumberFormatOptions::Notation::CompactShort:
+ return appendToken(u"compact-short");
+ case NumberFormatOptions::Notation::CompactLong:
+ return appendToken(u"compact-long");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected notation style");
+ return false;
+}
+
+bool NumberFormatterSkeleton::signDisplay(
+ NumberFormatOptions::SignDisplay display) {
+ switch (display) {
+ case NumberFormatOptions::SignDisplay::Auto:
+ // Default, no additional tokens needed.
+ return true;
+ case NumberFormatOptions::SignDisplay::Always:
+ return appendToken(u"sign-always");
+ case NumberFormatOptions::SignDisplay::Never:
+ return appendToken(u"sign-never");
+ case NumberFormatOptions::SignDisplay::ExceptZero:
+ return appendToken(u"sign-except-zero");
+ case NumberFormatOptions::SignDisplay::Negative:
+ return appendToken(u"sign-negative");
+ case NumberFormatOptions::SignDisplay::Accounting:
+ return appendToken(u"sign-accounting");
+ case NumberFormatOptions::SignDisplay::AccountingAlways:
+ return appendToken(u"sign-accounting-always");
+ case NumberFormatOptions::SignDisplay::AccountingExceptZero:
+ return appendToken(u"sign-accounting-except-zero");
+ case NumberFormatOptions::SignDisplay::AccountingNegative:
+ return appendToken(u"sign-accounting-negative");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected sign display type");
+ return false;
+}
+
+bool NumberFormatterSkeleton::roundingIncrement(uint32_t increment,
+ uint32_t mnfd, uint32_t mxfd,
+ bool stripTrailingZero) {
+ // Note: |mnfd| can be zero here.
+ MOZ_ASSERT(mnfd <= mxfd);
+ MOZ_ASSERT(increment > 1);
+
+ // Limit |mxfd| to 100. (20 is the current limit for ECMA-402, but there are
+ // plans to change it to 100.)
+ constexpr size_t maxFracDigits = 100;
+ MOZ_RELEASE_ASSERT(mxfd <= maxFracDigits);
+
+ static constexpr char digits[] = "0123456789";
+
+ // We need enough space to print any uint32_t, which is possibly shifted by
+ // |mxfd| decimal places. And additionally we need to reserve space for "0.".
+ static_assert(std::numeric_limits<uint32_t>::digits10 + 1 < maxFracDigits);
+ constexpr size_t maxLength = maxFracDigits + 2;
+
+ char chars[maxLength];
+ RangedPtr<char> ptr(chars + maxLength, chars, maxLength);
+ const RangedPtr<char> end = ptr;
+
+ // Convert to a signed integer, so we don't have to worry about underflows.
+ int32_t maxFrac = int32_t(mxfd);
+
+ // Write |increment| from back to front.
+ while (increment != 0) {
+ *--ptr = digits[increment % 10];
+ increment /= 10;
+ maxFrac -= 1;
+
+ if (maxFrac == 0) {
+ *--ptr = '.';
+ }
+ }
+
+ // Write any remaining zeros from |mxfd| and prepend '0' if we last wrote the
+ // decimal point.
+ while (maxFrac >= 0) {
+ MOZ_ASSERT_IF(maxFrac == 0, *ptr == '.');
+
+ *--ptr = '0';
+ maxFrac -= 1;
+
+ if (maxFrac == 0) {
+ *--ptr = '.';
+ }
+ }
+
+ MOZ_ASSERT(ptr < end, "At least one character is written.");
+ MOZ_ASSERT(*ptr != '.', "First character is a digit.");
+
+ if (!append(u"precision-increment/") || !append(ptr.get(), end - ptr)) {
+ return false;
+ }
+ if (stripTrailingZero) {
+ if (!append(u"/w")) {
+ return false;
+ }
+ }
+ return append(' ');
+}
+
+bool NumberFormatterSkeleton::roundingMode(
+ NumberFormatOptions::RoundingMode rounding) {
+ switch (rounding) {
+ case NumberFormatOptions::RoundingMode::Ceil:
+ return appendToken(u"rounding-mode-ceiling");
+ case NumberFormatOptions::RoundingMode::Floor:
+ return appendToken(u"rounding-mode-floor");
+ case NumberFormatOptions::RoundingMode::Expand:
+ return appendToken(u"rounding-mode-up");
+ case NumberFormatOptions::RoundingMode::Trunc:
+ return appendToken(u"rounding-mode-down");
+ case NumberFormatOptions::RoundingMode::HalfCeil:
+ return appendToken(u"rounding-mode-half-ceiling");
+ case NumberFormatOptions::RoundingMode::HalfFloor:
+ return appendToken(u"rounding-mode-half-floor");
+ case NumberFormatOptions::RoundingMode::HalfExpand:
+ return appendToken(u"rounding-mode-half-up");
+ case NumberFormatOptions::RoundingMode::HalfTrunc:
+ return appendToken(u"rounding-mode-half-down");
+ case NumberFormatOptions::RoundingMode::HalfEven:
+ return appendToken(u"rounding-mode-half-even");
+ case NumberFormatOptions::RoundingMode::HalfOdd:
+ return appendToken(u"rounding-mode-half-odd");
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected rounding mode");
+ return false;
+}
+
+UNumberFormatter* NumberFormatterSkeleton::toFormatter(
+ std::string_view locale) {
+ if (!mValidSkeleton) {
+ return nullptr;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UNumberFormatter* nf = unumf_openForSkeletonAndLocale(
+ mVector.begin(), mVector.length(), AssertNullTerminatedString(locale),
+ &status);
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ return nf;
+}
+
+static UNumberRangeCollapse ToUNumberRangeCollapse(
+ NumberRangeFormatOptions::RangeCollapse collapse) {
+ using RangeCollapse = NumberRangeFormatOptions::RangeCollapse;
+ switch (collapse) {
+ case RangeCollapse::Auto:
+ return UNUM_RANGE_COLLAPSE_AUTO;
+ case RangeCollapse::None:
+ return UNUM_RANGE_COLLAPSE_NONE;
+ case RangeCollapse::Unit:
+ return UNUM_RANGE_COLLAPSE_UNIT;
+ case RangeCollapse::All:
+ return UNUM_RANGE_COLLAPSE_ALL;
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected range collapse");
+ return UNUM_RANGE_COLLAPSE_NONE;
+}
+
+static UNumberRangeIdentityFallback ToUNumberRangeIdentityFallback(
+ NumberRangeFormatOptions::RangeIdentityFallback identity) {
+ using RangeIdentityFallback = NumberRangeFormatOptions::RangeIdentityFallback;
+ switch (identity) {
+ case RangeIdentityFallback::SingleValue:
+ return UNUM_IDENTITY_FALLBACK_SINGLE_VALUE;
+ case RangeIdentityFallback::ApproximatelyOrSingleValue:
+ return UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE;
+ case RangeIdentityFallback::Approximately:
+ return UNUM_IDENTITY_FALLBACK_APPROXIMATELY;
+ case RangeIdentityFallback::Range:
+ return UNUM_IDENTITY_FALLBACK_RANGE;
+ }
+ MOZ_ASSERT_UNREACHABLE("unexpected range identity fallback");
+ return UNUM_IDENTITY_FALLBACK_RANGE;
+}
+
+UNumberRangeFormatter* NumberFormatterSkeleton::toRangeFormatter(
+ std::string_view locale, NumberRangeFormatOptions::RangeCollapse collapse,
+ NumberRangeFormatOptions::RangeIdentityFallback identity) {
+ if (!mValidSkeleton) {
+ return nullptr;
+ }
+
+ UParseError* perror = nullptr;
+ UErrorCode status = U_ZERO_ERROR;
+ UNumberRangeFormatter* nrf =
+ unumrf_openForSkeletonWithCollapseAndIdentityFallback(
+ mVector.begin(), mVector.length(), ToUNumberRangeCollapse(collapse),
+ ToUNumberRangeIdentityFallback(identity),
+ AssertNullTerminatedString(locale), perror, &status);
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ return nrf;
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberFormatterSkeleton.h b/intl/components/src/NumberFormatterSkeleton.h
new file mode 100644
index 0000000000..134e9e0860
--- /dev/null
+++ b/intl/components/src/NumberFormatterSkeleton.h
@@ -0,0 +1,110 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberFormatterSkeleton_h_
+#define intl_components_NumberFormatterSkeleton_h_
+#include <string_view>
+#include "mozilla/intl/NumberFormat.h"
+#include "mozilla/intl/NumberRangeFormat.h"
+#include "mozilla/Vector.h"
+#include "unicode/unumberformatter.h"
+#include "unicode/utypes.h"
+
+struct UNumberRangeFormatter;
+
+namespace mozilla::intl {
+
+/**
+ * Class to create a number formatter skeleton.
+ *
+ * The skeleton syntax is documented at:
+ * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md
+ */
+class MOZ_STACK_CLASS NumberFormatterSkeleton final {
+ public:
+ explicit NumberFormatterSkeleton(const NumberFormatOptions& options);
+
+ /**
+ * Return a new UNumberFormatter based on this skeleton.
+ */
+ UNumberFormatter* toFormatter(std::string_view locale);
+
+ /**
+ * Return a new UNumberRangeFormatter based on this skeleton.
+ */
+ UNumberRangeFormatter* toRangeFormatter(
+ std::string_view locale, NumberRangeFormatOptions::RangeCollapse collapse,
+ NumberRangeFormatOptions::RangeIdentityFallback identity);
+
+ private:
+ static constexpr size_t DefaultVectorSize = 128;
+
+ mozilla::Vector<char16_t, DefaultVectorSize> mVector;
+ bool mValidSkeleton = false;
+
+ [[nodiscard]] bool append(char16_t c) { return mVector.append(c); }
+
+ [[nodiscard]] bool appendN(char16_t c, size_t times) {
+ return mVector.appendN(c, times);
+ }
+
+ template <size_t N>
+ [[nodiscard]] bool append(const char16_t (&chars)[N]) {
+ static_assert(N > 0,
+ "should only be used with string literals or properly "
+ "null-terminated arrays");
+ MOZ_ASSERT(chars[N - 1] == '\0',
+ "should only be used with string literals or properly "
+ "null-terminated arrays");
+ // Without trailing \0.
+ return mVector.append(chars, N - 1);
+ }
+
+ template <size_t N>
+ [[nodiscard]] bool appendToken(const char16_t (&token)[N]) {
+ return append(token) && append(' ');
+ }
+
+ [[nodiscard]] bool append(const char* chars, size_t length) {
+ return mVector.append(chars, length);
+ }
+
+ [[nodiscard]] bool currency(std::string_view currency);
+
+ [[nodiscard]] bool currencyDisplay(
+ NumberFormatOptions::CurrencyDisplay display);
+
+ [[nodiscard]] bool unit(std::string_view unit);
+
+ [[nodiscard]] bool unitDisplay(NumberFormatOptions::UnitDisplay display);
+
+ [[nodiscard]] bool percent();
+
+ [[nodiscard]] bool fractionDigits(uint32_t min, uint32_t max,
+ bool stripTrailingZero);
+
+ [[nodiscard]] bool fractionWithSignificantDigits(uint32_t mnfd, uint32_t mxfd,
+ uint32_t mnsd, uint32_t mxsd,
+ bool relaxed,
+ bool stripTrailingZero);
+
+ [[nodiscard]] bool minIntegerDigits(uint32_t min);
+
+ [[nodiscard]] bool significantDigits(uint32_t min, uint32_t max,
+ bool stripTrailingZero);
+
+ [[nodiscard]] bool grouping(NumberFormatOptions::Grouping grouping);
+
+ [[nodiscard]] bool notation(NumberFormatOptions::Notation style);
+
+ [[nodiscard]] bool signDisplay(NumberFormatOptions::SignDisplay display);
+
+ [[nodiscard]] bool roundingIncrement(uint32_t increment, uint32_t mnfd,
+ uint32_t mxfd, bool stripTrailingZero);
+
+ [[nodiscard]] bool roundingMode(NumberFormatOptions::RoundingMode rounding);
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberParser.cpp b/intl/components/src/NumberParser.cpp
new file mode 100644
index 0000000000..fb97393783
--- /dev/null
+++ b/intl/components/src/NumberParser.cpp
@@ -0,0 +1,45 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/NumberParser.h"
+
+namespace mozilla::intl {
+
+/*static*/ Result<UniquePtr<NumberParser>, ICUError> NumberParser::TryCreate(
+ const char* aLocale, bool aUseGrouping) {
+ UniquePtr<NumberParser> nf = MakeUnique<NumberParser>();
+
+ UErrorCode status = U_ZERO_ERROR;
+ nf->mNumberFormat =
+ unum_open(UNUM_DECIMAL, nullptr, 0, aLocale, nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ if (!aUseGrouping) {
+ unum_setAttribute(nf->mNumberFormat.GetMut(), UNUM_GROUPING_USED, UBool(0));
+ }
+
+ return nf;
+}
+
+NumberParser::~NumberParser() {
+ if (mNumberFormat) {
+ unum_close(mNumberFormat.GetMut());
+ }
+}
+
+Result<std::pair<double, int32_t>, ICUError> NumberParser::ParseDouble(
+ Span<const char16_t> aDouble) const {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t parsePos = 0;
+ double value = unum_parseDouble(mNumberFormat.GetConst(), aDouble.data(),
+ static_cast<int32_t>(aDouble.size()),
+ &parsePos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return std::make_pair(value, parsePos);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberParser.h b/intl/components/src/NumberParser.h
new file mode 100644
index 0000000000..97efec0836
--- /dev/null
+++ b/intl/components/src/NumberParser.h
@@ -0,0 +1,46 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberParser_h_
+#define intl_components_NumberParser_h_
+
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+#include "unicode/unum.h"
+
+namespace mozilla::intl {
+
+class NumberParser {
+ public:
+ /**
+ * Initialize a new NumberParser for the provided locale and using the
+ * provided options.
+ */
+ static Result<UniquePtr<NumberParser>, ICUError> TryCreate(
+ const char* aLocale, bool aUseGrouping);
+
+ NumberParser() : mNumberFormat(nullptr){};
+ NumberParser(const NumberParser&) = delete;
+ NumberParser& operator=(const NumberParser&) = delete;
+ ~NumberParser();
+
+ /**
+ * Attempts to parse a string representing a double, returning the parsed
+ * double and the parse position if successful, or an error.
+ *
+ * The parse position is the index into the input string where parsing
+ * stopped because an non-numeric character was encountered.
+ */
+ Result<std::pair<double, int32_t>, ICUError> ParseDouble(
+ Span<const char16_t> aDouble) const;
+
+ private:
+ ICUPointer<UNumberFormat> mNumberFormat;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberPart.h b/intl/components/src/NumberPart.h
new file mode 100644
index 0000000000..8639db5768
--- /dev/null
+++ b/intl/components/src/NumberPart.h
@@ -0,0 +1,53 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberPart_h_
+#define intl_components_NumberPart_h_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "mozilla/Vector.h"
+
+namespace mozilla::intl {
+
+enum class NumberPartType : int16_t {
+ ApproximatelySign,
+ Compact,
+ Currency,
+ Decimal,
+ ExponentInteger,
+ ExponentMinusSign,
+ ExponentSeparator,
+ Fraction,
+ Group,
+ Infinity,
+ Integer,
+ Literal,
+ MinusSign,
+ Nan,
+ Percent,
+ PlusSign,
+ Unit,
+};
+
+enum class NumberPartSource : int16_t { Shared, Start, End };
+
+// Because parts fully partition the formatted string, we only track the
+// index of the end of each part -- the beginning is implicitly the last
+// part's end.
+struct NumberPart {
+ NumberPartType type;
+ NumberPartSource source;
+ size_t endIndex;
+
+ bool operator==(const NumberPart& rhs) const {
+ return type == rhs.type && source == rhs.source && endIndex == rhs.endIndex;
+ }
+ bool operator!=(const NumberPart& rhs) const { return !(*this == rhs); }
+};
+
+using NumberPartVector = mozilla::Vector<NumberPart, 8>;
+
+} // namespace mozilla::intl
+#endif
diff --git a/intl/components/src/NumberRangeFormat.cpp b/intl/components/src/NumberRangeFormat.cpp
new file mode 100644
index 0000000000..844f633bc4
--- /dev/null
+++ b/intl/components/src/NumberRangeFormat.cpp
@@ -0,0 +1,215 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/NumberRangeFormat.h"
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/NumberFormat.h"
+#include "NumberFormatFields.h"
+#include "NumberFormatterSkeleton.h"
+#include "ScopedICUObject.h"
+
+#include "unicode/uformattedvalue.h"
+#include "unicode/unumberrangeformatter.h"
+#include "unicode/upluralrules.h"
+
+namespace mozilla::intl {
+
+/*static*/ Result<UniquePtr<NumberRangeFormat>, ICUError>
+NumberRangeFormat::TryCreate(std::string_view aLocale,
+ const NumberRangeFormatOptions& aOptions) {
+ UniquePtr<NumberRangeFormat> nrf = MakeUnique<NumberRangeFormat>();
+ MOZ_TRY(nrf->initialize(aLocale, aOptions));
+ return nrf;
+}
+
+NumberRangeFormat::~NumberRangeFormat() {
+ if (mFormattedNumberRange) {
+ unumrf_closeResult(mFormattedNumberRange);
+ }
+ if (mNumberRangeFormatter) {
+ unumrf_close(mNumberRangeFormatter);
+ }
+}
+
+Result<Ok, ICUError> NumberRangeFormat::initialize(
+ std::string_view aLocale, const NumberRangeFormatOptions& aOptions) {
+ mFormatForUnit = aOptions.mUnit.isSome();
+
+ NumberFormatterSkeleton skeleton(aOptions);
+ mNumberRangeFormatter = skeleton.toRangeFormatter(
+ aLocale, aOptions.mRangeCollapse, aOptions.mRangeIdentityFallback);
+ if (mNumberRangeFormatter) {
+ UErrorCode status = U_ZERO_ERROR;
+ mFormattedNumberRange = unumrf_openResult(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return Ok();
+ }
+ return Err(ICUError::InternalError);
+}
+
+Result<int32_t, ICUError> NumberRangeFormat::selectForRange(
+ double start, double end, char16_t* keyword, int32_t keywordSize,
+ const UPluralRules* pluralRules) const {
+ MOZ_ASSERT(keyword);
+ MOZ_ASSERT(pluralRules);
+
+ MOZ_TRY(format(start, end));
+
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t utf16KeywordLength = uplrules_selectForRange(
+ pluralRules, mFormattedNumberRange, keyword, keywordSize, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return utf16KeywordLength;
+}
+
+bool NumberRangeFormat::formatInternal(double start, double end) const {
+ // ICU incorrectly formats NaN values with the sign bit set, as if they
+ // were negative. Replace all NaNs with a single pattern with sign bit
+ // unset ("positive", that is) until ICU is fixed.
+ if (MOZ_UNLIKELY(IsNaN(start))) {
+ start = SpecificNaN<double>(0, 1);
+ }
+ if (MOZ_UNLIKELY(IsNaN(end))) {
+ end = SpecificNaN<double>(0, 1);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ unumrf_formatDoubleRange(mNumberRangeFormatter, start, end,
+ mFormattedNumberRange, &status);
+ return U_SUCCESS(status);
+}
+
+bool NumberRangeFormat::formatInternal(std::string_view start,
+ std::string_view end) const {
+ UErrorCode status = U_ZERO_ERROR;
+ unumrf_formatDecimalRange(mNumberRangeFormatter, start.data(), start.size(),
+ end.data(), end.size(), mFormattedNumberRange,
+ &status);
+ return U_SUCCESS(status);
+}
+
+Result<std::u16string_view, ICUError> NumberRangeFormat::formatResult() const {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const UFormattedValue* formattedValue =
+ unumrf_resultAsValue(mFormattedNumberRange, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t utf16Length;
+ const char16_t* utf16Str =
+ ufmtval_getString(formattedValue, &utf16Length, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
+}
+
+Result<std::u16string_view, ICUError> NumberRangeFormat::formatResultToParts(
+ Maybe<double> start, bool startIsNegative, Maybe<double> end,
+ bool endIsNegative, NumberPartVector& parts) const {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const UFormattedValue* formattedValue =
+ unumrf_resultAsValue(mFormattedNumberRange, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t utf16Length;
+ const char16_t* utf16Str =
+ ufmtval_getString(formattedValue, &utf16Length, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ UConstrainedFieldPosition* fpos = ucfpos_open(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
+
+ Maybe<double> number = start;
+ bool isNegative = startIsNegative;
+
+ NumberPartSourceMap sourceMap;
+
+ // Vacuum up fields in the overall formatted string.
+ NumberFormatFields fields;
+
+ while (true) {
+ bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ if (!hasMore) {
+ break;
+ }
+
+ int32_t category = ucfpos_getCategory(fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t fieldName = ucfpos_getField(fpos, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t beginIndex, endIndex;
+ ucfpos_getIndexes(fpos, &beginIndex, &endIndex, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ if (category == UFIELD_CATEGORY_NUMBER_RANGE_SPAN) {
+ // The special field category UFIELD_CATEGORY_NUMBER_RANGE_SPAN has only
+ // two allowed values (0 or 1), indicating the begin of the start resp.
+ // end number.
+ MOZ_ASSERT(fieldName == 0 || fieldName == 1,
+ "span category has unexpected value");
+
+ if (fieldName == 0) {
+ number = start;
+ isNegative = startIsNegative;
+
+ sourceMap.start = {uint32_t(beginIndex), uint32_t(endIndex)};
+ } else {
+ number = end;
+ isNegative = endIsNegative;
+
+ sourceMap.end = {uint32_t(beginIndex), uint32_t(endIndex)};
+ }
+
+ continue;
+ }
+
+ // Ignore categories other than UFIELD_CATEGORY_NUMBER.
+ if (category != UFIELD_CATEGORY_NUMBER) {
+ continue;
+ }
+
+ Maybe<NumberPartType> partType = GetPartTypeForNumberField(
+ UNumberFormatFields(fieldName), number, isNegative, mFormatForUnit);
+ if (!partType || !fields.append(*partType, beginIndex, endIndex)) {
+ return Err(ToICUError(status));
+ }
+ }
+
+ if (!fields.toPartsVector(utf16Length, sourceMap, parts)) {
+ return Err(ToICUError(status));
+ }
+
+ return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberRangeFormat.h b/intl/components/src/NumberRangeFormat.h
new file mode 100644
index 0000000000..60de8399ec
--- /dev/null
+++ b/intl/components/src/NumberRangeFormat.h
@@ -0,0 +1,237 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_NumberRangeFormat_h_
+#define intl_components_NumberRangeFormat_h_
+
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/NumberFormat.h"
+#include "mozilla/Result.h"
+#include "mozilla/UniquePtr.h"
+
+#include <stdint.h>
+#include <string_view>
+
+#include "unicode/utypes.h"
+
+struct UFormattedNumberRange;
+struct UNumberRangeFormatter;
+struct UPluralRules;
+
+namespace mozilla::intl {
+
+/**
+ * NumberRangeFormatOptions supports the same set of options as
+ * NumberFormatOptions and additionally allows to control how to display ranges.
+ */
+struct MOZ_STACK_CLASS NumberRangeFormatOptions : public NumberFormatOptions {
+ /**
+ * Controls if and how to collapse identical parts in a range.
+ */
+ enum class RangeCollapse {
+ /**
+ * Apply locale-specific heuristics.
+ */
+ Auto,
+
+ /**
+ * Never collapse identical parts.
+ */
+ None,
+
+ /**
+ * Collapse identical unit parts.
+ */
+ Unit,
+
+ /**
+ * Collapse all identical parts.
+ */
+ All,
+ } mRangeCollapse = RangeCollapse::Auto;
+
+ /**
+ * Controls how to display identical numbers.
+ */
+ enum class RangeIdentityFallback {
+ /**
+ * Display the range as a single value.
+ */
+ SingleValue,
+
+ /**
+ * Display the range as a single value if both numbers were equal before
+ * rounding. Otherwise display with a locale-sensitive approximation
+ * pattern.
+ */
+ ApproximatelyOrSingleValue,
+
+ /**
+ * Display with a locale-sensitive approximation pattern.
+ */
+ Approximately,
+
+ /**
+ * Display as a range expression.
+ */
+ Range,
+ } mRangeIdentityFallback = RangeIdentityFallback::SingleValue;
+};
+
+/**
+ * A NumberRangeFormat implementation that roughly mirrors the API provided by
+ * the ECMA-402 Intl.NumberFormat object for formatting number ranges.
+ *
+ * https://tc39.es/ecma402/#numberformat-objects
+ */
+class NumberRangeFormat final {
+ public:
+ /**
+ * Initialize a new NumberRangeFormat for the provided locale and using the
+ * provided options.
+ *
+ * https://tc39.es/ecma402/#sec-initializenumberformat
+ */
+ static Result<UniquePtr<NumberRangeFormat>, ICUError> TryCreate(
+ std::string_view aLocale, const NumberRangeFormatOptions& aOptions);
+
+ NumberRangeFormat() = default;
+ NumberRangeFormat(const NumberRangeFormat&) = delete;
+ NumberRangeFormat& operator=(const NumberRangeFormat&) = delete;
+
+ ~NumberRangeFormat();
+
+ /**
+ * Formats a double range to a utf-16 string. The string view is valid until
+ * another number range is formatted. Accessing the string view after this
+ * event is undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-formatnumericrange
+ */
+ Result<std::u16string_view, ICUError> format(double start, double end) const {
+ if (!formatInternal(start, end)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult();
+ }
+
+ /**
+ * Formats a double range to a utf-16 string, and fills the provided parts
+ * vector. The string view is valid until another number is formatted.
+ * Accessing the string view after this event is undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-partitionnumberrangepattern
+ */
+ Result<std::u16string_view, ICUError> formatToParts(
+ double start, double end, NumberPartVector& parts) const {
+ if (!formatInternal(start, end)) {
+ return Err(ICUError::InternalError);
+ }
+
+ bool isNegativeStart = !IsNaN(start) && IsNegative(start);
+ bool isNegativeEnd = !IsNaN(end) && IsNegative(end);
+
+ return formatResultToParts(Some(start), isNegativeStart, Some(end),
+ isNegativeEnd, parts);
+ }
+
+ /**
+ * Formats a decimal number range to a utf-16 string. The string view is valid
+ * until another number range is formatted. Accessing the string view after
+ * this event is undefined behavior.
+ *
+ * https://tc39.es/ecma402/#sec-formatnumericrange
+ */
+ Result<std::u16string_view, ICUError> format(std::string_view start,
+ std::string_view end) const {
+ if (!formatInternal(start, end)) {
+ return Err(ICUError::InternalError);
+ }
+
+ return formatResult();
+ }
+
+ /**
+ * Formats a string encoded decimal number range to a utf-16 string, and fills
+ * the provided parts vector. The string view is valid until another number is
+ * formatted. Accessing the string view after this event is undefined
+ * behavior.
+ *
+ * https://tc39.es/ecma402/#sec-partitionnumberrangepattern
+ */
+ Result<std::u16string_view, ICUError> formatToParts(
+ std::string_view start, std::string_view end,
+ NumberPartVector& parts) const {
+ if (!formatInternal(start, end)) {
+ return Err(ICUError::InternalError);
+ }
+
+ Maybe<double> numStart = Nothing();
+ if (start == "Infinity" || start == "+Infinity") {
+ numStart.emplace(PositiveInfinity<double>());
+ } else if (start == "-Infinity") {
+ numStart.emplace(NegativeInfinity<double>());
+ } else {
+ // Not currently expected, so we assert here.
+ MOZ_ASSERT(start != "NaN");
+ }
+
+ Maybe<double> numEnd = Nothing();
+ if (end == "Infinity" || end == "+Infinity") {
+ numEnd.emplace(PositiveInfinity<double>());
+ } else if (end == "-Infinity") {
+ numEnd.emplace(NegativeInfinity<double>());
+ } else {
+ // Not currently expected, so we assert here.
+ MOZ_ASSERT(end != "NaN");
+ }
+
+ bool isNegativeStart = !start.empty() && start[0] == '-';
+ bool isNegativeEnd = !end.empty() && end[0] == '-';
+
+ return formatResultToParts(numStart, isNegativeStart, numEnd, isNegativeEnd,
+ parts);
+ }
+
+ /**
+ * Formats the number range and selects the keyword by using a provided
+ * UPluralRules object.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.selectrange
+ *
+ * TODO(1713917) This is necessary because both PluralRules and
+ * NumberRangeFormat have a shared dependency on the raw UFormattedNumberRange
+ * type. Once we transition to using ICU4X, the FFI calls should no
+ * longer require such shared dependencies. At that time, this
+ * functionality should be removed from NumberRangeFormat and invoked
+ * solely from PluralRules.
+ */
+ Result<int32_t, ICUError> selectForRange(
+ double start, double end, char16_t* keyword, int32_t keywordSize,
+ const UPluralRules* pluralRules) const;
+
+ private:
+ UNumberRangeFormatter* mNumberRangeFormatter = nullptr;
+ UFormattedNumberRange* mFormattedNumberRange = nullptr;
+ bool mFormatForUnit = false;
+
+ Result<Ok, ICUError> initialize(std::string_view aLocale,
+ const NumberRangeFormatOptions& aOptions);
+
+ [[nodiscard]] bool formatInternal(double start, double end) const;
+
+ [[nodiscard]] bool formatInternal(std::string_view start,
+ std::string_view end) const;
+
+ Result<std::u16string_view, ICUError> formatResult() const;
+
+ Result<std::u16string_view, ICUError> formatResultToParts(
+ Maybe<double> start, bool startIsNegative, Maybe<double> end,
+ bool endIsNegative, NumberPartVector& parts) const;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/NumberingSystem.cpp b/intl/components/src/NumberingSystem.cpp
new file mode 100644
index 0000000000..b86484a5f7
--- /dev/null
+++ b/intl/components/src/NumberingSystem.cpp
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/NumberingSystem.h"
+#include "mozilla/intl/ICU4CGlue.h"
+
+#include "unicode/unumsys.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+NumberingSystem::~NumberingSystem() {
+ MOZ_ASSERT(mNumberingSystem);
+ unumsys_close(mNumberingSystem);
+}
+
+Result<UniquePtr<NumberingSystem>, ICUError> NumberingSystem::TryCreate(
+ const char* aLocale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UNumberingSystem* numbers = unumsys_open(IcuLocale(aLocale), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return MakeUnique<NumberingSystem>(numbers);
+}
+
+Result<Span<const char>, ICUError> NumberingSystem::GetName() {
+ const char* name = unumsys_getName(mNumberingSystem);
+ if (!name) {
+ return Err(ICUError::InternalError);
+ }
+
+ return MakeStringSpan(name);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/NumberingSystem.h b/intl/components/src/NumberingSystem.h
new file mode 100644
index 0000000000..a3d1903dd1
--- /dev/null
+++ b/intl/components/src/NumberingSystem.h
@@ -0,0 +1,56 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_NumberingSystem_h_
+#define intl_components_NumberingSystem_h_
+
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+struct UNumberingSystem;
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with numbering systems in
+ * internationalization code. It is used in coordination with other operations
+ * such as number formatting.
+ */
+class NumberingSystem final {
+ public:
+ explicit NumberingSystem(UNumberingSystem* aNumberingSystem)
+ : mNumberingSystem(aNumberingSystem) {
+ MOZ_ASSERT(aNumberingSystem);
+ };
+
+ // Do not allow copy as this class owns the ICU resource. Move is not
+ // currently implemented, but a custom move operator could be created if
+ // needed.
+ NumberingSystem(const NumberingSystem&) = delete;
+ NumberingSystem& operator=(const NumberingSystem&) = delete;
+
+ ~NumberingSystem();
+
+ /**
+ * Create a NumberingSystem.
+ */
+ static Result<UniquePtr<NumberingSystem>, ICUError> TryCreate(
+ const char* aLocale);
+
+ /**
+ * Returns the name of this numbering system.
+ *
+ * The returned string has the same lifetime as this NumberingSystem object.
+ */
+ Result<Span<const char>, ICUError> GetName();
+
+ private:
+ UNumberingSystem* mNumberingSystem = nullptr;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/PluralRules.cpp b/intl/components/src/PluralRules.cpp
new file mode 100644
index 0000000000..891ca45769
--- /dev/null
+++ b/intl/components/src/PluralRules.cpp
@@ -0,0 +1,180 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/PluralRules.h"
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/NumberFormat.h"
+#include "mozilla/intl/NumberRangeFormat.h"
+#include "mozilla/Utf8.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/Span.h"
+#include "ScopedICUObject.h"
+
+#include "unicode/unum.h"
+#include "unicode/upluralrules.h"
+#include "unicode/ustring.h"
+
+namespace mozilla::intl {
+
+PluralRules::PluralRules(UPluralRules*& aPluralRules,
+ UniquePtr<NumberFormat>&& aNumberFormat,
+ UniquePtr<NumberRangeFormat>&& aNumberRangeFormat)
+ : mPluralRules(aPluralRules),
+ mNumberFormat(std::move(aNumberFormat)),
+ mNumberRangeFormat(std::move(aNumberRangeFormat)) {
+ MOZ_ASSERT(aPluralRules);
+ aPluralRules = nullptr;
+}
+
+Result<UniquePtr<PluralRules>, ICUError> PluralRules::TryCreate(
+ const std::string_view aLocale, const PluralRulesOptions& aOptions) {
+ auto numberFormat =
+ NumberFormat::TryCreate(aLocale, aOptions.ToNumberFormatOptions());
+
+ if (numberFormat.isErr()) {
+ return Err(numberFormat.unwrapErr());
+ }
+
+ auto numberRangeFormat = NumberRangeFormat::TryCreate(
+ aLocale, aOptions.ToNumberRangeFormatOptions());
+
+ if (numberRangeFormat.isErr()) {
+ return Err(numberRangeFormat.unwrapErr());
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ auto pluralType = aOptions.mPluralType == PluralRules::Type::Cardinal
+ ? UPLURAL_TYPE_CARDINAL
+ : UPLURAL_TYPE_ORDINAL;
+ UPluralRules* pluralRules = uplrules_openForType(
+ AssertNullTerminatedString(aLocale), pluralType, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return UniquePtr<PluralRules>(new PluralRules(
+ pluralRules, numberFormat.unwrap(), numberRangeFormat.unwrap()));
+}
+
+Result<PluralRules::Keyword, ICUError> PluralRules::Select(
+ const double aNumber) const {
+ char16_t keyword[MAX_KEYWORD_LENGTH];
+
+ auto lengthResult = mNumberFormat->selectFormatted(
+ aNumber, keyword, MAX_KEYWORD_LENGTH, mPluralRules);
+
+ if (lengthResult.isErr()) {
+ return Err(lengthResult.unwrapErr());
+ }
+
+ return KeywordFromUtf16(Span(keyword, lengthResult.unwrap()));
+}
+
+Result<PluralRules::Keyword, ICUError> PluralRules::SelectRange(
+ double aStart, double aEnd) const {
+ char16_t keyword[MAX_KEYWORD_LENGTH];
+
+ auto lengthResult = mNumberRangeFormat->selectForRange(
+ aStart, aEnd, keyword, MAX_KEYWORD_LENGTH, mPluralRules);
+
+ if (lengthResult.isErr()) {
+ return Err(lengthResult.unwrapErr());
+ }
+
+ return KeywordFromUtf16(Span(keyword, lengthResult.unwrap()));
+}
+
+Result<EnumSet<PluralRules::Keyword>, ICUError> PluralRules::Categories()
+ const {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = uplrules_getKeywords(mPluralRules, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ ScopedICUObject<UEnumeration, uenum_close> closeEnum(enumeration);
+ EnumSet<PluralRules::Keyword> set;
+
+ while (true) {
+ int32_t keywordLength;
+ const char* keyword = uenum_next(enumeration, &keywordLength, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ if (!keyword) {
+ break;
+ }
+
+ set += KeywordFromAscii(Span(keyword, keywordLength));
+ }
+
+ return set;
+}
+
+PluralRules::Keyword PluralRules::KeywordFromUtf16(
+ Span<const char16_t> aKeyword) {
+ static constexpr auto kZero = MakeStringSpan(u"zero");
+ static constexpr auto kOne = MakeStringSpan(u"one");
+ static constexpr auto kTwo = MakeStringSpan(u"two");
+ static constexpr auto kFew = MakeStringSpan(u"few");
+ static constexpr auto kMany = MakeStringSpan(u"many");
+
+ if (aKeyword == kZero) {
+ return PluralRules::Keyword::Zero;
+ }
+ if (aKeyword == kOne) {
+ return PluralRules::Keyword::One;
+ }
+ if (aKeyword == kTwo) {
+ return PluralRules::Keyword::Two;
+ }
+ if (aKeyword == kFew) {
+ return PluralRules::Keyword::Few;
+ }
+ if (aKeyword == kMany) {
+ return PluralRules::Keyword::Many;
+ }
+
+ MOZ_ASSERT(aKeyword == MakeStringSpan(u"other"));
+ return PluralRules::Keyword::Other;
+}
+
+PluralRules::Keyword PluralRules::KeywordFromAscii(Span<const char> aKeyword) {
+ static constexpr auto kZero = MakeStringSpan("zero");
+ static constexpr auto kOne = MakeStringSpan("one");
+ static constexpr auto kTwo = MakeStringSpan("two");
+ static constexpr auto kFew = MakeStringSpan("few");
+ static constexpr auto kMany = MakeStringSpan("many");
+
+ if (aKeyword == kZero) {
+ return PluralRules::Keyword::Zero;
+ }
+ if (aKeyword == kOne) {
+ return PluralRules::Keyword::One;
+ }
+ if (aKeyword == kTwo) {
+ return PluralRules::Keyword::Two;
+ }
+ if (aKeyword == kFew) {
+ return PluralRules::Keyword::Few;
+ }
+ if (aKeyword == kMany) {
+ return PluralRules::Keyword::Many;
+ }
+
+ MOZ_ASSERT(aKeyword == MakeStringSpan("other"));
+ return PluralRules::Keyword::Other;
+}
+
+PluralRules::~PluralRules() {
+ if (mPluralRules) {
+ uplrules_close(mPluralRules);
+ mPluralRules = nullptr;
+ }
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/PluralRules.h b/intl/components/src/PluralRules.h
new file mode 100644
index 0000000000..a413d54279
--- /dev/null
+++ b/intl/components/src/PluralRules.h
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_PluralRules_h_
+#define intl_components_PluralRules_h_
+
+#include <string_view>
+#include <type_traits>
+#include <utility>
+
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/NumberFormat.h"
+#include "mozilla/intl/NumberRangeFormat.h"
+#include "mozilla/EnumSet.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+class PluralRules final {
+ public:
+ /**
+ * The set of keywords that a PluralRules object uses.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.resolvedoptions
+ */
+ enum class Keyword : uint8_t {
+ Few,
+ Many,
+ One,
+ Other,
+ Two,
+ Zero,
+ };
+
+ /**
+ * The two different types of PluralRules objects that can be created.
+ *
+ * https://tc39.es/ecma402/#sec-properties-of-intl-pluralrules-instances
+ */
+ enum class Type : uint8_t {
+ Cardinal,
+ Ordinal,
+ };
+
+ PluralRules(const PluralRules&) = delete;
+ PluralRules& operator=(const PluralRules&) = delete;
+
+ /**
+ * Attempts to construct a PluralRules with the given locale and options.
+ */
+ // TODO(1709880) use mozilla::Span instead of std::string_view.
+ static Result<UniquePtr<PluralRules>, ICUError> TryCreate(
+ std::string_view aLocale, const PluralRulesOptions& aOptions);
+
+ /**
+ * Returns the PluralRules keyword that corresponds to the |aNumber|.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.select
+ */
+ Result<PluralRules::Keyword, ICUError> Select(double aNumber) const;
+
+ /**
+ * Returns the PluralRules keyword that corresponds to the range from |aStart|
+ * to |aEnd|.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.selectrange
+ */
+ Result<PluralRules::Keyword, ICUError> SelectRange(double aStart,
+ double aEnd) const;
+
+ /**
+ * Returns an EnumSet with the plural-rules categories that are supported by
+ * the locale that the PluralRules instance was created with.
+ */
+ Result<EnumSet<PluralRules::Keyword>, ICUError> Categories() const;
+
+ ~PluralRules();
+
+ private:
+ // The longest keyword is "other"
+ static const size_t MAX_KEYWORD_LENGTH = 5;
+
+ UPluralRules* mPluralRules = nullptr;
+ UniquePtr<NumberFormat> mNumberFormat;
+ UniquePtr<NumberRangeFormat> mNumberRangeFormat;
+
+ PluralRules(UPluralRules*&, UniquePtr<NumberFormat>&&,
+ UniquePtr<NumberRangeFormat>&&);
+
+ /**
+ * Returns the PluralRules::Keyword that matches the UTF-16 string.
+ * Strings must be [u"few", u"many", u"one", u"other", u"two", u"zero"]
+ */
+ static PluralRules::Keyword KeywordFromUtf16(Span<const char16_t> aKeyword);
+
+ /**
+ * Returns the PluralRules::Keyword that matches the ASCII string.
+ * Strings must be ["few", "many", "one", "other", "two", "zero"]
+ */
+ static PluralRules::Keyword KeywordFromAscii(Span<const char> aKeyword);
+};
+
+/**
+ * Options required for constructing a PluralRules object.
+ */
+struct MOZ_STACK_CLASS PluralRulesOptions {
+ /**
+ * Creates a NumberFormatOptions from the PluralRulesOptions.
+ */
+ NumberFormatOptions ToNumberFormatOptions() const {
+ NumberFormatOptions options;
+ options.mRoundingMode = NumberFormatOptions::RoundingMode::HalfExpand;
+
+ if (mFractionDigits.isSome()) {
+ options.mFractionDigits.emplace(mFractionDigits.ref());
+ }
+
+ if (mMinIntegerDigits.isSome()) {
+ options.mMinIntegerDigits.emplace(mMinIntegerDigits.ref());
+ }
+
+ if (mSignificantDigits.isSome()) {
+ options.mSignificantDigits.emplace(mSignificantDigits.ref());
+ }
+
+ options.mRoundingPriority =
+ NumberFormatOptions::RoundingPriority(mRoundingPriority);
+
+ return options;
+ }
+ /**
+ * Creates a NumberFormatOptions from the PluralRulesOptions.
+ */
+ NumberRangeFormatOptions ToNumberRangeFormatOptions() const {
+ NumberRangeFormatOptions options;
+ options.mRoundingMode = NumberRangeFormatOptions::RoundingMode::HalfExpand;
+ options.mRangeCollapse = NumberRangeFormatOptions::RangeCollapse::None;
+ options.mRangeIdentityFallback =
+ NumberRangeFormatOptions::RangeIdentityFallback::Range;
+
+ if (mFractionDigits.isSome()) {
+ options.mFractionDigits.emplace(mFractionDigits.ref());
+ }
+
+ if (mMinIntegerDigits.isSome()) {
+ options.mMinIntegerDigits.emplace(mMinIntegerDigits.ref());
+ }
+
+ if (mSignificantDigits.isSome()) {
+ options.mSignificantDigits.emplace(mSignificantDigits.ref());
+ }
+
+ options.mRoundingPriority =
+ NumberFormatOptions::RoundingPriority(mRoundingPriority);
+
+ return options;
+ }
+
+ /**
+ * Set the plural type between cardinal and ordinal.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.resolvedoptions
+ */
+ PluralRules::Type mPluralType = PluralRules::Type::Cardinal;
+
+ /**
+ * Set the minimum number of integer digits. |min| must be a non-zero
+ * number.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.resolvedoptions
+ */
+ Maybe<uint32_t> mMinIntegerDigits;
+
+ /**
+ * Set the fraction digits settings. |min| can be zero, |max| must be
+ * larger-or-equal to |min|.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.resolvedoptions
+ */
+ Maybe<std::pair<uint32_t, uint32_t>> mFractionDigits;
+
+ /**
+ * Set the significant digits settings. |min| must be a non-zero number, |max|
+ * must be larger-or-equal to |min|.
+ *
+ * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.resolvedoptions
+ */
+ Maybe<std::pair<uint32_t, uint32_t>> mSignificantDigits;
+
+ /**
+ * Set the rounding priority. |mFractionDigits| and |mSignificantDigits| must
+ * both be set if the rounding priority isn't equal to "auto".
+ */
+ enum class RoundingPriority {
+ Auto,
+ MorePrecision,
+ LessPrecision,
+ } mRoundingPriority = RoundingPriority::Auto;
+
+ // Must be compatible with NumberFormatOptions::RoundingPriority.
+ static_assert(std::is_same_v<
+ std::underlying_type_t<RoundingPriority>,
+ std::underlying_type_t<NumberFormatOptions::RoundingPriority>>);
+ static_assert(RoundingPriority::Auto ==
+ RoundingPriority(NumberFormatOptions::RoundingPriority::Auto));
+ static_assert(
+ RoundingPriority::LessPrecision ==
+ RoundingPriority(NumberFormatOptions::RoundingPriority::LessPrecision));
+ static_assert(
+ RoundingPriority::MorePrecision ==
+ RoundingPriority(NumberFormatOptions::RoundingPriority::MorePrecision));
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/RelativeTimeFormat.cpp b/intl/components/src/RelativeTimeFormat.cpp
new file mode 100644
index 0000000000..8fe62d0d59
--- /dev/null
+++ b/intl/components/src/RelativeTimeFormat.cpp
@@ -0,0 +1,153 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mozilla/intl/RelativeTimeFormat.h"
+#include "mozilla/FloatingPoint.h"
+
+#include "unicode/unum.h"
+
+#include "NumberFormatFields.h"
+#include "ICU4CGlue.h"
+#include "ScopedICUObject.h"
+
+namespace mozilla::intl {
+
+/*static*/ Result<UniquePtr<RelativeTimeFormat>, ICUError>
+RelativeTimeFormat::TryCreate(const char* aLocale,
+ const RelativeTimeFormatOptions& aOptions) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ UFormattedRelativeDateTime* formattedRelativeDateTime =
+ ureldatefmt_openResult(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UFormattedRelativeDateTime, ureldatefmt_closeResult>
+ closeFormattedRelativeDate(formattedRelativeDateTime);
+
+ UNumberFormat* nf =
+ unum_open(UNUM_DECIMAL, nullptr, 0, IcuLocale(aLocale), nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ ScopedICUObject<UNumberFormat, unum_close> closeNumberFormatter(nf);
+
+ // Use the default values as if a new Intl.NumberFormat had been constructed.
+ unum_setAttribute(nf, UNUM_MIN_INTEGER_DIGITS, 1);
+ unum_setAttribute(nf, UNUM_MIN_FRACTION_DIGITS, 0);
+ unum_setAttribute(nf, UNUM_MAX_FRACTION_DIGITS, 3);
+ unum_setAttribute(nf, UNUM_GROUPING_USED, true);
+ unum_setAttribute(nf, UNUM_MINIMUM_GROUPING_DIGITS,
+ UNUM_MINIMUM_GROUPING_DIGITS_AUTO);
+
+ UDateRelativeDateTimeFormatterStyle relDateTimeStyle;
+ switch (aOptions.style) {
+ case RelativeTimeFormatOptions::Style::Short:
+ relDateTimeStyle = UDAT_STYLE_SHORT;
+ break;
+ case RelativeTimeFormatOptions::Style::Narrow:
+ relDateTimeStyle = UDAT_STYLE_NARROW;
+ break;
+ case RelativeTimeFormatOptions::Style::Long:
+ relDateTimeStyle = UDAT_STYLE_LONG;
+ break;
+ }
+
+ URelativeDateTimeFormatter* formatter =
+ ureldatefmt_open(IcuLocale(aLocale), nf, relDateTimeStyle,
+ UDISPCTX_CAPITALIZATION_FOR_STANDALONE, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ // Ownership was transferred to mFormatter.
+ closeNumberFormatter.forget();
+
+ UniquePtr<RelativeTimeFormat> rtf = MakeUnique<RelativeTimeFormat>(
+ aOptions.numeric, formatter, formattedRelativeDateTime);
+
+ // Ownership was transferred to rtf.
+ closeFormattedRelativeDate.forget();
+ return rtf;
+}
+
+RelativeTimeFormat::RelativeTimeFormat(
+ RelativeTimeFormatOptions::Numeric aNumeric,
+ URelativeDateTimeFormatter* aFormatter,
+ UFormattedRelativeDateTime* aFormattedRelativeDateTime)
+ : mNumeric(aNumeric),
+ mFormatter(aFormatter),
+ mFormattedRelativeDateTime(aFormattedRelativeDateTime) {}
+
+RelativeTimeFormat::~RelativeTimeFormat() {
+ if (mFormattedRelativeDateTime) {
+ ureldatefmt_closeResult(mFormattedRelativeDateTime);
+ mFormattedRelativeDateTime = nullptr;
+ }
+
+ if (mFormatter) {
+ ureldatefmt_close(mFormatter);
+ mFormatter = nullptr;
+ }
+}
+
+URelativeDateTimeUnit RelativeTimeFormat::ToURelativeDateTimeUnit(
+ FormatUnit unit) const {
+ switch (unit) {
+ case FormatUnit::Second:
+ return UDAT_REL_UNIT_SECOND;
+ case FormatUnit::Minute:
+ return UDAT_REL_UNIT_MINUTE;
+ case FormatUnit::Hour:
+ return UDAT_REL_UNIT_HOUR;
+ case FormatUnit::Day:
+ return UDAT_REL_UNIT_DAY;
+ case FormatUnit::Week:
+ return UDAT_REL_UNIT_WEEK;
+ case FormatUnit::Month:
+ return UDAT_REL_UNIT_MONTH;
+ case FormatUnit::Quarter:
+ return UDAT_REL_UNIT_QUARTER;
+ case FormatUnit::Year:
+ return UDAT_REL_UNIT_YEAR;
+ };
+ MOZ_ASSERT_UNREACHABLE();
+ return UDAT_REL_UNIT_SECOND;
+}
+
+Result<Span<const char16_t>, ICUError> RelativeTimeFormat::formatToParts(
+ double aNumber, FormatUnit aUnit, NumberPartVector& aParts) const {
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (mNumeric == RelativeTimeFormatOptions::Numeric::Auto) {
+ ureldatefmt_formatToResult(mFormatter, aNumber,
+ ToURelativeDateTimeUnit(aUnit),
+ mFormattedRelativeDateTime, &status);
+ } else {
+ ureldatefmt_formatNumericToResult(mFormatter, aNumber,
+ ToURelativeDateTimeUnit(aUnit),
+ mFormattedRelativeDateTime, &status);
+ }
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ const UFormattedValue* formattedValue =
+ ureldatefmt_resultAsValue(mFormattedRelativeDateTime, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ bool isNegative = !IsNaN(aNumber) && IsNegative(aNumber);
+
+ // Necessary until all of intl is using Span (Bug 1709880)
+ return FormatResultToParts(formattedValue, Nothing(), isNegative,
+ false /*formatForUnit*/, aParts)
+ .andThen([](std::u16string_view result)
+ -> Result<Span<const char16_t>, ICUError> {
+ return Span<const char16_t>(result.data(), result.length());
+ });
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/RelativeTimeFormat.h b/intl/components/src/RelativeTimeFormat.h
new file mode 100644
index 0000000000..94c2db6927
--- /dev/null
+++ b/intl/components/src/RelativeTimeFormat.h
@@ -0,0 +1,146 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_RelativeTimeFormat_h_
+#define intl_components_RelativeTimeFormat_h_
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/NumberPart.h"
+
+#include "unicode/ureldatefmt.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+struct RelativeTimeFormatOptions {
+ enum class Style { Short, Narrow, Long };
+ Style style = Style::Long;
+
+ enum class Numeric {
+ /**
+ * Only strings with numeric components like `1 day ago`.
+ */
+ Always,
+ /**
+ * Natural-language strings like `yesterday` when possible,
+ * otherwise strings with numeric components as in `7 months ago`.
+ */
+ Auto,
+ };
+ Numeric numeric = Numeric::Always;
+};
+
+/**
+ * A RelativeTimeFormat implementation that roughly mirrors the API provided by
+ * the ECMA-402 Intl.RelativeTimeFormat object.
+ *
+ * https://tc39.es/ecma402/#relativetimeformat-objects
+ */
+class RelativeTimeFormat final {
+ public:
+ /**
+ *
+ * Initialize a new RelativeTimeFormat for the provided locale and using the
+ * provided options.
+ *
+ * https://tc39.es/ecma402/#sec-InitializeRelativeTimeFormat
+ */
+ static Result<UniquePtr<RelativeTimeFormat>, ICUError> TryCreate(
+ const char* aLocale, const RelativeTimeFormatOptions& aOptions);
+
+ RelativeTimeFormat() = default;
+
+ RelativeTimeFormat(RelativeTimeFormatOptions::Numeric aNumeric,
+ URelativeDateTimeFormatter* aFormatter,
+ UFormattedRelativeDateTime* aFormattedRelativeDateTime);
+
+ RelativeTimeFormat(const RelativeTimeFormat&) = delete;
+ RelativeTimeFormat& operator=(const RelativeTimeFormat&) = delete;
+ ~RelativeTimeFormat();
+
+ enum class FormatUnit {
+ Second,
+ Minute,
+ Hour,
+ Day,
+ Week,
+ Month,
+ Quarter,
+ Year
+ };
+
+ /**
+ * Formats a double to the provider buffer (either utf-8 or utf-16)
+ *
+ * https://tc39.es/ecma402/#sec-FormatRelativeTime
+ */
+ template <typename B>
+ Result<Ok, ICUError> format(double aNumber, FormatUnit aUnit,
+ B& aBuffer) const {
+ static_assert(
+ std::is_same_v<typename B::CharType, char> ||
+ std::is_same_v<typename B::CharType, char16_t>,
+ "The only buffer CharTypes supported by RelativeTimeFormat are char "
+ "(for UTF-8 support) and char16_t (for UTF-16 support).");
+
+ auto fmt = mNumeric == RelativeTimeFormatOptions::Numeric::Auto
+ ? ureldatefmt_format
+ : ureldatefmt_formatNumeric;
+
+ if constexpr (std::is_same_v<typename B::CharType, char>) {
+ mozilla::Vector<char16_t, StackU16VectorSize> u16Vec;
+
+ MOZ_TRY(FillBufferWithICUCall(
+ u16Vec, [this, aNumber, aUnit, fmt](UChar* target, int32_t length,
+ UErrorCode* status) {
+ return fmt(mFormatter, aNumber, ToURelativeDateTimeUnit(aUnit),
+ target, length, status);
+ }));
+
+ if (!FillBuffer(u16Vec, aBuffer)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return Ok{};
+ } else {
+ static_assert(std::is_same_v<typename B::CharType, char16_t>);
+
+ return FillBufferWithICUCall(
+ aBuffer, [this, aNumber, aUnit, fmt](UChar* target, int32_t length,
+ UErrorCode* status) {
+ return fmt(mFormatter, aNumber, ToURelativeDateTimeUnit(aUnit),
+ target, length, status);
+ });
+ }
+ }
+
+ /**
+ * Formats the relative time to a utf-16 string, and fills the provided parts
+ * vector. The string view is valid until another time is formatted.
+ * Accessing the string view after this event is undefined behavior.
+ *
+ * This is utf-16 only because the only current use case is in
+ * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
+ * in NumberPartVector from fixed width to variable width, which might be
+ * tricky to get right and is work that won't be necessary if we switch to
+ * ICU4X (see Bug 1723120).
+ *
+ * https://tc39.es/ecma402/#sec-FormatRelativeTimeToParts
+ */
+ Result<Span<const char16_t>, ICUError> formatToParts(
+ double aNumber, FormatUnit aUnit, NumberPartVector& aParts) const;
+
+ private:
+ RelativeTimeFormatOptions::Numeric mNumeric =
+ RelativeTimeFormatOptions::Numeric::Always;
+ URelativeDateTimeFormatter* mFormatter = nullptr;
+ UFormattedRelativeDateTime* mFormattedRelativeDateTime = nullptr;
+
+ static constexpr size_t StackU16VectorSize = 128;
+
+ URelativeDateTimeUnit ToURelativeDateTimeUnit(FormatUnit unit) const;
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/ScopedICUObject.h b/intl/components/src/ScopedICUObject.h
new file mode 100644
index 0000000000..1aa79245dc
--- /dev/null
+++ b/intl/components/src/ScopedICUObject.h
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_ScopedICUObject_h
+#define intl_components_ScopedICUObject_h
+
+/*
+ * A simple RAII class to assure ICU objects are automatically deallocated at
+ * scope end. Unfortunately, ICU's C++ API is uniformly unstable, so we can't
+ * use its smart pointers for this.
+ */
+
+namespace mozilla::intl {
+
+template <typename T, void(Delete)(T*)>
+class ScopedICUObject {
+ T* ptr_;
+
+ public:
+ explicit ScopedICUObject(T* ptr) : ptr_(ptr) {}
+
+ ~ScopedICUObject() {
+ if (ptr_) {
+ Delete(ptr_);
+ }
+ }
+
+ // In cases where an object should be deleted on abnormal exits,
+ // but returned to the caller if everything goes well, call forget()
+ // to transfer the object just before returning.
+ T* forget() {
+ T* tmp = ptr_;
+ ptr_ = nullptr;
+ return tmp;
+ }
+};
+
+} // namespace mozilla::intl
+
+#endif /* intl_components_ScopedICUObject_h */
diff --git a/intl/components/src/String.cpp b/intl/components/src/String.cpp
new file mode 100644
index 0000000000..de24ab804f
--- /dev/null
+++ b/intl/components/src/String.cpp
@@ -0,0 +1,13 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/String.h"
+
+namespace mozilla::intl {
+
+Span<const char> String::GetUnicodeVersion() {
+ return MakeStringSpan(U_UNICODE_VERSION);
+}
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/String.h b/intl/components/src/String.h
new file mode 100644
index 0000000000..f07acd6578
--- /dev/null
+++ b/intl/components/src/String.h
@@ -0,0 +1,256 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_String_h_
+#define intl_components_String_h_
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Casting.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+
+#include "unicode/uchar.h"
+#include "unicode/unorm2.h"
+#include "unicode/ustring.h"
+#include "unicode/utext.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with strings in
+ * internationalization code.
+ */
+class String final {
+ public:
+ String() = delete;
+
+ /**
+ * Return the locale-sensitive lower case string of the input.
+ */
+ template <typename B>
+ static Result<Ok, ICUError> ToLocaleLowerCase(const char* aLocale,
+ Span<const char16_t> aString,
+ B& aBuffer) {
+ if (!aBuffer.reserve(aString.size())) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return u_strToLower(target, length, aString.data(), aString.size(),
+ aLocale, status);
+ });
+ }
+
+ /**
+ * Return the locale-sensitive upper case string of the input.
+ */
+ template <typename B>
+ static Result<Ok, ICUError> ToLocaleUpperCase(const char* aLocale,
+ Span<const char16_t> aString,
+ B& aBuffer) {
+ if (!aBuffer.reserve(aString.size())) {
+ return Err(ICUError::OutOfMemory);
+ }
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ return u_strToUpper(target, length, aString.data(), aString.size(),
+ aLocale, status);
+ });
+ }
+
+ /**
+ * Normalization form constants to describe which normalization algorithm
+ * should be performed.
+ *
+ * Also see:
+ * - Unicode Standard, §2.12 Equivalent Sequences
+ * - Unicode Standard, §3.11 Normalization Forms
+ * - https://unicode.org/reports/tr15/
+ */
+ enum class NormalizationForm {
+ /**
+ * Normalization Form C
+ */
+ NFC,
+
+ /**
+ * Normalization Form D
+ */
+ NFD,
+
+ /**
+ * Normalization Form KC
+ */
+ NFKC,
+
+ /**
+ * Normalization Form KD
+ */
+ NFKD,
+ };
+
+ enum class AlreadyNormalized : bool { No, Yes };
+
+ /**
+ * Normalize the input string according to requested normalization form.
+ *
+ * Returns `AlreadyNormalized::Yes` when the string is already in normalized
+ * form. The output buffer is unchanged in this case. Otherwise returns
+ * `AlreadyNormalized::No` and places the normalized string into the output
+ * buffer.
+ */
+ template <typename B>
+ static Result<AlreadyNormalized, ICUError> Normalize(
+ NormalizationForm aForm, Span<const char16_t> aString, B& aBuffer) {
+ // The unorm2_getXXXInstance() methods return a shared instance which must
+ // not be deleted.
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2* normalizer;
+ switch (aForm) {
+ case NormalizationForm::NFC:
+ normalizer = unorm2_getNFCInstance(&status);
+ break;
+ case NormalizationForm::NFD:
+ normalizer = unorm2_getNFDInstance(&status);
+ break;
+ case NormalizationForm::NFKC:
+ normalizer = unorm2_getNFKCInstance(&status);
+ break;
+ case NormalizationForm::NFKD:
+ normalizer = unorm2_getNFKDInstance(&status);
+ break;
+ }
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t spanLengthInt = unorm2_spanQuickCheckYes(normalizer, aString.data(),
+ aString.size(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ size_t spanLength = AssertedCast<size_t>(spanLengthInt);
+ MOZ_ASSERT(spanLength <= aString.size());
+
+ // Return if the input string is already normalized.
+ if (spanLength == aString.size()) {
+ return AlreadyNormalized::Yes;
+ }
+
+ if (!aBuffer.reserve(aString.size())) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ // Copy the already normalized prefix.
+ if (spanLength > 0) {
+ PodCopy(aBuffer.data(), aString.data(), spanLength);
+
+ aBuffer.written(spanLength);
+ }
+
+ MOZ_TRY(FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ Span<const char16_t> remaining = aString.From(spanLength);
+ return unorm2_normalizeSecondAndAppend(normalizer, target, spanLength,
+ length, remaining.data(),
+ remaining.size(), status);
+ }));
+
+ return AlreadyNormalized::No;
+ }
+
+ /**
+ * Return true if the code point has the binary property "Cased".
+ */
+ static bool IsCased(char32_t codePoint) {
+ return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_CASED);
+ }
+
+ /**
+ * Return true if the code point has the binary property "Case_Ignorable".
+ */
+ static bool IsCaseIgnorable(char32_t codePoint) {
+ return u_hasBinaryProperty(static_cast<UChar32>(codePoint),
+ UCHAR_CASE_IGNORABLE);
+ }
+
+ /**
+ * Return the NFC pairwise composition of the two input characters, if any;
+ * returns 0 (which we know is not a composed char!) if none exists.
+ */
+ static char32_t ComposePairNFC(char32_t a, char32_t b) {
+ // unorm2_getNFCInstance returns a static instance that does not have to be
+ // released here. If it fails, we just return 0 (no composition) always.
+ static UErrorCode status = U_ZERO_ERROR;
+ static const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ UChar32 ch = unorm2_composePair(normalizer, static_cast<UChar32>(a),
+ static_cast<UChar32>(b));
+ return ch < 0 ? 0 : static_cast<char32_t>(ch);
+ }
+
+ /**
+ * Put the "raw" (single-level) canonical decomposition of the input char, if
+ * any, into the provided buffer. Canonical decomps are never more than two
+ * chars in length (although full normalization may result in longer output
+ * due to recursion).
+ * Returns the length of the decomposition (0 if none, else 1 or 2).
+ */
+ static int DecomposeRawNFD(char32_t ab, char32_t decomp[2]) {
+ // unorm2_getNFCInstance returns a static instance that does not have to be
+ // released here. If it fails, we just return 0 (no decomposition) always.
+ // Although we are using it to query for a decomposition, the mode of the
+ // Normalizer2 is irrelevant here, so we may as well use the same singleton
+ // instance as ComposePairNFC.
+ static UErrorCode status = U_ZERO_ERROR;
+ static const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+
+ // Canonical decompositions are never more than two Unicode characters,
+ // or a maximum of 4 utf-16 code units.
+ const unsigned MAX_DECOMP_LENGTH = 4;
+ UErrorCode error = U_ZERO_ERROR;
+ UChar decompUtf16[MAX_DECOMP_LENGTH];
+ int32_t len =
+ unorm2_getRawDecomposition(normalizer, static_cast<UChar32>(ab),
+ decompUtf16, MAX_DECOMP_LENGTH, &error);
+ if (U_FAILURE(error) || len < 0) {
+ return 0;
+ }
+ UText text = UTEXT_INITIALIZER;
+ utext_openUChars(&text, decompUtf16, len, &error);
+ MOZ_ASSERT(U_SUCCESS(error));
+ UChar32 ch = UTEXT_NEXT32(&text);
+ len = 0;
+ if (ch != U_SENTINEL) {
+ decomp[0] = static_cast<char32_t>(ch);
+ ++len;
+ ch = UTEXT_NEXT32(&text);
+ if (ch != U_SENTINEL) {
+ decomp[1] = static_cast<char32_t>(ch);
+ ++len;
+ }
+ }
+ utext_close(&text);
+ return len;
+ }
+
+ /**
+ * Return the Unicode version, for example "13.0".
+ */
+ static Span<const char> GetUnicodeVersion();
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/TimeZone.cpp b/intl/components/src/TimeZone.cpp
new file mode 100644
index 0000000000..145dd3f071
--- /dev/null
+++ b/intl/components/src/TimeZone.cpp
@@ -0,0 +1,344 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/TimeZone.h"
+
+#include "mozilla/Vector.h"
+
+#include <algorithm>
+#include <string_view>
+
+#include "unicode/uenum.h"
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+# include "unicode/basictz.h"
+#endif
+
+namespace mozilla::intl {
+
+/* static */
+Result<UniquePtr<TimeZone>, ICUError> TimeZone::TryCreate(
+ Maybe<Span<const char16_t>> aTimeZoneOverride) {
+ const UChar* zoneID = nullptr;
+ int32_t zoneIDLen = 0;
+ if (aTimeZoneOverride) {
+ zoneIDLen = static_cast<int32_t>(aTimeZoneOverride->Length());
+ zoneID = aTimeZoneOverride->Elements();
+ }
+
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ UniquePtr<icu::TimeZone> tz;
+ if (zoneID) {
+ tz.reset(
+ icu::TimeZone::createTimeZone(icu::UnicodeString(zoneID, zoneIDLen)));
+ } else {
+ tz.reset(icu::TimeZone::createDefault());
+ }
+ MOZ_ASSERT(tz);
+
+ if (*tz == icu::TimeZone::getUnknown()) {
+ return Err(ICUError::InternalError);
+ }
+
+ return MakeUnique<TimeZone>(std::move(tz));
+#else
+ // An empty string is used for the root locale. This is regarded as the base
+ // locale of all locales, and is used as the language/country neutral locale
+ // for locale sensitive operations.
+ const char* rootLocale = "";
+
+ UErrorCode status = U_ZERO_ERROR;
+ UCalendar* calendar =
+ ucal_open(zoneID, zoneIDLen, rootLocale, UCAL_DEFAULT, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ // https://tc39.es/ecma262/#sec-time-values-and-time-range
+ //
+ // A time value supports a slightly smaller range of -8,640,000,000,000,000 to
+ // 8,640,000,000,000,000 milliseconds.
+ constexpr double StartOfTime = -8.64e15;
+
+ // Ensure all computations are performed in the proleptic Gregorian calendar.
+ ucal_setGregorianChange(calendar, StartOfTime, &status);
+
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return MakeUnique<TimeZone>(calendar);
+#endif
+}
+
+Result<int32_t, ICUError> TimeZone::GetRawOffsetMs() {
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ return mTimeZone->getRawOffset();
+#else
+ // Reset the time in case the calendar has been modified.
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setMillis(mCalendar, ucal_getNow(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t offset = ucal_get(mCalendar, UCAL_ZONE_OFFSET, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return offset;
+#endif
+}
+
+Result<int32_t, ICUError> TimeZone::GetDSTOffsetMs(int64_t aUTCMilliseconds) {
+ UDate date = UDate(aUTCMilliseconds);
+
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ constexpr bool dateIsLocalTime = false;
+ int32_t rawOffset, dstOffset;
+ UErrorCode status = U_ZERO_ERROR;
+
+ mTimeZone->getOffset(date, dateIsLocalTime, rawOffset, dstOffset, status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return dstOffset;
+#else
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setMillis(mCalendar, date, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t dstOffset = ucal_get(mCalendar, UCAL_DST_OFFSET, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return dstOffset;
+#endif
+}
+
+Result<int32_t, ICUError> TimeZone::GetOffsetMs(int64_t aUTCMilliseconds) {
+ UDate date = UDate(aUTCMilliseconds);
+
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ constexpr bool dateIsLocalTime = false;
+ int32_t rawOffset, dstOffset;
+ UErrorCode status = U_ZERO_ERROR;
+
+ mTimeZone->getOffset(date, dateIsLocalTime, rawOffset, dstOffset, status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return rawOffset + dstOffset;
+#else
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setMillis(mCalendar, date, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t rawOffset = ucal_get(mCalendar, UCAL_ZONE_OFFSET, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t dstOffset = ucal_get(mCalendar, UCAL_DST_OFFSET, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return rawOffset + dstOffset;
+#endif
+}
+
+Result<int32_t, ICUError> TimeZone::GetUTCOffsetMs(int64_t aLocalMilliseconds) {
+ // https://tc39.es/ecma262/#sec-local-time-zone-adjustment
+ //
+ // LocalTZA ( t, isUTC )
+ //
+ // When t_local represents local time repeating multiple times at a negative
+ // time zone transition (e.g. when the daylight saving time ends or the time
+ // zone offset is decreased due to a time zone rule change) or skipped local
+ // time at a positive time zone transitions (e.g. when the daylight saving
+ // time starts or the time zone offset is increased due to a time zone rule
+ // change), t_local must be interpreted using the time zone offset before the
+ // transition.
+ constexpr UTimeZoneLocalOption skippedTime = UCAL_TZ_LOCAL_FORMER;
+ constexpr UTimeZoneLocalOption repeatedTime = UCAL_TZ_LOCAL_FORMER;
+
+ UDate date = UDate(aLocalMilliseconds);
+
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ int32_t rawOffset, dstOffset;
+ UErrorCode status = U_ZERO_ERROR;
+
+ // All ICU TimeZone classes derive from BasicTimeZone, so we can safely
+ // perform the static_cast.
+ // Once <https://unicode-org.atlassian.net/browse/ICU-13705> is fixed we
+ // can remove this extra cast.
+ auto* basicTz = static_cast<icu::BasicTimeZone*>(mTimeZone.get());
+ basicTz->getOffsetFromLocal(date, skippedTime, repeatedTime, rawOffset,
+ dstOffset, status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return rawOffset + dstOffset;
+#else
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setMillis(mCalendar, date, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ int32_t rawOffset, dstOffset;
+ ucal_getTimeZoneOffsetFromLocal(mCalendar, skippedTime, repeatedTime,
+ &rawOffset, &dstOffset, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return rawOffset + dstOffset;
+#endif
+}
+
+using TimeZoneIdentifierVector =
+ Vector<char16_t, TimeZone::TimeZoneIdentifierLength>;
+
+#if !MOZ_INTL_USE_ICU_CPP_TIMEZONE
+static bool IsUnknownTimeZone(const TimeZoneIdentifierVector& timeZone) {
+ constexpr std::string_view unknownTimeZone = UCAL_UNKNOWN_ZONE_ID;
+
+ return timeZone.length() == unknownTimeZone.length() &&
+ std::equal(timeZone.begin(), timeZone.end(), unknownTimeZone.begin(),
+ unknownTimeZone.end());
+}
+
+static ICUResult SetDefaultTimeZone(TimeZoneIdentifierVector& timeZone) {
+ // The string mustn't already be null-terminated.
+ MOZ_ASSERT_IF(!timeZone.empty(), timeZone.end()[-1] != '\0');
+
+ // The time zone identifier must be a null-terminated string.
+ if (!timeZone.append('\0')) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ ucal_setDefaultTimeZone(timeZone.begin(), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return Ok{};
+}
+#endif
+
+Result<bool, ICUError> TimeZone::SetDefaultTimeZone(
+ Span<const char> aTimeZone) {
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ icu::UnicodeString tzid(aTimeZone.data(), aTimeZone.size(), US_INV);
+ if (tzid.isBogus()) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ UniquePtr<icu::TimeZone> newTimeZone(icu::TimeZone::createTimeZone(tzid));
+ MOZ_ASSERT(newTimeZone);
+
+ if (*newTimeZone != icu::TimeZone::getUnknown()) {
+ // adoptDefault() takes ownership of the time zone.
+ icu::TimeZone::adoptDefault(newTimeZone.release());
+ return true;
+ }
+#else
+ TimeZoneIdentifierVector tzid;
+ if (!tzid.append(aTimeZone.data(), aTimeZone.size())) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ // Retrieve the current default time zone in case we need to restore it.
+ TimeZoneIdentifierVector defaultTimeZone;
+ MOZ_TRY(FillBufferWithICUCall(defaultTimeZone, ucal_getDefaultTimeZone));
+
+ // Try to set the new time zone.
+ MOZ_TRY(mozilla::intl::SetDefaultTimeZone(tzid));
+
+ // Check if the time zone was actually applied.
+ TimeZoneIdentifierVector newTimeZone;
+ MOZ_TRY(FillBufferWithICUCall(newTimeZone, ucal_getDefaultTimeZone));
+
+ // Return if the new time zone was successfully applied.
+ if (!IsUnknownTimeZone(newTimeZone)) {
+ return true;
+ }
+
+ // Otherwise restore the original time zone.
+ MOZ_TRY(mozilla::intl::SetDefaultTimeZone(defaultTimeZone));
+#endif
+
+ return false;
+}
+
+ICUResult TimeZone::SetDefaultTimeZoneFromHostTimeZone() {
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ if (icu::TimeZone* defaultZone = icu::TimeZone::detectHostTimeZone()) {
+ icu::TimeZone::adoptDefault(defaultZone);
+ }
+#else
+ TimeZoneIdentifierVector hostTimeZone;
+ MOZ_TRY(FillBufferWithICUCall(hostTimeZone, ucal_getHostTimeZone));
+
+ MOZ_TRY(mozilla::intl::SetDefaultTimeZone(hostTimeZone));
+#endif
+
+ return Ok{};
+}
+
+Result<Span<const char>, ICUError> TimeZone::GetTZDataVersion() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char* tzdataVersion = ucal_getTZDataVersion(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return MakeStringSpan(tzdataVersion);
+}
+
+Result<SpanEnumeration<char>, ICUError> TimeZone::GetAvailableTimeZones(
+ const char* aRegion) {
+ // Get the time zones that are commonly used in the given region. Uses the
+ // UCAL_ZONE_TYPE_ANY filter so we have more fine-grained control over the
+ // returned time zones and don't omit time zones which are considered links in
+ // ICU, but are treated as proper zones in IANA.
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucal_openTimeZoneIDEnumeration(
+ UCAL_ZONE_TYPE_ANY, aRegion, nullptr, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return SpanEnumeration<char>(enumeration);
+}
+
+Result<SpanEnumeration<char>, ICUError> TimeZone::GetAvailableTimeZones() {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucal_openTimeZones(&status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ return SpanEnumeration<char>(enumeration);
+}
+
+#if !MOZ_INTL_USE_ICU_CPP_TIMEZONE
+TimeZone::~TimeZone() {
+ MOZ_ASSERT(mCalendar);
+ ucal_close(mCalendar);
+}
+#endif
+
+} // namespace mozilla::intl
diff --git a/intl/components/src/TimeZone.h b/intl/components/src/TimeZone.h
new file mode 100644
index 0000000000..180092bd3f
--- /dev/null
+++ b/intl/components/src/TimeZone.h
@@ -0,0 +1,237 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_TimeZone_h_
+#define intl_components_TimeZone_h_
+
+// ICU doesn't provide a separate C API for time zone functions, but instead
+// requires to use UCalendar. This adds a measurable overhead when compared to
+// using ICU's C++ TimeZone API, therefore we prefer to use the C++ API when
+// possible. Due to the lack of a stable ABI in C++, it's only possible to use
+// the C++ API when we use our in-tree ICU copy.
+#if !MOZ_SYSTEM_ICU
+# define MOZ_INTL_USE_ICU_CPP_TIMEZONE 1
+#else
+# define MOZ_INTL_USE_ICU_CPP_TIMEZONE 0
+#endif
+
+#include <stdint.h>
+#include <utility>
+
+#include "unicode/ucal.h"
+#include "unicode/utypes.h"
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+# include "unicode/locid.h"
+# include "unicode/timezone.h"
+# include "unicode/unistr.h"
+#endif
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Casting.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with time zones in
+ * internationalization code. It is used in coordination with other operations
+ * such as datetime formatting.
+ */
+class TimeZone final {
+ public:
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ explicit TimeZone(UniquePtr<icu::TimeZone> aTimeZone)
+ : mTimeZone(std::move(aTimeZone)) {
+ MOZ_ASSERT(mTimeZone);
+ }
+#else
+ explicit TimeZone(UCalendar* aCalendar) : mCalendar(aCalendar) {
+ MOZ_ASSERT(mCalendar);
+ }
+#endif
+
+ // Do not allow copy as this class owns the ICU resource. Move is not
+ // currently implemented, but a custom move operator could be created if
+ // needed.
+ TimeZone(const TimeZone&) = delete;
+ TimeZone& operator=(const TimeZone&) = delete;
+
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ ~TimeZone() = default;
+#else
+ ~TimeZone();
+#endif
+
+ /**
+ * Create a TimeZone.
+ */
+ static Result<UniquePtr<TimeZone>, ICUError> TryCreate(
+ Maybe<Span<const char16_t>> aTimeZoneOverride = Nothing{});
+
+ /**
+ * A number indicating the raw offset from GMT in milliseconds.
+ */
+ Result<int32_t, ICUError> GetRawOffsetMs();
+
+ /**
+ * Return the daylight saving offset in milliseconds at the given UTC time.
+ */
+ Result<int32_t, ICUError> GetDSTOffsetMs(int64_t aUTCMilliseconds);
+
+ /**
+ * Return the local offset in milliseconds at the given UTC time.
+ */
+ Result<int32_t, ICUError> GetOffsetMs(int64_t aUTCMilliseconds);
+
+ /**
+ * Return the UTC offset in milliseconds at the given local time.
+ */
+ Result<int32_t, ICUError> GetUTCOffsetMs(int64_t aLocalMilliseconds);
+
+ enum class DaylightSavings : bool { No, Yes };
+
+ /**
+ * Return the display name for this time zone.
+ */
+ template <typename B>
+ ICUResult GetDisplayName(const char* aLocale,
+ DaylightSavings aDaylightSavings, B& aBuffer) {
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ icu::UnicodeString displayName;
+ mTimeZone->getDisplayName(static_cast<bool>(aDaylightSavings),
+ icu::TimeZone::LONG, icu::Locale(aLocale),
+ displayName);
+
+ int32_t length = displayName.length();
+ if (!aBuffer.reserve(AssertedCast<size_t>(length))) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ // Copy the display name.
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t written = displayName.extract(aBuffer.data(), length, status);
+ if (!ICUSuccessForStringSpan(status)) {
+ return Err(ToICUError(status));
+ }
+ MOZ_ASSERT(written == length);
+
+ aBuffer.written(written);
+
+ return Ok{};
+#else
+ return FillBufferWithICUCall(
+ aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
+ UCalendarDisplayNameType type =
+ static_cast<bool>(aDaylightSavings) ? UCAL_DST : UCAL_STANDARD;
+ return ucal_getTimeZoneDisplayName(mCalendar, type, aLocale, target,
+ length, status);
+ });
+#endif
+ }
+
+ /**
+ * Fill the buffer with the system's default IANA time zone identifier, e.g.
+ * "America/Chicago".
+ */
+ template <typename B>
+ static ICUResult GetDefaultTimeZone(B& aBuffer) {
+ return FillBufferWithICUCall(aBuffer, ucal_getDefaultTimeZone);
+ }
+
+ /**
+ * Fill the buffer with the host system's default IANA time zone identifier,
+ * e.g. "America/Chicago".
+ *
+ * NOTE: This function is not thread-safe.
+ */
+ template <typename B>
+ static ICUResult GetHostTimeZone(B& aBuffer) {
+ return FillBufferWithICUCall(aBuffer, ucal_getHostTimeZone);
+ }
+
+ /**
+ * Set the default time zone.
+ */
+ static Result<bool, ICUError> SetDefaultTimeZone(Span<const char> aTimeZone);
+
+ /**
+ * Set the default time zone using the host system's time zone.
+ *
+ * NOTE: This function is not thread-safe.
+ */
+ static ICUResult SetDefaultTimeZoneFromHostTimeZone();
+
+ /**
+ * Return the tzdata version.
+ *
+ * The tzdata version is a string of the form "<year><release>", e.g. "2021a".
+ */
+ static Result<Span<const char>, ICUError> GetTZDataVersion();
+
+ /**
+ * Constant for the typical maximal length of a time zone identifier.
+ *
+ * At the time of this writing 32 characters fits every supported time zone:
+ *
+ * Intl.supportedValuesOf("timeZone")
+ * .reduce((acc, v) => Math.max(acc, v.length), 0)
+ */
+ static constexpr size_t TimeZoneIdentifierLength = 32;
+
+ /**
+ * Returns the canonical system time zone ID or the normalized custom time
+ * zone ID for the given time zone ID.
+ */
+ template <typename B>
+ static ICUResult GetCanonicalTimeZoneID(Span<const char16_t> inputTimeZone,
+ B& aBuffer) {
+ static_assert(std::is_same_v<typename B::CharType, char16_t>,
+ "Currently only UTF-16 buffers are supported.");
+
+ if (aBuffer.capacity() == 0) {
+ // ucal_getCanonicalTimeZoneID differs from other API calls and fails when
+ // passed a nullptr or 0 length result. Reserve some space initially so
+ // that a real pointer will be used in the API.
+ if (!aBuffer.reserve(TimeZoneIdentifierLength)) {
+ return Err(ICUError::OutOfMemory);
+ }
+ }
+
+ return FillBufferWithICUCall(
+ aBuffer,
+ [&inputTimeZone](UChar* target, int32_t length, UErrorCode* status) {
+ return ucal_getCanonicalTimeZoneID(
+ inputTimeZone.Elements(),
+ static_cast<int32_t>(inputTimeZone.Length()), target, length,
+ /* isSystemID */ nullptr, status);
+ });
+ }
+
+ /**
+ * Return an enumeration over all time zones commonly used in the given
+ * region.
+ */
+ static Result<SpanEnumeration<char>, ICUError> GetAvailableTimeZones(
+ const char* aRegion);
+
+ /**
+ * Return an enumeration over all available time zones.
+ */
+ static Result<SpanEnumeration<char>, ICUError> GetAvailableTimeZones();
+
+ private:
+#if MOZ_INTL_USE_ICU_CPP_TIMEZONE
+ UniquePtr<icu::TimeZone> mTimeZone = nullptr;
+#else
+ UCalendar* mCalendar = nullptr;
+#endif
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/UnicodeProperties.h b/intl/components/src/UnicodeProperties.h
new file mode 100644
index 0000000000..785bc356f8
--- /dev/null
+++ b/intl/components/src/UnicodeProperties.h
@@ -0,0 +1,306 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_UnicodeProperties_h_
+#define intl_components_UnicodeProperties_h_
+
+#include "mozilla/intl/BidiClass.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/UnicodeScriptCodes.h"
+#include "mozilla/Vector.h"
+
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with text properties.
+ */
+class UnicodeProperties final {
+ public:
+ /**
+ * Return the BidiClass for the character.
+ */
+ static inline BidiClass GetBidiClass(uint32_t aCh) {
+ return BidiClass(u_charDirection(aCh));
+ }
+
+ /**
+ * Maps the specified character to a "mirror-image" character.
+ */
+ static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); }
+
+ /**
+ * Return the general category value for the code point.
+ */
+ static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
+
+ /**
+ * Determine whether the code point has the Bidi_Mirrored property.
+ */
+ static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); }
+
+ /**
+ * Returns the combining class of the code point as specified in
+ * UnicodeData.txt.
+ */
+ static inline uint8_t GetCombiningClass(uint32_t aCh) {
+ return u_getCombiningClass(aCh);
+ }
+
+ enum class IntProperty {
+ BidiPairedBracketType,
+ EastAsianWidth,
+ HangulSyllableType,
+ LineBreak,
+ NumericType,
+ };
+
+ /**
+ * Get the property value for an enumerated or integer Unicode property for a
+ * code point.
+ */
+ static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) {
+ UProperty prop;
+ switch (aProp) {
+ case IntProperty::BidiPairedBracketType:
+ prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE;
+ break;
+ case IntProperty::EastAsianWidth:
+ prop = UCHAR_EAST_ASIAN_WIDTH;
+ break;
+ case IntProperty::HangulSyllableType:
+ prop = UCHAR_HANGUL_SYLLABLE_TYPE;
+ break;
+ case IntProperty::LineBreak:
+ prop = UCHAR_LINE_BREAK;
+ break;
+ case IntProperty::NumericType:
+ prop = UCHAR_NUMERIC_TYPE;
+ break;
+ }
+ return u_getIntPropertyValue(aCh, prop);
+ }
+
+ /**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database if the input is decimal or a digit,
+ * otherwise, returns -1.
+ */
+ static inline int8_t GetNumericValue(uint32_t aCh) {
+ UNumericType type =
+ UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType));
+ return type == U_NT_DECIMAL || type == U_NT_DIGIT
+ ? int8_t(u_getNumericValue(aCh))
+ : -1;
+ }
+
+ /**
+ * Maps the specified character to its paired bracket character.
+ */
+ static inline uint32_t GetBidiPairedBracket(uint32_t aCh) {
+ return u_getBidiPairedBracket(aCh);
+ }
+
+ /**
+ * The given character is mapped to its uppercase equivalent according to
+ * UnicodeData.txt; if the character has no uppercase equivalent, the
+ * character itself is returned.
+ */
+ static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); }
+
+ /**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the
+ * character itself is returned.
+ */
+ static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); }
+
+ /**
+ * Check if a code point has the Lowercase Unicode property.
+ */
+ static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); }
+
+ /**
+ * The given character is mapped to its titlecase equivalent according to
+ * UnicodeData.txt; if the character has no titlecase equivalent, the
+ * character itself is returned.
+ */
+ static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); }
+
+ /**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ */
+ static inline uint32_t FoldCase(uint32_t aCh) {
+ return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
+ }
+
+ enum class BinaryProperty {
+ DefaultIgnorableCodePoint,
+ Emoji,
+ EmojiPresentation,
+ };
+
+ /**
+ * Check a binary Unicode property for a code point.
+ */
+ static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) {
+ UProperty prop;
+ switch (aProp) {
+ case BinaryProperty::DefaultIgnorableCodePoint:
+ prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT;
+ break;
+ case BinaryProperty::Emoji:
+ prop = UCHAR_EMOJI;
+ break;
+ case BinaryProperty::EmojiPresentation:
+ prop = UCHAR_EMOJI_PRESENTATION;
+ break;
+ }
+ return u_hasBinaryProperty(aCh, prop);
+ }
+
+ /**
+ * Check if the width of aCh is full width, half width or wide
+ * excluding emoji.
+ */
+ static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
+ switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
+ case U_EA_FULLWIDTH:
+ case U_EA_HALFWIDTH:
+ return true;
+ case U_EA_WIDE:
+ return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true;
+ case U_EA_AMBIGUOUS:
+ case U_EA_NARROW:
+ case U_EA_NEUTRAL:
+ return false;
+ }
+ return false;
+ }
+
+ /**
+ * Check if the width of aCh is ambiguous, full width, or wide.
+ */
+ static inline bool IsEastAsianWidthAFW(uint32_t aCh) {
+ switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
+ case U_EA_AMBIGUOUS:
+ case U_EA_FULLWIDTH:
+ case U_EA_WIDE:
+ return true;
+ case U_EA_HALFWIDTH:
+ case U_EA_NARROW:
+ case U_EA_NEUTRAL:
+ return false;
+ }
+ return false;
+ }
+
+ /**
+ * Check if the width of aCh is full width, or wide.
+ */
+ static inline bool IsEastAsianWidthFW(uint32_t aCh) {
+ switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
+ case U_EA_FULLWIDTH:
+ case U_EA_WIDE:
+ return true;
+ case U_EA_AMBIGUOUS:
+ case U_EA_HALFWIDTH:
+ case U_EA_NARROW:
+ case U_EA_NEUTRAL:
+ return false;
+ }
+ return false;
+ }
+
+ /**
+ * Check if the CharType of aCh is math or other symbol.
+ */
+ static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
+ // Keep this function in sync with is_math_symbol in base_chars.py.
+ return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
+ }
+
+ static inline Script GetScriptCode(uint32_t aCh) {
+ // We can safely ignore the error code here because uscript_getScript
+ // returns USCRIPT_INVALID_CODE in the event of an error.
+ UErrorCode err = U_ZERO_ERROR;
+ return Script(uscript_getScript(aCh, &err));
+ }
+
+ static inline bool HasScript(uint32_t aCh, Script aScript) {
+ return uscript_hasScript(aCh, UScriptCode(aScript));
+ }
+
+ static inline const char* GetScriptShortName(Script aScript) {
+ return uscript_getShortName(UScriptCode(aScript));
+ }
+
+ static inline int32_t GetMaxNumberOfScripts() {
+ return u_getIntPropertyMaxValue(UCHAR_SCRIPT);
+ }
+
+ // The code point which has the most script extensions is 0x0965, which has 21
+ // script extensions, so choose the vector size as 32 to prevent heap
+ // allocation.
+ static constexpr size_t kMaxScripts = 32;
+
+ using ScriptExtensionVector = Vector<Script, kMaxScripts>;
+
+ /**
+ * Get the script extensions for the given code point, and write the script
+ * extensions to aExtensions vector. If the code point has script extensions,
+ * the script code (Script::COMMON or Script::INHERITED) will be excluded.
+ *
+ * If the code point doesn't have any script extension, then its script code
+ * will be written to aExtensions vector.
+ *
+ * If the code point is invalid, Script::UNKNOWN will be written to
+ * aExtensions vector.
+ *
+ * Note: aExtensions will be cleared after calling this method regardless of
+ * failure.
+ *
+ * See [1] for the script code of the code point, [2] for the script
+ * extensions.
+ *
+ * https://www.unicode.org/Public/UNIDATA/Scripts.txt
+ * https://www.unicode.org/Public/UNIDATA/ScriptExtensions.txt
+ */
+ static ICUResult GetExtensions(char32_t aCodePoint,
+ ScriptExtensionVector& aExtensions) {
+ // Clear the vector first.
+ aExtensions.clear();
+
+ // We cannot pass aExtensions to uscript_getScriptExtension as USCriptCode
+ // takes 4 bytes, so create a local UScriptCode array to get the extensions.
+ UScriptCode ext[kMaxScripts];
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = uscript_getScriptExtensions(static_cast<UChar32>(aCodePoint),
+ ext, kMaxScripts, &status);
+ if (U_FAILURE(status)) {
+ // kMaxScripts should be large enough to hold the maximun number of script
+ // extensions.
+ MOZ_DIAGNOSTIC_ASSERT(status != U_BUFFER_OVERFLOW_ERROR);
+ return Err(ToICUError(status));
+ }
+
+ if (!aExtensions.reserve(len)) {
+ return Err(ICUError::OutOfMemory);
+ }
+
+ for (int32_t i = 0; i < len; i++) {
+ aExtensions.infallibleAppend(Script(ext[i]));
+ }
+
+ return Ok();
+ }
+};
+
+} // namespace mozilla::intl
+
+#endif
diff --git a/intl/components/src/UnicodeScriptCodes.h b/intl/components/src/UnicodeScriptCodes.h
new file mode 100644
index 0000000000..b5d6f490ee
--- /dev/null
+++ b/intl/components/src/UnicodeScriptCodes.h
@@ -0,0 +1,261 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Derived from the Unicode Character Database by genUnicodePropertyData.pl
+ *
+ * For Unicode terms of use, see http://www.unicode.org/terms_of_use.html
+ */
+
+/*
+ * Created on Tue Oct 25 06:53:25 2022 from UCD data files with version info:
+ *
+
+# Unicode Character Database
+# Date: 2022-09-02
+# © 2022 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# For documentation, see the following:
+# NamesList.html
+# UAX #38, "Unicode Han Database (Unihan)"
+# UAX #44, "Unicode Character Database"
+# UTS #51, "Unicode Emoji"
+#
+# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.0.0/
+
+This directory contains the final data files
+for the Unicode Character Database, for Version 15.0.0 of the Unicode Standard.
+
+# IdentifierStatus.txt
+# Date: 2022-08-26, 16:49:09 GMT
+
+#
+# Unihan_Variants.txt
+# Date: 2022-08-01 16:36:07 GMT [JHJ]
+
+# VerticalOrientation-17.txt
+# Date: 2016-10-20, 07:00:00 GMT [EM, KI, LI]
+
+ *
+ * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
+ */
+
+#ifndef intl_components_UnicodeScriptCodes_h_
+#define intl_components_UnicodeScriptCodes_h_
+
+namespace mozilla::intl {
+enum class Script : int16_t {
+ COMMON = 0,
+ INHERITED = 1,
+ ARABIC = 2,
+ ARMENIAN = 3,
+ BENGALI = 4,
+ BOPOMOFO = 5,
+ CHEROKEE = 6,
+ COPTIC = 7,
+ CYRILLIC = 8,
+ DESERET = 9,
+ DEVANAGARI = 10,
+ ETHIOPIC = 11,
+ GEORGIAN = 12,
+ GOTHIC = 13,
+ GREEK = 14,
+ GUJARATI = 15,
+ GURMUKHI = 16,
+ HAN = 17,
+ HANGUL = 18,
+ HEBREW = 19,
+ HIRAGANA = 20,
+ KANNADA = 21,
+ KATAKANA = 22,
+ KHMER = 23,
+ LAO = 24,
+ LATIN = 25,
+ MALAYALAM = 26,
+ MONGOLIAN = 27,
+ MYANMAR = 28,
+ OGHAM = 29,
+ OLD_ITALIC = 30,
+ ORIYA = 31,
+ RUNIC = 32,
+ SINHALA = 33,
+ SYRIAC = 34,
+ TAMIL = 35,
+ TELUGU = 36,
+ THAANA = 37,
+ THAI = 38,
+ TIBETAN = 39,
+ CANADIAN_ABORIGINAL = 40,
+ YI = 41,
+ TAGALOG = 42,
+ HANUNOO = 43,
+ BUHID = 44,
+ TAGBANWA = 45,
+ BRAILLE = 46,
+ CYPRIOT = 47,
+ LIMBU = 48,
+ LINEAR_B = 49,
+ OSMANYA = 50,
+ SHAVIAN = 51,
+ TAI_LE = 52,
+ UGARITIC = 53,
+ KATAKANA_OR_HIRAGANA = 54,
+ BUGINESE = 55,
+ GLAGOLITIC = 56,
+ KHAROSHTHI = 57,
+ SYLOTI_NAGRI = 58,
+ NEW_TAI_LUE = 59,
+ TIFINAGH = 60,
+ OLD_PERSIAN = 61,
+ BALINESE = 62,
+ BATAK = 63,
+ BLISSYMBOLS = 64,
+ BRAHMI = 65,
+ CHAM = 66,
+ CIRTH = 67,
+ OLD_CHURCH_SLAVONIC_CYRILLIC = 68,
+ DEMOTIC_EGYPTIAN = 69,
+ HIERATIC_EGYPTIAN = 70,
+ EGYPTIAN_HIEROGLYPHS = 71,
+ KHUTSURI = 72,
+ SIMPLIFIED_HAN = 73,
+ TRADITIONAL_HAN = 74,
+ PAHAWH_HMONG = 75,
+ OLD_HUNGARIAN = 76,
+ HARAPPAN_INDUS = 77,
+ JAVANESE = 78,
+ KAYAH_LI = 79,
+ LATIN_FRAKTUR = 80,
+ LATIN_GAELIC = 81,
+ LEPCHA = 82,
+ LINEAR_A = 83,
+ MANDAIC = 84,
+ MAYAN_HIEROGLYPHS = 85,
+ MEROITIC_HIEROGLYPHS = 86,
+ NKO = 87,
+ OLD_TURKIC = 88,
+ OLD_PERMIC = 89,
+ PHAGS_PA = 90,
+ PHOENICIAN = 91,
+ MIAO = 92,
+ RONGORONGO = 93,
+ SARATI = 94,
+ ESTRANGELO_SYRIAC = 95,
+ WESTERN_SYRIAC = 96,
+ EASTERN_SYRIAC = 97,
+ TENGWAR = 98,
+ VAI = 99,
+ VISIBLE_SPEECH = 100,
+ CUNEIFORM = 101,
+ UNWRITTEN_LANGUAGES = 102,
+ UNKNOWN = 103,
+ CARIAN = 104,
+ JAPANESE = 105,
+ TAI_THAM = 106,
+ LYCIAN = 107,
+ LYDIAN = 108,
+ OL_CHIKI = 109,
+ REJANG = 110,
+ SAURASHTRA = 111,
+ SIGNWRITING = 112,
+ SUNDANESE = 113,
+ MOON = 114,
+ MEETEI_MAYEK = 115,
+ IMPERIAL_ARAMAIC = 116,
+ AVESTAN = 117,
+ CHAKMA = 118,
+ KOREAN = 119,
+ KAITHI = 120,
+ MANICHAEAN = 121,
+ INSCRIPTIONAL_PAHLAVI = 122,
+ PSALTER_PAHLAVI = 123,
+ BOOK_PAHLAVI = 124,
+ INSCRIPTIONAL_PARTHIAN = 125,
+ SAMARITAN = 126,
+ TAI_VIET = 127,
+ MATHEMATICAL_NOTATION = 128,
+ SYMBOLS = 129,
+ BAMUM = 130,
+ LISU = 131,
+ NAKHI_GEBA = 132,
+ OLD_SOUTH_ARABIAN = 133,
+ BASSA_VAH = 134,
+ DUPLOYAN = 135,
+ ELBASAN = 136,
+ GRANTHA = 137,
+ KPELLE = 138,
+ LOMA = 139,
+ MENDE_KIKAKUI = 140,
+ MEROITIC_CURSIVE = 141,
+ OLD_NORTH_ARABIAN = 142,
+ NABATAEAN = 143,
+ PALMYRENE = 144,
+ KHUDAWADI = 145,
+ WARANG_CITI = 146,
+ AFAKA = 147,
+ JURCHEN = 148,
+ MRO = 149,
+ NUSHU = 150,
+ SHARADA = 151,
+ SORA_SOMPENG = 152,
+ TAKRI = 153,
+ TANGUT = 154,
+ WOLEAI = 155,
+ ANATOLIAN_HIEROGLYPHS = 156,
+ KHOJKI = 157,
+ TIRHUTA = 158,
+ CAUCASIAN_ALBANIAN = 159,
+ MAHAJANI = 160,
+ AHOM = 161,
+ HATRAN = 162,
+ MODI = 163,
+ MULTANI = 164,
+ PAU_CIN_HAU = 165,
+ SIDDHAM = 166,
+ ADLAM = 167,
+ BHAIKSUKI = 168,
+ MARCHEN = 169,
+ NEWA = 170,
+ OSAGE = 171,
+ HAN_WITH_BOPOMOFO = 172,
+ JAMO = 173,
+ SYMBOLS_EMOJI = 174,
+ MASARAM_GONDI = 175,
+ SOYOMBO = 176,
+ ZANABAZAR_SQUARE = 177,
+ DOGRA = 178,
+ GUNJALA_GONDI = 179,
+ MAKASAR = 180,
+ MEDEFAIDRIN = 181,
+ HANIFI_ROHINGYA = 182,
+ SOGDIAN = 183,
+ OLD_SOGDIAN = 184,
+ ELYMAIC = 185,
+ NYIAKENG_PUACHUE_HMONG = 186,
+ NANDINAGARI = 187,
+ WANCHO = 188,
+ CHORASMIAN = 189,
+ DIVES_AKURU = 190,
+ KHITAN_SMALL_SCRIPT = 191,
+ YEZIDI = 192,
+ CYPRO_MINOAN = 193,
+ OLD_UYGHUR = 194,
+ TANGSA = 195,
+ TOTO = 196,
+ VITHKUQI = 197,
+ KAWI = 198,
+ NAG_MUNDARI = 199,
+
+ NUM_SCRIPT_CODES = 200,
+
+ INVALID = -1
+};
+} // namespace mozilla::intl
+
+#endif
+/*
+ * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
+ */