1 files changed, 773 insertions, 0 deletions
diff --git a/intl/components/src/Locale.h b/intl/components/src/Locale.h
new file mode 100644
index 0000000000..478d5f4a9e
--- /dev/null
+++ b/intl/components/src/Locale.h
@@ -0,0 +1,773 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Structured representation of Unicode locale IDs used with Intl functions. */
+
+#ifndef intl_components_Locale_h
+#define intl_components_Locale_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+#include "mozilla/Result.h"
+
+#include <algorithm>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <utility>
+
+#include "unicode/uloc.h"
+
+namespace mozilla::intl {
+
+/**
+ * Return true if |language| is a valid language subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> aLanguage);
+
+/**
+ * Return true if |script| is a valid script subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> aScript);
+
+/**
+ * Return true if |region| is a valid region subtag.
+ */
+template <typename CharT>
+bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> aRegion);
+
+#ifdef DEBUG
+/**
+ * Return true if |variant| is a valid variant subtag.
+ */
+bool IsStructurallyValidVariantTag(mozilla::Span<const char> aVariant);
+
+/**
+ * Return true if |extension| is a valid Unicode extension subtag.
+ */
+bool IsStructurallyValidUnicodeExtensionTag(
+    mozilla::Span<const char> aExtension);
+
+/**
+ * Return true if |privateUse| is a valid private-use subtag.
+ */
+bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> aPrivateUse);
+
+#endif
+
+template <typename CharT>
+char AsciiToLowerCase(CharT aChar) {
+  MOZ_ASSERT(mozilla::IsAscii(aChar));
+  return mozilla::IsAsciiUppercaseAlpha(aChar) ? (aChar + 0x20) : aChar;
+}
+
+template <typename CharT>
+char AsciiToUpperCase(CharT aChar) {
+  MOZ_ASSERT(mozilla::IsAscii(aChar));
+  return mozilla::IsAsciiLowercaseAlpha(aChar) ? (aChar - 0x20) : aChar;
+}
+
+template <typename CharT>
+void AsciiToLowerCase(CharT* aChars, size_t aLength, char* aDest) {
+  char (&fn)(CharT) = AsciiToLowerCase;
+  std::transform(aChars, aChars + aLength, aDest, fn);
+}
+
+template <typename CharT>
+void AsciiToUpperCase(CharT* aChars, size_t aLength, char* aDest) {
+  char (&fn)(CharT) = AsciiToUpperCase;
+  std::transform(aChars, aChars + aLength, aDest, fn);
+}
+
+template <typename CharT>
+void AsciiToTitleCase(CharT* aChars, size_t aLength, char* aDest) {
+  if (aLength > 0) {
+    AsciiToUpperCase(aChars, 1, aDest);
+    AsciiToLowerCase(aChars + 1, aLength - 1, aDest + 1);
+  }
+}
+
+// Constants for language subtag lengths.
+namespace LanguageTagLimits {
+
+// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+static constexpr size_t LanguageLength = 8;
+
+// unicode_script_subtag = alpha{4} ;
+static constexpr size_t ScriptLength = 4;
+
+// unicode_region_subtag = (alpha{2} | digit{3}) ;
+static constexpr size_t RegionLength = 3;
+static constexpr size_t AlphaRegionLength = 2;
+static constexpr size_t DigitRegionLength = 3;
+
+// key = alphanum alpha ;
+static constexpr size_t UnicodeKeyLength = 2;
+
+// tkey = alpha digit ;
+static constexpr size_t TransformKeyLength = 2;
+
+}  // namespace LanguageTagLimits
+
+// Fixed size language subtag which is stored inline in Locale.
+template <size_t SubtagLength>
+class LanguageTagSubtag final {
+  uint8_t mLength = 0;
+  char mChars[SubtagLength] = {};  // zero initialize
+
+ public:
+  LanguageTagSubtag() = default;
+
+  LanguageTagSubtag(const LanguageTagSubtag& aOther) {
+    std::copy_n(aOther.mChars, SubtagLength, mChars);
+    mLength = aOther.mLength;
+  }
+
+  LanguageTagSubtag& operator=(const LanguageTagSubtag& aOther) {
+    std::copy_n(aOther.mChars, SubtagLength, mChars);
+    mLength = aOther.mLength;
+    return *this;
+  }
+
+  size_t Length() const { return mLength; }
+  bool Missing() const { return mLength == 0; }
+  bool Present() const { return mLength > 0; }
+
+  mozilla::Span<const char> Span() const { return {mChars, mLength}; }
+
+  template <typename CharT>
+  void Set(mozilla::Span<const CharT> str) {
+    MOZ_ASSERT(str.size() <= SubtagLength);
+    std::copy_n(str.data(), str.size(), mChars);
+    mLength = str.size();
+  }
+
+  // The toXYZCase() methods are using |SubtagLength| instead of |length()|,
+  // because current compilers (tested GCC and Clang) can't infer the maximum
+  // string length - even when using hints like |std::min| - and instead are
+  // emitting SIMD optimized code. Using a fixed sized length avoids emitting
+  // the SIMD code. (Emitting SIMD code doesn't make sense here, because the
+  // SIMD code only kicks in for long strings.) A fixed length will
+  // additionally ensure the compiler unrolls the loop in the case conversion
+  // code.
+
+  void ToLowerCase() { AsciiToLowerCase(mChars, SubtagLength, mChars); }
+
+  void ToUpperCase() { AsciiToUpperCase(mChars, SubtagLength, mChars); }
+
+  void ToTitleCase() { AsciiToTitleCase(mChars, SubtagLength, mChars); }
+
+  template <size_t N>
+  bool EqualTo(const char (&str)[N]) const {
+    static_assert(N - 1 <= SubtagLength,
+                  "subtag literals must not exceed the maximum subtag length");
+
+    return mLength == N - 1 && memcmp(mChars, str, N - 1) == 0;
+  }
+};
+
+using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
+using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
+using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
+
+using Latin1Char = unsigned char;
+using UniqueChars = UniquePtr<char[]>;
+
+/**
+ * Object representing a Unicode BCP 47 locale identifier.
+ *
+ * All subtags are already in canonicalized case.
+ */
+class MOZ_STACK_CLASS Locale final {
+  LanguageSubtag mLanguage = {};
+  ScriptSubtag mScript = {};
+  RegionSubtag mRegion = {};
+
+  using VariantsVector = Vector<UniqueChars, 2>;
+  using ExtensionsVector = Vector<UniqueChars, 2>;
+
+  VariantsVector mVariants;
+  ExtensionsVector mExtensions;
+  UniqueChars mPrivateUse = nullptr;
+
+  friend class LocaleParser;
+
+ public:
+  enum class CanonicalizationError : uint8_t {
+    DuplicateVariant,
+    InternalError,
+    OutOfMemory,
+  };
+
+ private:
+  Result<Ok, CanonicalizationError> CanonicalizeUnicodeExtension(
+      UniqueChars& unicodeExtension);
+
+  Result<Ok, CanonicalizationError> CanonicalizeTransformExtension(
+      UniqueChars& transformExtension);
+
+ public:
+  static bool LanguageMapping(LanguageSubtag& aLanguage);
+  static bool ComplexLanguageMapping(const LanguageSubtag& aLanguage);
+
+ private:
+  static bool ScriptMapping(ScriptSubtag& aScript);
+  static bool RegionMapping(RegionSubtag& aRegion);
+  static bool ComplexRegionMapping(const RegionSubtag& aRegion);
+
+  void PerformComplexLanguageMappings();
+  void PerformComplexRegionMappings();
+  [[nodiscard]] bool PerformVariantMappings();
+
+  [[nodiscard]] bool UpdateLegacyMappings();
+
+  static bool SignLanguageMapping(LanguageSubtag& aLanguage,
+                                  const RegionSubtag& aRegion);
+
+  static const char* ReplaceTransformExtensionType(
+      mozilla::Span<const char> aKey, mozilla::Span<const char> aType);
+
+ public:
+  /**
+   * Given a Unicode key and type, return the null-terminated preferred
+   * replacement for that type if there is one, or null if there is none, e.g.
+   * in effect
+   * |ReplaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"|
+   * and
+   * |ReplaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|.
+   */
+  static const char* ReplaceUnicodeExtensionType(
+      mozilla::Span<const char> aKey, mozilla::Span<const char> aType);
+
+ public:
+  Locale() = default;
+  Locale(const Locale&) = delete;
+  Locale& operator=(const Locale&) = delete;
+  Locale(Locale&&) = default;
+  Locale& operator=(Locale&&) = default;
+
+  template <class Vec>
+  class SubtagIterator {
+    using Iter = decltype(std::declval<const Vec>().begin());
+
+    Iter mIter;
+
+   public:
+    explicit SubtagIterator(Iter iter) : mIter(iter) {}
+
+    // std::iterator traits.
+    using iterator_category = std::input_iterator_tag;
+    using value_type = Span<const char>;
+    using difference_type = ptrdiff_t;
+    using pointer = value_type*;
+    using reference = value_type&;
+
+    SubtagIterator& operator++() {
+      mIter++;
+      return *this;
+    }
+
+    SubtagIterator operator++(int) {
+      SubtagIterator result = *this;
+      ++(*this);
+      return result;
+    }
+
+    bool operator==(const SubtagIterator& aOther) const {
+      return mIter == aOther.mIter;
+    }
+
+    bool operator!=(const SubtagIterator& aOther) const {
+      return !(*this == aOther);
+    }
+
+    value_type operator*() const { return MakeStringSpan(mIter->get()); }
+  };
+
+  template <size_t N>
+  class SubtagEnumeration {
+    using Vec = Vector<UniqueChars, N>;
+
+    const Vec& mVector;
+
+   public:
+    explicit SubtagEnumeration(const Vec& aVector) : mVector(aVector) {}
+
+    size_t length() const { return mVector.length(); }
+    bool empty() const { return mVector.empty(); }
+
+    auto begin() const { return SubtagIterator<Vec>(mVector.begin()); }
+    auto end() const { return SubtagIterator<Vec>(mVector.end()); }
+
+    Span<const char> operator[](size_t aIndex) const {
+      return MakeStringSpan(mVector[aIndex].get());
+    }
+  };
+
+  const LanguageSubtag& Language() const { return mLanguage; }
+  const ScriptSubtag& Script() const { return mScript; }
+  const RegionSubtag& Region() const { return mRegion; }
+  auto Variants() const { return SubtagEnumeration(mVariants); }
+  auto Extensions() const { return SubtagEnumeration(mExtensions); }
+  Maybe<Span<const char>> PrivateUse() const {
+    if (const char* p = mPrivateUse.get()) {
+      return Some(MakeStringSpan(p));
+    }
+    return Nothing();
+  }
+
+  /**
+   * Return the Unicode extension subtag or Nothing if not present.
+   */
+  Maybe<Span<const char>> GetUnicodeExtension() const;
+
+ private:
+  ptrdiff_t UnicodeExtensionIndex() const;
+
+ public:
+  /**
+   * Set the language subtag. The input must be a valid language subtag.
+   */
+  template <size_t N>
+  void SetLanguage(const char (&aLanguage)[N]) {
+    mozilla::Span<const char> span(aLanguage, N - 1);
+    MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
+    mLanguage.Set(span);
+  }
+
+  /**
+   * Set the language subtag. The input must be a valid language subtag.
+   */
+  void SetLanguage(const LanguageSubtag& aLanguage) {
+    MOZ_ASSERT(IsStructurallyValidLanguageTag(aLanguage.Span()));
+    mLanguage.Set(aLanguage.Span());
+  }
+
+  /**
+   * Set the script subtag. The input must be a valid script subtag.
+   */
+  template <size_t N>
+  void SetScript(const char (&aScript)[N]) {
+    mozilla::Span<const char> span(aScript, N - 1);
+    MOZ_ASSERT(IsStructurallyValidScriptTag(span));
+    mScript.Set(span);
+  }
+
+  /**
+   * Set the script subtag. The input must be a valid script subtag or the empty
+   * string.
+   */
+  void SetScript(const ScriptSubtag& aScript) {
+    MOZ_ASSERT(aScript.Missing() ||
+               IsStructurallyValidScriptTag(aScript.Span()));
+    mScript.Set(aScript.Span());
+  }
+
+  /**
+   * Set the region subtag. The input must be a valid region subtag.
+   */
+  template <size_t N>
+  void SetRegion(const char (&aRegion)[N]) {
+    mozilla::Span<const char> span(aRegion, N - 1);
+    MOZ_ASSERT(IsStructurallyValidRegionTag(span));
+    mRegion.Set(span);
+  }
+
+  /**
+   * Set the region subtag. The input must be a valid region subtag or the empty
+   * empty string.
+   */
+  void SetRegion(const RegionSubtag& aRegion) {
+    MOZ_ASSERT(aRegion.Missing() ||
+               IsStructurallyValidRegionTag(aRegion.Span()));
+    mRegion.Set(aRegion.Span());
+  }
+
+  /**
+   * Removes all variant subtags.
+   */
+  void ClearVariants() { mVariants.clearAndFree(); }
+
+  /**
+   * Set the Unicode extension subtag. The input must be a valid Unicode
+   * extension subtag.
+   */
+  ICUResult SetUnicodeExtension(Span<const char> aExtension);
+
+  /**
+   * Remove any Unicode extension subtag if present.
+   */
+  void ClearUnicodeExtension();
+
+  /** Canonicalize the base-name (language, script, region, variant) subtags. */
+  Result<Ok, CanonicalizationError> CanonicalizeBaseName();
+
+  /**
+   * Canonicalize all extension subtags.
+   */
+  Result<Ok, CanonicalizationError> CanonicalizeExtensions();
+
+  /**
+   * Canonicalizes the given structurally valid Unicode BCP 47 locale
+   * identifier, including regularized case of subtags. For example, the
+   * locale Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
+   * where
+   *
+   *     Zh             ; 2*3ALPHA
+   *     -haNS          ; ["-" script]
+   *     -bu            ; ["-" region]
+   *     -variant2      ; *("-" variant)
+   *     -Variant1
+   *     -u-ca-chinese  ; *("-" extension)
+   *     -t-Zh-laTN
+   *     -x-PRIVATE     ; ["-" privateuse]
+   *
+   * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
+   *
+   * Spec: ECMAScript Internationalization API Specification, 6.2.3.
+   */
+  Result<Ok, CanonicalizationError> Canonicalize() {
+    MOZ_TRY(CanonicalizeBaseName());
+    return CanonicalizeExtensions();
+  }
+
+  /**
+   * Fill the buffer with a string representation of the locale.
+   */
+  template <typename B>
+  ICUResult ToString(B& aBuffer) const {
+    static_assert(std::is_same_v<typename B::CharType, char>);
+
+    size_t capacity = ToStringCapacity();
+
+    // Attempt to reserve needed capacity
+    if (!aBuffer.reserve(capacity)) {
+      return Err(ICUError::OutOfMemory);
+    }
+
+    size_t offset = ToStringAppend(aBuffer.data());
+
+    MOZ_ASSERT(capacity == offset);
+    aBuffer.written(offset);
+
+    return Ok();
+  }
+
+  /**
+   * Add likely-subtags to the locale.
+   *
+   * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
+   */
+  ICUResult AddLikelySubtags();
+
+  /**
+   * Remove likely-subtags from the locale.
+   *
+   * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
+   */
+  ICUResult RemoveLikelySubtags();
+
+  /**
+   * Returns the default locale as an ICU locale identifier. The returned string
+   * is NOT a valid BCP 47 locale!
+   *
+   * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+   */
+  static const char* GetDefaultLocale() { return uloc_getDefault(); }
+
+  /**
+   * Returns an iterator over all supported locales.
+   *
+   * The returned strings are ICU locale identifiers and NOT BCP 47 language
+   * tags.
+   *
+   * Also see <https://unicode-org.github.io/icu/userguide/locale>.
+   */
+  static auto GetAvailableLocales() {
+    return AvailableLocalesEnumeration<uloc_countAvailable,
+                                       uloc_getAvailable>();
+  }
+
+ private:
+  static UniqueChars DuplicateStringToUniqueChars(const char* aStr);
+  static UniqueChars DuplicateStringToUniqueChars(Span<const char> aStr);
+  size_t ToStringCapacity() const;
+  size_t ToStringAppend(char* aBuffer) const;
+};
+
+/**
+ * Parser for Unicode BCP 47 locale identifiers.
+ *
+ * <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers>
+ */
+class MOZ_STACK_CLASS LocaleParser final {
+ public:
+  enum class ParserError : uint8_t {
+    // Input was not parseable as a locale, subtag or extension.
+    NotParseable,
+    // Unable to allocate memory for the parser to operate.
+    OutOfMemory,
+  };
+
+  // Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
+  enum class TokenKind : uint8_t {
+    None = 0b000,
+    Alpha = 0b001,
+    Digit = 0b010,
+    AlphaDigit = 0b011,
+    Error = 0b100
+  };
+
+ private:
+  class Token final {
+    size_t mIndex;
+    size_t mLength;
+    TokenKind mKind;
+
+   public:
+    Token(TokenKind aKind, size_t aIndex, size_t aLength)
+        : mIndex(aIndex), mLength(aLength), mKind(aKind) {}
+
+    TokenKind Kind() const { return mKind; }
+    size_t Index() const { return mIndex; }
+    size_t Length() const { return mLength; }
+
+    bool IsError() const { return mKind == TokenKind::Error; }
+    bool IsNone() const { return mKind == TokenKind::None; }
+    bool IsAlpha() const { return mKind == TokenKind::Alpha; }
+    bool IsDigit() const { return mKind == TokenKind::Digit; }
+    bool IsAlphaDigit() const { return mKind == TokenKind::AlphaDigit; }
+  };
+
+  const char* mLocale;
+  size_t mLength;
+  size_t mIndex = 0;
+
+  explicit LocaleParser(Span<const char> aLocale)
+      : mLocale(aLocale.data()), mLength(aLocale.size()) {}
+
+  char CharAt(size_t aIndex) const { return mLocale[aIndex]; }
+
+  // Copy the token characters into |subtag|.
+  template <size_t N>
+  void CopyChars(const Token& aTok, LanguageTagSubtag<N>& aSubtag) const {
+    aSubtag.Set(mozilla::Span(mLocale + aTok.Index(), aTok.Length()));
+  }
+
+  // Create a string copy of |length| characters starting at |index|.
+  UniqueChars Chars(size_t aIndex, size_t aLength) const;
+
+  // Create a string copy of the token characters.
+  UniqueChars Chars(const Token& aTok) const {
+    return Chars(aTok.Index(), aTok.Length());
+  }
+
+  UniqueChars Extension(const Token& aStart, const Token& aEnd) const {
+    MOZ_ASSERT(aStart.Index() < aEnd.Index());
+
+    size_t length = aEnd.Index() - 1 - aStart.Index();
+    return Chars(aStart.Index(), length);
+  }
+
+  Token NextToken();
+
+  // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+  //
+  // Four character language subtags are not allowed in Unicode BCP 47 locale
+  // identifiers. Also see the comparison to Unicode CLDR locale identifiers in
+  // <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+  bool IsLanguage(const Token& aTok) const {
+    return aTok.IsAlpha() && ((2 <= aTok.Length() && aTok.Length() <= 3) ||
+                              (5 <= aTok.Length() && aTok.Length() <= 8));
+  }
+
+  // unicode_script_subtag = alpha{4} ;
+  bool IsScript(const Token& aTok) const {
+    return aTok.IsAlpha() && aTok.Length() == 4;
+  }
+
+  // unicode_region_subtag = (alpha{2} | digit{3}) ;
+  bool IsRegion(const Token& aTok) const {
+    return (aTok.IsAlpha() && aTok.Length() == 2) ||
+           (aTok.IsDigit() && aTok.Length() == 3);
+  }
+
+  // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+  bool IsVariant(const Token& aTok) const {
+    return (5 <= aTok.Length() && aTok.Length() <= 8) ||
+           (aTok.Length() == 4 && mozilla::IsAsciiDigit(CharAt(aTok.Index())));
+  }
+
+  // Returns the code unit of the first character at the given singleton token.
+  // Always returns the lower case form of an alphabetical character.
+  char SingletonKey(const Token& aTok) const {
+    MOZ_ASSERT(aTok.Length() == 1);
+    return AsciiToLowerCase(CharAt(aTok.Index()));
+  }
+
+  // extensions = unicode_locale_extensions |
+  //              transformed_extensions |
+  //              other_extensions ;
+  //
+  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+  //                                       (sep attribute)+ (sep keyword)*) ;
+  //
+  // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
+  //                                    (sep tfield)+) ;
+  //
+  // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+  bool IsExtensionStart(const Token& aTok) const {
+    return aTok.Length() == 1 && SingletonKey(aTok) != 'x';
+  }
+
+  // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+  bool IsOtherExtensionPart(const Token& aTok) const {
+    return 2 <= aTok.Length() && aTok.Length() <= 8;
+  }
+
+  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+  //                                       (sep attribute)+ (sep keyword)*) ;
+  // keyword = key (sep type)? ;
+  bool IsUnicodeExtensionPart(const Token& aTok) const {
+    return IsUnicodeExtensionKey(aTok) || IsUnicodeExtensionType(aTok) ||
+           IsUnicodeExtensionAttribute(aTok);
+  }
+
+  // attribute = alphanum{3,8} ;
+  bool IsUnicodeExtensionAttribute(const Token& aTok) const {
+    return 3 <= aTok.Length() && aTok.Length() <= 8;
+  }
+
+  // key = alphanum alpha ;
+  bool IsUnicodeExtensionKey(const Token& aTok) const {
+    return aTok.Length() == 2 &&
+           mozilla::IsAsciiAlpha(CharAt(aTok.Index() + 1));
+  }
+
+  // type = alphanum{3,8} (sep alphanum{3,8})* ;
+  bool IsUnicodeExtensionType(const Token& aTok) const {
+    return 3 <= aTok.Length() && aTok.Length() <= 8;
+  }
+
+  // tkey = alpha digit ;
+  bool IsTransformExtensionKey(const Token& aTok) const {
+    return aTok.Length() == 2 && mozilla::IsAsciiAlpha(CharAt(aTok.Index())) &&
+           mozilla::IsAsciiDigit(CharAt(aTok.Index() + 1));
+  }
+
+  // tvalue = (sep alphanum{3,8})+ ;
+  bool IsTransformExtensionPart(const Token& aTok) const {
+    return 3 <= aTok.Length() && aTok.Length() <= 8;
+  }
+
+  // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+  bool IsPrivateUseStart(const Token& aTok) const {
+    return aTok.Length() == 1 && SingletonKey(aTok) == 'x';
+  }
+
+  // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+  bool IsPrivateUsePart(const Token& aTok) const {
+    return 1 <= aTok.Length() && aTok.Length() <= 8;
+  }
+
+  // Helper function for use in |ParseBaseName| and
+  // |ParseTlangInTransformExtension|.  Do not use this directly!
+  static Result<Ok, ParserError> InternalParseBaseName(
+      LocaleParser& aLocaleParser, Locale& aTag, Token& aTok);
+
+  // Parse the `unicode_language_id` production, i.e. the
+  // language/script/region/variants portion of a locale, into |aTag|.
+  // |aTok| must be the current token.
+  static Result<Ok, ParserError> ParseBaseName(LocaleParser& aLocaleParser,
+                                               Locale& aTag, Token& aTok) {
+    return InternalParseBaseName(aLocaleParser, aTag, aTok);
+  }
+
+  // Parse the `tlang` production within a parsed 't' transform extension.
+  // The precise requirements for "previously parsed" are:
+  //
+  //   * the input begins from current token |tok| with a valid `tlang`
+  //   * the `tlang` is wholly lowercase (*not* canonical case)
+  //   * variant subtags in the `tlang` may contain duplicates and be
+  //     unordered
+  //
+  // Return an error on internal failure. Otherwise, return a success value. If
+  // there was no `tlang`, then |tag.language().missing()|. But if there was a
+  // `tlang`, then |tag| is filled with subtags exactly as they appeared in the
+  // parse input.
+  static Result<Ok, ParserError> ParseTlangInTransformExtension(
+      LocaleParser& aLocaleParser, Locale& aTag, Token& aTok) {
+    MOZ_ASSERT(aLocaleParser.IsLanguage(aTok));
+    return InternalParseBaseName(aLocaleParser, aTag, aTok);
+  }
+
+  friend class Locale;
+
+  class Range final {
+    size_t mBegin;
+    size_t mLength;
+
+   public:
+    Range(size_t aBegin, size_t aLength) : mBegin(aBegin), mLength(aLength) {}
+
+    size_t Begin() const { return mBegin; }
+    size_t Length() const { return mLength; }
+  };
+
+  using TFieldVector = Vector<Range, 8>;
+  using AttributesVector = Vector<Range, 8>;
+  using KeywordsVector = Vector<Range, 8>;
+
+  // Parse |extension|, which must be a validated, fully lowercase
+  // `transformed_extensions` subtag, and fill |tag| and |fields| from the
+  // `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
+  // with |extension|.
+  static Result<Ok, ParserError> ParseTransformExtension(
+      mozilla::Span<const char> aExtension, Locale& aTag,
+      TFieldVector& aFields);
+
+  // Parse |extension|, which must be a validated, fully lowercase
+  // `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
+  // from the `attribute` and `keyword` components.
+  static Result<Ok, ParserError> ParseUnicodeExtension(
+      mozilla::Span<const char> aExtension, AttributesVector& aAttributes,
+      KeywordsVector& aKeywords);
+
+ public:
+  // Parse the input string as a locale.
+  //
+  // NOTE: |aTag| must be a new, empty Locale.
+  static Result<Ok, ParserError> TryParse(Span<const char> aLocale,
+                                          Locale& aTag);
+
+  // Parse the input string as the base-name parts (language, script, region,
+  // variants) of a locale.
+  //
+  // NOTE: |aTag| must be a new, empty Locale.
+  static Result<Ok, ParserError> TryParseBaseName(Span<const char> aLocale,
+                                                  Locale& aTag);
+
+  // Return Ok() iff |extension| can be parsed as a Unicode extension subtag.
+  static Result<Ok, ParserError> CanParseUnicodeExtension(
+      Span<const char> aExtension);
+
+  // Return Ok() iff |unicodeType| can be parsed as a Unicode extension type.
+  static Result<Ok, ParserError> CanParseUnicodeExtensionType(
+      Span<const char> aUnicodeType);
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LocaleParser::TokenKind)
+
+}  // namespace mozilla::intl
+
+#endif /* intl_components_Locale_h */