Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /intl/components/src/Locale.cpp
parent: Initial commit. (diff)
download: thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
1 files changed, 1471 insertions, 0 deletions
diff --git a/intl/components/src/Locale.cpp b/intl/components/src/Locale.cpp
new file mode 100644
index 0000000000..9a043518cf
--- /dev/null
+++ b/intl/components/src/Locale.cpp
@@ -0,0 +1,1471 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/Locale.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Span.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/Variant.h"
+
+#include "ICU4CGlue.h"
+
+#include <algorithm>
+#include <iterator>
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <string.h>
+#include <type_traits>
+#include <utility>
+
+#include "unicode/uloc.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+using namespace intl::LanguageTagLimits;
+
+template <typename CharT>
+bool IsStructurallyValidLanguageTag(Span<const CharT> aLanguage) {
+  // unicode_language_subtag = alpha{2,3} | alpha{5,8};
+  size_t length = aLanguage.size();
+  const CharT* str = aLanguage.data();
+  return ((2 <= length && length <= 3) || (5 <= length && length <= 8)) &&
+         std::all_of(str, str + length, IsAsciiAlpha<CharT>);
+}
+
+template bool IsStructurallyValidLanguageTag(Span<const char> aLanguage);
+template bool IsStructurallyValidLanguageTag(Span<const Latin1Char> aLanguage);
+template bool IsStructurallyValidLanguageTag(Span<const char16_t> aLanguage);
+
+template <typename CharT>
+bool IsStructurallyValidScriptTag(Span<const CharT> aScript) {
+  // unicode_script_subtag = alpha{4} ;
+  size_t length = aScript.size();
+  const CharT* str = aScript.data();
+  return length == 4 && std::all_of(str, str + length, IsAsciiAlpha<CharT>);
+}
+
+template bool IsStructurallyValidScriptTag(Span<const char> aScript);
+template bool IsStructurallyValidScriptTag(Span<const Latin1Char> aScript);
+template bool IsStructurallyValidScriptTag(Span<const char16_t> aScript);
+
+template <typename CharT>
+bool IsStructurallyValidRegionTag(Span<const CharT> aRegion) {
+  // unicode_region_subtag = (alpha{2} | digit{3}) ;
+  size_t length = aRegion.size();
+  const CharT* str = aRegion.data();
+  return (length == 2 && std::all_of(str, str + length, IsAsciiAlpha<CharT>)) ||
+         (length == 3 && std::all_of(str, str + length, IsAsciiDigit<CharT>));
+}
+
+template bool IsStructurallyValidRegionTag(Span<const char> aRegion);
+template bool IsStructurallyValidRegionTag(Span<const Latin1Char> aRegion);
+template bool IsStructurallyValidRegionTag(Span<const char16_t> aRegion);
+
+#ifdef DEBUG
+bool IsStructurallyValidVariantTag(Span<const char> aVariant) {
+  // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+  size_t length = aVariant.size();
+  const char* str = aVariant.data();
+  return ((5 <= length && length <= 8) ||
+          (length == 4 && IsAsciiDigit(str[0]))) &&
+         std::all_of(str, str + length, IsAsciiAlphanumeric<char>);
+}
+
+bool IsStructurallyValidUnicodeExtensionTag(Span<const char> aExtension) {
+  return LocaleParser::CanParseUnicodeExtension(aExtension).isOk();
+}
+
+static bool IsStructurallyValidExtensionTag(Span<const char> aExtension) {
+  // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+  // NB: Allow any extension, including Unicode and Transform here, because
+  // this function is only used for an assertion.
+
+  size_t length = aExtension.size();
+  const char* str = aExtension.data();
+  const char* const end = aExtension.data() + length;
+  if (length <= 2) {
+    return false;
+  }
+  if (!IsAsciiAlphanumeric(str[0]) || str[0] == 'x' || str[0] == 'X') {
+    return false;
+  }
+  str++;
+  if (*str++ != '-') {
+    return false;
+  }
+  while (true) {
+    const char* sep =
+        reinterpret_cast<const char*>(memchr(str, '-', end - str));
+    size_t len = (sep ? sep : end) - str;
+    if (len < 2 || len > 8 ||
+        !std::all_of(str, str + len, IsAsciiAlphanumeric<char>)) {
+      return false;
+    }
+    if (!sep) {
+      return true;
+    }
+    str = sep + 1;
+  }
+}
+
+bool IsStructurallyValidPrivateUseTag(Span<const char> aPrivateUse) {
+  // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+
+  size_t length = aPrivateUse.size();
+  const char* str = aPrivateUse.data();
+  const char* const end = aPrivateUse.data() + length;
+  if (length <= 2) {
+    return false;
+  }
+  if (str[0] != 'x' && str[0] != 'X') {
+    return false;
+  }
+  str++;
+  if (*str++ != '-') {
+    return false;
+  }
+  while (true) {
+    const char* sep =
+        reinterpret_cast<const char*>(memchr(str, '-', end - str));
+    size_t len = (sep ? sep : end) - str;
+    if (len == 0 || len > 8 ||
+        !std::all_of(str, str + len, IsAsciiAlphanumeric<char>)) {
+      return false;
+    }
+    if (!sep) {
+      return true;
+    }
+    str = sep + 1;
+  }
+}
+#endif
+
+ptrdiff_t Locale::UnicodeExtensionIndex() const {
+  // The extension subtags aren't necessarily sorted, so we can't use binary
+  // search here.
+  auto p = std::find_if(
+      mExtensions.begin(), mExtensions.end(),
+      [](const auto& ext) { return ext[0] == 'u' || ext[0] == 'U'; });
+  if (p != mExtensions.end()) {
+    return std::distance(mExtensions.begin(), p);
+  }
+  return -1;
+}
+
+Maybe<Span<const char>> Locale::GetUnicodeExtension() const {
+  ptrdiff_t index = UnicodeExtensionIndex();
+  if (index >= 0) {
+    return Some(MakeStringSpan(mExtensions[index].get()));
+  }
+  return Nothing();
+}
+
+ICUResult Locale::SetUnicodeExtension(Span<const char> aExtension) {
+  MOZ_ASSERT(IsStructurallyValidUnicodeExtensionTag(aExtension));
+
+  auto duplicated = DuplicateStringToUniqueChars(aExtension);
+
+  // Replace the existing Unicode extension subtag or append a new one.
+  ptrdiff_t index = UnicodeExtensionIndex();
+  if (index >= 0) {
+    mExtensions[index] = std::move(duplicated);
+    return Ok();
+  }
+  if (!mExtensions.append(std::move(duplicated))) {
+    return Err(ICUError::OutOfMemory);
+  }
+  return Ok();
+}
+
+void Locale::ClearUnicodeExtension() {
+  ptrdiff_t index = UnicodeExtensionIndex();
+  if (index >= 0) {
+    mExtensions.erase(mExtensions.begin() + index);
+  }
+}
+
+template <size_t InitialCapacity>
+static bool SortAlphabetically(Vector<UniqueChars, InitialCapacity>& aSubtags) {
+  size_t length = aSubtags.length();
+
+  // Zero or one element lists are already sorted.
+  if (length < 2) {
+    return true;
+  }
+
+  // Handle two element lists inline.
+  if (length == 2) {
+    if (strcmp(aSubtags[0].get(), aSubtags[1].get()) > 0) {
+      aSubtags[0].swap(aSubtags[1]);
+    }
+    return true;
+  }
+
+  Vector<char*, 8> scratch;
+  if (!scratch.resizeUninitialized(length)) {
+    return false;
+  }
+  for (size_t i = 0; i < length; i++) {
+    scratch[i] = aSubtags[i].release();
+  }
+
+  std::stable_sort(
+      scratch.begin(), scratch.end(),
+      [](const char* a, const char* b) { return strcmp(a, b) < 0; });
+
+  for (size_t i = 0; i < length; i++) {
+    aSubtags[i] = UniqueChars(scratch[i]);
+  }
+  return true;
+}
+
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeBaseName() {
+  // Per 6.2.3 CanonicalizeUnicodeLocaleId, the very first step is to
+  // canonicalize the syntax by normalizing the case and ordering all subtags.
+  // The canonical syntax form is specified in UTS 35, 3.2.1.
+
+  // Language codes need to be in lower case. "JA" -> "ja"
+  mLanguage.ToLowerCase();
+  MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
+
+  // The first character of a script code needs to be capitalized.
+  // "hans" -> "Hans"
+  mScript.ToTitleCase();
+  MOZ_ASSERT(Script().Missing() ||
+             IsStructurallyValidScriptTag(Script().Span()));
+
+  // Region codes need to be in upper case. "bu" -> "BU"
+  mRegion.ToUpperCase();
+  MOZ_ASSERT(Region().Missing() ||
+             IsStructurallyValidRegionTag(Region().Span()));
+
+  // The canonical case for variant subtags is lowercase.
+  for (UniqueChars& variant : mVariants) {
+    char* variantChars = variant.get();
+    size_t variantLength = strlen(variantChars);
+    AsciiToLowerCase(variantChars, variantLength, variantChars);
+
+    MOZ_ASSERT(IsStructurallyValidVariantTag({variantChars, variantLength}));
+  }
+
+  // Extensions and privateuse subtags are case normalized in the
+  // |canonicalizeExtensions| method.
+
+  // The second step in UTS 35, 3.2.1, is to order all subtags.
+
+  if (mVariants.length() > 1) {
+    // 1. Any variants are in alphabetical order.
+    if (!SortAlphabetically(mVariants)) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+
+    // Reject the Locale identifier if a duplicate variant was found, e.g.
+    // "en-variant-Variant".
+    const UniqueChars* duplicate = std::adjacent_find(
+        mVariants.begin(), mVariants.end(), [](const auto& a, const auto& b) {
+          return strcmp(a.get(), b.get()) == 0;
+        });
+    if (duplicate != mVariants.end()) {
+      return Err(CanonicalizationError::DuplicateVariant);
+    }
+  }
+
+  // 2. Any extensions are in alphabetical order by their singleton.
+  // 3. All attributes are sorted in alphabetical order.
+  // 4. All keywords and tfields are sorted by alphabetical order of their keys,
+  //    within their respective extensions.
+  // 5. Any type or tfield value "true" is removed.
+  // - A subsequent call to canonicalizeExtensions() will perform these steps.
+
+  // 6.2.3 CanonicalizeUnicodeLocaleId, step 2 transforms the locale identifier
+  // into its canonical form per UTS 3.2.1.
+
+  // 1. Use the bcp47 data to replace keys, types, tfields, and tvalues by their
+  // canonical forms.
+  // - A subsequent call to canonicalizeExtensions() will perform this step.
+
+  // 2. Replace aliases in the unicode_language_id and tlang (if any).
+  // - tlang is handled in canonicalizeExtensions().
+
+  // Replace deprecated language, region, and variant subtags with their
+  // preferred mappings.
+
+  if (!UpdateLegacyMappings()) {
+    return Err(CanonicalizationError::OutOfMemory);
+  }
+
+  // Replace deprecated language subtags with their preferred values.
+  if (!LanguageMapping(mLanguage) && ComplexLanguageMapping(mLanguage)) {
+    PerformComplexLanguageMappings();
+  }
+
+  // Replace deprecated script subtags with their preferred values.
+  if (Script().Present()) {
+    ScriptMapping(mScript);
+  }
+
+  // Replace deprecated region subtags with their preferred values.
+  if (Region().Present()) {
+    if (!RegionMapping(mRegion) && ComplexRegionMapping(mRegion)) {
+      PerformComplexRegionMappings();
+    }
+  }
+
+  // Replace deprecated variant subtags with their preferred values.
+  if (!PerformVariantMappings()) {
+    return Err(CanonicalizationError::OutOfMemory);
+  }
+
+  // No extension replacements are currently present.
+  // Private use sequences are left as is.
+
+  // 3. Replace aliases in special key values.
+  // - A subsequent call to canonicalizeExtensions() will perform this step.
+
+  return Ok();
+}
+
+#ifdef DEBUG
+static bool IsAsciiLowercaseAlphanumericOrDash(Span<const char> aSpan) {
+  const char* ptr = aSpan.data();
+  size_t length = aSpan.size();
+  return std::all_of(ptr, ptr + length, [](auto c) {
+    return IsAsciiLowercaseAlpha(c) || IsAsciiDigit(c) || c == '-';
+  });
+}
+#endif
+
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeExtensions() {
+  // The canonical case for all extension subtags is lowercase.
+  for (UniqueChars& extension : mExtensions) {
+    char* extensionChars = extension.get();
+    size_t extensionLength = strlen(extensionChars);
+    AsciiToLowerCase(extensionChars, extensionLength, extensionChars);
+
+    MOZ_ASSERT(
+        IsStructurallyValidExtensionTag({extensionChars, extensionLength}));
+  }
+
+  // Any extensions are in alphabetical order by their singleton.
+  // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
+  if (!SortAlphabetically(mExtensions)) {
+    return Err(CanonicalizationError::OutOfMemory);
+  }
+
+  for (UniqueChars& extension : mExtensions) {
+    if (extension[0] == 'u') {
+      MOZ_TRY(CanonicalizeUnicodeExtension(extension));
+    } else if (extension[0] == 't') {
+      MOZ_TRY(CanonicalizeTransformExtension(extension));
+    }
+
+    MOZ_ASSERT(
+        IsAsciiLowercaseAlphanumericOrDash(MakeStringSpan(extension.get())));
+  }
+
+  // The canonical case for privateuse subtags is lowercase.
+  if (char* privateuse = mPrivateUse.get()) {
+    size_t privateuseLength = strlen(privateuse);
+    AsciiToLowerCase(privateuse, privateuseLength, privateuse);
+
+    MOZ_ASSERT(
+        IsStructurallyValidPrivateUseTag({privateuse, privateuseLength}));
+  }
+  return Ok();
+}
+
+template <size_t N>
+static inline bool AppendSpan(Vector<char, N>& vector, Span<const char> aSpan) {
+  return vector.append(aSpan.data(), aSpan.size());
+}
+
+/**
+ * CanonicalizeUnicodeExtension( attributes, keywords )
+ *
+ * Canonical syntax per
+ * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All attributes and keywords are in lowercase.
+ *   - Note: The parser already converted keywords to lowercase.
+ * - All attributes are sorted in alphabetical order.
+ * - All keywords are sorted by alphabetical order of their keys.
+ * - Any type value "true" is removed.
+ *
+ * Canonical form:
+ * - All keys and types use the canonical form (from the name attribute;
+ *   see Section 3.6.4 U Extension Data Files).
+ */
+Result<Ok, Locale::CanonicalizationError> Locale::CanonicalizeUnicodeExtension(
+    UniqueChars& aUnicodeExtension) {
+  Span<const char> extension = MakeStringSpan(aUnicodeExtension.get());
+  MOZ_ASSERT(extension[0] == 'u');
+  MOZ_ASSERT(extension[1] == '-');
+  MOZ_ASSERT(IsStructurallyValidExtensionTag(extension));
+
+  LocaleParser::AttributesVector attributes;
+  LocaleParser::KeywordsVector keywords;
+
+  using Attribute = LocaleParser::AttributesVector::ElementType;
+  using Keyword = LocaleParser::KeywordsVector::ElementType;
+
+  if (LocaleParser::ParseUnicodeExtension(extension, attributes, keywords)
+          .isErr()) {
+    MOZ_ASSERT_UNREACHABLE("unexpected invalid Unicode extension subtag");
+    return Err(CanonicalizationError::InternalError);
+  }
+
+  auto attributesLess = [extension](const Attribute& a, const Attribute& b) {
+    auto astr = extension.Subspan(a.Begin(), a.Length());
+    auto bstr = extension.Subspan(b.Begin(), b.Length());
+    return astr < bstr;
+  };
+
+  // All attributes are sorted in alphabetical order.
+  if (attributes.length() > 1) {
+    std::stable_sort(attributes.begin(), attributes.end(), attributesLess);
+  }
+
+  auto keywordsLess = [extension](const Keyword& a, const Keyword& b) {
+    auto astr = extension.Subspan(a.Begin(), UnicodeKeyLength);
+    auto bstr = extension.Subspan(b.Begin(), UnicodeKeyLength);
+    return astr < bstr;
+  };
+
+  // All keywords are sorted by alphabetical order of keys.
+  if (keywords.length() > 1) {
+    // Using a stable sort algorithm, guarantees that two keywords using the
+    // same key are never reordered. That means for example
+    // when we have the input "u-nu-thai-kf-false-nu-latn", we are guaranteed to
+    // get the result "u-kf-false-nu-thai-nu-latn", i.e. "nu-thai" still occurs
+    // before "nu-latn".
+    // This is required so that deduplication below preserves the first keyword
+    // for a given key and discards the rest.
+    std::stable_sort(keywords.begin(), keywords.end(), keywordsLess);
+  }
+
+  Vector<char, 32> sb;
+  if (!sb.append('u')) {
+    return Err(CanonicalizationError::OutOfMemory);
+  }
+
+  // Append all Unicode extension attributes.
+  for (size_t i = 0; i < attributes.length(); i++) {
+    const auto& attribute = attributes[i];
+    auto span = extension.Subspan(attribute.Begin(), attribute.Length());
+
+    // Skip duplicate attributes.
+    if (i > 0) {
+      const auto& lastAttribute = attributes[i - 1];
+      if (span ==
+          extension.Subspan(lastAttribute.Begin(), lastAttribute.Length())) {
+        continue;
+      }
+      MOZ_ASSERT(attributesLess(lastAttribute, attribute));
+    }
+
+    if (!sb.append('-')) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+    if (!AppendSpan(sb, span)) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+  }
+
+  static constexpr size_t UnicodeKeyWithSepLength = UnicodeKeyLength + 1;
+
+  using StringSpan = Span<const char>;
+
+  static constexpr StringSpan True = MakeStringSpan("true");
+
+  // Append all Unicode extension keywords.
+  for (size_t i = 0; i < keywords.length(); i++) {
+    const auto& keyword = keywords[i];
+
+    // Skip duplicate keywords.
+    if (i > 0) {
+      const auto& lastKeyword = keywords[i - 1];
+      if (extension.Subspan(keyword.Begin(), UnicodeKeyLength) ==
+          extension.Subspan(lastKeyword.Begin(), UnicodeKeyLength)) {
+        continue;
+      }
+      MOZ_ASSERT(keywordsLess(lastKeyword, keyword));
+    }
+
+    if (!sb.append('-')) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+
+    StringSpan span = extension.Subspan(keyword.Begin(), keyword.Length());
+    if (span.size() == UnicodeKeyLength) {
+      // Keyword without type value.
+      if (!AppendSpan(sb, span)) {
+        return Err(CanonicalizationError::OutOfMemory);
+      }
+    } else {
+      StringSpan key = span.To(UnicodeKeyLength);
+      StringSpan type = span.From(UnicodeKeyWithSepLength);
+
+      // Search if there's a replacement for the current Unicode keyword.
+      if (const char* replacement = ReplaceUnicodeExtensionType(key, type)) {
+        StringSpan repl = MakeStringSpan(replacement);
+        if (repl == True) {
+          // Elide the type "true" if present in the replacement.
+          if (!AppendSpan(sb, key)) {
+            return Err(CanonicalizationError::OutOfMemory);
+          }
+        } else {
+          // Otherwise append the Unicode key (including the separator) and the
+          // replaced type.
+          if (!AppendSpan(sb, span.To(UnicodeKeyWithSepLength))) {
+            return Err(CanonicalizationError::OutOfMemory);
+          }
+          if (!AppendSpan(sb, repl)) {
+            return Err(CanonicalizationError::OutOfMemory);
+          }
+        }
+      } else {
+        if (type == True) {
+          // Elide the Unicode extension type "true".
+          if (!AppendSpan(sb, key)) {
+            return Err(CanonicalizationError::OutOfMemory);
+          }
+        } else {
+          // Otherwise append the complete Unicode extension keyword.
+          if (!AppendSpan(sb, span)) {
+            return Err(CanonicalizationError::OutOfMemory);
+          }
+        }
+      }
+    }
+  }
+
+  // We can keep the previous extension when canonicalization didn't modify it.
+  if (static_cast<Span<const char>>(sb) != extension) {
+    // Otherwise replace the previous extension with the canonical extension.
+    UniqueChars canonical = DuplicateStringToUniqueChars(sb);
+    if (!canonical) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+    aUnicodeExtension = std::move(canonical);
+  }
+
+  return Ok();
+}
+
+template <class Buffer>
+static bool LocaleToString(const Locale& aTag, Buffer& aBuffer) {
+  auto appendSubtag = [&aBuffer](const auto& subtag) {
+    auto span = subtag.Span();
+    MOZ_ASSERT(!span.empty());
+    return aBuffer.append(span.data(), span.size());
+  };
+
+  auto appendSubtagSpan = [&aBuffer](Span<const char> subtag) {
+    MOZ_ASSERT(!subtag.empty());
+    return aBuffer.append(subtag.data(), subtag.size());
+  };
+
+  auto appendSubtags = [&aBuffer, &appendSubtagSpan](const auto& subtags) {
+    for (const auto& subtag : subtags) {
+      if (!aBuffer.append('-') || !appendSubtagSpan(subtag)) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  // Append the language subtag.
+  if (!appendSubtag(aTag.Language())) {
+    return false;
+  }
+
+  // Append the script subtag if present.
+  if (aTag.Script().Present()) {
+    if (!aBuffer.append('-') || !appendSubtag(aTag.Script())) {
+      return false;
+    }
+  }
+
+  // Append the region subtag if present.
+  if (aTag.Region().Present()) {
+    if (!aBuffer.append('-') || !appendSubtag(aTag.Region())) {
+      return false;
+    }
+  }
+
+  // Append the variant subtags if present.
+  if (!appendSubtags(aTag.Variants())) {
+    return false;
+  }
+
+  // Append the extensions subtags if present.
+  if (!appendSubtags(aTag.Extensions())) {
+    return false;
+  }
+
+  // Append the private-use subtag if present.
+  if (auto privateuse = aTag.PrivateUse()) {
+    if (!aBuffer.append('-') || !appendSubtagSpan(privateuse.value())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/**
+ * CanonicalizeTransformExtension
+ *
+ * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
+ *
+ * - These subtags are all in lowercase (that is the canonical casing for these
+ *   subtags), [...].
+ *
+ * And per
+ * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All keywords and tfields are sorted by alphabetical order of their keys,
+ *   within their respective extensions.
+ */
+Result<Ok, Locale::CanonicalizationError>
+Locale::CanonicalizeTransformExtension(UniqueChars& aTransformExtension) {
+  Span<const char> extension = MakeStringSpan(aTransformExtension.get());
+  MOZ_ASSERT(extension[0] == 't');
+  MOZ_ASSERT(extension[1] == '-');
+  MOZ_ASSERT(IsStructurallyValidExtensionTag(extension));
+
+  Locale tag;
+  LocaleParser::TFieldVector fields;
+
+  using TField = LocaleParser::TFieldVector::ElementType;
+
+  if (LocaleParser::ParseTransformExtension(extension, tag, fields).isErr()) {
+    MOZ_ASSERT_UNREACHABLE("unexpected invalid transform extension subtag");
+    return Err(CanonicalizationError::InternalError);
+  }
+
+  auto tfieldLess = [extension](const TField& a, const TField& b) {
+    auto astr = extension.Subspan(a.Begin(), TransformKeyLength);
+    auto bstr = extension.Subspan(b.Begin(), TransformKeyLength);
+    return astr < bstr;
+  };
+
+  // All tfields are sorted by alphabetical order of their keys.
+  if (fields.length() > 1) {
+    std::stable_sort(fields.begin(), fields.end(), tfieldLess);
+  }
+
+  Vector<char, 32> sb;
+  if (!sb.append('t')) {
+    return Err(CanonicalizationError::OutOfMemory);
+  }
+
+  // Append the language subtag if present.
+  //
+  // Replace aliases in tlang per
+  // <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>.
+  if (tag.Language().Present()) {
+    if (!sb.append('-')) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+
+    MOZ_TRY(tag.CanonicalizeBaseName());
+
+    // The canonical case for Transform extensions is lowercase per
+    // <https://unicode.org/reports/tr35/#BCP47_T_Extension>. Convert the two
+    // subtags which don't use lowercase for their canonical syntax.
+    tag.mScript.ToLowerCase();
+    tag.mRegion.ToLowerCase();
+
+    if (!LocaleToString(tag, sb)) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+  }
+
+  static constexpr size_t TransformKeyWithSepLength = TransformKeyLength + 1;
+
+  using StringSpan = Span<const char>;
+
+  // Append all fields.
+  //
+  // UTS 35, 3.2.1 specifies:
+  // - Any type or tfield value "true" is removed.
+  //
+  // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so ignore
+  // this apparently invalid part of the UTS 35 specification and simply
+  // append all `tfield` subtags.
+  for (const auto& field : fields) {
+    if (!sb.append('-')) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+
+    StringSpan span = extension.Subspan(field.Begin(), field.Length());
+    StringSpan key = span.To(TransformKeyLength);
+    StringSpan value = span.From(TransformKeyWithSepLength);
+
+    // Search if there's a replacement for the current transform keyword.
+    if (const char* replacement = ReplaceTransformExtensionType(key, value)) {
+      if (!AppendSpan(sb, span.To(TransformKeyWithSepLength))) {
+        return Err(CanonicalizationError::OutOfMemory);
+      }
+      if (!AppendSpan(sb, MakeStringSpan(replacement))) {
+        return Err(CanonicalizationError::OutOfMemory);
+      }
+    } else {
+      if (!AppendSpan(sb, span)) {
+        return Err(CanonicalizationError::OutOfMemory);
+      }
+    }
+  }
+
+  // We can keep the previous extension when canonicalization didn't modify it.
+  if (static_cast<Span<const char>>(sb) != extension) {
+    // Otherwise replace the previous extension with the canonical extension.
+    UniqueChars canonical = DuplicateStringToUniqueChars(sb);
+    if (!canonical) {
+      return Err(CanonicalizationError::OutOfMemory);
+    }
+    aTransformExtension = std::move(canonical);
+  }
+
+  return Ok();
+}
+
+// Zero-terminated ICU Locale ID.
+using LocaleId =
+    Vector<char, LanguageLength + 1 + ScriptLength + 1 + RegionLength + 1>;
+
+enum class LikelySubtags : bool { Add, Remove };
+
+// Return true iff the locale is already maximized resp. minimized.
+static bool HasLikelySubtags(LikelySubtags aLikelySubtags, const Locale& aTag) {
+  // The locale is already maximized if the language, script, and region
+  // subtags are present and no placeholder subtags ("und", "Zzzz", "ZZ") are
+  // used.
+  if (aLikelySubtags == LikelySubtags::Add) {
+    return !aTag.Language().EqualTo("und") &&
+           (aTag.Script().Present() && !aTag.Script().EqualTo("Zzzz")) &&
+           (aTag.Region().Present() && !aTag.Region().EqualTo("ZZ"));
+  }
+
+  // The locale is already minimized if it only contains a language
+  // subtag whose value is not the placeholder value "und".
+  return !aTag.Language().EqualTo("und") && aTag.Script().Missing() &&
+         aTag.Region().Missing();
+}
+
+// Create an ICU locale ID from the given locale.
+static bool CreateLocaleForLikelySubtags(const Locale& aTag,
+                                         LocaleId& aLocale) {
+  MOZ_ASSERT(aLocale.length() == 0);
+
+  auto appendSubtag = [&aLocale](const auto& subtag) {
+    auto span = subtag.Span();
+    MOZ_ASSERT(!span.empty());
+    return aLocale.append(span.data(), span.size());
+  };
+
+  // Append the language subtag.
+  if (!appendSubtag(aTag.Language())) {
+    return false;
+  }
+
+  // Append the script subtag if present.
+  if (aTag.Script().Present()) {
+    if (!aLocale.append('_') || !appendSubtag(aTag.Script())) {
+      return false;
+    }
+  }
+
+  // Append the region subtag if present.
+  if (aTag.Region().Present()) {
+    if (!aLocale.append('_') || !appendSubtag(aTag.Region())) {
+      return false;
+    }
+  }
+
+  // Zero-terminated for use with ICU.
+  return aLocale.append('\0');
+}
+
+static ICUError ParserErrorToICUError(LocaleParser::ParserError aErr) {
+  using ParserError = LocaleParser::ParserError;
+
+  switch (aErr) {
+    case ParserError::NotParseable:
+      return ICUError::InternalError;
+    case ParserError::OutOfMemory:
+      return ICUError::OutOfMemory;
+  }
+  MOZ_CRASH("Unexpected parser error");
+}
+
+static ICUError CanonicalizationErrorToICUError(
+    Locale::CanonicalizationError aErr) {
+  using CanonicalizationError = Locale::CanonicalizationError;
+
+  switch (aErr) {
+    case CanonicalizationError::DuplicateVariant:
+    case CanonicalizationError::InternalError:
+      return ICUError::InternalError;
+    case CanonicalizationError::OutOfMemory:
+      return ICUError::OutOfMemory;
+  }
+  MOZ_CRASH("Unexpected canonicalization error");
+}
+
+// Assign the language, script, and region subtags from an ICU locale ID.
+//
+// ICU provides |uloc_getLanguage|, |uloc_getScript|, and |uloc_getCountry| to
+// retrieve these subtags, but unfortunately these functions are rather slow, so
+// we use our own implementation.
+static ICUResult AssignFromLocaleId(LocaleId& aLocaleId, Locale& aTag) {
+  // Replace the ICU locale ID separator.
+  std::replace(aLocaleId.begin(), aLocaleId.end(), '_', '-');
+
+  // ICU replaces "und" with the empty string, which means "und" becomes "" and
+  // "und-Latn" becomes "-Latn". Handle this case separately.
+  if (aLocaleId.empty() || aLocaleId[0] == '-') {
+    static constexpr auto und = MakeStringSpan("und");
+    constexpr size_t length = und.size();
+
+    // Insert "und" in front of the locale ID.
+    if (!aLocaleId.growBy(length)) {
+      return Err(ICUError::OutOfMemory);
+    }
+    memmove(aLocaleId.begin() + length, aLocaleId.begin(), aLocaleId.length());
+    memmove(aLocaleId.begin(), und.data(), length);
+  }
+
+  // Retrieve the language, script, and region subtags from the locale ID
+  Locale localeTag;
+  MOZ_TRY(LocaleParser::TryParseBaseName(aLocaleId, localeTag)
+              .mapErr(ParserErrorToICUError));
+
+  aTag.SetLanguage(localeTag.Language());
+  aTag.SetScript(localeTag.Script());
+  aTag.SetRegion(localeTag.Region());
+
+  return Ok();
+}
+
+template <decltype(uloc_addLikelySubtags) likelySubtagsFn>
+static ICUResult CallLikelySubtags(const LocaleId& aLocaleId,
+                                   LocaleId& aResult) {
+  // Locale ID must be zero-terminated before passing it to ICU.
+  MOZ_ASSERT(aLocaleId.back() == '\0');
+  MOZ_ASSERT(aResult.length() == 0);
+
+  // Ensure there's enough room for the result.
+  MOZ_ALWAYS_TRUE(aResult.resize(LocaleId::InlineLength));
+
+  return FillBufferWithICUCall(
+      aResult, [&aLocaleId](char* chars, int32_t size, UErrorCode* status) {
+        return likelySubtagsFn(aLocaleId.begin(), chars, size, status);
+      });
+}
+
+// The canonical way to compute the Unicode BCP 47 locale identifier with likely
+// subtags is as follows:
+//
+// 1. Call uloc_forLanguageTag() to transform the locale identifer into an ICU
+//    locale ID.
+// 2. Call uloc_addLikelySubtags() to add the likely subtags to the locale ID.
+// 3. Call uloc_toLanguageTag() to transform the resulting locale ID back into
+//    a Unicode BCP 47 locale identifier.
+//
+// Since uloc_forLanguageTag() and uloc_toLanguageTag() are both kind of slow
+// and we know, by construction, that the input Unicode BCP 47 locale identifier
+// only contains valid language, script, and region subtags, we can avoid both
+// calls if we implement them ourselves, see CreateLocaleForLikelySubtags() and
+// AssignFromLocaleId(). (Where "slow" means about 50% of the execution time of
+// |Intl.Locale.prototype.maximize|.)
+static ICUResult LikelySubtags(LikelySubtags aLikelySubtags, Locale& aTag) {
+  // Return early if the input is already maximized/minimized.
+  if (HasLikelySubtags(aLikelySubtags, aTag)) {
+    return Ok();
+  }
+
+  // Create the locale ID for the input argument.
+  LocaleId locale;
+  if (!CreateLocaleForLikelySubtags(aTag, locale)) {
+    return Err(ICUError::OutOfMemory);
+  }
+
+  // Either add or remove likely subtags to/from the locale ID.
+  LocaleId localeLikelySubtags;
+  if (aLikelySubtags == LikelySubtags::Add) {
+    MOZ_TRY(
+        CallLikelySubtags<uloc_addLikelySubtags>(locale, localeLikelySubtags));
+  } else {
+    MOZ_TRY(
+        CallLikelySubtags<uloc_minimizeSubtags>(locale, localeLikelySubtags));
+  }
+
+  // Assign the language, script, and region subtags from the locale ID.
+  MOZ_TRY(AssignFromLocaleId(localeLikelySubtags, aTag));
+
+  // Update mappings in case ICU returned a non-canonical locale.
+  MOZ_TRY(aTag.CanonicalizeBaseName().mapErr(CanonicalizationErrorToICUError));
+
+  return Ok();
+}
+
+ICUResult Locale::AddLikelySubtags() {
+  return LikelySubtags(LikelySubtags::Add, *this);
+}
+
+ICUResult Locale::RemoveLikelySubtags() {
+  return LikelySubtags(LikelySubtags::Remove, *this);
+}
+
+UniqueChars Locale::DuplicateStringToUniqueChars(const char* aStr) {
+  size_t length = strlen(aStr) + 1;
+  auto duplicate = MakeUnique<char[]>(length);
+  memcpy(duplicate.get(), aStr, length);
+  return duplicate;
+}
+
+UniqueChars Locale::DuplicateStringToUniqueChars(Span<const char> aStr) {
+  size_t length = aStr.size();
+  auto duplicate = MakeUnique<char[]>(length + 1);
+  memcpy(duplicate.get(), aStr.data(), length);
+  duplicate[length] = '\0';
+  return duplicate;
+}
+
+size_t Locale::ToStringCapacity() const {
+  // This is a bit awkward, the buffer class currently does not support
+  // being resized, so we need to calculate the required size up front and
+  // reserve it all at once.
+  auto lengthSubtag = [](const auto& subtag) {
+    auto span = subtag.Span();
+    MOZ_ASSERT(!span.empty());
+    return span.size();
+  };
+
+  auto lengthSubtagZ = [](const char* subtag) {
+    size_t length = strlen(subtag);
+    MOZ_ASSERT(length > 0);
+    return length;
+  };
+
+  auto lengthSubtagsZ = [&lengthSubtagZ](const auto& subtags) {
+    size_t length = 0;
+    for (const auto& subtag : subtags) {
+      length += lengthSubtagZ(subtag.get()) + 1;
+    }
+    return length;
+  };
+
+  // First calculate required capacity
+  size_t capacity = 0;
+
+  capacity += lengthSubtag(mLanguage);
+
+  if (mScript.Present()) {
+    capacity += lengthSubtag(mScript) + 1;
+  }
+
+  if (mRegion.Present()) {
+    capacity += lengthSubtag(mRegion) + 1;
+  }
+
+  capacity += lengthSubtagsZ(mVariants);
+
+  capacity += lengthSubtagsZ(mExtensions);
+
+  if (mPrivateUse.get()) {
+    capacity += lengthSubtagZ(mPrivateUse.get()) + 1;
+  }
+
+  return capacity;
+}
+
+size_t Locale::ToStringAppend(char* aBuffer) const {
+  // Current write position inside buffer.
+  size_t offset = 0;
+
+  auto appendHyphen = [&offset, &aBuffer]() {
+    aBuffer[offset] = '-';
+    offset += 1;
+  };
+
+  auto appendSubtag = [&offset, &aBuffer](const auto& subtag) {
+    auto span = subtag.Span();
+    memcpy(aBuffer + offset, span.data(), span.size());
+    offset += span.size();
+  };
+
+  auto appendSubtagZ = [&offset, &aBuffer](const char* subtag) {
+    size_t length = strlen(subtag);
+    memcpy(aBuffer + offset, subtag, length);
+    offset += length;
+  };
+
+  auto appendSubtagsZ = [&appendHyphen, &appendSubtagZ](const auto& subtags) {
+    for (const auto& subtag : subtags) {
+      appendHyphen();
+      appendSubtagZ(subtag.get());
+    }
+  };
+
+  // Append the language subtag.
+  appendSubtag(mLanguage);
+
+  // Append the script subtag if present.
+  if (mScript.Present()) {
+    appendHyphen();
+    appendSubtag(mScript);
+  }
+
+  // Append the region subtag if present.
+  if (mRegion.Present()) {
+    appendHyphen();
+    appendSubtag(mRegion);
+  }
+
+  // Append the variant subtags if present.
+  appendSubtagsZ(mVariants);
+
+  // Append the extensions subtags if present.
+  appendSubtagsZ(mExtensions);
+
+  // Append the private-use subtag if present.
+  if (mPrivateUse.get()) {
+    appendHyphen();
+    appendSubtagZ(mPrivateUse.get());
+  }
+
+  return offset;
+}
+
+LocaleParser::Token LocaleParser::NextToken() {
+  MOZ_ASSERT(mIndex <= mLength + 1, "called after 'None' token was read");
+
+  TokenKind kind = TokenKind::None;
+  size_t tokenLength = 0;
+  for (size_t i = mIndex; i < mLength; i++) {
+    // UTS 35, section 3.1.
+    // alpha = [A-Z a-z] ;
+    // digit = [0-9] ;
+    char c = CharAt(i);
+    if (IsAsciiAlpha(c)) {
+      kind |= TokenKind::Alpha;
+    } else if (IsAsciiDigit(c)) {
+      kind |= TokenKind::Digit;
+    } else if (c == '-' && i > mIndex && i + 1 < mLength) {
+      break;
+    } else {
+      return {TokenKind::Error, 0, 0};
+    }
+    tokenLength += 1;
+  }
+
+  Token token{kind, mIndex, tokenLength};
+  mIndex += tokenLength + 1;
+  return token;
+}
+
+UniqueChars LocaleParser::Chars(size_t aIndex, size_t aLength) const {
+  // Add +1 to null-terminate the string.
+  auto chars = MakeUnique<char[]>(aLength + 1);
+  char* dest = chars.get();
+  std::copy_n(mLocale + aIndex, aLength, dest);
+  dest[aLength] = '\0';
+  return chars;
+}
+
+// Parse the `unicode_language_id` production.
+//
+// unicode_language_id = unicode_language_subtag
+//                       (sep unicode_script_subtag)?
+//                       (sep unicode_region_subtag)?
+//                       (sep unicode_variant_subtag)* ;
+//
+// sep                 = "-"
+//
+// Note: Unicode CLDR locale identifier backward compatibility extensions
+//       removed from `unicode_language_id`.
+//
+// |tok| is the current token from |ts|.
+//
+// All subtags will be added unaltered to |tag|, without canonicalizing their
+// case or, in the case of variant subtags, detecting and rejecting duplicate
+// variants. Users must subsequently |CanonicalizeBaseName| to perform these
+// actions.
+//
+// Do not use this function directly: use |ParseBaseName| or
+// |ParseTlangFromTransformExtension| instead.
+Result<Ok, LocaleParser::ParserError> LocaleParser::InternalParseBaseName(
+    LocaleParser& aLocaleParser, Locale& aTag, Token& aTok) {
+  if (aLocaleParser.IsLanguage(aTok)) {
+    aLocaleParser.CopyChars(aTok, aTag.mLanguage);
+
+    aTok = aLocaleParser.NextToken();
+  } else {
+    // The language subtag is mandatory.
+    return Err(ParserError::NotParseable);
+  }
+
+  if (aLocaleParser.IsScript(aTok)) {
+    aLocaleParser.CopyChars(aTok, aTag.mScript);
+
+    aTok = aLocaleParser.NextToken();
+  }
+
+  if (aLocaleParser.IsRegion(aTok)) {
+    aLocaleParser.CopyChars(aTok, aTag.mRegion);
+
+    aTok = aLocaleParser.NextToken();
+  }
+
+  auto& variants = aTag.mVariants;
+  MOZ_ASSERT(variants.length() == 0);
+  while (aLocaleParser.IsVariant(aTok)) {
+    auto variant = aLocaleParser.Chars(aTok);
+    if (!variants.append(std::move(variant))) {
+      return Err(ParserError::OutOfMemory);
+    }
+
+    aTok = aLocaleParser.NextToken();
+  }
+
+  return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::TryParse(
+    mozilla::Span<const char> aLocale, Locale& aTag) {
+  // |aTag| must be a new, empty Locale.
+  MOZ_ASSERT(aTag.Language().Missing());
+  MOZ_ASSERT(aTag.Script().Missing());
+  MOZ_ASSERT(aTag.Region().Missing());
+  MOZ_ASSERT(aTag.Variants().empty());
+  MOZ_ASSERT(aTag.Extensions().empty());
+  MOZ_ASSERT(aTag.PrivateUse().isNothing());
+
+  // unicode_locale_id = unicode_language_id
+  //                     extensions*
+  //                     pu_extensions? ;
+
+  LocaleParser ts(aLocale);
+  Token tok = ts.NextToken();
+
+  MOZ_TRY(ParseBaseName(ts, aTag, tok));
+
+  // extensions = unicode_locale_extensions
+  //            | transformed_extensions
+  //            | other_extensions ;
+
+  // Bit set of seen singletons.
+  uint64_t seenSingletons = 0;
+
+  auto& extensions = aTag.mExtensions;
+  while (ts.IsExtensionStart(tok)) {
+    char singleton = ts.SingletonKey(tok);
+
+    // Reject the input if a duplicate singleton was found.
+    uint64_t hash = 1ULL << (AsciiAlphanumericToNumber(singleton) + 1);
+    if (seenSingletons & hash) {
+      return Err(ParserError::NotParseable);
+    }
+    seenSingletons |= hash;
+
+    Token start = tok;
+    tok = ts.NextToken();
+
+    // We'll check for missing non-singleton subtags after this block by
+    // comparing |startValue| with the then-current position.
+    size_t startValue = tok.Index();
+
+    if (singleton == 'u') {
+      while (ts.IsUnicodeExtensionPart(tok)) {
+        tok = ts.NextToken();
+      }
+    } else if (singleton == 't') {
+      // transformed_extensions = sep [tT]
+      //                          ((sep tlang (sep tfield)*)
+      //                           | (sep tfield)+) ;
+
+      // tlang = unicode_language_subtag
+      //         (sep unicode_script_subtag)?
+      //         (sep unicode_region_subtag)?
+      //         (sep unicode_variant_subtag)* ;
+      if (ts.IsLanguage(tok)) {
+        tok = ts.NextToken();
+
+        if (ts.IsScript(tok)) {
+          tok = ts.NextToken();
+        }
+
+        if (ts.IsRegion(tok)) {
+          tok = ts.NextToken();
+        }
+
+        while (ts.IsVariant(tok)) {
+          tok = ts.NextToken();
+        }
+      }
+
+      // tfield = tkey tvalue;
+      while (ts.IsTransformExtensionKey(tok)) {
+        tok = ts.NextToken();
+
+        size_t startTValue = tok.Index();
+        while (ts.IsTransformExtensionPart(tok)) {
+          tok = ts.NextToken();
+        }
+
+        // `tfield` requires at least one `tvalue`.
+        if (tok.Index() <= startTValue) {
+          return Err(ParserError::NotParseable);
+        }
+      }
+    } else {
+      while (ts.IsOtherExtensionPart(tok)) {
+        tok = ts.NextToken();
+      }
+    }
+
+    // Singletons must be followed by a non-singleton subtag, "en-a-b" is not
+    // allowed.
+    if (tok.Index() <= startValue) {
+      return Err(ParserError::NotParseable);
+    }
+
+    UniqueChars extension = ts.Extension(start, tok);
+    if (!extensions.append(std::move(extension))) {
+      return Err(ParserError::OutOfMemory);
+    }
+  }
+
+  // Trailing `pu_extension` component of the `unicode_locale_id` production.
+  if (ts.IsPrivateUseStart(tok)) {
+    Token start = tok;
+    tok = ts.NextToken();
+
+    size_t startValue = tok.Index();
+    while (ts.IsPrivateUsePart(tok)) {
+      tok = ts.NextToken();
+    }
+
+    // There must be at least one subtag after the "-x-".
+    if (tok.Index() <= startValue) {
+      return Err(ParserError::NotParseable);
+    }
+
+    UniqueChars privateUse = ts.Extension(start, tok);
+    aTag.mPrivateUse = std::move(privateUse);
+  }
+
+  if (!tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::TryParseBaseName(
+    Span<const char> aLocale, Locale& aTag) {
+  // |aTag| must be a new, empty Locale.
+  MOZ_ASSERT(aTag.Language().Missing());
+  MOZ_ASSERT(aTag.Script().Missing());
+  MOZ_ASSERT(aTag.Region().Missing());
+  MOZ_ASSERT(aTag.Variants().empty());
+  MOZ_ASSERT(aTag.Extensions().empty());
+  MOZ_ASSERT(aTag.PrivateUse().isNothing());
+
+  LocaleParser ts(aLocale);
+  Token tok = ts.NextToken();
+
+  MOZ_TRY(ParseBaseName(ts, aTag, tok));
+  if (!tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  return Ok();
+}
+
+// Parse |aExtension|, which must be a valid `transformed_extensions` subtag,
+// and fill |aTag| and |aFields| from the `tlang` and `tfield` components.
+Result<Ok, LocaleParser::ParserError> LocaleParser::ParseTransformExtension(
+    Span<const char> aExtension, Locale& aTag, TFieldVector& aFields) {
+  LocaleParser ts(aExtension);
+  Token tok = ts.NextToken();
+
+  if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 't') {
+    return Err(ParserError::NotParseable);
+  }
+
+  tok = ts.NextToken();
+
+  if (tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  if (ts.IsLanguage(tok)) {
+    // We're parsing a possible `tlang` in a known-valid transform extension, so
+    // use the special-purpose function that takes advantage of this to compute
+    // lowercased |tag| contents in an optimal manner.
+    MOZ_TRY(ParseTlangInTransformExtension(ts, aTag, tok));
+
+    // After `tlang` we must have a `tfield` and its `tkey`, or we're at the end
+    // of the transform extension.
+    MOZ_ASSERT(ts.IsTransformExtensionKey(tok) || tok.IsNone());
+  } else {
+    // If there's no `tlang` subtag, at least one `tfield` must be present.
+    MOZ_ASSERT(ts.IsTransformExtensionKey(tok));
+  }
+
+  // Trailing `tfield` subtags. (Any other trailing subtags are an error,
+  // because we're guaranteed to only see a valid tranform extension here.)
+  while (ts.IsTransformExtensionKey(tok)) {
+    size_t begin = tok.Index();
+    tok = ts.NextToken();
+
+    size_t startTValue = tok.Index();
+    while (ts.IsTransformExtensionPart(tok)) {
+      tok = ts.NextToken();
+    }
+
+    // `tfield` requires at least one `tvalue`.
+    if (tok.Index() <= startTValue) {
+      return Err(ParserError::NotParseable);
+    }
+
+    size_t length = tok.Index() - 1 - begin;
+    if (!aFields.emplaceBack(begin, length)) {
+      return Err(ParserError::OutOfMemory);
+    }
+  }
+
+  if (!tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  return Ok();
+}
+
+// Parse |aExtension|, which must be a valid `unicode_locale_extensions` subtag,
+// and fill |aAttributes| and |aKeywords| from the `attribute` and `keyword`
+// components.
+Result<Ok, LocaleParser::ParserError> LocaleParser::ParseUnicodeExtension(
+    Span<const char> aExtension, AttributesVector& aAttributes,
+    KeywordsVector& aKeywords) {
+  LocaleParser ts(aExtension);
+  Token tok = ts.NextToken();
+
+  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+  //                                       (sep attribute)+ (sep keyword)*) ;
+
+  if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 'u') {
+    return Err(ParserError::NotParseable);
+  }
+
+  tok = ts.NextToken();
+
+  if (tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  while (ts.IsUnicodeExtensionAttribute(tok)) {
+    if (!aAttributes.emplaceBack(tok.Index(), tok.Length())) {
+      return Err(ParserError::OutOfMemory);
+    }
+
+    tok = ts.NextToken();
+  }
+
+  // keyword = key (sep type)? ;
+  while (ts.IsUnicodeExtensionKey(tok)) {
+    size_t begin = tok.Index();
+    tok = ts.NextToken();
+
+    while (ts.IsUnicodeExtensionType(tok)) {
+      tok = ts.NextToken();
+    }
+
+    if (tok.IsError()) {
+      return Err(ParserError::NotParseable);
+    }
+
+    size_t length = tok.Index() - 1 - begin;
+    if (!aKeywords.emplaceBack(begin, length)) {
+      return Err(ParserError::OutOfMemory);
+    }
+  }
+
+  if (!tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError> LocaleParser::CanParseUnicodeExtension(
+    Span<const char> aExtension) {
+  LocaleParser ts(aExtension);
+  Token tok = ts.NextToken();
+
+  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
+  //                                       (sep attribute)+ (sep keyword)*) ;
+
+  if (!ts.IsExtensionStart(tok) || ts.SingletonKey(tok) != 'u') {
+    return Err(ParserError::NotParseable);
+  }
+
+  tok = ts.NextToken();
+
+  if (tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  while (ts.IsUnicodeExtensionAttribute(tok)) {
+    tok = ts.NextToken();
+  }
+
+  // keyword = key (sep type)? ;
+  while (ts.IsUnicodeExtensionKey(tok)) {
+    tok = ts.NextToken();
+
+    while (ts.IsUnicodeExtensionType(tok)) {
+      tok = ts.NextToken();
+    }
+
+    if (tok.IsError()) {
+      return Err(ParserError::NotParseable);
+    }
+  }
+
+  if (!tok.IsNone()) {
+    return Err(ParserError::OutOfMemory);
+  }
+
+  return Ok();
+}
+
+Result<Ok, LocaleParser::ParserError>
+LocaleParser::CanParseUnicodeExtensionType(Span<const char> aUnicodeType) {
+  MOZ_ASSERT(!aUnicodeType.empty(), "caller must exclude empty strings");
+
+  LocaleParser ts(aUnicodeType);
+  Token tok = ts.NextToken();
+
+  while (ts.IsUnicodeExtensionType(tok)) {
+    tok = ts.NextToken();
+  }
+
+  if (!tok.IsNone()) {
+    return Err(ParserError::NotParseable);
+  }
+
+  return Ok();
+}
+
+}  // namespace mozilla::intl
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /intl/components/src/Locale.cpp
parent	Initial commit. (diff)
download	thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip