summaryrefslogtreecommitdiffstats
path: root/intl/locale/nsLanguageAtomService.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /intl/locale/nsLanguageAtomService.cpp
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/locale/nsLanguageAtomService.cpp')
-rw-r--r--intl/locale/nsLanguageAtomService.cpp256
1 files changed, 256 insertions, 0 deletions
diff --git a/intl/locale/nsLanguageAtomService.cpp b/intl/locale/nsLanguageAtomService.cpp
new file mode 100644
index 0000000000..6c57fb8743
--- /dev/null
+++ b/intl/locale/nsLanguageAtomService.cpp
@@ -0,0 +1,256 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsLanguageAtomService.h"
+#include "nsUConvPropertySearch.h"
+#include "nsUnicharUtils.h"
+#include "nsAtom.h"
+#include "nsGkAtoms.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/intl/Locale.h"
+#include "mozilla/intl/OSPreferences.h"
+#include "mozilla/ServoBindings.h"
+#include "mozilla/ServoUtils.h"
+#include "mozilla/StaticPtr.h"
+
+using namespace mozilla;
+using mozilla::intl::OSPreferences;
+
+static constexpr nsUConvProp encodingsGroups[] = {
+#include "encodingsgroups.properties.h"
+};
+
+// List of mozilla internal x-* tags that map to themselves (see bug 256257)
+static constexpr nsStaticAtom* kLangGroups[] = {
+ // This list must be sorted!
+ nsGkAtoms::x_armn, nsGkAtoms::x_cyrillic, nsGkAtoms::x_devanagari,
+ nsGkAtoms::x_geor, nsGkAtoms::x_math, nsGkAtoms::x_tamil,
+ nsGkAtoms::Unicode, nsGkAtoms::x_western
+ // These self-mappings are not necessary unless somebody use them to specify
+ // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
+ // x-beng=x-beng
+ // x-cans=x-cans
+ // x-ethi=x-ethi
+ // x-guru=x-guru
+ // x-gujr=x-gujr
+ // x-khmr=x-khmr
+ // x-mlym=x-mlym
+};
+
+// Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
+static constexpr struct {
+ const char* mTag;
+ nsStaticAtom* mAtom;
+} kScriptLangGroup[] = {
+ // This list must be sorted by script code!
+ {"Arab", nsGkAtoms::ar},
+ {"Armn", nsGkAtoms::x_armn},
+ {"Beng", nsGkAtoms::x_beng},
+ {"Cans", nsGkAtoms::x_cans},
+ {"Cyrl", nsGkAtoms::x_cyrillic},
+ {"Deva", nsGkAtoms::x_devanagari},
+ {"Ethi", nsGkAtoms::x_ethi},
+ {"Geok", nsGkAtoms::x_geor},
+ {"Geor", nsGkAtoms::x_geor},
+ {"Grek", nsGkAtoms::el},
+ {"Gujr", nsGkAtoms::x_gujr},
+ {"Guru", nsGkAtoms::x_guru},
+ {"Hang", nsGkAtoms::ko},
+ // Hani is not mapped to a specific langGroup, we prefer to look at the
+ // primary language subtag in this case
+ {"Hans", nsGkAtoms::Chinese},
+ // Hant is special-cased in code
+ // Hant=zh-HK
+ // Hant=zh-TW
+ {"Hebr", nsGkAtoms::he},
+ {"Hira", nsGkAtoms::Japanese},
+ {"Jpan", nsGkAtoms::Japanese},
+ {"Kana", nsGkAtoms::Japanese},
+ {"Khmr", nsGkAtoms::x_khmr},
+ {"Knda", nsGkAtoms::x_knda},
+ {"Kore", nsGkAtoms::ko},
+ {"Latn", nsGkAtoms::x_western},
+ {"Mlym", nsGkAtoms::x_mlym},
+ {"Orya", nsGkAtoms::x_orya},
+ {"Sinh", nsGkAtoms::x_sinh},
+ {"Taml", nsGkAtoms::x_tamil},
+ {"Telu", nsGkAtoms::x_telu},
+ {"Thai", nsGkAtoms::th},
+ {"Tibt", nsGkAtoms::x_tibt}};
+
+static StaticAutoPtr<nsLanguageAtomService> gLangAtomService;
+
+// static
+nsLanguageAtomService* nsLanguageAtomService::GetService() {
+ if (!gLangAtomService) {
+ gLangAtomService = new nsLanguageAtomService();
+ }
+ return gLangAtomService.get();
+}
+
+// static
+void nsLanguageAtomService::Shutdown() { gLangAtomService = nullptr; }
+
+nsStaticAtom* nsLanguageAtomService::LookupLanguage(
+ const nsACString& aLanguage) {
+ nsAutoCString lowered(aLanguage);
+ ToLowerCase(lowered);
+
+ RefPtr<nsAtom> lang = NS_Atomize(lowered);
+ return GetLanguageGroup(lang);
+}
+
+already_AddRefed<nsAtom> nsLanguageAtomService::LookupCharSet(
+ NotNull<const Encoding*> aEncoding) {
+ nsAutoCString charset;
+ aEncoding->Name(charset);
+ nsAutoCString group;
+ if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
+ encodingsGroups, ArrayLength(encodingsGroups), charset, group))) {
+ return RefPtr<nsAtom>(nsGkAtoms::Unicode).forget();
+ }
+ return NS_Atomize(group);
+}
+
+nsAtom* nsLanguageAtomService::GetLocaleLanguage() {
+ do {
+ if (!mLocaleLanguage) {
+ AutoTArray<nsCString, 10> regionalPrefsLocales;
+ if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
+ regionalPrefsLocales))) {
+ // use lowercase for all language atoms
+ ToLowerCase(regionalPrefsLocales[0]);
+ mLocaleLanguage = NS_Atomize(regionalPrefsLocales[0]);
+ } else {
+ nsAutoCString locale;
+ OSPreferences::GetInstance()->GetSystemLocale(locale);
+
+ ToLowerCase(locale); // use lowercase for all language atoms
+ mLocaleLanguage = NS_Atomize(locale);
+ }
+ }
+ } while (0);
+
+ return mLocaleLanguage;
+}
+
+nsStaticAtom* nsLanguageAtomService::GetLanguageGroup(nsAtom* aLanguage,
+ bool* aNeedsToCache) {
+ if (aNeedsToCache) {
+ if (nsStaticAtom* atom = mLangToGroup.Get(aLanguage)) {
+ return atom;
+ }
+ *aNeedsToCache = true;
+ return nullptr;
+ }
+
+ return mLangToGroup.LookupOrInsertWith(aLanguage, [&] {
+ AssertIsMainThreadOrServoFontMetricsLocked();
+ return GetUncachedLanguageGroup(aLanguage);
+ });
+}
+
+nsStaticAtom* nsLanguageAtomService::GetUncachedLanguageGroup(
+ nsAtom* aLanguage) const {
+ nsAutoCString langStr;
+ aLanguage->ToUTF8String(langStr);
+ ToLowerCase(langStr);
+
+ if (langStr[0] == 'x' && langStr[1] == '-') {
+ // Internal x-* langGroup codes map to themselves (see bug 256257)
+ for (nsStaticAtom* langGroup : kLangGroups) {
+ if (langGroup == aLanguage) {
+ return langGroup;
+ }
+ if (aLanguage->IsAsciiLowercase()) {
+ continue;
+ }
+ // Do the slow ascii-case-insensitive comparison just if needed.
+ nsDependentAtomString string(langGroup);
+ if (string.EqualsASCII(langStr.get(), langStr.Length())) {
+ return langGroup;
+ }
+ }
+ } else {
+ // If the lang code can be parsed as BCP47, look up its (likely) script.
+
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034:
+ // First strip any private subtags that would cause Locale to reject the
+ // tag as non-wellformed.
+ nsACString::const_iterator start, end;
+ langStr.BeginReading(start);
+ langStr.EndReading(end);
+ if (FindInReadable("-x-"_ns, start, end)) {
+ // The substring we want ends at the beginning of the "-x-" subtag.
+ langStr.Truncate(start.get() - langStr.BeginReading());
+ }
+
+ intl::Locale loc;
+ auto result = intl::LocaleParser::TryParse(langStr, loc);
+ if (!result.isOk()) {
+ // Did the author (wrongly) use '_' instead of '-' to separate subtags?
+ // If so, fix it up and re-try parsing.
+ if (langStr.Contains('_')) {
+ langStr.ReplaceChar('_', '-');
+
+ // Throw away the partially parsed locale and re-start parsing.
+ loc = {};
+ result = intl::LocaleParser::TryParse(langStr, loc);
+ }
+ }
+ if (result.isOk() && loc.Canonicalize().isOk()) {
+ // Fill in script subtag if not present.
+ if (loc.Script().Missing()) {
+ if (loc.AddLikelySubtags().isErr()) {
+ // Fall back to x-unicode if no match was found
+ return nsGkAtoms::Unicode;
+ }
+ }
+ // Traditional Chinese has separate prefs for Hong Kong / Taiwan;
+ // check the region subtag.
+ if (loc.Script().EqualTo("Hant")) {
+ if (loc.Region().EqualTo("HK")) {
+ return nsGkAtoms::HongKongChinese;
+ }
+ return nsGkAtoms::Taiwanese;
+ }
+ // Search list of known script subtags that map to langGroup codes.
+ size_t foundIndex;
+ Span<const char> scriptAsSpan = loc.Script().Span();
+ nsDependentCSubstring script(scriptAsSpan.data(), scriptAsSpan.size());
+ if (BinarySearchIf(
+ kScriptLangGroup, 0, ArrayLength(kScriptLangGroup),
+ [script](const auto& entry) -> int {
+ return Compare(script, nsDependentCString(entry.mTag));
+ },
+ &foundIndex)) {
+ return kScriptLangGroup[foundIndex].mAtom;
+ }
+ // Script subtag was not recognized (includes "Hani"); check the language
+ // subtag for CJK possibilities so that we'll prefer the appropriate font
+ // rather than falling back to the browser's hardcoded preference.
+ if (loc.Language().EqualTo("zh")) {
+ if (loc.Region().EqualTo("HK")) {
+ return nsGkAtoms::HongKongChinese;
+ }
+ if (loc.Region().EqualTo("TW")) {
+ return nsGkAtoms::Taiwanese;
+ }
+ return nsGkAtoms::Chinese;
+ }
+ if (loc.Language().EqualTo("ja")) {
+ return nsGkAtoms::Japanese;
+ }
+ if (loc.Language().EqualTo("ko")) {
+ return nsGkAtoms::ko;
+ }
+ }
+ }
+
+ // Fall back to x-unicode if no match was found
+ return nsGkAtoms::Unicode;
+}