/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* Runtime-wide Intl data shared across compartments. */ #include "builtin/intl/SharedIntlData.h" #include "mozilla/Assertions.h" #include "mozilla/HashFunctions.h" #include "mozilla/TextUtils.h" #include #include #include #include "builtin/intl/CommonFunctions.h" #include "builtin/intl/ScopedICUObject.h" #include "builtin/intl/TimeZoneDataGenerated.h" #include "builtin/String.h" #include "js/Utility.h" #include "js/Vector.h" #include "unicode/ucal.h" #include "unicode/ucol.h" #include "unicode/udat.h" #include "unicode/udatpg.h" #include "unicode/uenum.h" #include "unicode/uloc.h" #include "unicode/unum.h" #include "unicode/utypes.h" #include "vm/JSAtom.h" #include "vm/StringType.h" using js::HashNumber; using js::intl::StringsAreEqual; template static constexpr Char ToUpperASCII(Char c) { return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c; } static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly"); static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly"); static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly"); static_assert(ToUpperASCII(u'a') == u'A', "verifying u'a' uppercases correctly"); static_assert(ToUpperASCII(u'k') == u'K', "verifying u'k' uppercases correctly"); static_assert(ToUpperASCII(u'z') == u'Z', "verifying u'z' uppercases correctly"); template static HashNumber HashStringIgnoreCaseASCII(const Char* s, size_t length) { uint32_t hash = 0; for (size_t i = 0; i < length; i++) { hash = mozilla::AddToHash(hash, ToUpperASCII(s[i])); } return hash; } js::intl::SharedIntlData::TimeZoneHasher::Lookup::Lookup( JSLinearString* timeZone) : js::intl::SharedIntlData::LinearStringLookup(timeZone) { if (isLatin1) { hash = HashStringIgnoreCaseASCII(latin1Chars, length); } else { hash = HashStringIgnoreCaseASCII(twoByteChars, length); } } template static bool EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, size_t len) { for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) { if (ToUpperASCII(*s1) != ToUpperASCII(*s2)) { return false; } } return true; } bool js::intl::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup) { if (key->length() != lookup.length) { return false; } // Compare time zone names ignoring ASCII case differences. if (key->hasLatin1Chars()) { const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); if (lookup.isLatin1) { return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, lookup.length); } return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length); } const char16_t* keyChars = key->twoByteChars(lookup.nogc); if (lookup.isLatin1) { return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, lookup.length); } return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length); } static bool IsLegacyICUTimeZone(const char* timeZone) { for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) { if (StringsAreEqual(timeZone, legacyTimeZone)) { return true; } } return false; } bool js::intl::SharedIntlData::ensureTimeZones(JSContext* cx) { if (timeZoneDataInitialized) { return true; } // If ensureTimeZones() was called previously, but didn't complete due to // OOM, clear all sets/maps and start from scratch. availableTimeZones.clearAndCompact(); UErrorCode status = U_ZERO_ERROR; UEnumeration* values = ucal_openTimeZones(&status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } ScopedICUObject toClose(values); RootedAtom timeZone(cx); while (true) { int32_t size; const char* rawTimeZone = uenum_next(values, &size, &status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } if (rawTimeZone == nullptr) { break; } // Skip legacy ICU time zone names. if (IsLegacyICUTimeZone(rawTimeZone)) { continue; } MOZ_ASSERT(size >= 0); timeZone = Atomize(cx, rawTimeZone, size_t(size)); if (!timeZone) { return false; } TimeZoneHasher::Lookup lookup(timeZone); TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup); // ICU shouldn't report any duplicate time zone names, but if it does, // just ignore the duplicate name. if (!p && !availableTimeZones.add(p, timeZone)) { ReportOutOfMemory(cx); return false; } } ianaZonesTreatedAsLinksByICU.clearAndCompact(); for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) { MOZ_ASSERT(rawTimeZone != nullptr); timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone)); if (!timeZone) { return false; } TimeZoneHasher::Lookup lookup(timeZone); TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup); MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU"); if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) { ReportOutOfMemory(cx); return false; } } ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); RootedAtom linkName(cx); RootedAtom& target = timeZone; for (const auto& linkAndTarget : timezone::ianaLinksCanonicalizedDifferentlyByICU) { const char* rawLinkName = linkAndTarget.link; const char* rawTarget = linkAndTarget.target; MOZ_ASSERT(rawLinkName != nullptr); linkName = Atomize(cx, rawLinkName, strlen(rawLinkName)); if (!linkName) { return false; } MOZ_ASSERT(rawTarget != nullptr); target = Atomize(cx, rawTarget, strlen(rawTarget)); if (!target) { return false; } TimeZoneHasher::Lookup lookup(linkName); TimeZoneMap::AddPtr p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup); MOZ_ASSERT( !p, "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU"); if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) { ReportOutOfMemory(cx); return false; } } MOZ_ASSERT(!timeZoneDataInitialized, "ensureTimeZones is neither reentrant nor thread-safe"); timeZoneDataInitialized = true; return true; } bool js::intl::SharedIntlData::validateTimeZoneName(JSContext* cx, HandleString timeZone, MutableHandleAtom result) { if (!ensureTimeZones(cx)) { return false; } RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx)); if (!timeZoneLinear) { return false; } TimeZoneHasher::Lookup lookup(timeZoneLinear); if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup)) { result.set(*p); } return true; } bool js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA( JSContext* cx, HandleString timeZone, MutableHandleAtom result) { if (!ensureTimeZones(cx)) { return false; } RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx)); if (!timeZoneLinear) { return false; } TimeZoneHasher::Lookup lookup(timeZoneLinear); MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name"); if (TimeZoneMap::Ptr p = ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) { // The effectively supported time zones aren't known at compile time, // when // 1. SpiderMonkey was compiled with "--with-system-icu". // 2. ICU's dynamic time zone data loading feature was used. // (ICU supports loading time zone files at runtime through the // ICU_TIMEZONE_FILES_DIR environment variable.) // Ensure ICU supports the new target zone before applying the update. TimeZoneName targetTimeZone = p->value(); TimeZoneHasher::Lookup targetLookup(targetTimeZone); if (availableTimeZones.has(targetLookup)) { result.set(targetTimeZone); } } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) { result.set(*p); } return true; } js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale) : js::intl::SharedIntlData::LinearStringLookup(locale) { if (isLatin1) { hash = mozilla::HashString(latin1Chars, length); } else { hash = mozilla::HashString(twoByteChars, length); } } js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(const char* chars, size_t length) : js::intl::SharedIntlData::LinearStringLookup(chars, length) { hash = mozilla::HashString(latin1Chars, length); } bool js::intl::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup) { if (key->length() != lookup.length) { return false; } if (key->hasLatin1Chars()) { const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); if (lookup.isLatin1) { return EqualChars(keyChars, lookup.latin1Chars, lookup.length); } return EqualChars(keyChars, lookup.twoByteChars, lookup.length); } const char16_t* keyChars = key->twoByteChars(lookup.nogc); if (lookup.isLatin1) { return EqualChars(lookup.latin1Chars, keyChars, lookup.length); } return EqualChars(keyChars, lookup.twoByteChars, lookup.length); } bool js::intl::SharedIntlData::getAvailableLocales( JSContext* cx, LocaleSet& locales, CountAvailable countAvailable, GetAvailable getAvailable) { auto addLocale = [cx, &locales](const char* locale, size_t length) { JSAtom* atom = Atomize(cx, locale, length); if (!atom) { return false; } LocaleHasher::Lookup lookup(atom); LocaleSet::AddPtr p = locales.lookupForAdd(lookup); // ICU shouldn't report any duplicate locales, but if it does, just // ignore the duplicated locale. if (!p && !locales.add(p, atom)) { ReportOutOfMemory(cx); return false; } return true; }; js::Vector lang(cx); int32_t count = countAvailable(); for (int32_t i = 0; i < count; i++) { const char* locale = getAvailable(i); size_t length = strlen(locale); lang.clear(); if (!lang.append(locale, length)) { return false; } std::replace(lang.begin(), lang.end(), '_', '-'); if (!addLocale(lang.begin(), length)) { return false; } } // Add old-style language tags without script code for locales that in current // usage would include a script subtag. Also add an entry for the last-ditch // locale, in case ICU doesn't directly support it (but does support it // through fallback, e.g. supporting "en-GB" indirectly using "en" support). // Certain old-style language tags lack a script code, but in current usage // they *would* include a script code. Map these over to modern forms. for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { const char* oldStyle = mapping.oldStyle; const char* modernStyle = mapping.modernStyle; LocaleHasher::Lookup lookup(modernStyle, strlen(modernStyle)); if (locales.has(lookup)) { if (!addLocale(oldStyle, strlen(oldStyle))) { return false; } } } // Also forcibly provide the last-ditch locale. { const char* lastDitch = intl::LastDitchLocale(); MOZ_ASSERT(strcmp(lastDitch, "en-GB") == 0); #ifdef DEBUG static constexpr char lastDitchParent[] = "en"; LocaleHasher::Lookup lookup(lastDitchParent, strlen(lastDitchParent)); MOZ_ASSERT(locales.has(lookup), "shouldn't be a need to add every locale implied by the " "last-ditch locale, merely just the last-ditch locale"); #endif if (!addLocale(lastDitch, strlen(lastDitch))) { return false; } } return true; } #ifdef DEBUG template static bool IsSameAvailableLocales(CountAvailable countAvailable1, GetAvailable getAvailable1, CountAvailable countAvailable2, GetAvailable getAvailable2) { int32_t count = countAvailable1(); if (count != countAvailable2()) { return false; } for (int32_t i = 0; i < count; i++) { if (getAvailable1(i) != getAvailable2(i)) { return false; } } return true; } #endif bool js::intl::SharedIntlData::ensureSupportedLocales(JSContext* cx) { if (supportedLocalesInitialized) { return true; } // If ensureSupportedLocales() was called previously, but didn't complete due // to OOM, clear all data and start from scratch. supportedLocales.clearAndCompact(); collatorSupportedLocales.clearAndCompact(); if (!getAvailableLocales(cx, supportedLocales, uloc_countAvailable, uloc_getAvailable)) { return false; } if (!getAvailableLocales(cx, collatorSupportedLocales, ucol_countAvailable, ucol_getAvailable)) { return false; } MOZ_ASSERT(IsSameAvailableLocales(uloc_countAvailable, uloc_getAvailable, udat_countAvailable, udat_getAvailable)); MOZ_ASSERT(IsSameAvailableLocales(uloc_countAvailable, uloc_getAvailable, unum_countAvailable, unum_getAvailable)); MOZ_ASSERT(!supportedLocalesInitialized, "ensureSupportedLocales is neither reentrant nor thread-safe"); supportedLocalesInitialized = true; return true; } bool js::intl::SharedIntlData::isSupportedLocale(JSContext* cx, SupportedLocaleKind kind, HandleString locale, bool* supported) { if (!ensureSupportedLocales(cx)) { return false; } RootedLinearString localeLinear(cx, locale->ensureLinear(cx)); if (!localeLinear) { return false; } LocaleHasher::Lookup lookup(localeLinear); switch (kind) { case SupportedLocaleKind::Collator: *supported = collatorSupportedLocales.has(lookup); return true; case SupportedLocaleKind::DateTimeFormat: case SupportedLocaleKind::DisplayNames: case SupportedLocaleKind::ListFormat: case SupportedLocaleKind::NumberFormat: case SupportedLocaleKind::PluralRules: case SupportedLocaleKind::RelativeTimeFormat: *supported = supportedLocales.has(lookup); return true; } MOZ_CRASH("Invalid Intl constructor"); } #if DEBUG || MOZ_SYSTEM_ICU bool js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx) { if (upperCaseFirstInitialized) { return true; } // If ensureUpperCaseFirstLocales() was called previously, but didn't // complete due to OOM, clear all data and start from scratch. upperCaseFirstLocales.clearAndCompact(); UErrorCode status = U_ZERO_ERROR; UEnumeration* available = ucol_openAvailableLocales(&status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } ScopedICUObject toClose(available); RootedAtom locale(cx); while (true) { int32_t size; const char* rawLocale = uenum_next(available, &size, &status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } if (rawLocale == nullptr) { break; } UCollator* collator = ucol_open(rawLocale, &status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } ScopedICUObject toCloseCollator(collator); UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status); if (U_FAILURE(status)) { ReportInternalError(cx); return false; } if (caseFirst != UCOL_UPPER_FIRST) { continue; } MOZ_ASSERT(size >= 0); locale = Atomize(cx, rawLocale, size_t(size)); if (!locale) { return false; } LocaleHasher::Lookup lookup(locale); LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup); // ICU shouldn't report any duplicate locales, but if it does, just // ignore the duplicated locale. if (!p && !upperCaseFirstLocales.add(p, locale)) { ReportOutOfMemory(cx); return false; } } MOZ_ASSERT( !upperCaseFirstInitialized, "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe"); upperCaseFirstInitialized = true; return true; } #endif // DEBUG || MOZ_SYSTEM_ICU bool js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst) { #if DEBUG || MOZ_SYSTEM_ICU if (!ensureUpperCaseFirstLocales(cx)) { return false; } #endif RootedLinearString localeLinear(cx, locale->ensureLinear(cx)); if (!localeLinear) { return false; } #if !MOZ_SYSTEM_ICU // "da" (Danish) and "mt" (Maltese) are the only two supported locales using // upper-case first. CLDR also lists "cu" (Church Slavic) as an upper-case // first locale, but since it's not supported in ICU, we don't care about it // here. bool isDefaultUpperCaseFirstLocale = js::StringEqualsLiteral(localeLinear, "da") || js::StringEqualsLiteral(localeLinear, "mt"); #endif #if DEBUG || MOZ_SYSTEM_ICU LocaleHasher::Lookup lookup(localeLinear); *isUpperFirst = upperCaseFirstLocales.has(lookup); #else *isUpperFirst = isDefaultUpperCaseFirstLocale; #endif #if !MOZ_SYSTEM_ICU MOZ_ASSERT(*isUpperFirst == isDefaultUpperCaseFirstLocale, "upper-case first locales don't match hard-coded list"); #endif return true; } void js::intl::DateTimePatternGeneratorDeleter::operator()( UDateTimePatternGenerator* ptr) { udatpg_close(ptr); } UDateTimePatternGenerator* js::intl::SharedIntlData::getDateTimePatternGenerator(JSContext* cx, const char* locale) { // Return the cached instance if the requested locale matches the locale // of the cached generator. if (dateTimePatternGeneratorLocale && StringsAreEqual(dateTimePatternGeneratorLocale.get(), locale)) { return dateTimePatternGenerator.get(); } UErrorCode status = U_ZERO_ERROR; UniqueUDateTimePatternGenerator gen(udatpg_open(IcuLocale(locale), &status)); if (U_FAILURE(status)) { intl::ReportInternalError(cx); return nullptr; } JS::UniqueChars localeCopy = js::DuplicateString(cx, locale); if (!localeCopy) { return nullptr; } dateTimePatternGenerator = std::move(gen); dateTimePatternGeneratorLocale = std::move(localeCopy); return dateTimePatternGenerator.get(); } void js::intl::SharedIntlData::destroyInstance() { availableTimeZones.clearAndCompact(); ianaZonesTreatedAsLinksByICU.clearAndCompact(); ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); supportedLocales.clearAndCompact(); collatorSupportedLocales.clearAndCompact(); #if DEBUG || MOZ_SYSTEM_ICU upperCaseFirstLocales.clearAndCompact(); #endif } void js::intl::SharedIntlData::trace(JSTracer* trc) { // Atoms are always tenured. if (!JS::RuntimeHeapIsMinorCollecting()) { availableTimeZones.trace(trc); ianaZonesTreatedAsLinksByICU.trace(trc); ianaLinksCanonicalizedDifferentlyByICU.trace(trc); supportedLocales.trace(trc); collatorSupportedLocales.trace(trc); #if DEBUG || MOZ_SYSTEM_ICU upperCaseFirstLocales.trace(trc); #endif } } size_t js::intl::SharedIntlData::sizeOfExcludingThis( mozilla::MallocSizeOf mallocSizeOf) const { return availableTimeZones.shallowSizeOfExcludingThis(mallocSizeOf) + ianaZonesTreatedAsLinksByICU.shallowSizeOfExcludingThis(mallocSizeOf) + ianaLinksCanonicalizedDifferentlyByICU.shallowSizeOfExcludingThis( mallocSizeOf) + supportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + collatorSupportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + #if DEBUG || MOZ_SYSTEM_ICU upperCaseFirstLocales.shallowSizeOfExcludingThis(mallocSizeOf) + #endif mallocSizeOf(dateTimePatternGeneratorLocale.get()); }