1 files changed, 335 insertions, 0 deletions
diff --git a/js/src/builtin/intl/SharedIntlData.h b/js/src/builtin/intl/SharedIntlData.h
new file mode 100644
index 0000000000..8cada7c61b
--- /dev/null
+++ b/js/src/builtin/intl/SharedIntlData.h
@@ -0,0 +1,335 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef builtin_intl_SharedIntlData_h
+#define builtin_intl_SharedIntlData_h
+
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/UniquePtr.h"
+
+#include <stddef.h>
+
+#include "js/AllocPolicy.h"
+#include "js/GCAPI.h"
+#include "js/GCHashTable.h"
+#include "js/Result.h"
+#include "js/RootingAPI.h"
+#include "js/Utility.h"
+#include "vm/StringType.h"
+
+namespace mozilla::intl {
+class DateTimePatternGenerator;
+}  // namespace mozilla::intl
+
+namespace js {
+
+class ArrayObject;
+
+namespace intl {
+
+/**
+ * This deleter class exists so that mozilla::intl::DateTimePatternGenerator
+ * can be a forward declaration, but still be used inside of a UniquePtr.
+ */
+class DateTimePatternGeneratorDeleter {
+ public:
+  void operator()(mozilla::intl::DateTimePatternGenerator* ptr);
+};
+
+/**
+ * Stores Intl data which can be shared across compartments (but not contexts).
+ *
+ * Used for data which is expensive when computed repeatedly or is not
+ * available through ICU.
+ */
+class SharedIntlData {
+  struct LinearStringLookup {
+    union {
+      const JS::Latin1Char* latin1Chars;
+      const char16_t* twoByteChars;
+    };
+    bool isLatin1;
+    size_t length;
+    JS::AutoCheckCannotGC nogc;
+    HashNumber hash = 0;
+
+    explicit LinearStringLookup(JSLinearString* string)
+        : isLatin1(string->hasLatin1Chars()), length(string->length()) {
+      if (isLatin1) {
+        latin1Chars = string->latin1Chars(nogc);
+      } else {
+        twoByteChars = string->twoByteChars(nogc);
+      }
+    }
+
+    LinearStringLookup(const char* chars, size_t length)
+        : isLatin1(true), length(length) {
+      latin1Chars = reinterpret_cast<const JS::Latin1Char*>(chars);
+    }
+  };
+
+ public:
+  /**
+   * Information tracking the set of the supported time zone names, derived
+   * from the IANA time zone database <https://www.iana.org/time-zones>.
+   *
+   * There are two kinds of IANA time zone names: Zone and Link (denoted as
+   * such in database source files). Zone names are the canonical, preferred
+   * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
+   * target Zone names for their meaning, e.g. Asia/Calcutta targets
+   * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
+   * sense of deprecation: some Link names also exist partly for convenience,
+   * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
+   *
+   * Two data sources determine the time zone names we support: those ICU
+   * supports and IANA's zone information.
+   *
+   * Unfortunately the names ICU and IANA support, and their Link
+   * relationships from name to target, aren't identical, so we can't simply
+   * implicitly trust ICU's name handling. We must perform various
+   * preprocessing of user-provided zone names and post-processing of
+   * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
+   *
+   * Also see <https://ssl.icu-project.org/trac/ticket/12044> and
+   * <http://unicode.org/cldr/trac/ticket/9892>.
+   */
+
+  using TimeZoneName = JSAtom*;
+
+  struct TimeZoneHasher {
+    struct Lookup : LinearStringLookup {
+      explicit Lookup(JSLinearString* timeZone);
+    };
+
+    static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
+    static bool match(TimeZoneName key, const Lookup& lookup);
+  };
+
+  using TimeZoneSet =
+      GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
+  using TimeZoneMap =
+      GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
+
+ private:
+  /**
+   * As a threshold matter, available time zones are those time zones ICU
+   * supports, via ucal_openTimeZones. But ICU supports additional non-IANA
+   * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
+   * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
+   * compatibility purposes. This set consists of ICU's supported time zones,
+   * minus all backwards-compatibility time zones.
+   */
+  TimeZoneSet availableTimeZones;
+
+  /**
+   * IANA treats some time zone names as Zones, that ICU instead treats as
+   * Links. For example, IANA considers "America/Indiana/Indianapolis" to be
+   * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
+   * considers the former a Link that targets "America/Indianapolis" (which
+   * IANA treats as a Link).
+   *
+   * ECMA-402 requires that we respect IANA data, so if we're asked to
+   * canonicalize a time zone name in this set, we must *not* return ICU's
+   * canonicalization.
+   */
+  TimeZoneSet ianaZonesTreatedAsLinksByICU;
+
+  /**
+   * IANA treats some time zone names as Links to one target, that ICU
+   * instead treats as either Zones, or Links to different targets. An
+   * example of the former is "Asia/Calcutta, which IANA assigns the target
+   * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
+   * is "America/Virgin", which IANA assigns the target
+   * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
+   *
+   * ECMA-402 requires that we respect IANA data, so if we're asked to
+   * canonicalize a time zone name that's a key in this map, we *must* return
+   * the corresponding value and *must not* return ICU's canonicalization.
+   */
+  TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
+
+  bool timeZoneDataInitialized = false;
+
+  /**
+   * Precomputes the available time zone names, because it's too expensive to
+   * call ucal_openTimeZones() repeatedly.
+   */
+  bool ensureTimeZones(JSContext* cx);
+
+ public:
+  /**
+   * Returns the validated time zone name in |result|. If the input time zone
+   * isn't a valid IANA time zone name, |result| remains unchanged.
+   */
+  bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone,
+                            JS::MutableHandle<JSAtom*> result);
+
+  /**
+   * Returns the canonical time zone name in |result|. If no canonical name
+   * was found, |result| remains unchanged.
+   *
+   * This method only handles time zones which are canonicalized differently
+   * by ICU when compared to IANA.
+   */
+  bool tryCanonicalizeTimeZoneConsistentWithIANA(
+      JSContext* cx, JS::Handle<JSString*> timeZone,
+      JS::MutableHandle<JSAtom*> result);
+
+  /**
+   * Returns an iterator over all available time zones supported by ICU. The
+   * returned time zone names aren't canonicalized.
+   */
+  JS::Result<TimeZoneSet::Iterator> availableTimeZonesIteration(JSContext* cx);
+
+ private:
+  using Locale = JSAtom*;
+
+  struct LocaleHasher {
+    struct Lookup : LinearStringLookup {
+      explicit Lookup(JSLinearString* locale);
+      Lookup(const char* chars, size_t length);
+    };
+
+    static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
+    static bool match(Locale key, const Lookup& lookup);
+  };
+
+  using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>;
+
+  // Set of supported locales for all Intl service constructors except Collator,
+  // which uses its own set.
+  //
+  // UDateFormat:
+  // udat_[count,get]Available() return the same results as their
+  // uloc_[count,get]Available() counterparts.
+  //
+  // UNumberFormatter:
+  // unum_[count,get]Available() return the same results as their
+  // uloc_[count,get]Available() counterparts.
+  //
+  // UListFormatter, UPluralRules, and URelativeDateTimeFormatter:
+  // We're going to use ULocale availableLocales as per ICU recommendation:
+  // https://unicode-org.atlassian.net/browse/ICU-12756
+  LocaleSet supportedLocales;
+
+  // ucol_[count,get]Available() return different results compared to
+  // uloc_[count,get]Available(), we can't use |supportedLocales| here.
+  LocaleSet collatorSupportedLocales;
+
+  bool supportedLocalesInitialized = false;
+
+  // CountAvailable and GetAvailable describe the signatures used for ICU API
+  // to determine available locales for various functionality.
+  using CountAvailable = int32_t (*)();
+  using GetAvailable = const char* (*)(int32_t localeIndex);
+
+  template <class AvailableLocales>
+  static bool getAvailableLocales(JSContext* cx, LocaleSet& locales,
+                                  const AvailableLocales& availableLocales);
+
+  /**
+   * Precomputes the available locales sets.
+   */
+  bool ensureSupportedLocales(JSContext* cx);
+
+ public:
+  enum class SupportedLocaleKind {
+    Collator,
+    DateTimeFormat,
+    DisplayNames,
+    ListFormat,
+    NumberFormat,
+    PluralRules,
+    RelativeTimeFormat
+  };
+
+  /**
+   * Sets |supported| to true if |locale| is supported by the requested Intl
+   * service constructor. Otherwise sets |supported| to false.
+   */
+  [[nodiscard]] bool isSupportedLocale(JSContext* cx, SupportedLocaleKind kind,
+                                       JS::Handle<JSString*> locale,
+                                       bool* supported);
+
+  /**
+   * Returns all available locales for |kind|.
+   */
+  ArrayObject* availableLocalesOf(JSContext* cx, SupportedLocaleKind kind);
+
+ private:
+  /**
+   * The case first parameter (BCP47 key "kf") allows to switch the order of
+   * upper- and lower-case characters. ICU doesn't directly provide an API
+   * to query the default case first value of a given locale, but instead
+   * requires to instantiate a collator object and then query the case first
+   * attribute (UCOL_CASE_FIRST).
+   * To avoid instantiating an additional collator object whenever we need
+   * to retrieve the default case first value of a specific locale, we
+   * compute the default case first value for every supported locale only
+   * once and then keep a list of all locales which don't use the default
+   * case first setting.
+   * There is almost no difference between lower-case first and when case
+   * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
+   * track locales which use upper-case first as their default setting.
+   *
+   * Instantiating collator objects for each available locale is slow
+   * (bug 1527879), therefore we're hardcoding the two locales using upper-case
+   * first ("da" (Danish) and "mt" (Maltese)) and only assert in debug-mode
+   * these two locales match the upper-case first locales returned by ICU. A
+   * system-ICU may support a different set of locales, therefore we're always
+   * calling into ICU to find the upper-case first locales in that case.
+   */
+
+#if DEBUG || MOZ_SYSTEM_ICU
+  LocaleSet upperCaseFirstLocales;
+
+  bool upperCaseFirstInitialized = false;
+
+  /**
+   * Precomputes the available locales which use upper-case first sorting.
+   */
+  bool ensureUpperCaseFirstLocales(JSContext* cx);
+#endif
+
+ public:
+  /**
+   * Sets |isUpperFirst| to true if |locale| sorts upper-case characters
+   * before lower-case characters.
+   */
+  bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale,
+                        bool* isUpperFirst);
+
+ private:
+  using UniqueDateTimePatternGenerator =
+      mozilla::UniquePtr<mozilla::intl::DateTimePatternGenerator,
+                         DateTimePatternGeneratorDeleter>;
+
+  UniqueDateTimePatternGenerator dateTimePatternGenerator;
+  JS::UniqueChars dateTimePatternGeneratorLocale;
+
+ public:
+  /**
+   * Get a non-owned cached instance of the DateTimePatternGenerator, which is
+   * expensive to instantiate.
+   *
+   * See: https://bugzilla.mozilla.org/show_bug.cgi?id=1549578
+   */
+  mozilla::intl::DateTimePatternGenerator* getDateTimePatternGenerator(
+      JSContext* cx, const char* locale);
+
+ public:
+  void destroyInstance();
+
+  void trace(JSTracer* trc);
+
+  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
+};
+
+}  // namespace intl
+
+}  // namespace js
+
+#endif /* builtin_intl_SharedIntlData_h */