diff options
Diffstat (limited to 'js/src/builtin/intl')
44 files changed, 23660 insertions, 0 deletions
diff --git a/js/src/builtin/intl/Collator.cpp b/js/src/builtin/intl/Collator.cpp new file mode 100644 index 0000000000..f3deb50454 --- /dev/null +++ b/js/src/builtin/intl/Collator.cpp @@ -0,0 +1,472 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.Collator implementation. */ + +#include "builtin/intl/Collator.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" + +#include "jsapi.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/ScopedICUObject.h" +#include "builtin/intl/SharedIntlData.h" +#include "gc/FreeOp.h" +#include "js/CharacterEncoding.h" +#include "js/PropertySpec.h" +#include "js/StableStringChars.h" +#include "js/TypeDecls.h" +#include "unicode/ucol.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "unicode/utypes.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Runtime.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" + +using namespace js; + +using JS::AutoStableStringChars; + +using js::intl::IcuLocale; +using js::intl::ReportInternalError; +using js::intl::SharedIntlData; +using js::intl::StringsAreEqual; + +const JSClassOps CollatorObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + CollatorObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; + +const JSClass CollatorObject::class_ = { + "Intl.Collator", + JSCLASS_HAS_RESERVED_SLOTS(CollatorObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_Collator) | + JSCLASS_FOREGROUND_FINALIZE, + &CollatorObject::classOps_, &CollatorObject::classSpec_}; + +const JSClass& CollatorObject::protoClass_ = PlainObject::class_; + +static bool collator_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Collator); + return true; +} + +static const JSFunctionSpec collator_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", "Intl_Collator_supportedLocalesOf", + 1, 0), + JS_FS_END}; + +static const JSFunctionSpec collator_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_Collator_resolvedOptions", 0, 0), + JS_FN(js_toSource_str, collator_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec collator_properties[] = { + JS_SELF_HOSTED_GET("compare", "$Intl_Collator_compare_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.Collator", JSPROP_READONLY), JS_PS_END}; + +static bool Collator(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec CollatorObject::classSpec_ = { + GenericCreateConstructor<Collator, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<CollatorObject>, + collator_static_methods, + nullptr, + collator_methods, + collator_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 10.1.2 Intl.Collator([ locales [, options]]) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +static bool Collator(JSContext* cx, const CallArgs& args) { + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Steps 2-5 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Collator, &proto)) { + return false; + } + + Rooted<CollatorObject*> collator( + cx, NewObjectWithClassProto<CollatorObject>(cx, proto)); + if (!collator) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 6. + if (!intl::InitializeObject(cx, collator, cx->names().InitializeCollator, + locales, options)) { + return false; + } + + args.rval().setObject(*collator); + return true; +} + +static bool Collator(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return Collator(cx, args); +} + +bool js::intl_Collator(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(!args.isConstructing()); + + return Collator(cx, args); +} + +void js::CollatorObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + if (UCollator* coll = obj->as<CollatorObject>().getCollator()) { + intl::RemoveICUCellMemory(fop, obj, CollatorObject::EstimatedMemoryUse); + + ucol_close(coll); + } +} + +bool js::intl_availableCollations(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + UErrorCode status = U_ZERO_ERROR; + UEnumeration* values = + ucol_getKeywordValuesForLocale("co", locale.get(), false, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + ScopedICUObject<UEnumeration, uenum_close> toClose(values); + + uint32_t count = uenum_count(values, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + + RootedObject collations(cx, NewDenseEmptyArray(cx)); + if (!collations) { + return false; + } + + // The first element of the collations array must be |null| per + // ES2017 Intl, 10.2.3 Internal Slots. + if (!NewbornArrayPush(cx, collations, NullValue())) { + return false; + } + + for (uint32_t i = 0; i < count; i++) { + const char* collation = uenum_next(values, nullptr, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + + // Per ECMA-402, 10.2.3, we don't include standard and search: + // "The values 'standard' and 'search' must not be used as elements in + // any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co + // array." + if (StringsAreEqual(collation, "standard") || + StringsAreEqual(collation, "search")) { + continue; + } + + // ICU returns old-style keyword values; map them to BCP 47 equivalents. + collation = uloc_toUnicodeLocaleType("co", collation); + if (!collation) { + ReportInternalError(cx); + return false; + } + + JSString* jscollation = NewStringCopyZ<CanGC>(cx, collation); + if (!jscollation) { + return false; + } + if (!NewbornArrayPush(cx, collations, StringValue(jscollation))) { + return false; + } + } + + args.rval().setObject(*collations); + return true; +} + +/** + * Returns a new UCollator with the locale and collation options + * of the given Collator. + */ +static UCollator* NewUCollator(JSContext* cx, + Handle<CollatorObject*> collator) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, collator)); + if (!internals) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + // UCollator options with default values. + UColAttributeValue uStrength = UCOL_DEFAULT; + UColAttributeValue uCaseLevel = UCOL_OFF; + UColAttributeValue uAlternate = UCOL_DEFAULT; + UColAttributeValue uNumeric = UCOL_OFF; + // Normalization is always on to meet the canonical equivalence requirement. + UColAttributeValue uNormalization = UCOL_ON; + UColAttributeValue uCaseFirst = UCOL_DEFAULT; + + if (!GetProperty(cx, internals, internals, cx->names().usage, &value)) { + return nullptr; + } + + { + JSLinearString* usage = value.toString()->ensureLinear(cx); + if (!usage) { + return nullptr; + } + if (StringEqualsLiteral(usage, "search")) { + // ICU expects search as a Unicode locale extension on locale. + intl::LanguageTag tag(cx); + if (!intl::LanguageTagParser::parse( + cx, mozilla::MakeStringSpan(locale.get()), tag)) { + return nullptr; + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!keywords.emplaceBack("co", cx->names().search)) { + return nullptr; + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of + // the Unicode extension subtag. We're then relying on ICU to follow RFC + // 6067, which states that any trailing keywords using the same key + // should be ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + locale = tag.toStringZ(cx); + if (!locale) { + return nullptr; + } + } else { + MOZ_ASSERT(StringEqualsLiteral(usage, "sort")); + } + } + + // We don't need to look at the collation property - it can only be set + // via the Unicode locale extension and is therefore already set on + // locale. + + if (!GetProperty(cx, internals, internals, cx->names().sensitivity, &value)) { + return nullptr; + } + + { + JSLinearString* sensitivity = value.toString()->ensureLinear(cx); + if (!sensitivity) { + return nullptr; + } + if (StringEqualsLiteral(sensitivity, "base")) { + uStrength = UCOL_PRIMARY; + } else if (StringEqualsLiteral(sensitivity, "accent")) { + uStrength = UCOL_SECONDARY; + } else if (StringEqualsLiteral(sensitivity, "case")) { + uStrength = UCOL_PRIMARY; + uCaseLevel = UCOL_ON; + } else { + MOZ_ASSERT(StringEqualsLiteral(sensitivity, "variant")); + uStrength = UCOL_TERTIARY; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().ignorePunctuation, + &value)) { + return nullptr; + } + // According to the ICU team, UCOL_SHIFTED causes punctuation to be + // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data + // Markup Language, "shifted" causes whitespace and punctuation to be + // ignored - that's a bit more than asked for, but there's no way to get + // less. + if (value.toBoolean()) { + uAlternate = UCOL_SHIFTED; + } + + if (!GetProperty(cx, internals, internals, cx->names().numeric, &value)) { + return nullptr; + } + if (!value.isUndefined() && value.toBoolean()) { + uNumeric = UCOL_ON; + } + + if (!GetProperty(cx, internals, internals, cx->names().caseFirst, &value)) { + return nullptr; + } + if (!value.isUndefined()) { + JSLinearString* caseFirst = value.toString()->ensureLinear(cx); + if (!caseFirst) { + return nullptr; + } + if (StringEqualsLiteral(caseFirst, "upper")) { + uCaseFirst = UCOL_UPPER_FIRST; + } else if (StringEqualsLiteral(caseFirst, "lower")) { + uCaseFirst = UCOL_LOWER_FIRST; + } else { + MOZ_ASSERT(StringEqualsLiteral(caseFirst, "false")); + uCaseFirst = UCOL_OFF; + } + } + + UErrorCode status = U_ZERO_ERROR; + UCollator* coll = ucol_open(IcuLocale(locale.get()), &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return nullptr; + } + + ucol_setAttribute(coll, UCOL_STRENGTH, uStrength, &status); + ucol_setAttribute(coll, UCOL_CASE_LEVEL, uCaseLevel, &status); + ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, uAlternate, &status); + ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, uNumeric, &status); + ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, uNormalization, &status); + ucol_setAttribute(coll, UCOL_CASE_FIRST, uCaseFirst, &status); + if (U_FAILURE(status)) { + ucol_close(coll); + ReportInternalError(cx); + return nullptr; + } + + return coll; +} + +static bool intl_CompareStrings(JSContext* cx, UCollator* coll, + HandleString str1, HandleString str2, + MutableHandleValue result) { + MOZ_ASSERT(str1); + MOZ_ASSERT(str2); + + if (str1 == str2) { + result.setInt32(0); + return true; + } + + AutoStableStringChars stableChars1(cx); + if (!stableChars1.initTwoByte(cx, str1)) { + return false; + } + + AutoStableStringChars stableChars2(cx); + if (!stableChars2.initTwoByte(cx, str2)) { + return false; + } + + mozilla::Range<const char16_t> chars1 = stableChars1.twoByteRange(); + mozilla::Range<const char16_t> chars2 = stableChars2.twoByteRange(); + + UCollationResult uresult = + ucol_strcoll(coll, chars1.begin().get(), chars1.length(), + chars2.begin().get(), chars2.length()); + int32_t res; + switch (uresult) { + case UCOL_LESS: + res = -1; + break; + case UCOL_EQUAL: + res = 0; + break; + case UCOL_GREATER: + res = 1; + break; + default: + MOZ_CRASH("ucol_strcoll returned bad UCollationResult"); + } + result.setInt32(res); + return true; +} + +bool js::intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isString()); + MOZ_ASSERT(args[2].isString()); + + Rooted<CollatorObject*> collator(cx, + &args[0].toObject().as<CollatorObject>()); + + // Obtain a cached UCollator object. + UCollator* coll = collator->getCollator(); + if (!coll) { + coll = NewUCollator(cx, collator); + if (!coll) { + return false; + } + collator->setCollator(coll); + + intl::AddICUCellMemory(collator, CollatorObject::EstimatedMemoryUse); + } + + // Use the UCollator to actually compare the strings. + RootedString str1(cx, args[1].toString()); + RootedString str2(cx, args[2].toString()); + return intl_CompareStrings(cx, coll, str1, str2, args.rval()); +} + +bool js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + RootedString locale(cx, args[0].toString()); + bool isUpperFirst; + if (!sharedIntlData.isUpperCaseFirst(cx, locale, &isUpperFirst)) { + return false; + } + + args.rval().setBoolean(isUpperFirst); + return true; +} diff --git a/js/src/builtin/intl/Collator.h b/js/src/builtin/intl/Collator.h new file mode 100644 index 0000000000..997d490821 --- /dev/null +++ b/js/src/builtin/intl/Collator.h @@ -0,0 +1,104 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_Collator_h +#define builtin_intl_Collator_h + +#include "mozilla/Attributes.h" + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +struct UCollator; + +namespace js { + +/******************** Collator ********************/ + +class CollatorObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t UCOLLATOR_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UCollator (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 1128; + + UCollator* getCollator() const { + const auto& slot = getFixedSlot(UCOLLATOR_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UCollator*>(slot.toPrivate()); + } + + void setCollator(UCollator* collator) { + setFixedSlot(UCOLLATOR_SLOT, PrivateValue(collator)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in Collator constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: collator = intl_Collator(locales, options) + */ +extern MOZ_MUST_USE bool intl_Collator(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns an array with the collation type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * collations supported for the given locale. "standard" and "search" are + * excluded. + * + * Usage: collations = intl_availableCollations(locale) + */ +extern MOZ_MUST_USE bool intl_availableCollations(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Compares x and y (which must be String values), and returns a number less + * than 0 if x < y, 0 if x = y, or a number greater than 0 if x > y according + * to the sort order for the locale and collation options of the given + * Collator. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.2. + * + * Usage: result = intl_CompareStrings(collator, x, y) + */ +extern MOZ_MUST_USE bool intl_CompareStrings(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns true if the given locale sorts upper-case before lower-case + * characters. + * + * Usage: result = intl_isUpperCaseFirst(locale) + */ +extern MOZ_MUST_USE bool intl_isUpperCaseFirst(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_Collator_h */ diff --git a/js/src/builtin/intl/Collator.js b/js/src/builtin/intl/Collator.js new file mode 100644 index 0000000000..9558d27795 --- /dev/null +++ b/js/src/builtin/intl/Collator.js @@ -0,0 +1,398 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +/** + * Compute an internal properties object from |lazyCollatorData|. + */ +function resolveCollatorInternals(lazyCollatorData) { + assert(IsObject(lazyCollatorData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var Collator = collatorInternalProperties; + + // Step 5. + internalProps.usage = lazyCollatorData.usage; + + // Steps 6-7. + var collatorIsSorting = lazyCollatorData.usage === "sort"; + var localeData = collatorIsSorting + ? Collator.sortLocaleData + : Collator.searchLocaleData; + + // Compute effective locale. + // Step 16. + var relevantExtensionKeys = Collator.relevantExtensionKeys; + + // Step 17. + var r = ResolveLocale("Collator", + lazyCollatorData.requestedLocales, + lazyCollatorData.opt, + relevantExtensionKeys, + localeData); + + // Step 18. + internalProps.locale = r.locale; + + // Step 19. + var collation = r.co; + + // Step 20. + if (collation === null) + collation = "default"; + + // Step 21. + internalProps.collation = collation; + + // Step 22. + internalProps.numeric = r.kn === "true"; + + // Step 23. + internalProps.caseFirst = r.kf; + + // Compute remaining collation options. + // Step 25. + var s = lazyCollatorData.rawSensitivity; + if (s === undefined) { + // In theory the default sensitivity for the "search" collator is + // locale dependent; in reality the CLDR/ICU default strength is + // always tertiary. Therefore use "variant" as the default value for + // both collation modes. + s = "variant"; + } + + // Step 26. + internalProps.sensitivity = s; + + // Step 28. + internalProps.ignorePunctuation = lazyCollatorData.ignorePunctuation; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the Collator internal properties of |obj|. + */ +function getCollatorInternals(obj) { + assert(IsObject(obj), "getCollatorInternals called with non-object"); + assert(GuardToCollator(obj) !== null, "getCollatorInternals called with non-Collator"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "Collator", "bad type escaped getIntlObjectInternals"); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + internalProps = resolveCollatorInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Initializes an object as a Collator. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a Collator. This + * later work occurs in |resolveCollatorInternals|; steps not noted here occur + * there. + * + * Spec: ECMAScript Internationalization API Specification, 10.1.1. + */ +function InitializeCollator(collator, locales, options) { + assert(IsObject(collator), "InitializeCollator called with non-object"); + assert(GuardToCollator(collator) != null, "InitializeCollator called with non-Collator"); + + // Lazy Collator data has the following structure: + // + // { + // requestedLocales: List of locales, + // usage: "sort" / "search", + // opt: // opt object computed in InitializeCollator + // { + // localeMatcher: "lookup" / "best fit", + // co: string matching a Unicode extension type / undefined + // kn: true / false / undefined, + // kf: "upper" / "lower" / "false" / undefined + // } + // rawSensitivity: "base" / "accent" / "case" / "variant" / undefined, + // ignorePunctuation: true / false + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every Collator lazy data object has *all* these properties, never a + // subset of them. + var lazyCollatorData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyCollatorData.requestedLocales = requestedLocales; + + // Steps 2-3. + // + // If we ever need more speed here at startup, we should try to detect the + // case where |options === undefined| and then directly use the default + // value for each option. For now, just keep it simple. + if (options === undefined) + options = std_Object_create(null); + else + options = ToObject(options); + + // Compute options that impact interpretation of locale. + // Step 4. + var u = GetOption(options, "usage", "string", ["sort", "search"], "sort"); + lazyCollatorData.usage = u; + + // Step 8. + var opt = new Record(); + lazyCollatorData.opt = opt; + + // Steps 9-10. + var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + // https://github.com/tc39/ecma402/pull/459 + var collation = GetOption(options, "collation", "string", undefined, undefined); + if (collation !== undefined) + collation = intl_ValidateAndCanonicalizeUnicodeExtensionType(collation, "collation", "co"); + opt.co = collation; + + // Steps 11-13. + var numericValue = GetOption(options, "numeric", "boolean", undefined, undefined); + if (numericValue !== undefined) + numericValue = numericValue ? "true" : "false"; + opt.kn = numericValue; + + // Steps 14-15. + var caseFirstValue = GetOption(options, "caseFirst", "string", ["upper", "lower", "false"], undefined); + opt.kf = caseFirstValue; + + // Compute remaining collation options. + // Step 24. + var s = GetOption(options, "sensitivity", "string", + ["base", "accent", "case", "variant"], undefined); + lazyCollatorData.rawSensitivity = s; + + // Step 27. + var ip = GetOption(options, "ignorePunctuation", "boolean", undefined, false); + lazyCollatorData.ignorePunctuation = ip; + + // Step 29. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(collator, "Collator", lazyCollatorData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 10.2.2. + */ +function Intl_Collator_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "Collator"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Collator internal properties. + * + * Spec: ECMAScript Internationalization API Specification, 9.1 and 10.2.3. + */ +var collatorInternalProperties = { + sortLocaleData: collatorSortLocaleData, + searchLocaleData: collatorSearchLocaleData, + relevantExtensionKeys: ["co", "kf", "kn"], +}; + +/** + * Returns the actual locale used when a collator for |locale| is constructed. + */ +function collatorActualLocale(locale) { + assert(typeof locale === "string", "locale should be string"); + + // If |locale| is the default locale (e.g. da-DK), but only supported + // through a fallback (da), we need to get the actual locale before we + // can call intl_isUpperCaseFirst. Also see intl_BestAvailableLocale. + return BestAvailableLocaleIgnoringDefault("Collator", locale); +} + +/** + * Returns the default caseFirst values for the given locale. The first + * element in the returned array denotes the default value per ES2017 Intl, + * 9.1 Internal slots of Service Constructors. + */ +function collatorSortCaseFirst(locale) { + var actualLocale = collatorActualLocale(locale); + if (intl_isUpperCaseFirst(actualLocale)) + return ["upper", "false", "lower"]; + + // Default caseFirst values for all other languages. + return ["false", "lower", "upper"]; +} + +/** + * Returns the default caseFirst value for the given locale. + */ +function collatorSortCaseFirstDefault(locale) { + var actualLocale = collatorActualLocale(locale); + if (intl_isUpperCaseFirst(actualLocale)) + return "upper"; + + // Default caseFirst value for all other languages. + return "false"; +} + +function collatorSortLocaleData() { + /* eslint-disable object-shorthand */ + return { + co: intl_availableCollations, + kn: function() { + return ["false", "true"]; + }, + kf: collatorSortCaseFirst, + default: { + co: function() { + // The first element of the collations array must be |null| + // per ES2017 Intl, 10.2.3 Internal Slots. + return null; + }, + kn: function() { + return "false"; + }, + kf: collatorSortCaseFirstDefault, + }, + }; + /* eslint-enable object-shorthand */ +} + +function collatorSearchLocaleData() { + /* eslint-disable object-shorthand */ + return { + co: function() { + return [null]; + }, + kn: function() { + return ["false", "true"]; + }, + kf: function() { + return ["false", "lower", "upper"]; + }, + default: { + co: function() { + return null; + }, + kn: function() { + return "false"; + }, + kf: function() { + return "false"; + }, + }, + }; + /* eslint-enable object-shorthand */ +} + +/** + * Create function to be cached and returned by Intl.Collator.prototype.compare. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.3.1. + */ +function createCollatorCompare(collator) { + // This function is not inlined in $Intl_Collator_compare_get to avoid + // creating a call-object on each call to $Intl_Collator_compare_get. + return function(x, y) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(collator), "collatorCompareToBind called with non-object"); + assert(GuardToCollator(collator) !== null, "collatorCompareToBind called with non-Collator"); + + // Steps 3-6 + var X = ToString(x); + var Y = ToString(y); + + // Step 7. + return intl_CompareStrings(collator, X, Y); + }; +} + +/** + * Returns a function bound to this Collator that compares x (converted to a + * String value) and y (converted to a String value), + * and returns a number less than 0 if x < y, 0 if x = y, or a number greater + * than 0 if x > y according to the sort order for the locale and collation + * options of this Collator object. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.3. + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `_SetCanonicalName`. +function $Intl_Collator_compare_get() { + // Step 1. + var collator = this; + + // Steps 2-3. + if (!IsObject(collator) || (collator = GuardToCollator(collator)) === null) + return callFunction(CallCollatorMethodIfWrapped, this, "$Intl_Collator_compare_get"); + + var internals = getCollatorInternals(collator); + + // Step 4. + if (internals.boundCompare === undefined) { + // Steps 4.a-c. + internals.boundCompare = createCollatorCompare(collator); + } + + // Step 5. + return internals.boundCompare; +} +_SetCanonicalName($Intl_Collator_compare_get, "get compare"); + +/** + * Returns the resolved options for a Collator object. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.4. + */ +function Intl_Collator_resolvedOptions() { + // Step 1. + var collator = this; + + // Steps 2-3. + if (!IsObject(collator) || (collator = GuardToCollator(collator)) === null) + return callFunction(CallCollatorMethodIfWrapped, this, "Intl_Collator_resolvedOptions"); + + var internals = getCollatorInternals(collator); + + // Steps 4-5. + var result = { + locale: internals.locale, + usage: internals.usage, + sensitivity: internals.sensitivity, + ignorePunctuation: internals.ignorePunctuation, + collation: internals.collation, + numeric: internals.numeric, + caseFirst: internals.caseFirst, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/CommonFunctions.cpp b/js/src/builtin/intl/CommonFunctions.cpp new file mode 100644 index 0000000000..f3dcf80522 --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.cpp @@ -0,0 +1,147 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Operations used to implement multiple Intl.* classes. */ + +#include "builtin/intl/CommonFunctions.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> + +#include "gc/GCEnum.h" +#include "gc/Zone.h" +#include "gc/ZoneAllocator.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_INTERNAL_INTL_ERROR +#include "js/Value.h" +#include "unicode/uformattedvalue.h" +#include "vm/JSContext.h" +#include "vm/JSObject.h" +#include "vm/SelfHosting.h" +#include "vm/Stack.h" + +#include "vm/JSObject-inl.h" + +bool js::intl::InitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options) { + FixedInvokeArgs<3> args(cx); + + args[0].setObject(*obj); + args[1].set(locales); + args[2].set(options); + + RootedValue ignored(cx); + if (!CallSelfHostedFunction(cx, initializer, JS::NullHandleValue, args, + &ignored)) { + return false; + } + + MOZ_ASSERT(ignored.isUndefined(), + "Unexpected return value from non-legacy Intl object initializer"); + return true; +} + +bool js::intl::LegacyInitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options, + DateTimeFormatOptions dtfOptions, + JS::MutableHandle<JS::Value> result) { + FixedInvokeArgs<5> args(cx); + + args[0].setObject(*obj); + args[1].set(thisValue); + args[2].set(locales); + args[3].set(options); + args[4].setBoolean(dtfOptions == DateTimeFormatOptions::EnableMozExtensions); + + if (!CallSelfHostedFunction(cx, initializer, NullHandleValue, args, result)) { + return false; + } + + MOZ_ASSERT(result.isObject(), + "Legacy Intl object initializer must return an object"); + return true; +} + +JSObject* js::intl::GetInternalsObject(JSContext* cx, + JS::Handle<JSObject*> obj) { + FixedInvokeArgs<1> args(cx); + + args[0].setObject(*obj); + + RootedValue v(cx); + if (!js::CallSelfHostedFunction(cx, cx->names().getInternals, NullHandleValue, + args, &v)) { + return nullptr; + } + + return &v.toObject(); +} + +void js::intl::ReportInternalError(JSContext* cx) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INTERNAL_INTL_ERROR); +} + +const js::intl::OldStyleLanguageTagMapping + js::intl::oldStyleLanguageTagMappings[] = { + {"pa-PK", "pa-Arab-PK"}, {"zh-CN", "zh-Hans-CN"}, + {"zh-HK", "zh-Hant-HK"}, {"zh-SG", "zh-Hans-SG"}, + {"zh-TW", "zh-Hant-TW"}, +}; + +js::UniqueChars js::intl::EncodeLocale(JSContext* cx, JSString* locale) { + MOZ_ASSERT(locale->length() > 0); + + js::UniqueChars chars = EncodeAscii(cx, locale); + +#ifdef DEBUG + // Ensure the returned value contains only valid BCP 47 characters. + // (Lambdas can't be placed inside MOZ_ASSERT, so move the checks in an + // #ifdef block.) + if (chars) { + auto alnumOrDash = [](char c) { + return mozilla::IsAsciiAlphanumeric(c) || c == '-'; + }; + MOZ_ASSERT(mozilla::IsAsciiAlpha(chars[0])); + MOZ_ASSERT( + std::all_of(chars.get(), chars.get() + locale->length(), alnumOrDash)); + } +#endif + + return chars; +} + +void js::intl::AddICUCellMemory(JSObject* obj, size_t nbytes) { + // Account the (estimated) number of bytes allocated by an ICU object against + // the JSObject's zone. + AddCellMemory(obj, nbytes, MemoryUse::ICUObject); +} + +void js::intl::RemoveICUCellMemory(JSFreeOp* fop, JSObject* obj, + size_t nbytes) { + fop->removeCellMemory(obj, nbytes, MemoryUse::ICUObject); +} + +JSString* js::intl::FormattedValueToString( + JSContext* cx, const UFormattedValue* formattedValue) { + UErrorCode status = U_ZERO_ERROR; + int32_t strLength; + const char16_t* str = ufmtval_getString(formattedValue, &strLength, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return nullptr; + } + + return NewStringCopyN<CanGC>(cx, str, + mozilla::AssertedCast<uint32_t>(strLength)); +} diff --git a/js/src/builtin/intl/CommonFunctions.h b/js/src/builtin/intl/CommonFunctions.h new file mode 100644 index 0000000000..f0208f14c4 --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.h @@ -0,0 +1,169 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_CommonFunctions_h +#define builtin_intl_CommonFunctions_h + +#include "mozilla/Assertions.h" + +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <type_traits> + +#include "js/RootingAPI.h" +#include "js/Vector.h" +#include "unicode/utypes.h" +#include "vm/StringType.h" + +struct UFormattedValue; + +namespace js { + +namespace intl { + +/** + * Initialize a new Intl.* object using the named self-hosted function. + */ +extern bool InitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options); + +enum class DateTimeFormatOptions { + Standard, + EnableMozExtensions, +}; + +/** + * Initialize an existing object as an Intl.* object using the named + * self-hosted function. This is only for a few old Intl.* constructors, for + * legacy reasons -- new ones should use the function above instead. + */ +extern bool LegacyInitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options, + DateTimeFormatOptions dtfOptions, + JS::MutableHandle<JS::Value> result); + +/** + * Returns the object holding the internal properties for obj. + */ +extern JSObject* GetInternalsObject(JSContext* cx, JS::Handle<JSObject*> obj); + +/** Report an Intl internal error not directly tied to a spec step. */ +extern void ReportInternalError(JSContext* cx); + +static inline bool StringsAreEqual(const char* s1, const char* s2) { + return !strcmp(s1, s2); +} + +/** + * The last-ditch locale is used if none of the available locales satisfies a + * request. "en-GB" is used based on the assumptions that English is the most + * common second language, that both en-GB and en-US are normally available in + * an implementation, and that en-GB is more representative of the English used + * in other locales. + */ +static inline const char* LastDitchLocale() { return "en-GB"; } + +/** + * Certain old, commonly-used language tags that lack a script, are expected to + * nonetheless imply one. This object maps these old-style tags to modern + * equivalents. + */ +struct OldStyleLanguageTagMapping { + const char* const oldStyle; + const char* const modernStyle; + + // Provide a constructor to catch missing initializers in the mappings array. + constexpr OldStyleLanguageTagMapping(const char* oldStyle, + const char* modernStyle) + : oldStyle(oldStyle), modernStyle(modernStyle) {} +}; + +extern const OldStyleLanguageTagMapping oldStyleLanguageTagMappings[5]; + +static inline const char* IcuLocale(const char* locale) { + if (StringsAreEqual(locale, "und")) { + return ""; // ICU root locale + } + + return locale; +} + +extern UniqueChars EncodeLocale(JSContext* cx, JSString* locale); + +// Starting with ICU 59, UChar defaults to char16_t. +static_assert( + std::is_same_v<UChar, char16_t>, + "SpiderMonkey doesn't support redefining UChar to a different type"); + +// The inline capacity we use for a Vector<char16_t>. Use this to ensure that +// our uses of ICU string functions, below and elsewhere, will try to fill the +// buffer's entire inline capacity before growing it and heap-allocating. +constexpr size_t INITIAL_CHAR_BUFFER_SIZE = 32; + +template <typename ICUStringFunction, typename CharT, size_t InlineCapacity> +static int32_t CallICU(JSContext* cx, const ICUStringFunction& strFn, + Vector<CharT, InlineCapacity>& chars) { + MOZ_ASSERT(chars.length() >= InlineCapacity); + + UErrorCode status = U_ZERO_ERROR; + int32_t size = strFn(chars.begin(), chars.length(), &status); + if (status == U_BUFFER_OVERFLOW_ERROR) { + MOZ_ASSERT(size >= 0); + + // Some ICU functions (e.g. uloc_getDisplayName) return one less character + // than the actual minimum size when U_BUFFER_OVERFLOW_ERROR is raised, + // resulting in later reporting U_STRING_NOT_TERMINATED_WARNING. So add plus + // one here and then assert U_STRING_NOT_TERMINATED_WARNING isn't raised. + size++; + + if (!chars.resize(size_t(size))) { + return -1; + } + status = U_ZERO_ERROR; + size = strFn(chars.begin(), size, &status); + + MOZ_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); + } + if (U_FAILURE(status)) { + ReportInternalError(cx); + return -1; + } + + MOZ_ASSERT(size >= 0); + return size; +} + +template <typename ICUStringFunction> +static JSString* CallICU(JSContext* cx, const ICUStringFunction& strFn) { + Vector<char16_t, INITIAL_CHAR_BUFFER_SIZE> chars(cx); + MOZ_ALWAYS_TRUE(chars.resize(INITIAL_CHAR_BUFFER_SIZE)); + + int32_t size = CallICU(cx, strFn, chars); + if (size < 0) { + return nullptr; + } + + return NewStringCopyN<CanGC>(cx, chars.begin(), size_t(size)); +} + +void AddICUCellMemory(JSObject* obj, size_t nbytes); + +void RemoveICUCellMemory(JSFreeOp* fop, JSObject* obj, size_t nbytes); + +JSString* FormattedValueToString(JSContext* cx, + const UFormattedValue* formattedValue); + +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_CommonFunctions_h */ diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js new file mode 100644 index 0000000000..e7f205147c --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.js @@ -0,0 +1,844 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +#ifdef DEBUG +#define assertIsValidAndCanonicalLanguageTag(locale, desc) \ + do { \ + let canonical = intl_TryValidateAndCanonicalizeLanguageTag(locale); \ + assert(canonical !== null, \ + `${desc} is a structurally valid language tag`); \ + assert(canonical === locale, \ + `${desc} is a canonicalized language tag`); \ + } while (false) +#else +#define assertIsValidAndCanonicalLanguageTag(locale, desc) ; // Elided assertion. +#endif + +/** + * Returns the start index of a "Unicode locale extension sequence", which the + * specification defines as: "any substring of a language tag that starts with + * a separator '-' and the singleton 'u' and includes the maximum sequence of + * following non-singleton subtags and their preceding '-' separators." + * + * Alternatively, this may be defined as: the components of a language tag that + * match the `unicode_locale_extensions` production in UTS 35. + * + * Spec: ECMAScript Internationalization API Specification, 6.2.1. + */ +function startOfUnicodeExtensions(locale) { + assert(typeof locale === "string", "locale is a string"); + + // Search for "-u-" marking the start of a Unicode extension sequence. + var start = callFunction(std_String_indexOf, locale, "-u-"); + if (start < 0) + return -1; + + // And search for "-x-" marking the start of any privateuse component to + // handle the case when "-u-" was only found within a privateuse subtag. + var privateExt = callFunction(std_String_indexOf, locale, "-x-"); + if (privateExt >= 0 && privateExt < start) + return -1; + + return start; +} + +/** + * Returns the end index of a Unicode locale extension sequence. + */ +function endOfUnicodeExtensions(locale, start) { + assert(typeof locale === "string", "locale is a string"); + assert(0 <= start && start < locale.length, "start is an index into locale"); + assert(Substring(locale, start, 3) === "-u-", "start points to Unicode extension sequence"); + + #define HYPHEN 0x2D + assert(std_String_fromCharCode(HYPHEN) === "-", + "code unit constant should match the expected character"); + + // Search for the start of the next singleton or privateuse subtag. + // + // Begin searching after the smallest possible Unicode locale extension + // sequence, namely |"-u-" 2alphanum|. End searching once the remaining + // characters can't fit the smallest possible singleton or privateuse + // subtag, namely |"-x-" alphanum|. Note the reduced end-limit means + // indexing inside the loop is always in-range. + for (var i = start + 5, end = locale.length - 4; i <= end; i++) { + if (callFunction(std_String_charCodeAt, locale, i) !== HYPHEN) + continue; + if (callFunction(std_String_charCodeAt, locale, i + 2) === HYPHEN) + return i; + + // Skip over (i + 1) and (i + 2) because we've just verified they + // aren't "-", so the next possible delimiter can only be at (i + 3). + i += 2; + } + + #undef HYPHEN + + // If no singleton or privateuse subtag was found, the Unicode extension + // sequence extends until the end of the string. + return locale.length; +} + +/** + * Removes Unicode locale extension sequences from the given language tag. + */ +function removeUnicodeExtensions(locale) { + assertIsValidAndCanonicalLanguageTag(locale, "locale with possible Unicode extension"); + + var start = startOfUnicodeExtensions(locale); + if (start < 0) + return locale; + + var end = endOfUnicodeExtensions(locale, start); + + var left = Substring(locale, 0, start); + var right = Substring(locale, end, locale.length - end); + var combined = left + right; + + assertIsValidAndCanonicalLanguageTag(combined, "the recombined locale"); + assert(startOfUnicodeExtensions(combined) < 0, + "recombination failed to remove all Unicode locale extension sequences"); + + return combined; +} + +/** + * Returns Unicode locale extension sequences from the given language tag. + */ +function getUnicodeExtensions(locale) { + assertIsValidAndCanonicalLanguageTag(locale, "locale with Unicode extension"); + + var start = startOfUnicodeExtensions(locale); + assert(start >= 0, "start of Unicode extension sequence not found"); + var end = endOfUnicodeExtensions(locale, start); + + return Substring(locale, start, end - start); +} + +/** + * Returns true if the input contains only ASCII alphabetical characters. + */ +function IsASCIIAlphaString(s) { + assert(typeof s === "string", "IsASCIIAlphaString"); + + for (var i = 0; i < s.length; i++) { + var c = callFunction(std_String_charCodeAt, s, i); + if (!((0x41 <= c && c <= 0x5A) || (0x61 <= c && c <= 0x7A))) + return false; + } + return true; +} + +var localeCache = { + runtimeDefaultLocale: undefined, + defaultLocale: undefined, +}; + +/** + * Returns the BCP 47 language tag for the host environment's current locale. + * + * Spec: ECMAScript Internationalization API Specification, 6.2.4. + */ +function DefaultLocale() { + if (IsRuntimeDefaultLocale(localeCache.runtimeDefaultLocale)) + return localeCache.defaultLocale; + + // If we didn't have a cache hit, compute the candidate default locale. + var runtimeDefaultLocale = RuntimeDefaultLocale(); + var locale = intl_supportedLocaleOrFallback(runtimeDefaultLocale); + + assertIsValidAndCanonicalLanguageTag(locale, "the computed default locale"); + assert(startOfUnicodeExtensions(locale) < 0, + "the computed default locale must not contain a Unicode extension sequence"); + + // Cache the computed locale until the runtime default locale changes. + localeCache.defaultLocale = locale; + localeCache.runtimeDefaultLocale = runtimeDefaultLocale; + + return locale; +} + +/** + * Canonicalizes a locale list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.1. + */ +function CanonicalizeLocaleList(locales) { + // Step 1. + if (locales === undefined) + return []; + + // Step 3 (and the remaining steps). + var tag = intl_ValidateAndCanonicalizeLanguageTag(locales, false); + if (tag !== null) { + assert(typeof tag === "string", + "intl_ValidateAndCanonicalizeLanguageTag returns a string value"); + return [tag]; + } + + // Step 2. + var seen = []; + + // Step 4. + var O = ToObject(locales); + + // Step 5. + var len = ToLength(O.length); + + // Step 6. + var k = 0; + + // Step 7. + while (k < len) { + // Steps 7.a-c. + if (k in O) { + // Step 7.c.i. + var kValue = O[k]; + + // Step 7.c.ii. + if (!(typeof kValue === "string" || IsObject(kValue))) + ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT); + + // Steps 7.c.iii-iv. + var tag = intl_ValidateAndCanonicalizeLanguageTag(kValue, true); + assert(typeof tag === "string", + "ValidateAndCanonicalizeLanguageTag returns a string value"); + + // Step 7.c.v. + if (callFunction(ArrayIndexOf, seen, tag) === -1) + _DefineDataProperty(seen, seen.length, tag); + } + + // Step 7.d. + k++; + } + + // Step 8. + return seen; +} + +/** + * Compares a BCP 47 language tag against the locales in availableLocales + * and returns the best available match. Uses the fallback + * mechanism of RFC 4647, section 3.4. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.2. + * Spec: RFC 4647, section 3.4. + */ +function BestAvailableLocale(availableLocales, locale) { + return intl_BestAvailableLocale(availableLocales, locale, DefaultLocale()); +} + +/** + * Identical to BestAvailableLocale, but does not consider the default locale + * during computation. + */ +function BestAvailableLocaleIgnoringDefault(availableLocales, locale) { + return intl_BestAvailableLocale(availableLocales, locale, null); +} + +/** + * Compares a BCP 47 language priority list against the set of locales in + * availableLocales and determines the best available language to meet the + * request. Options specified through Unicode extension subsequences are + * ignored in the lookup, but information about such subsequences is returned + * separately. + * + * This variant is based on the Lookup algorithm of RFC 4647 section 3.4. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.3. + * Spec: RFC 4647, section 3.4. + */ +function LookupMatcher(availableLocales, requestedLocales) { + // Step 1. + var result = new Record(); + + // Step 2. + for (var i = 0; i < requestedLocales.length; i++) { + var locale = requestedLocales[i]; + + // Step 2.a. + var noExtensionsLocale = removeUnicodeExtensions(locale); + + // Step 2.b. + var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale); + + // Step 2.c. + if (availableLocale !== undefined) { + // Step 2.c.i. + result.locale = availableLocale; + + // Step 2.c.ii. + if (locale !== noExtensionsLocale) + result.extension = getUnicodeExtensions(locale); + + // Step 2.c.iii. + return result; + } + } + + // Steps 3-4. + result.locale = DefaultLocale(); + + // Step 5. + return result; +} + +/** + * Compares a BCP 47 language priority list against the set of locales in + * availableLocales and determines the best available language to meet the + * request. Options specified through Unicode extension subsequences are + * ignored in the lookup, but information about such subsequences is returned + * separately. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.4. + */ +function BestFitMatcher(availableLocales, requestedLocales) { + // this implementation doesn't have anything better + return LookupMatcher(availableLocales, requestedLocales); +} + +/** + * Returns the Unicode extension value subtags for the requested key subtag. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.5. + */ +function UnicodeExtensionValue(extension, key) { + assert(typeof extension === "string", "extension is a string value"); + assert(callFunction(std_String_startsWith, extension, "-u-") && + getUnicodeExtensions("und" + extension) === extension, + "extension is a Unicode extension subtag"); + assert(typeof key === "string", "key is a string value"); + + // Step 1. + assert(key.length === 2, "key is a Unicode extension key subtag"); + + // Step 2. + var size = extension.length; + + // Step 3. + var searchValue = "-" + key + "-"; + + // Step 4. + var pos = callFunction(std_String_indexOf, extension, searchValue); + + // Step 5. + if (pos !== -1) { + // Step 5.a. + var start = pos + 4; + + // Step 5.b. + var end = start; + + // Step 5.c. + var k = start; + + // Steps 5.d-e. + while (true) { + // Step 5.e.i. + var e = callFunction(std_String_indexOf, extension, "-", k); + + // Step 5.e.ii. + var len = e === -1 ? size - k : e - k; + + // Step 5.e.iii. + if (len === 2) + break; + + // Step 5.e.iv. + if (e === -1) { + end = size; + break; + } + + // Step 5.e.v. + end = e; + k = e + 1; + } + + // Step 5.f. + return callFunction(String_substring, extension, start, end); + } + + // Step 6. + searchValue = "-" + key; + + // Steps 7-8. + if (callFunction(std_String_endsWith, extension, searchValue)) + return ""; + + // Step 9 (implicit). +} + +/** + * Compares a BCP 47 language priority list against availableLocales and + * determines the best available language to meet the request. Options specified + * through Unicode extension subsequences are negotiated separately, taking the + * caller's relevant extensions and locale data as well as client-provided + * options into consideration. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.6. + */ +function ResolveLocale(availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) { + // Steps 1-3. + var matcher = options.localeMatcher; + var r = (matcher === "lookup") + ? LookupMatcher(availableLocales, requestedLocales) + : BestFitMatcher(availableLocales, requestedLocales); + + // Step 4. + var foundLocale = r.locale; + var extension = r.extension; + + // Step 5. + var result = new Record(); + + // Step 6. + result.dataLocale = foundLocale; + + // Step 7. + var supportedExtension = "-u"; + + // In this implementation, localeData is a function, not an object. + var localeDataProvider = localeData(); + + // Step 8. + for (var i = 0; i < relevantExtensionKeys.length; i++) { + var key = relevantExtensionKeys[i]; + + // Steps 8.a-h (The locale data is only computed when needed). + var keyLocaleData = undefined; + var value = undefined; + + // Locale tag may override. + + // Step 8.g. + var supportedExtensionAddition = ""; + + // Step 8.h. + if (extension !== undefined) { + // Step 8.h.i. + var requestedValue = UnicodeExtensionValue(extension, key); + + // Step 8.h.ii. + if (requestedValue !== undefined) { + // Steps 8.a-d. + keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale); + + // Step 8.h.ii.1. + if (requestedValue !== "") { + // Step 8.h.ii.1.a. + if (callFunction(ArrayIndexOf, keyLocaleData, requestedValue) !== -1) { + value = requestedValue; + supportedExtensionAddition = "-" + key + "-" + value; + } + } else { + // Step 8.h.ii.2. + + // According to the LDML spec, if there's no type value, + // and true is an allowed value, it's used. + if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) { + value = "true"; + supportedExtensionAddition = "-" + key; + } + } + } + } + + // Options override all. + + // Step 8.i.i. + var optionsValue = options[key]; + + // Step 8.i.ii. + assert(typeof optionsValue === "string" || + optionsValue === undefined || + optionsValue === null, + "unexpected type for options value"); + + // Steps 8.i, 8.i.iii.1. + if (optionsValue !== undefined && optionsValue !== value) { + // Steps 8.a-d. + if (keyLocaleData === undefined) + keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale); + + // Step 8.i.iii. + if (callFunction(ArrayIndexOf, keyLocaleData, optionsValue) !== -1) { + value = optionsValue; + supportedExtensionAddition = ""; + } + } + + // Locale data provides default value. + if (value === undefined) { + // Steps 8.a-f. + value = keyLocaleData === undefined + ? callFunction(localeDataProvider.default[key], null, foundLocale) + : keyLocaleData[0]; + } + + // Step 8.j. + assert(typeof value === "string" || value === null, "unexpected locale data value"); + result[key] = value; + + // Step 8.k. + supportedExtension += supportedExtensionAddition; + } + + // Step 9. + if (supportedExtension.length > 2) + foundLocale = addUnicodeExtension(foundLocale, supportedExtension); + + // Step 10. + result.locale = foundLocale; + + // Step 11. + return result; +} + +/** + * Adds a Unicode extension subtag to a locale. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.6. + */ +function addUnicodeExtension(locale, extension) { + assert(typeof locale === "string", "locale is a string value"); + assert(!callFunction(std_String_startsWith, locale, "x-"), + "unexpected privateuse-only locale"); + assert(startOfUnicodeExtensions(locale) < 0, + "Unicode extension subtag already present in locale"); + + assert(typeof extension === "string", "extension is a string value"); + assert(callFunction(std_String_startsWith, extension, "-u-") && + getUnicodeExtensions("und" + extension) === extension, + "extension is a Unicode extension subtag"); + + // Step 9.a. + var privateIndex = callFunction(std_String_indexOf, locale, "-x-"); + + // Steps 9.b-c. + if (privateIndex === -1) { + locale += extension; + } else { + var preExtension = callFunction(String_substring, locale, 0, privateIndex); + var postExtension = callFunction(String_substring, locale, privateIndex); + locale = preExtension + extension + postExtension; + } + + // Steps 9.d-e (Step 9.e is not required in this implementation, because we don't canonicalize + // Unicode extension subtags). + assertIsValidAndCanonicalLanguageTag(locale, "locale after concatenation"); + + return locale; +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.7. + */ +function LookupSupportedLocales(availableLocales, requestedLocales) { + // Step 1. + var subset = []; + + // Step 2. + for (var i = 0; i < requestedLocales.length; i++) { + var locale = requestedLocales[i]; + + // Step 2.a. + var noExtensionsLocale = removeUnicodeExtensions(locale); + + // Step 2.b. + var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale); + + // Step 2.c. + if (availableLocale !== undefined) + _DefineDataProperty(subset, subset.length, locale); + } + + // Step 3. + return subset; +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.8. + */ +function BestFitSupportedLocales(availableLocales, requestedLocales) { + // don't have anything better + return LookupSupportedLocales(availableLocales, requestedLocales); +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.9. + */ +function SupportedLocales(availableLocales, requestedLocales, options) { + // Step 1. + var matcher; + if (options !== undefined) { + // Step 1.a. + options = ToObject(options); + + // Step 1.b + matcher = options.localeMatcher; + if (matcher !== undefined) { + matcher = ToString(matcher); + if (matcher !== "lookup" && matcher !== "best fit") + ThrowRangeError(JSMSG_INVALID_LOCALE_MATCHER, matcher); + } + } + + // Steps 2-5. + return (matcher === undefined || matcher === "best fit") + ? BestFitSupportedLocales(availableLocales, requestedLocales) + : LookupSupportedLocales(availableLocales, requestedLocales); +} + +/** + * Extracts a property value from the provided options object, converts it to + * the required type, checks whether it is one of a list of allowed values, + * and fills in a fallback value if necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.10. + */ +function GetOption(options, property, type, values, fallback) { + // Step 1. + var value = options[property]; + + // Step 2. + if (value !== undefined) { + // Steps 2.a-c. + if (type === "boolean") + value = ToBoolean(value); + else if (type === "string") + value = ToString(value); + else + assert(false, "GetOption"); + + // Step 2.d. + if (values !== undefined && callFunction(ArrayIndexOf, values, value) === -1) + ThrowRangeError(JSMSG_INVALID_OPTION_VALUE, property, `"${value}"`); + + // Step 2.e. + return value; + } + + // Step 3. + return fallback; +} + +/** + * The abstract operation DefaultNumberOption converts value to a Number value, + * checks whether it is in the allowed range, and fills in a fallback value if + * necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.11. + */ +function DefaultNumberOption(value, minimum, maximum, fallback) { + assert(typeof minimum === "number" && (minimum | 0) === minimum, "DefaultNumberOption"); + assert(typeof maximum === "number" && (maximum | 0) === maximum, "DefaultNumberOption"); + assert(fallback === undefined || (typeof fallback === "number" && (fallback | 0) === fallback), + "DefaultNumberOption"); + assert(fallback === undefined || (minimum <= fallback && fallback <= maximum), + "DefaultNumberOption"); + + // Step 1. + if (value === undefined) + return fallback; + + // Step 2. + value = ToNumber(value); + + // Step 3. + if (Number_isNaN(value) || value < minimum || value > maximum) + ThrowRangeError(JSMSG_INVALID_DIGITS_VALUE, value); + + // Step 4. + // Apply bitwise-or to convert -0 to +0 per ES2017, 5.2 and to ensure the + // result is an int32 value. + return std_Math_floor(value) | 0; +} + +/** + * Extracts a property value from the provided options object, converts it to a + * Number value, checks whether it is in the allowed range, and fills in a + * fallback value if necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.12. + */ +function GetNumberOption(options, property, minimum, maximum, fallback) { + // Steps 1-2. + return DefaultNumberOption(options[property], minimum, maximum, fallback); +} + +// Symbols in the self-hosting compartment can't be cloned, use a separate +// object to hold the actual symbol value. +// TODO: Can we add support to clone symbols? +var intlFallbackSymbolHolder = { value: undefined }; + +/** + * The [[FallbackSymbol]] symbol of the %Intl% intrinsic object. + * + * This symbol is used to implement the legacy constructor semantics for + * Intl.DateTimeFormat and Intl.NumberFormat. + */ +function intlFallbackSymbol() { + var fallbackSymbol = intlFallbackSymbolHolder.value; + if (!fallbackSymbol) { + fallbackSymbol = std_Symbol("IntlLegacyConstructedSymbol"); + intlFallbackSymbolHolder.value = fallbackSymbol; + } + return fallbackSymbol; +} + +/** + * Initializes the INTL_INTERNALS_OBJECT_SLOT of the given object. + */ +function initializeIntlObject(obj, type, lazyData) { + assert(IsObject(obj), "Non-object passed to initializeIntlObject"); + assert((type === "Collator" && GuardToCollator(obj) !== null) || + (type === "DateTimeFormat" && GuardToDateTimeFormat(obj) !== null) || + (type === "DisplayNames" && GuardToDisplayNames(obj) !== null) || + (type === "ListFormat" && GuardToListFormat(obj) !== null) || + (type === "NumberFormat" && GuardToNumberFormat(obj) !== null) || + (type === "PluralRules" && GuardToPluralRules(obj) !== null) || + (type === "RelativeTimeFormat" && GuardToRelativeTimeFormat(obj) !== null), + "type must match the object's class"); + assert(IsObject(lazyData), "non-object lazy data"); + + // The meaning of an internals object for an object |obj| is as follows. + // + // The .type property indicates the type of Intl object that |obj| is. It + // must be one of: + // - Collator + // - DateTimeFormat + // - DisplayNames + // - ListFormat + // - NumberFormat + // - PluralRules + // - RelativeTimeFormat + // + // The .lazyData property stores information needed to compute -- without + // observable side effects -- the actual internal Intl properties of + // |obj|. If it is non-null, then the actual internal properties haven't + // been computed, and .lazyData must be processed by + // |setInternalProperties| before internal Intl property values are + // available. If it is null, then the .internalProps property contains an + // object whose properties are the internal Intl properties of |obj|. + + var internals = std_Object_create(null); + internals.type = type; + internals.lazyData = lazyData; + internals.internalProps = null; + + assert(UnsafeGetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT) === undefined, + "Internal slot already initialized?"); + UnsafeSetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT, internals); +} + +/** + * Set the internal properties object for an |internals| object previously + * associated with lazy data. + */ +function setInternalProperties(internals, internalProps) { + assert(IsObject(internals.lazyData), "lazy data must exist already"); + assert(IsObject(internalProps), "internalProps argument should be an object"); + + // Set in reverse order so that the .lazyData nulling is a barrier. + internals.internalProps = internalProps; + internals.lazyData = null; +} + +/** + * Get the existing internal properties out of a non-newborn |internals|, or + * null if none have been computed. + */ +function maybeInternalProperties(internals) { + assert(IsObject(internals), "non-object passed to maybeInternalProperties"); + var lazyData = internals.lazyData; + if (lazyData) + return null; + assert(IsObject(internals.internalProps), "missing lazy data and computed internals"); + return internals.internalProps; +} + +/** + * Return |obj|'s internals object (*not* the object holding its internal + * properties!), with structure specified above. + * + * Spec: ECMAScript Internationalization API Specification, 10.3. + * Spec: ECMAScript Internationalization API Specification, 11.3. + * Spec: ECMAScript Internationalization API Specification, 12.3. + */ +function getIntlObjectInternals(obj) { + assert(IsObject(obj), "getIntlObjectInternals called with non-Object"); + assert(GuardToCollator(obj) !== null || + GuardToDateTimeFormat(obj) !== null || + GuardToDisplayNames(obj) !== null || + GuardToListFormat(obj) !== null || + GuardToNumberFormat(obj) !== null || + GuardToPluralRules(obj) !== null || + GuardToRelativeTimeFormat(obj) !== null, + "getIntlObjectInternals called with non-Intl object"); + + var internals = UnsafeGetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT); + + assert(IsObject(internals), "internals not an object"); + assert(hasOwn("type", internals), "missing type"); + assert((internals.type === "Collator" && GuardToCollator(obj) !== null) || + (internals.type === "DateTimeFormat" && GuardToDateTimeFormat(obj) !== null) || + (internals.type === "DisplayNames" && GuardToDisplayNames(obj) !== null) || + (internals.type === "ListFormat" && GuardToListFormat(obj) !== null) || + (internals.type === "NumberFormat" && GuardToNumberFormat(obj) !== null) || + (internals.type === "PluralRules" && GuardToPluralRules(obj) !== null) || + (internals.type === "RelativeTimeFormat" && GuardToRelativeTimeFormat(obj) !== null), + "type must match the object's class"); + assert(hasOwn("lazyData", internals), "missing lazyData"); + assert(hasOwn("internalProps", internals), "missing internalProps"); + + return internals; +} + +/** + * Get the internal properties of known-Intl object |obj|. For use only by + * C++ code that knows what it's doing! + */ +function getInternals(obj) { + var internals = getIntlObjectInternals(obj); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + var type = internals.type; + if (type === "Collator") + internalProps = resolveCollatorInternals(internals.lazyData); + else if (type === "DateTimeFormat") + internalProps = resolveDateTimeFormatInternals(internals.lazyData); + else if (type === "DisplayNames") + internalProps = resolveDisplayNamesInternals(internals.lazyData); + else if (type === "ListFormat") + internalProps = resolveListFormatInternals(internals.lazyData); + else if (type === "NumberFormat") + internalProps = resolveNumberFormatInternals(internals.lazyData); + else if (type === "PluralRules") + internalProps = resolvePluralRulesInternals(internals.lazyData); + else + internalProps = resolveRelativeTimeFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} diff --git a/js/src/builtin/intl/CurrencyDataGenerated.js b/js/src/builtin/intl/CurrencyDataGenerated.js new file mode 100644 index 0000000000..210aa2978b --- /dev/null +++ b/js/src/builtin/intl/CurrencyDataGenerated.js @@ -0,0 +1,78 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// Version: 2018-08-29 + +/** + * Mapping from currency codes to the number of decimal digits used for them. + * Default is 2 digits. + * + * Spec: ISO 4217 Currency and Funds Code List. + * http://www.currency-iso.org/en/home/tables/table-a1.html + */ +var currencyDigits = { + // Bahraini Dinar (BAHRAIN) + BHD: 3, + // Burundi Franc (BURUNDI) + BIF: 0, + // Unidad de Fomento (CHILE) + CLF: 4, + // Chilean Peso (CHILE) + CLP: 0, + // Djibouti Franc (DJIBOUTI) + DJF: 0, + // Guinean Franc (GUINEA) + GNF: 0, + // Iraqi Dinar (IRAQ) + IQD: 3, + // Iceland Krona (ICELAND) + ISK: 0, + // Jordanian Dinar (JORDAN) + JOD: 3, + // Yen (JAPAN) + JPY: 0, + // Comorian Franc (COMOROS (THE)) + KMF: 0, + // Won (KOREA (THE REPUBLIC OF)) + KRW: 0, + // Kuwaiti Dinar (KUWAIT) + KWD: 3, + // Libyan Dinar (LIBYA) + LYD: 3, + // Rial Omani (OMAN) + OMR: 3, + // Guarani (PARAGUAY) + PYG: 0, + // Rwanda Franc (RWANDA) + RWF: 0, + // Tunisian Dinar (TUNISIA) + TND: 3, + // Uganda Shilling (UGANDA) + UGX: 0, + // Uruguay Peso en Unidades Indexadas (UI) (URUGUAY) + UYI: 0, + // Unidad Previsional (URUGUAY) + UYW: 4, + // Dong (VIET NAM) + VND: 0, + // Vatu (VANUATU) + VUV: 0, + // CFA Franc BEAC (CAMEROON) + // CFA Franc BEAC (CENTRAL AFRICAN REPUBLIC (THE)) + // CFA Franc BEAC (CHAD) + // CFA Franc BEAC (CONGO (THE)) + // CFA Franc BEAC (EQUATORIAL GUINEA) + // CFA Franc BEAC (GABON) + XAF: 0, + // CFA Franc BCEAO (BENIN) + // CFA Franc BCEAO (BURKINA FASO) + // CFA Franc BCEAO (CÔTE D'IVOIRE) + // CFA Franc BCEAO (GUINEA-BISSAU) + // CFA Franc BCEAO (MALI) + // CFA Franc BCEAO (NIGER (THE)) + // CFA Franc BCEAO (SENEGAL) + // CFA Franc BCEAO (TOGO) + XOF: 0, + // CFP Franc (FRENCH POLYNESIA) + // CFP Franc (NEW CALEDONIA) + // CFP Franc (WALLIS AND FUTUNA) + XPF: 0, +}; diff --git a/js/src/builtin/intl/DateTimeFormat.cpp b/js/src/builtin/intl/DateTimeFormat.cpp new file mode 100644 index 0000000000..678c90abb5 --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.cpp @@ -0,0 +1,1884 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.DateTimeFormat implementation. */ + +#include "builtin/intl/DateTimeFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Range.h" +#include "mozilla/Span.h" + +#include "jsfriendapi.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/ScopedICUObject.h" +#include "builtin/intl/SharedIntlData.h" +#include "builtin/intl/TimeZoneDataGenerated.h" +#include "gc/FreeOp.h" +#include "js/CharacterEncoding.h" +#include "js/Date.h" +#include "js/experimental/Intl.h" // JS::AddMozDateTimeFormatConstructor +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/GCAPI.h" +#include "js/PropertySpec.h" +#include "js/StableStringChars.h" +#include "unicode/ucal.h" +#include "unicode/udat.h" +#include "unicode/udateintervalformat.h" +#include "unicode/udatpg.h" +#include "unicode/uenum.h" +#include "unicode/ufieldpositer.h" +#include "unicode/uloc.h" +#include "unicode/utypes.h" +#include "vm/DateTime.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Runtime.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using JS::AutoStableStringChars; +using JS::ClippedTime; +using JS::TimeClip; + +using js::intl::CallICU; +using js::intl::DateTimeFormatOptions; +using js::intl::IcuLocale; +using js::intl::INITIAL_CHAR_BUFFER_SIZE; +using js::intl::SharedIntlData; +using js::intl::StringsAreEqual; + +const JSClassOps DateTimeFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + DateTimeFormatObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; + +const JSClass DateTimeFormatObject::class_ = { + "Intl.DateTimeFormat", + JSCLASS_HAS_RESERVED_SLOTS(DateTimeFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_DateTimeFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &DateTimeFormatObject::classOps_, &DateTimeFormatObject::classSpec_}; + +const JSClass& DateTimeFormatObject::protoClass_ = PlainObject::class_; + +static bool dateTimeFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().DateTimeFormat); + return true; +} + +static const JSFunctionSpec dateTimeFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_DateTimeFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec dateTimeFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_DateTimeFormat_resolvedOptions", + 0, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_DateTimeFormat_formatToParts", 1, + 0), +#ifdef NIGHTLY_BUILD +# ifndef U_HIDE_DRAFT_API + JS_SELF_HOSTED_FN("formatRange", "Intl_DateTimeFormat_formatRange", 2, 0), + JS_SELF_HOSTED_FN("formatRangeToParts", + "Intl_DateTimeFormat_formatRangeToParts", 2, 0), +# endif +#endif + JS_FN(js_toSource_str, dateTimeFormat_toSource, 0, 0), + JS_FS_END}; + +static const JSPropertySpec dateTimeFormat_properties[] = { + JS_SELF_HOSTED_GET("format", "$Intl_DateTimeFormat_format_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.DateTimeFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool DateTimeFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec DateTimeFormatObject::classSpec_ = { + GenericCreateConstructor<DateTimeFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<DateTimeFormatObject>, + dateTimeFormat_static_methods, + nullptr, + dateTimeFormat_methods, + dateTimeFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 12.2.1 Intl.DateTimeFormat([ locales [, options]]) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +static bool DateTimeFormat(JSContext* cx, const CallArgs& args, bool construct, + DateTimeFormatOptions dtfOptions) { + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + JSProtoKey protoKey = dtfOptions == DateTimeFormatOptions::Standard + ? JSProto_DateTimeFormat + : JSProto_Null; + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, protoKey, &proto)) { + return false; + } + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = NewObjectWithClassProto<DateTimeFormatObject>(cx, proto); + if (!dateTimeFormat) { + return false; + } + + RootedValue thisValue( + cx, construct ? ObjectValue(*dateTimeFormat) : args.thisv()); + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + return intl::LegacyInitializeObject( + cx, dateTimeFormat, cx->names().InitializeDateTimeFormat, thisValue, + locales, options, dtfOptions, args.rval()); +} + +static bool DateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return DateTimeFormat(cx, args, args.isConstructing(), + DateTimeFormatOptions::Standard); +} + +static bool MozDateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Don't allow to call mozIntl.DateTimeFormat as a function. That way we + // don't need to worry how to handle the legacy initialization semantics + // when applied on mozIntl.DateTimeFormat. + if (!ThrowIfNotConstructing(cx, args, "mozIntl.DateTimeFormat")) { + return false; + } + + return DateTimeFormat(cx, args, true, + DateTimeFormatOptions::EnableMozExtensions); +} + +bool js::intl_DateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(!args.isConstructing()); + // intl_DateTimeFormat is an intrinsic for self-hosted JavaScript, so it + // cannot be used with "new", but it still has to be treated as a + // constructor. + return DateTimeFormat(cx, args, true, DateTimeFormatOptions::Standard); +} + +void js::DateTimeFormatObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + auto* dateTimeFormat = &obj->as<DateTimeFormatObject>(); + UDateFormat* df = dateTimeFormat->getDateFormat(); + UDateIntervalFormat* dif = dateTimeFormat->getDateIntervalFormat(); + + if (df) { + intl::RemoveICUCellMemory( + fop, obj, DateTimeFormatObject::UDateFormatEstimatedMemoryUse); + + udat_close(df); + } + + if (dif) { + intl::RemoveICUCellMemory( + fop, obj, DateTimeFormatObject::UDateIntervalFormatEstimatedMemoryUse); + + udtitvfmt_close(dif); + } +} + +bool JS::AddMozDateTimeFormatConstructor(JSContext* cx, + JS::Handle<JSObject*> intl) { + RootedObject ctor( + cx, GlobalObject::createConstructor(cx, MozDateTimeFormat, + cx->names().DateTimeFormat, 0)); + if (!ctor) { + return false; + } + + RootedObject proto( + cx, GlobalObject::createBlankPrototype<PlainObject>(cx, cx->global())); + if (!proto) { + return false; + } + + if (!LinkConstructorAndPrototype(cx, ctor, proto)) { + return false; + } + + // 12.3.2 + if (!JS_DefineFunctions(cx, ctor, dateTimeFormat_static_methods)) { + return false; + } + + // 12.4.4 and 12.4.5 + if (!JS_DefineFunctions(cx, proto, dateTimeFormat_methods)) { + return false; + } + + // 12.4.2 and 12.4.3 + if (!JS_DefineProperties(cx, proto, dateTimeFormat_properties)) { + return false; + } + + RootedValue ctorValue(cx, ObjectValue(*ctor)); + return DefineDataProperty(cx, intl, cx->names().DateTimeFormat, ctorValue, 0); +} + +static bool DefaultCalendar(JSContext* cx, const UniqueChars& locale, + MutableHandleValue rval) { + UErrorCode status = U_ZERO_ERROR; + UCalendar* cal = ucal_open(nullptr, 0, locale.get(), UCAL_DEFAULT, &status); + + // This correctly handles nullptr |cal| when opening failed. + ScopedICUObject<UCalendar, ucal_close> closeCalendar(cal); + + const char* calendar = ucal_getType(cal, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + // ICU returns old-style keyword values; map them to BCP 47 equivalents + calendar = uloc_toUnicodeLocaleType("ca", calendar); + if (!calendar) { + intl::ReportInternalError(cx); + return false; + } + + JSString* str = NewStringCopyZ<CanGC>(cx, calendar); + if (!str) { + return false; + } + + rval.setString(str); + return true; +} + +struct CalendarAlias { + const char* const calendar; + const char* const alias; +}; + +const CalendarAlias calendarAliases[] = {{"islamic-civil", "islamicc"}, + {"ethioaa", "ethiopic-amete-alem"}}; + +bool js::intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + RootedObject calendars(cx, NewDenseEmptyArray(cx)); + if (!calendars) { + return false; + } + + // We need the default calendar for the locale as the first result. + RootedValue defaultCalendar(cx); + if (!DefaultCalendar(cx, locale, &defaultCalendar)) { + return false; + } + + if (!NewbornArrayPush(cx, calendars, defaultCalendar)) { + return false; + } + + // Now get the calendars that "would make a difference", i.e., not the + // default. + UErrorCode status = U_ZERO_ERROR; + UEnumeration* values = + ucal_getKeywordValuesForLocale("ca", locale.get(), false, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UEnumeration, uenum_close> toClose(values); + + uint32_t count = uenum_count(values, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + for (; count > 0; count--) { + const char* calendar = uenum_next(values, nullptr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + // ICU returns old-style keyword values; map them to BCP 47 equivalents + calendar = uloc_toUnicodeLocaleType("ca", calendar); + if (!calendar) { + intl::ReportInternalError(cx); + return false; + } + + JSString* jscalendar = NewStringCopyZ<CanGC>(cx, calendar); + if (!jscalendar) { + return false; + } + if (!NewbornArrayPush(cx, calendars, StringValue(jscalendar))) { + return false; + } + + // ICU doesn't return calendar aliases, append them here. + for (const auto& calendarAlias : calendarAliases) { + if (StringsAreEqual(calendar, calendarAlias.calendar)) { + JSString* jscalendar = NewStringCopyZ<CanGC>(cx, calendarAlias.alias); + if (!jscalendar) { + return false; + } + if (!NewbornArrayPush(cx, calendars, StringValue(jscalendar))) { + return false; + } + } + } + } + + args.rval().setObject(*calendars); + return true; +} + +bool js::intl_defaultCalendar(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + return DefaultCalendar(cx, locale, args.rval()); +} + +bool js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + RootedString timeZone(cx, args[0].toString()); + RootedAtom validatedTimeZone(cx); + if (!sharedIntlData.validateTimeZoneName(cx, timeZone, &validatedTimeZone)) { + return false; + } + + if (validatedTimeZone) { + cx->markAtom(validatedTimeZone); + args.rval().setString(validatedTimeZone); + } else { + args.rval().setNull(); + } + + return true; +} + +bool js::intl_canonicalizeTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + // Some time zone names are canonicalized differently by ICU -- handle + // those first: + RootedString timeZone(cx, args[0].toString()); + RootedAtom ianaTimeZone(cx); + if (!sharedIntlData.tryCanonicalizeTimeZoneConsistentWithIANA( + cx, timeZone, &ianaTimeZone)) { + return false; + } + + if (ianaTimeZone) { + cx->markAtom(ianaTimeZone); + args.rval().setString(ianaTimeZone); + return true; + } + + AutoStableStringChars stableChars(cx); + if (!stableChars.initTwoByte(cx, timeZone)) { + return false; + } + + mozilla::Range<const char16_t> tzchars = stableChars.twoByteRange(); + + JSString* str = CallICU(cx, [&tzchars](UChar* chars, uint32_t size, + UErrorCode* status) { + return ucal_getCanonicalTimeZoneID(tzchars.begin().get(), tzchars.length(), + chars, size, nullptr, status); + }); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +bool js::intl_defaultTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + // The current default might be stale, because JS::ResetTimeZone() doesn't + // immediately update ICU's default time zone. So perform an update if + // needed. + js::ResyncICUDefaultTimeZone(); + + JSString* str = CallICU(cx, ucal_getDefaultTimeZone); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +bool js::intl_defaultTimeZoneOffset(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + UErrorCode status = U_ZERO_ERROR; + const UChar* uTimeZone = nullptr; + int32_t uTimeZoneLength = 0; + const char* rootLocale = ""; + UCalendar* cal = + ucal_open(uTimeZone, uTimeZoneLength, rootLocale, UCAL_DEFAULT, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UCalendar, ucal_close> toClose(cal); + + int32_t offset = ucal_get(cal, UCAL_ZONE_OFFSET, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + args.rval().setInt32(offset); + return true; +} + +bool js::intl_isDefaultTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString() || args[0].isUndefined()); + + // |undefined| is the default value when the Intl runtime caches haven't + // yet been initialized. Handle it the same way as a cache miss. + if (args[0].isUndefined()) { + args.rval().setBoolean(false); + return true; + } + + // The current default might be stale, because JS::ResetTimeZone() doesn't + // immediately update ICU's default time zone. So perform an update if + // needed. + js::ResyncICUDefaultTimeZone(); + + Vector<char16_t, INITIAL_CHAR_BUFFER_SIZE> chars(cx); + MOZ_ALWAYS_TRUE(chars.resize(INITIAL_CHAR_BUFFER_SIZE)); + + int32_t size = CallICU(cx, ucal_getDefaultTimeZone, chars); + if (size < 0) { + return false; + } + + JSLinearString* str = args[0].toString()->ensureLinear(cx); + if (!str) { + return false; + } + + bool equals; + if (str->length() == size_t(size)) { + JS::AutoCheckCannotGC nogc; + equals = + str->hasLatin1Chars() + ? EqualChars(str->latin1Chars(nogc), chars.begin(), str->length()) + : EqualChars(str->twoByteChars(nogc), chars.begin(), str->length()); + } else { + equals = false; + } + + args.rval().setBoolean(equals); + return true; +} + +enum class HourCycle { + // 12 hour cycle, from 0 to 11. + H11, + + // 12 hour cycle, from 1 to 12. + H12, + + // 24 hour cycle, from 0 to 23. + H23, + + // 24 hour cycle, from 1 to 24. + H24 +}; + +static bool IsHour12(HourCycle hc) { + return hc == HourCycle::H11 || hc == HourCycle::H12; +} + +static char16_t HourSymbol(HourCycle hc) { + switch (hc) { + case HourCycle::H11: + return 'K'; + case HourCycle::H12: + return 'h'; + case HourCycle::H23: + return 'H'; + case HourCycle::H24: + return 'k'; + } + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected hour cycle"); +} + +/** + * Parse a pattern according to the format specified in + * <https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns>. + */ +template <typename CharT> +class PatternIterator { + CharT* iter_; + const CharT* const end_; + + public: + explicit PatternIterator(mozilla::Span<CharT> pattern) + : iter_(pattern.data()), end_(pattern.data() + pattern.size()) {} + + CharT* next() { + MOZ_ASSERT(iter_ != nullptr); + + bool inQuote = false; + while (iter_ < end_) { + CharT* cur = iter_++; + if (*cur == '\'') { + inQuote = !inQuote; + } else if (!inQuote) { + return cur; + } + } + + iter_ = nullptr; + return nullptr; + } +}; + +/** + * Return the hour cycle for the given option string. + */ +static HourCycle HourCycleFromOption(JSLinearString* str) { + if (StringEqualsLiteral(str, "h11")) { + return HourCycle::H11; + } + if (StringEqualsLiteral(str, "h12")) { + return HourCycle::H12; + } + if (StringEqualsLiteral(str, "h23")) { + return HourCycle::H23; + } + MOZ_ASSERT(StringEqualsLiteral(str, "h24")); + return HourCycle::H24; +} + +/** + * Return the hour cycle used in the input pattern or Nothing if none was found. + */ +template <typename CharT> +static mozilla::Maybe<HourCycle> HourCycleFromPattern( + mozilla::Span<const CharT> pattern) { + PatternIterator<const CharT> iter(pattern); + while (const auto* ptr = iter.next()) { + switch (*ptr) { + case 'K': + return mozilla::Some(HourCycle::H11); + case 'h': + return mozilla::Some(HourCycle::H12); + case 'H': + return mozilla::Some(HourCycle::H23); + case 'k': + return mozilla::Some(HourCycle::H24); + } + } + return mozilla::Nothing(); +} + +/** + * Replaces all hour pattern characters in |patternOrSkeleton| to use the + * matching hour representation for |hourCycle|. + */ +static void ReplaceHourSymbol(mozilla::Span<char16_t> patternOrSkeleton, + HourCycle hc) { + char16_t replacement = HourSymbol(hc); + PatternIterator<char16_t> iter(patternOrSkeleton); + while (auto* ptr = iter.next()) { + char16_t ch = *ptr; + if (ch == 'K' || ch == 'h' || ch == 'H' || ch == 'k' || ch == 'j') { + *ptr = replacement; + } + } +} + +bool js::intl_patternForSkeleton(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isString()); + MOZ_ASSERT(args[1].isString()); + MOZ_ASSERT(args[2].isString() || args[2].isUndefined()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + AutoStableStringChars skeleton(cx); + if (!skeleton.initTwoByte(cx, args[1].toString())) { + return false; + } + + mozilla::Maybe<HourCycle> hourCycle; + if (args[2].isString()) { + JSLinearString* hourCycleStr = args[2].toString()->ensureLinear(cx); + if (!hourCycleStr) { + return false; + } + + hourCycle.emplace(HourCycleFromOption(hourCycleStr)); + } + + mozilla::Range<const char16_t> skelChars = skeleton.twoByteRange(); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + UDateTimePatternGenerator* gen = + sharedIntlData.getDateTimePatternGenerator(cx, locale.get()); + if (!gen) { + return false; + } + + Vector<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> pattern(cx); + MOZ_ALWAYS_TRUE(pattern.resize(intl::INITIAL_CHAR_BUFFER_SIZE)); + + int32_t patternSize = CallICU( + cx, + [gen, &skelChars](UChar* chars, uint32_t size, UErrorCode* status) { + return udatpg_getBestPattern(gen, skelChars.begin().get(), + skelChars.length(), chars, size, status); + }, + pattern); + if (patternSize < 0) { + return false; + } + pattern.shrinkTo(size_t(patternSize)); + + // If the hourCycle option was set, adjust the resolved pattern to use the + // requested hour cycle representation. + if (hourCycle) { + ReplaceHourSymbol(pattern, hourCycle.value()); + } + + JSString* str = NewStringCopyN<CanGC>(cx, pattern.begin(), pattern.length()); + if (!str) { + return false; + } + args.rval().setString(str); + return true; +} + +/** + * Find a matching pattern using the requested hour-12 options. + * + * This function is needed to work around the following two issues. + * - https://unicode-org.atlassian.net/browse/ICU-21023 + * - https://unicode-org.atlassian.net/browse/CLDR-13425 + * + * We're currently using a relatively simple workaround, which doesn't give the + * most accurate results. For example: + * + * ``` + * var dtf = new Intl.DateTimeFormat("en", { + * timeZone: "UTC", + * dateStyle: "long", + * timeStyle: "long", + * hourCycle: "h12", + * }); + * print(dtf.format(new Date("2020-01-01T00:00Z"))); + * ``` + * + * Returns the pattern "MMMM d, y 'at' h:mm:ss a z", but when going through + * |udatpg_getSkeleton| and then |udatpg_getBestPattern| to find an equivalent + * pattern for "h23", we'll end up with the pattern "MMMM d, y, HH:mm:ss z", so + * the combinator element " 'at' " was lost in the process. + */ +template <size_t N> +static bool FindPatternWithHourCycle(JSContext* cx, const char* locale, + Vector<char16_t, N>& pattern, + bool hour12) { + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + UDateTimePatternGenerator* gen = + sharedIntlData.getDateTimePatternGenerator(cx, locale); + if (!gen) { + return false; + } + + Vector<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> skeleton(cx); + MOZ_ALWAYS_TRUE(skeleton.resize(intl::INITIAL_CHAR_BUFFER_SIZE)); + + int32_t skeletonSize = CallICU( + cx, + [&pattern](UChar* chars, uint32_t size, UErrorCode* status) { + return udatpg_getSkeleton(nullptr, pattern.begin(), pattern.length(), + chars, size, status); + }, + skeleton); + if (skeletonSize < 0) { + return false; + } + skeleton.shrinkTo(size_t(skeletonSize)); + + // Input skeletons don't differentiate between "K" and "h" resp. "k" and "H". + ReplaceHourSymbol(skeleton, hour12 ? HourCycle::H12 : HourCycle::H23); + + MOZ_ALWAYS_TRUE(pattern.resize(N)); + + int32_t patternSize = CallICU( + cx, + [gen, &skeleton](UChar* chars, uint32_t size, UErrorCode* status) { + return udatpg_getBestPattern(gen, skeleton.begin(), skeleton.length(), + chars, size, status); + }, + pattern); + if (patternSize < 0) { + return false; + } + pattern.shrinkTo(size_t(patternSize)); + + return true; +} + +bool js::intl_patternForStyle(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 6); + MOZ_ASSERT(args[0].isString()); + MOZ_ASSERT(args[1].isString() || args[1].isUndefined()); + MOZ_ASSERT(args[2].isString() || args[2].isUndefined()); + MOZ_ASSERT(args[3].isString()); + MOZ_ASSERT(args[4].isBoolean() || args[4].isUndefined()); + MOZ_ASSERT(args[5].isString() || args[5].isUndefined()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + auto toDateFormatStyle = [](JSLinearString* str) { + if (StringEqualsLiteral(str, "full")) { + return UDAT_FULL; + } + if (StringEqualsLiteral(str, "long")) { + return UDAT_LONG; + } + if (StringEqualsLiteral(str, "medium")) { + return UDAT_MEDIUM; + } + MOZ_ASSERT(StringEqualsLiteral(str, "short")); + return UDAT_SHORT; + }; + + UDateFormatStyle dateStyle = UDAT_NONE; + if (args[1].isString()) { + JSLinearString* dateStyleStr = args[1].toString()->ensureLinear(cx); + if (!dateStyleStr) { + return false; + } + + dateStyle = toDateFormatStyle(dateStyleStr); + } + + UDateFormatStyle timeStyle = UDAT_NONE; + if (args[2].isString()) { + JSLinearString* timeStyleStr = args[2].toString()->ensureLinear(cx); + if (!timeStyleStr) { + return false; + } + + timeStyle = toDateFormatStyle(timeStyleStr); + } + + AutoStableStringChars timeZone(cx); + if (!timeZone.initTwoByte(cx, args[3].toString())) { + return false; + } + + mozilla::Maybe<bool> hour12; + if (args[4].isBoolean()) { + hour12.emplace(args[4].toBoolean()); + } + + mozilla::Maybe<HourCycle> hourCycle; + if (args[5].isString()) { + JSLinearString* hourCycleStr = args[5].toString()->ensureLinear(cx); + if (!hourCycleStr) { + return false; + } + + hourCycle.emplace(HourCycleFromOption(hourCycleStr)); + } + + mozilla::Range<const char16_t> timeZoneChars = timeZone.twoByteRange(); + + UErrorCode status = U_ZERO_ERROR; + UDateFormat* df = udat_open(timeStyle, dateStyle, IcuLocale(locale.get()), + timeZoneChars.begin().get(), + timeZoneChars.length(), nullptr, -1, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UDateFormat, udat_close> toClose(df); + + Vector<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> pattern(cx); + MOZ_ALWAYS_TRUE(pattern.resize(intl::INITIAL_CHAR_BUFFER_SIZE)); + + int32_t patternSize = CallICU( + cx, + [df](UChar* chars, uint32_t size, UErrorCode* status) { + return udat_toPattern(df, false, chars, size, status); + }, + pattern); + if (patternSize < 0) { + return false; + } + pattern.shrinkTo(size_t(patternSize)); + + // If a specific hour cycle was requested and this hour cycle doesn't match + // the hour cycle used in the resolved pattern, find an equivalent pattern + // with the correct hour cycle. + if (timeStyle != UDAT_NONE && (hour12 || hourCycle)) { + if (auto hcPattern = HourCycleFromPattern<char16_t>(pattern)) { + bool wantHour12 = hour12 ? hour12.value() : IsHour12(hourCycle.value()); + if (wantHour12 != IsHour12(hcPattern.value())) { + if (!FindPatternWithHourCycle(cx, locale.get(), pattern, wantHour12)) { + return false; + } + } + } + } + + // If the hourCycle option was set, adjust the resolved pattern to use the + // requested hour cycle representation. + if (hourCycle) { + ReplaceHourSymbol(pattern, hourCycle.value()); + } + + JSString* str = NewStringCopyN<CanGC>(cx, pattern.begin(), pattern.length()); + if (!str) { + return false; + } + args.rval().setString(str); + return true; +} + +bool js::intl_skeletonForPattern(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + AutoStableStringChars pattern(cx); + if (!pattern.initTwoByte(cx, args[0].toString())) { + return false; + } + mozilla::Range<const char16_t> patternChars = pattern.twoByteRange(); + + JSString* skeleton = CallICU( + cx, [&patternChars](char16_t* chars, int32_t size, UErrorCode* status) { + return udatpg_getSkeleton(nullptr, patternChars.begin().get(), + patternChars.length(), chars, size, status); + }); + if (!skeleton) { + return false; + } + + args.rval().setString(skeleton); + return true; +} + +static UniqueChars DateTimeFormatLocale( + JSContext* cx, HandleObject internals, + mozilla::Maybe<HourCycle> hourCycle = mozilla::Nothing()) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects calendar, numberingSystem, and hourCycle as Unicode locale + // extensions on locale. + + intl::LanguageTag tag(cx); + { + JSLinearString* locale = value.toString()->ensureLinear(cx); + if (!locale) { + return nullptr; + } + + if (!intl::LanguageTagParser::parse(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().calendar, &value)) { + return nullptr; + } + + { + JSLinearString* calendar = value.toString()->ensureLinear(cx); + if (!calendar) { + return nullptr; + } + + if (!keywords.emplaceBack("ca", calendar)) { + return nullptr; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + if (hourCycle) { + JSAtom* hourCycleStr; + switch (*hourCycle) { + case HourCycle::H11: + hourCycleStr = cx->names().h11; + break; + case HourCycle::H12: + hourCycleStr = cx->names().h12; + break; + case HourCycle::H23: + hourCycleStr = cx->names().h23; + break; + case HourCycle::H24: + hourCycleStr = cx->names().h24; + break; + } + + if (!keywords.emplaceBack("hc", hourCycleStr)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of + // the Unicode extension subtag. We're then relying on ICU to follow RFC + // 6067, which states that any trailing keywords using the same key + // should be ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + return tag.toStringZ(cx); +} + +/** + * Returns a new UDateFormat with the locale and date-time formatting options + * of the given DateTimeFormat. + */ +static UDateFormat* NewUDateFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, dateTimeFormat)); + if (!internals) { + return nullptr; + } + + UniqueChars locale = DateTimeFormatLocale(cx, internals); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().timeZone, &value)) { + return nullptr; + } + + AutoStableStringChars timeZone(cx); + if (!timeZone.initTwoByte(cx, value.toString())) { + return nullptr; + } + + mozilla::Range<const char16_t> timeZoneChars = timeZone.twoByteRange(); + + if (!GetProperty(cx, internals, internals, cx->names().pattern, &value)) { + return nullptr; + } + + AutoStableStringChars pattern(cx); + if (!pattern.initTwoByte(cx, value.toString())) { + return nullptr; + } + + mozilla::Range<const char16_t> patternChars = pattern.twoByteRange(); + + UErrorCode status = U_ZERO_ERROR; + UDateFormat* df = + udat_open(UDAT_PATTERN, UDAT_PATTERN, IcuLocale(locale.get()), + timeZoneChars.begin().get(), timeZoneChars.length(), + patternChars.begin().get(), patternChars.length(), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + // ECMAScript requires the Gregorian calendar to be used from the beginning + // of ECMAScript time. + UCalendar* cal = const_cast<UCalendar*>(udat_getCalendar(df)); + ucal_setGregorianChange(cal, StartOfTime, &status); + + // An error here means the calendar is not Gregorian, so we don't care. + + return df; +} + +static bool intl_FormatDateTime(JSContext* cx, const UDateFormat* df, + ClippedTime x, MutableHandleValue result) { + MOZ_ASSERT(x.isValid()); + + JSString* str = + CallICU(cx, [df, x](UChar* chars, int32_t size, UErrorCode* status) { + return udat_format(df, x.toDouble(), chars, size, nullptr, status); + }); + if (!str) { + return false; + } + + result.setString(str); + return true; +} + +using FieldType = js::ImmutablePropertyNamePtr JSAtomState::*; + +static FieldType GetFieldTypeForFormatField(UDateFormatField fieldName) { + // See intl/icu/source/i18n/unicode/udat.h for a detailed field list. This + // switch is deliberately exhaustive: cases might have to be added/removed + // if this code is compiled with a different ICU with more + // UDateFormatField enum initializers. Please guard such cases with + // appropriate ICU version-testing #ifdefs, should cross-version divergence + // occur. + switch (fieldName) { + case UDAT_ERA_FIELD: + return &JSAtomState::era; + + case UDAT_YEAR_FIELD: + case UDAT_YEAR_WOY_FIELD: + case UDAT_EXTENDED_YEAR_FIELD: + return &JSAtomState::year; + + case UDAT_YEAR_NAME_FIELD: + return &JSAtomState::yearName; + + case UDAT_MONTH_FIELD: + case UDAT_STANDALONE_MONTH_FIELD: + return &JSAtomState::month; + + case UDAT_DATE_FIELD: + case UDAT_JULIAN_DAY_FIELD: + return &JSAtomState::day; + + case UDAT_HOUR_OF_DAY1_FIELD: + case UDAT_HOUR_OF_DAY0_FIELD: + case UDAT_HOUR1_FIELD: + case UDAT_HOUR0_FIELD: + return &JSAtomState::hour; + + case UDAT_MINUTE_FIELD: + return &JSAtomState::minute; + + case UDAT_SECOND_FIELD: + return &JSAtomState::second; + + case UDAT_DAY_OF_WEEK_FIELD: + case UDAT_STANDALONE_DAY_FIELD: + case UDAT_DOW_LOCAL_FIELD: + case UDAT_DAY_OF_WEEK_IN_MONTH_FIELD: + return &JSAtomState::weekday; + + case UDAT_AM_PM_FIELD: + return &JSAtomState::dayPeriod; + + case UDAT_TIMEZONE_FIELD: + return &JSAtomState::timeZoneName; + + case UDAT_FRACTIONAL_SECOND_FIELD: + return &JSAtomState::fractionalSecond; + + case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: +#ifdef NIGHTLY_BUILD + return &JSAtomState::dayPeriod; +#else + // Currently restricted to Nightly. + return &JSAtomState::unknown; +#endif + +#ifndef U_HIDE_INTERNAL_API + case UDAT_RELATED_YEAR_FIELD: + return &JSAtomState::relatedYear; +#endif + + case UDAT_DAY_OF_YEAR_FIELD: + case UDAT_WEEK_OF_YEAR_FIELD: + case UDAT_WEEK_OF_MONTH_FIELD: + case UDAT_MILLISECONDS_IN_DAY_FIELD: + case UDAT_TIMEZONE_RFC_FIELD: + case UDAT_TIMEZONE_GENERIC_FIELD: + case UDAT_QUARTER_FIELD: + case UDAT_STANDALONE_QUARTER_FIELD: + case UDAT_TIMEZONE_SPECIAL_FIELD: + case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: + case UDAT_TIMEZONE_ISO_FIELD: + case UDAT_TIMEZONE_ISO_LOCAL_FIELD: + case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: +#ifndef U_HIDE_INTERNAL_API + case UDAT_TIME_SEPARATOR_FIELD: +#endif + // These fields are all unsupported. + return &JSAtomState::unknown; + +#ifndef U_HIDE_DEPRECATED_API + case UDAT_FIELD_COUNT: + MOZ_ASSERT_UNREACHABLE( + "format field sentinel value returned by " + "iterator!"); +#endif + } + + MOZ_ASSERT_UNREACHABLE( + "unenumerated, undocumented format field returned " + "by iterator"); + return nullptr; +} + +static bool intl_FormatToPartsDateTime(JSContext* cx, const UDateFormat* df, + ClippedTime x, FieldType source, + MutableHandleValue result) { + MOZ_ASSERT(x.isValid()); + + UErrorCode status = U_ZERO_ERROR; + UFieldPositionIterator* fpositer = ufieldpositer_open(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UFieldPositionIterator, ufieldpositer_close> toClose( + fpositer); + + RootedString overallResult(cx); + overallResult = CallICU( + cx, [df, x, fpositer](UChar* chars, int32_t size, UErrorCode* status) { + return udat_formatForFields(df, x.toDouble(), chars, size, fpositer, + status); + }); + if (!overallResult) { + return false; + } + + RootedArrayObject partsArray(cx, NewDenseEmptyArray(cx)); + if (!partsArray) { + return false; + } + + if (overallResult->length() == 0) { + // An empty string contains no parts, so avoid extra work below. + result.setObject(*partsArray); + return true; + } + + size_t lastEndIndex = 0; + + RootedObject singlePart(cx); + RootedValue val(cx); + + auto AppendPart = [&](FieldType type, size_t beginIndex, size_t endIndex) { + singlePart = NewBuiltinClassInstance<PlainObject>(cx); + if (!singlePart) { + return false; + } + + val = StringValue(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { + return false; + } + + JSLinearString* partSubstr = NewDependentString( + cx, overallResult, beginIndex, endIndex - beginIndex); + if (!partSubstr) { + return false; + } + + val = StringValue(partSubstr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { + return false; + } + + if (source) { + val = StringValue(cx->names().*source); + if (!DefineDataProperty(cx, singlePart, cx->names().source, val)) { + return false; + } + } + + if (!NewbornArrayPush(cx, partsArray, ObjectValue(*singlePart))) { + return false; + } + + lastEndIndex = endIndex; + return true; + }; + + int32_t fieldInt, beginIndexInt, endIndexInt; + while ((fieldInt = ufieldpositer_next(fpositer, &beginIndexInt, + &endIndexInt)) >= 0) { + MOZ_ASSERT(beginIndexInt >= 0); + MOZ_ASSERT(endIndexInt >= 0); + MOZ_ASSERT(beginIndexInt <= endIndexInt, + "field iterator returning invalid range"); + + size_t beginIndex(beginIndexInt); + size_t endIndex(endIndexInt); + + // Technically this isn't guaranteed. But it appears true in pratice, + // and http://bugs.icu-project.org/trac/ticket/12024 is expected to + // correct the documentation lapse. + MOZ_ASSERT(lastEndIndex <= beginIndex, + "field iteration didn't return fields in order start to " + "finish as expected"); + + if (FieldType type = GetFieldTypeForFormatField( + static_cast<UDateFormatField>(fieldInt))) { + if (lastEndIndex < beginIndex) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, beginIndex)) { + return false; + } + } + + if (!AppendPart(type, beginIndex, endIndex)) { + return false; + } + } + } + + // Append any final literal. + if (lastEndIndex < overallResult->length()) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, + overallResult->length())) { + return false; + } + } + + result.setObject(*partsArray); + return true; +} + +bool js::intl_FormatDateTime(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumber()); + MOZ_ASSERT(args[2].isBoolean()); + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = &args[0].toObject().as<DateTimeFormatObject>(); + + bool formatToParts = args[2].toBoolean(); + + ClippedTime x = TimeClip(args[1].toNumber()); + if (!x.isValid()) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatToParts" : "format"); + return false; + } + + // Obtain a cached UDateFormat object. + UDateFormat* df = dateTimeFormat->getDateFormat(); + if (!df) { + df = NewUDateFormat(cx, dateTimeFormat); + if (!df) { + return false; + } + dateTimeFormat->setDateFormat(df); + + intl::AddICUCellMemory(dateTimeFormat, + DateTimeFormatObject::UDateFormatEstimatedMemoryUse); + } + + // Use the UDateFormat to actually format the time stamp. + FieldType source = nullptr; + return formatToParts + ? intl_FormatToPartsDateTime(cx, df, x, source, args.rval()) + : intl_FormatDateTime(cx, df, x, args.rval()); +} + +#ifndef U_HIDE_DRAFT_API +/** + * Returns a new UDateIntervalFormat with the locale and date-time formatting + * options of the given DateTimeFormat. + */ +static UDateIntervalFormat* NewUDateIntervalFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, dateTimeFormat)); + if (!internals) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().pattern, &value)) { + return nullptr; + } + + // Determine the hour cycle used in the resolved pattern. This is needed to + // workaround <https://unicode-org.atlassian.net/browse/ICU-21154> and + // <https://unicode-org.atlassian.net/browse/ICU-21155>. + mozilla::Maybe<HourCycle> hcPattern; + { + JSLinearString* pattern = value.toString()->ensureLinear(cx); + if (!pattern) { + return nullptr; + } + + JS::AutoCheckCannotGC nogc; + if (pattern->hasLatin1Chars()) { + hcPattern = HourCycleFromPattern<Latin1Char>(pattern->latin1Range(nogc)); + } else { + hcPattern = HourCycleFromPattern<char16_t>(pattern->twoByteRange(nogc)); + } + } + + UniqueChars locale = DateTimeFormatLocale(cx, internals, hcPattern); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().timeZone, &value)) { + return nullptr; + } + + AutoStableStringChars timeZone(cx); + if (!timeZone.initTwoByte(cx, value.toString())) { + return nullptr; + } + mozilla::Span<const char16_t> timeZoneChars = timeZone.twoByteRange(); + + if (!GetProperty(cx, internals, internals, cx->names().skeleton, &value)) { + return nullptr; + } + + AutoStableStringChars skeleton(cx); + if (!skeleton.initTwoByte(cx, value.toString())) { + return nullptr; + } + mozilla::Span<const char16_t> skeletonChars = skeleton.twoByteRange(); + + Vector<char16_t, INITIAL_CHAR_BUFFER_SIZE> newSkeleton(cx); + if (hcPattern) { + if (!newSkeleton.append(skeletonChars.data(), skeletonChars.size())) { + return nullptr; + } + + ReplaceHourSymbol(newSkeleton, *hcPattern); + skeletonChars = newSkeleton; + } + + UErrorCode status = U_ZERO_ERROR; + UDateIntervalFormat* dif = udtitvfmt_open( + IcuLocale(locale.get()), skeletonChars.data(), skeletonChars.size(), + timeZoneChars.data(), timeZoneChars.size(), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + return dif; +} + +static UCalendar* CreateCalendar(JSContext* cx, const UCalendar* cal, + ClippedTime t) { + UErrorCode status = U_ZERO_ERROR; + UCalendar* clone = ucal_clone(cal, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + ScopedICUObject<UCalendar, ucal_close> toClose(clone); + + ucal_setMillis(clone, t.toDouble(), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return toClose.forget(); +} + +/** + * PartitionDateTimeRangePattern ( dateTimeFormat, x, y ) + */ +static const UFormattedValue* PartitionDateTimeRangePattern( + JSContext* cx, const UDateFormat* df, const UDateIntervalFormat* dif, + UFormattedDateInterval* formatted, ClippedTime x, ClippedTime y) { + MOZ_ASSERT(x.isValid()); + MOZ_ASSERT(y.isValid()); + MOZ_ASSERT(x.toDouble() <= y.toDouble()); + + // We can't access the calendar used by UDateIntervalFormat to change it to a + // proleptic Gregorian calendar. Instead we need to call a different formatter + // function which accepts UCalendar instead of UDate. + // But creating new UCalendar objects for each call is slow, so when we can + // ensure that the input dates are later than the Gregorian change date, + // directly call the formatter functions taking UDate. + + // The Gregorian change date "1582-10-15T00:00:00.000Z". + constexpr double GregorianChangeDate = -12219292800000.0; + + // Add a full day to account for time zone offsets. + constexpr double GregorianChangeDatePlusOneDay = + GregorianChangeDate + msPerDay; + + UErrorCode status = U_ZERO_ERROR; + if (x.toDouble() < GregorianChangeDatePlusOneDay) { + // Create calendar objects for the start and end date by cloning the date + // formatter calendar. The date formatter calendar already has the correct + // time zone set and was changed to use a proleptic Gregorian calendar. + const UCalendar* cal = udat_getCalendar(df); + + UCalendar* startCal = CreateCalendar(cx, cal, x); + if (!startCal) { + return nullptr; + } + ScopedICUObject<UCalendar, ucal_close> toCloseStart(startCal); + + UCalendar* endCal = CreateCalendar(cx, cal, y); + if (!endCal) { + return nullptr; + } + ScopedICUObject<UCalendar, ucal_close> toCloseEnd(endCal); + + udtitvfmt_formatCalendarToResult(dif, startCal, endCal, formatted, &status); + } else { + // The common fast path which doesn't require creating calendar objects. + udtitvfmt_formatToResult(dif, x.toDouble(), y.toDouble(), formatted, + &status); + } + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + const UFormattedValue* formattedValue = + udtitvfmt_resultAsValue(formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + return formattedValue; +} + +/** + * PartitionDateTimeRangePattern ( dateTimeFormat, x, y ), steps 9-11. + * + * Examine the formatted value to see if any interval span field is present. + */ +static bool DateFieldsPracticallyEqual(JSContext* cx, + const UFormattedValue* formattedValue, + bool* equal) { + UErrorCode status = U_ZERO_ERROR; + UConstrainedFieldPosition* fpos = ucfpos_open(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos); + + // We're only interested in UFIELD_CATEGORY_DATE_INTERVAL_SPAN fields. + ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_DATE_INTERVAL_SPAN, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + bool hasSpan = ufmtval_nextPosition(formattedValue, fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + // When no date interval span field was found, both dates are "practically + // equal" per PartitionDateTimeRangePattern. + *equal = !hasSpan; + return true; +} + +/** + * FormatDateTimeRange( dateTimeFormat, x, y ) + */ +static bool FormatDateTimeRange(JSContext* cx, const UDateFormat* df, + const UDateIntervalFormat* dif, ClippedTime x, + ClippedTime y, MutableHandleValue result) { + UErrorCode status = U_ZERO_ERROR; + UFormattedDateInterval* formatted = udtitvfmt_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UFormattedDateInterval, udtitvfmt_closeResult> toClose( + formatted); + + const UFormattedValue* formattedValue = + PartitionDateTimeRangePattern(cx, df, dif, formatted, x, y); + if (!formattedValue) { + return false; + } + + // PartitionDateTimeRangePattern, steps 9-11. + bool equal; + if (!DateFieldsPracticallyEqual(cx, formattedValue, &equal)) { + return false; + } + + // PartitionDateTimeRangePattern, step 12. + if (equal) { + return intl_FormatDateTime(cx, df, x, result); + } + + JSString* resultStr = intl::FormattedValueToString(cx, formattedValue); + if (!resultStr) { + return false; + } + + result.setString(resultStr); + return true; +} + +/** + * FormatDateTimeRangeToParts ( dateTimeFormat, x, y ) + */ +static bool FormatDateTimeRangeToParts(JSContext* cx, const UDateFormat* df, + const UDateIntervalFormat* dif, + ClippedTime x, ClippedTime y, + MutableHandleValue result) { + UErrorCode status = U_ZERO_ERROR; + UFormattedDateInterval* formatted = udtitvfmt_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UFormattedDateInterval, udtitvfmt_closeResult> toClose( + formatted); + + const UFormattedValue* formattedValue = + PartitionDateTimeRangePattern(cx, df, dif, formatted, x, y); + if (!formattedValue) { + return false; + } + + // PartitionDateTimeRangePattern, steps 9-11. + bool equal; + if (!DateFieldsPracticallyEqual(cx, formattedValue, &equal)) { + return false; + } + + // PartitionDateTimeRangePattern, step 12. + if (equal) { + FieldType source = &JSAtomState::shared; + return intl_FormatToPartsDateTime(cx, df, x, source, result); + } + + RootedString overallResult(cx, + intl::FormattedValueToString(cx, formattedValue)); + if (!overallResult) { + return false; + } + + RootedArrayObject partsArray(cx, NewDenseEmptyArray(cx)); + if (!partsArray) { + return false; + } + + size_t lastEndIndex = 0; + RootedObject singlePart(cx); + RootedValue val(cx); + + auto AppendPart = [&](FieldType type, size_t beginIndex, size_t endIndex, + FieldType source) { + singlePart = NewBuiltinClassInstance<PlainObject>(cx); + if (!singlePart) { + return false; + } + + val = StringValue(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { + return false; + } + + JSLinearString* partSubstr = NewDependentString( + cx, overallResult, beginIndex, endIndex - beginIndex); + if (!partSubstr) { + return false; + } + + val = StringValue(partSubstr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { + return false; + } + + val = StringValue(cx->names().*source); + if (!DefineDataProperty(cx, singlePart, cx->names().source, val)) { + return false; + } + + if (!NewbornArrayPush(cx, partsArray, ObjectValue(*singlePart))) { + return false; + } + + lastEndIndex = endIndex; + return true; + }; + + UConstrainedFieldPosition* fpos = ucfpos_open(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos); + + size_t categoryEndIndex = 0; + FieldType source = &JSAtomState::shared; + + while (true) { + bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + if (!hasMore) { + break; + } + + int32_t category = ucfpos_getCategory(fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + int32_t field = ucfpos_getField(fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + int32_t beginIndexInt, endIndexInt; + ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + MOZ_ASSERT(beginIndexInt >= 0); + MOZ_ASSERT(endIndexInt >= 0); + MOZ_ASSERT(beginIndexInt <= endIndexInt, + "field iterator returning invalid range"); + + size_t beginIndex = size_t(beginIndexInt); + size_t endIndex = size_t(endIndexInt); + + // Indices are guaranteed to be returned in order (from left to right). + MOZ_ASSERT(lastEndIndex <= beginIndex, + "field iteration didn't return fields in order start to " + "finish as expected"); + + if (category == UFIELD_CATEGORY_DATE_INTERVAL_SPAN) { + // Append any remaining literal parts before changing the source kind. + if (lastEndIndex < beginIndex) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, beginIndex, + source)) { + return false; + } + } + + // The special field category UFIELD_CATEGORY_DATE_INTERVAL_SPAN has only + // two allowed values (0 or 1), indicating the begin of the start- resp. + // end-date. + MOZ_ASSERT(field == 0 || field == 1, + "span category has unexpected value"); + + source = field == 0 ? &JSAtomState::startRange : &JSAtomState::endRange; + categoryEndIndex = endIndex; + continue; + } + + // Ignore categories other than UFIELD_CATEGORY_DATE. + if (category != UFIELD_CATEGORY_DATE) { + continue; + } + + // Append the field if supported. If not supported, append it as part of the + // next literal part. + if (FieldType type = + GetFieldTypeForFormatField(static_cast<UDateFormatField>(field))) { + if (lastEndIndex < beginIndex) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, beginIndex, + source)) { + return false; + } + } + + if (!AppendPart(type, beginIndex, endIndex, source)) { + return false; + } + } + + if (endIndex == categoryEndIndex) { + // Append any remaining literal parts before changing the source kind. + if (lastEndIndex < endIndex) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, endIndex, + source)) { + return false; + } + } + + source = &JSAtomState::shared; + } + } + + // Append any final literal. + if (lastEndIndex < overallResult->length()) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, + overallResult->length(), source)) { + return false; + } + } + + result.setObject(*partsArray); + return true; +} + +bool js::intl_FormatDateTimeRange(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumber()); + MOZ_ASSERT(args[2].isNumber()); + MOZ_ASSERT(args[3].isBoolean()); + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = &args[0].toObject().as<DateTimeFormatObject>(); + + bool formatToParts = args[3].toBoolean(); + + // PartitionDateTimeRangePattern, steps 1-2. + ClippedTime x = TimeClip(args[1].toNumber()); + if (!x.isValid()) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + + // PartitionDateTimeRangePattern, steps 3-4. + ClippedTime y = TimeClip(args[2].toNumber()); + if (!y.isValid()) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + + // Self-hosted code should have checked this condition. + MOZ_ASSERT(x.toDouble() <= y.toDouble(), + "start date mustn't be after the end date"); + + // Obtain a cached UDateFormat object. + UDateFormat* df = dateTimeFormat->getDateFormat(); + if (!df) { + df = NewUDateFormat(cx, dateTimeFormat); + if (!df) { + return false; + } + dateTimeFormat->setDateFormat(df); + + intl::AddICUCellMemory(dateTimeFormat, + DateTimeFormatObject::UDateFormatEstimatedMemoryUse); + } + + // Obtain a cached UDateIntervalFormat object. + UDateIntervalFormat* dif = dateTimeFormat->getDateIntervalFormat(); + if (!dif) { + dif = NewUDateIntervalFormat(cx, dateTimeFormat); + if (!dif) { + return false; + } + dateTimeFormat->setDateIntervalFormat(dif); + + intl::AddICUCellMemory( + dateTimeFormat, + DateTimeFormatObject::UDateIntervalFormatEstimatedMemoryUse); + } + + // Use the UDateIntervalFormat to actually format the time range. + return formatToParts + ? FormatDateTimeRangeToParts(cx, df, dif, x, y, args.rval()) + : FormatDateTimeRange(cx, df, dif, x, y, args.rval()); +} + +#else +bool js::intl_FormatDateTimeRange(JSContext* cx, unsigned argc, Value* vp) { + MOZ_CRASH("FormatDateTimeRange requires ICU draft APIs"); +} +#endif // U_HIDE_DRAFT_API diff --git a/js/src/builtin/intl/DateTimeFormat.h b/js/src/builtin/intl/DateTimeFormat.h new file mode 100644 index 0000000000..49eb8d6e0d --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.h @@ -0,0 +1,233 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_DateTimeFormat_h +#define builtin_intl_DateTimeFormat_h + +#include "mozilla/Attributes.h" + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "js/RootingAPI.h" +#include "vm/NativeObject.h" + +using UDateFormat = void*; +struct UDateIntervalFormat; + +namespace js { + +class DateTimeFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t UDATE_FORMAT_SLOT = 1; + static constexpr uint32_t UDATE_INTERVAL_FORMAT_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UDateFormat (see IcuMemoryUsage). + static constexpr size_t UDateFormatEstimatedMemoryUse = 91922; + + // Estimated memory use for UDateIntervalFormat (see IcuMemoryUsage). + static constexpr size_t UDateIntervalFormatEstimatedMemoryUse = 119856; + + UDateFormat* getDateFormat() const { + const auto& slot = getFixedSlot(UDATE_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UDateFormat*>(slot.toPrivate()); + } + + void setDateFormat(UDateFormat* dateFormat) { + setFixedSlot(UDATE_FORMAT_SLOT, PrivateValue(dateFormat)); + } + + UDateIntervalFormat* getDateIntervalFormat() const { + const auto& slot = getFixedSlot(UDATE_INTERVAL_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UDateIntervalFormat*>(slot.toPrivate()); + } + + void setDateIntervalFormat(UDateIntervalFormat* dateIntervalFormat) { + setFixedSlot(UDATE_INTERVAL_FORMAT_SLOT, PrivateValue(dateIntervalFormat)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in DateTimeFormat constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: dateTimeFormat = intl_DateTimeFormat(locales, options) + */ +extern MOZ_MUST_USE bool intl_DateTimeFormat(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns an array with the calendar type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * supported calendars for the given locale. The default calendar is + * element 0. + * + * Usage: calendars = intl_availableCalendars(locale) + */ +extern MOZ_MUST_USE bool intl_availableCalendars(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns the calendar type identifier per Unicode Technical Standard 35, + * Unicode Locale Data Markup Language, for the default calendar for the given + * locale. + * + * Usage: calendar = intl_defaultCalendar(locale) + */ +extern MOZ_MUST_USE bool intl_defaultCalendar(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * 6.4.1 IsValidTimeZoneName ( timeZone ) + * + * Verifies that the given string is a valid time zone name. If it is a valid + * time zone name, its IANA time zone name is returned. Otherwise returns null. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + * + * Usage: ianaTimeZone = intl_IsValidTimeZoneName(timeZone) + */ +extern MOZ_MUST_USE bool intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the canonicalized time zone name. Canonicalization resolves link + * names to their target time zones. + * + * Usage: ianaTimeZone = intl_canonicalizeTimeZone(timeZone) + */ +extern MOZ_MUST_USE bool intl_canonicalizeTimeZone(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the default time zone name. The time zone name is not canonicalized. + * + * Usage: icuDefaultTimeZone = intl_defaultTimeZone() + */ +extern MOZ_MUST_USE bool intl_defaultTimeZone(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the raw offset from GMT in milliseconds for the default time zone. + * + * Usage: defaultTimeZoneOffset = intl_defaultTimeZoneOffset() + */ +extern MOZ_MUST_USE bool intl_defaultTimeZoneOffset(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Return true if the given string is the default time zone as returned by + * intl_defaultTimeZone(). Otherwise return false. + * + * Usage: isIcuDefaultTimeZone = intl_isDefaultTimeZone(icuDefaultTimeZone) + */ +extern MOZ_MUST_USE bool intl_isDefaultTimeZone(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return a pattern in the date-time format pattern language of Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * best-fit date-time format pattern corresponding to skeleton for the + * given locale. + * + * Usage: pattern = intl_patternForSkeleton(locale, skeleton, hourCycle) + */ +extern MOZ_MUST_USE bool intl_patternForSkeleton(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return a pattern in the date-time format pattern language of Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * best-fit date-time style for the given locale. + * The function takes six arguments: + * + * locale + * BCP47 compliant locale string + * dateStyle + * A string with values: full or long or medium or short, or `undefined` + * timeStyle + * A string with values: full or long or medium or short, or `undefined` + * timeZone + * IANA time zone name + * hour12 + * A boolean to request hour12 representation, or `undefined` + * hourCycle + * A string with values: h11, h12, h23, or h24, or `undefined` + * + * Date and time style categories map to CLDR time/date standard + * format patterns. + * + * For the definition of a pattern string, see LDML 4.8: + * http://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns + * + * If `undefined` is passed to `dateStyle` or `timeStyle`, the respective + * portions of the pattern will not be included in the result. + * + * Usage: pattern = intl_patternForStyle(locale, dateStyle, timeStyle, timeZone, + * hour12, hourCycle) + */ +extern MOZ_MUST_USE bool intl_patternForStyle(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return a skeleton for the pattern in the date-time format pattern language of + * Unicode Technical Standard 35, Unicode Locale Data Markup Language. + * + * Usage: skeleton = intl_skeletonForPattern(pattern) + */ +extern MOZ_MUST_USE bool intl_skeletonForPattern(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a String value representing x (which must be a Number value) + * according to the effective locale and the formatting options of the + * given DateTimeFormat. + * + * Spec: ECMAScript Internationalization API Specification, 12.3.2. + * + * Usage: formatted = intl_FormatDateTime(dateTimeFormat, x, formatToParts) + */ +extern MOZ_MUST_USE bool intl_FormatDateTime(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a String value representing the range between x and y (which both + * must be Number values) according to the effective locale and the formatting + * options of the given DateTimeFormat. + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + * + * Usage: formatted = intl_FormatDateTimeRange(dateTimeFmt, x, y, formatToParts) + */ +extern MOZ_MUST_USE bool intl_FormatDateTimeRange(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_DateTimeFormat_h */ diff --git a/js/src/builtin/intl/DateTimeFormat.js b/js/src/builtin/intl/DateTimeFormat.js new file mode 100644 index 0000000000..7321d0f0fc --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.js @@ -0,0 +1,1205 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +/** + * Compute an internal properties object from |lazyDateTimeFormatData|. + */ +function resolveDateTimeFormatInternals(lazyDateTimeFormatData) { + assert(IsObject(lazyDateTimeFormatData), "lazy data not an object?"); + + // Lazy DateTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // localeOpt: // *first* opt computed in InitializeDateTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // + // nu: string matching a Unicode extension type, // optional + // + // hc: "h11" / "h12" / "h23" / "h24", // optional + // } + // + // timeZone: IANA time zone name, + // + // formatOpt: // *second* opt computed in InitializeDateTimeFormat + // { + // // all the properties/values listed in Table 3 + // // (weekday, era, year, month, day, &c.) + // + // hour12: true / false, // optional + // } + // + // formatMatcher: "basic" / "best fit", + // + // dateStyle: "full" / "long" / "medium" / "short" / undefined, + // + // timeStyle: "full" / "long" / "medium" / "short" / undefined, + // + // patternOption: + // String representing LDML Date Format pattern or undefined + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DateTimeFormat lazy data object has *all* these properties, + // never a subset of them. + + var internalProps = std_Object_create(null); + + var DateTimeFormat = dateTimeFormatInternalProperties; + + // Compute effective locale. + + // Step 10. + var localeData = DateTimeFormat.localeData; + + // Step 11. + var r = ResolveLocale("DateTimeFormat", + lazyDateTimeFormatData.requestedLocales, + lazyDateTimeFormatData.localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData); + + // Steps 12-13, 15. + internalProps.locale = r.locale; + internalProps.calendar = r.ca; + internalProps.numberingSystem = r.nu; + + // Compute formatting options. + // Step 16. + var dataLocale = r.dataLocale; + + // Allow the calendar field to modify the pattern selection choice. + dataLocale = addUnicodeExtension(dataLocale, "-u-ca-" + r.ca); + + // Step 20. + internalProps.timeZone = lazyDateTimeFormatData.timeZone; + + // Step 21. + var formatOpt = lazyDateTimeFormatData.formatOpt; + + // Step 14. + // Copy the hourCycle setting, if present, to the format options. But + // only do this if no hour12 option is present, because the latter takes + // precedence over hourCycle. + if (r.hc !== null && formatOpt.hour12 === undefined) + formatOpt.hourCycle = r.hc; + + // Steps 26-30, more or less - see comment after this function. + var skeleton; + var pattern; + if (lazyDateTimeFormatData.patternOption !== undefined) { + pattern = lazyDateTimeFormatData.patternOption; + skeleton = intl_skeletonForPattern(pattern); + + internalProps.patternOption = lazyDateTimeFormatData.patternOption; + } else if (lazyDateTimeFormatData.dateStyle !== undefined || + lazyDateTimeFormatData.timeStyle !== undefined) { + pattern = intl_patternForStyle(dataLocale, + lazyDateTimeFormatData.dateStyle, + lazyDateTimeFormatData.timeStyle, + lazyDateTimeFormatData.timeZone, + formatOpt.hour12, + formatOpt.hourCycle); + skeleton = intl_skeletonForPattern(pattern); + + internalProps.dateStyle = lazyDateTimeFormatData.dateStyle; + internalProps.timeStyle = lazyDateTimeFormatData.timeStyle; + } else { + skeleton = toICUSkeleton(formatOpt); + pattern = toBestICUPattern(dataLocale, skeleton, formatOpt); + } + + // Step 31. + internalProps.skeleton = skeleton; + internalProps.pattern = pattern; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the DateTimeFormat internal properties of |obj|. + */ +function getDateTimeFormatInternals(obj) { + assert(IsObject(obj), "getDateTimeFormatInternals called with non-object"); + assert(GuardToDateTimeFormat(obj) !== null, "getDateTimeFormatInternals called with non-DateTimeFormat"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "DateTimeFormat", "bad type escaped getIntlObjectInternals"); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + internalProps = resolveDateTimeFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * 12.1.10 UnwrapDateTimeFormat( dtf ) + */ +function UnwrapDateTimeFormat(dtf) { + // Steps 2 and 4 (error handling moved to caller). + if (IsObject(dtf) && + GuardToDateTimeFormat(dtf) === null && + !IsWrappedDateTimeFormat(dtf) && + dtf instanceof GetBuiltinConstructor("DateTimeFormat")) + { + dtf = dtf[intlFallbackSymbol()]; + } + return dtf; +} + +/** + * 6.4.2 CanonicalizeTimeZoneName ( timeZone ) + * + * Canonicalizes the given IANA time zone name. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + */ +function CanonicalizeTimeZoneName(timeZone) { + assert(typeof timeZone === "string", "CanonicalizeTimeZoneName"); + + // Step 1. (Not applicable, the input is already a valid IANA time zone.) + assert(timeZone !== "Etc/Unknown", "Invalid time zone"); + assert(timeZone === intl_IsValidTimeZoneName(timeZone), "Time zone name not normalized"); + + // Step 2. + var ianaTimeZone = intl_canonicalizeTimeZone(timeZone); + assert(ianaTimeZone !== "Etc/Unknown", "Invalid canonical time zone"); + assert(ianaTimeZone === intl_IsValidTimeZoneName(ianaTimeZone), "Unsupported canonical time zone"); + + // Step 3. + if (ianaTimeZone === "Etc/UTC" || ianaTimeZone === "Etc/GMT") { + ianaTimeZone = "UTC"; + } + + // Step 4. + return ianaTimeZone; +} + +var timeZoneCache = { + icuDefaultTimeZone: undefined, + defaultTimeZone: undefined, +}; + +/** + * 6.4.3 DefaultTimeZone () + * + * Returns the IANA time zone name for the host environment's current time zone. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + */ +function DefaultTimeZone() { + if (intl_isDefaultTimeZone(timeZoneCache.icuDefaultTimeZone)) + return timeZoneCache.defaultTimeZone; + + // Verify that the current ICU time zone is a valid ECMA-402 time zone. + var icuDefaultTimeZone = intl_defaultTimeZone(); + var timeZone = intl_IsValidTimeZoneName(icuDefaultTimeZone); + if (timeZone === null) { + // Before defaulting to "UTC", try to represent the default time zone + // using the Etc/GMT + offset format. This format only accepts full + // hour offsets. + const msPerHour = 60 * 60 * 1000; + var offset = intl_defaultTimeZoneOffset(); + assert(offset === (offset | 0), + "milliseconds offset shouldn't be able to exceed int32_t range"); + var offsetHours = offset / msPerHour, offsetHoursFraction = offset % msPerHour; + if (offsetHoursFraction === 0) { + // Etc/GMT + offset uses POSIX-style signs, i.e. a positive offset + // means a location west of GMT. + timeZone = "Etc/GMT" + (offsetHours < 0 ? "+" : "-") + std_Math_abs(offsetHours); + + // Check if the fallback is valid. + timeZone = intl_IsValidTimeZoneName(timeZone); + } + + // Fallback to "UTC" if everything else fails. + if (timeZone === null) + timeZone = "UTC"; + } + + // Canonicalize the ICU time zone, e.g. change Etc/UTC to UTC. + var defaultTimeZone = CanonicalizeTimeZoneName(timeZone); + + timeZoneCache.defaultTimeZone = defaultTimeZone; + timeZoneCache.icuDefaultTimeZone = icuDefaultTimeZone; + + return defaultTimeZone; +} + +/** + * Initializes an object as a DateTimeFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a DateTimeFormat. + * This later work occurs in |resolveDateTimeFormatInternals|; steps not noted + * here occur there. + * + * Spec: ECMAScript Internationalization API Specification, 12.1.1. + */ +function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, mozExtensions) { + assert(IsObject(dateTimeFormat), "InitializeDateTimeFormat called with non-Object"); + assert(GuardToDateTimeFormat(dateTimeFormat) !== null, + "InitializeDateTimeFormat called with non-DateTimeFormat"); + + // Lazy DateTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // localeOpt: // *first* opt computed in InitializeDateTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // + // nu: string matching a Unicode extension type, // optional + // + // hc: "h11" / "h12" / "h23" / "h24", // optional + // } + // + // timeZone: IANA time zone name, + // + // formatOpt: // *second* opt computed in InitializeDateTimeFormat + // { + // // all the properties/values listed in Table 3 + // // (weekday, era, year, month, day, &c.) + // + // hour12: true / false, // optional + // } + // + // formatMatcher: "basic" / "best fit", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DateTimeFormat lazy data object has *all* these properties, + // never a subset of them. + var lazyDateTimeFormatData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyDateTimeFormatData.requestedLocales = requestedLocales; + + // Step 2. + options = ToDateTimeOptions(options, "any", "date"); + + // Compute options that impact interpretation of locale. + // Step 3. + var localeOpt = new Record(); + lazyDateTimeFormatData.localeOpt = localeOpt; + + // Steps 4-5. + var localeMatcher = + GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], + "best fit"); + localeOpt.localeMatcher = localeMatcher; + + var calendar = GetOption(options, "calendar", "string", undefined, undefined); + + if (calendar !== undefined) { + calendar = intl_ValidateAndCanonicalizeUnicodeExtensionType(calendar, "calendar", "ca"); + } + + localeOpt.ca = calendar; + + var numberingSystem = GetOption(options, "numberingSystem", "string", undefined, undefined); + + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType(numberingSystem, + "numberingSystem", + "nu"); + } + + localeOpt.nu = numberingSystem; + + // Step 6. + var hr12 = GetOption(options, "hour12", "boolean", undefined, undefined); + + // Step 7. + var hc = GetOption(options, "hourCycle", "string", ["h11", "h12", "h23", "h24"], undefined); + + // Step 8. + if (hr12 !== undefined) { + // The "hourCycle" option is ignored if "hr12" is also present. + hc = null; + } + + // Step 9. + localeOpt.hc = hc; + + // Steps 10-16 (see resolveDateTimeFormatInternals). + + // Steps 17-20. + var tz = options.timeZone; + if (tz !== undefined) { + // Step 18.a. + tz = ToString(tz); + + // Step 18.b. + var timeZone = intl_IsValidTimeZoneName(tz); + if (timeZone === null) + ThrowRangeError(JSMSG_INVALID_TIME_ZONE, tz); + + // Step 18.c. + tz = CanonicalizeTimeZoneName(timeZone); + } else { + // Step 19. + tz = DefaultTimeZone(); + } + lazyDateTimeFormatData.timeZone = tz; + + // Step 21. + var formatOpt = new Record(); + lazyDateTimeFormatData.formatOpt = formatOpt; + + if (mozExtensions) { + let pattern = GetOption(options, "pattern", "string", undefined, undefined); + lazyDateTimeFormatData.patternOption = pattern; + } + + // Step 22. + // 12.1, Table 5: Components of date and time formats. + formatOpt.weekday = GetOption(options, "weekday", "string", ["narrow", "short", "long"], + undefined); + formatOpt.era = GetOption(options, "era", "string", ["narrow", "short", "long"], undefined); + formatOpt.year = GetOption(options, "year", "string", ["2-digit", "numeric"], undefined); + formatOpt.month = GetOption(options, "month", "string", + ["2-digit", "numeric", "narrow", "short", "long"], undefined); + formatOpt.day = GetOption(options, "day", "string", ["2-digit", "numeric"], undefined); +#ifdef NIGHTLY_BUILD + formatOpt.dayPeriod = GetOption(options, "dayPeriod", "string", ["narrow", "short", "long"], + undefined); +#endif + formatOpt.hour = GetOption(options, "hour", "string", ["2-digit", "numeric"], undefined); + formatOpt.minute = GetOption(options, "minute", "string", ["2-digit", "numeric"], undefined); + formatOpt.second = GetOption(options, "second", "string", ["2-digit", "numeric"], undefined); + formatOpt.fractionalSecondDigits = GetNumberOption(options, "fractionalSecondDigits", 1, 3, + undefined); + formatOpt.timeZoneName = GetOption(options, "timeZoneName", "string", ["short", "long"], + undefined); + + // Steps 23-24 provided by ICU - see comment after this function. + + // Step 25. + // + // For some reason (ICU not exposing enough interface?) we drop the + // requested format matcher on the floor after this. In any case, even if + // doing so is justified, we have to do this work here in case it triggers + // getters or similar. (bug 852837) + var formatMatcher = + GetOption(options, "formatMatcher", "string", ["basic", "best fit"], + "best fit"); + void formatMatcher; + + // "DateTimeFormat dateStyle & timeStyle" propsal + // https://github.com/tc39/proposal-intl-datetime-style + var dateStyle = GetOption(options, "dateStyle", "string", ["full", "long", "medium", "short"], + undefined); + lazyDateTimeFormatData.dateStyle = dateStyle; + + var timeStyle = GetOption(options, "timeStyle", "string", ["full", "long", "medium", "short"], + undefined); + lazyDateTimeFormatData.timeStyle = timeStyle; + + if (dateStyle !== undefined || timeStyle !== undefined) { + var optionsList = [ + "weekday", "era", "year", "month", "day", "hour", "minute", "second", + "fractionalSecondDigits", "timeZoneName", + ]; + + for (var i = 0; i < optionsList.length; i++) { + var option = optionsList[i]; + if (formatOpt[option] !== undefined) { + ThrowTypeError(JSMSG_INVALID_DATETIME_OPTION, option, + dateStyle !== undefined ? "dateStyle" : "timeStyle"); + } + } + } + + // Steps 26-28 provided by ICU, more or less - see comment after this function. + + // Steps 29-30. + // Pass hr12 on to ICU. + if (hr12 !== undefined) + formatOpt.hour12 = hr12; + + // Step 32. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(dateTimeFormat, "DateTimeFormat", lazyDateTimeFormatData); + + // 12.2.1, steps 4-5. + // TODO: spec issue - The current spec doesn't have the IsObject check, + // which means |Intl.DateTimeFormat.call(null)| is supposed to throw here. + if (dateTimeFormat !== thisValue && IsObject(thisValue) && + thisValue instanceof GetBuiltinConstructor("DateTimeFormat")) + { + _DefineDataProperty(thisValue, intlFallbackSymbol(), dateTimeFormat, + ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE); + + return thisValue; + } + + // 12.2.1, step 6. + return dateTimeFormat; +} + +// Intl.DateTimeFormat and ICU skeletons and patterns +// ================================================== +// +// Different locales have different ways to display dates using the same +// basic components. For example, en-US might use "Sept. 24, 2012" while +// fr-FR might use "24 Sept. 2012". The intent of Intl.DateTimeFormat is to +// permit production of a format for the locale that best matches the +// set of date-time components and their desired representation as specified +// by the API client. +// +// ICU supports specification of date and time formats in three ways: +// +// 1) A style is just one of the identifiers FULL, LONG, MEDIUM, or SHORT. +// The date-time components included in each style and their representation +// are defined by ICU using CLDR locale data (CLDR is the Unicode +// Consortium's Common Locale Data Repository). +// +// 2) A skeleton is a string specifying which date-time components to include, +// and which representations to use for them. For example, "yyyyMMMMdd" +// specifies a year with at least four digits, a full month name, and a +// two-digit day. It does not specify in which order the components appear, +// how they are separated, the localized strings for textual components +// (such as weekday or month), whether the month is in format or +// stand-alone form¹, or the numbering system used for numeric components. +// All that information is filled in by ICU using CLDR locale data. +// ¹ The format form is the one used in formatted strings that include a +// day; the stand-alone form is used when not including days, e.g., in +// calendar headers. The two forms differ at least in some Slavic languages, +// e.g. Russian: "22 марта 2013 г." vs. "Март 2013". +// +// 3) A pattern is a string specifying which date-time components to include, +// in which order, with which separators, in which grammatical case. For +// example, "EEEE, d MMMM y" specifies the full localized weekday name, +// followed by comma and space, followed by the day, followed by space, +// followed by the full month name in format form, followed by space, +// followed by the full year. It +// still does not specify localized strings for textual components and the +// numbering system - these are determined by ICU using CLDR locale data or +// possibly API parameters. +// +// All actual formatting in ICU is done with patterns; styles and skeletons +// have to be mapped to patterns before processing. +// +// The options of DateTimeFormat most closely correspond to ICU skeletons. This +// implementation therefore, in the toBestICUPattern function, converts +// DateTimeFormat options to ICU skeletons, and then lets ICU map skeletons to +// actual ICU patterns. The pattern may not directly correspond to what the +// skeleton requests, as the mapper (UDateTimePatternGenerator) is constrained +// by the available locale data for the locale. The resulting ICU pattern is +// kept as the DateTimeFormat's [[pattern]] internal property and passed to ICU +// in the format method. +// +// An ICU pattern represents the information of the following DateTimeFormat +// internal properties described in the specification, which therefore don't +// exist separately in the implementation: +// - [[weekday]], [[era]], [[year]], [[month]], [[day]], [[hour]], [[minute]], +// [[second]], [[timeZoneName]] +// - [[hour12]] +// - [[hourCycle]] +// - [[hourNo0]] +// When needed for the resolvedOptions method, the resolveICUPattern function +// maps the instance's ICU pattern back to the specified properties of the +// object returned by resolvedOptions. +// +// ICU date-time skeletons and patterns aren't fully documented in the ICU +// documentation (see http://bugs.icu-project.org/trac/ticket/9627). The best +// documentation at this point is in UTR 35: +// http://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns + +/* eslint-disable complexity */ +/** + * Returns an ICU skeleton string representing the specified options. + */ +function toICUSkeleton(options) { + // Create an ICU skeleton representing the specified options. See + // http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + var skeleton = ""; + switch (options.weekday) { + case "narrow": + skeleton += "EEEEE"; + break; + case "short": + skeleton += "E"; + break; + case "long": + skeleton += "EEEE"; + } + switch (options.era) { + case "narrow": + skeleton += "GGGGG"; + break; + case "short": + skeleton += "G"; + break; + case "long": + skeleton += "GGGG"; + break; + } + switch (options.year) { + case "2-digit": + skeleton += "yy"; + break; + case "numeric": + skeleton += "y"; + break; + } + switch (options.month) { + case "2-digit": + skeleton += "MM"; + break; + case "numeric": + skeleton += "M"; + break; + case "narrow": + skeleton += "MMMMM"; + break; + case "short": + skeleton += "MMM"; + break; + case "long": + skeleton += "MMMM"; + break; + } + switch (options.day) { + case "2-digit": + skeleton += "dd"; + break; + case "numeric": + skeleton += "d"; + break; + } + // If hour12 and hourCycle are both present, hour12 takes precedence. + var hourSkeletonChar = "j"; + if (options.hour12 !== undefined) { + if (options.hour12) + hourSkeletonChar = "h"; + else + hourSkeletonChar = "H"; + } else { + switch (options.hourCycle) { + case "h11": + case "h12": + hourSkeletonChar = "h"; + break; + case "h23": + case "h24": + hourSkeletonChar = "H"; + break; + } + } + switch (options.hour) { + case "2-digit": + skeleton += hourSkeletonChar + hourSkeletonChar; + break; + case "numeric": + skeleton += hourSkeletonChar; + break; + } +#ifdef NIGHTLY_BUILD + // ICU requires that "B" is set after the "j" hour skeleton symbol. + // https://unicode-org.atlassian.net/browse/ICU-20731 + switch (options.dayPeriod) { + case "narrow": + skeleton += "BBBBB"; + break; + case "short": + skeleton += "B"; + break; + case "long": + skeleton += "BBBB"; + break; + } +#endif + switch (options.minute) { + case "2-digit": + skeleton += "mm"; + break; + case "numeric": + skeleton += "m"; + break; + } + switch (options.second) { + case "2-digit": + skeleton += "ss"; + break; + case "numeric": + skeleton += "s"; + break; + } + switch (options.fractionalSecondDigits) { + case 1: + skeleton += "S"; + break; + case 2: + skeleton += "SS"; + break; + case 3: + skeleton += "SSS"; + break; + } + switch (options.timeZoneName) { + case "short": + skeleton += "z"; + break; + case "long": + skeleton += "zzzz"; + break; + } + return skeleton; +} +/* eslint-enable complexity */ + +/** + * Returns an ICU pattern string for the given locale and representing the + * specified skeleton as closely as possible given available locale data. + */ +function toBestICUPattern(locale, skeleton, options) { + // Let ICU convert the ICU skeleton to an ICU pattern for the given locale. + return intl_patternForSkeleton(locale, skeleton, options.hourCycle); +} + +/** + * Returns a new options object that includes the provided options (if any) + * and fills in default components if required components are not defined. + * Required can be "date", "time", or "any". + * Defaults can be "date", "time", or "all". + * + * Spec: ECMAScript Internationalization API Specification, 12.1.1. + */ +function ToDateTimeOptions(options, required, defaults) { + assert(typeof required === "string", "ToDateTimeOptions"); + assert(typeof defaults === "string", "ToDateTimeOptions"); + + // Steps 1-2. + if (options === undefined) + options = null; + else + options = ToObject(options); + options = std_Object_create(options); + + // Step 3. + var needDefaults = true; + + // Step 4. + if (required === "date" || required === "any") { + if (options.weekday !== undefined) + needDefaults = false; + if (options.year !== undefined) + needDefaults = false; + if (options.month !== undefined) + needDefaults = false; + if (options.day !== undefined) + needDefaults = false; + } + + // Step 5. + if (required === "time" || required === "any") { +#ifdef NIGHTLY_BUILD + if (options.dayPeriod !== undefined) + needDefaults = false; +#endif + if (options.hour !== undefined) + needDefaults = false; + if (options.minute !== undefined) + needDefaults = false; + if (options.second !== undefined) + needDefaults = false; + if (options.fractionalSecondDigits !== undefined) + needDefaults = false; + } + + // "DateTimeFormat dateStyle & timeStyle" propsal + // https://github.com/tc39/proposal-intl-datetime-style + var dateStyle = options.dateStyle; + var timeStyle = options.timeStyle; + + if (dateStyle !== undefined || timeStyle !== undefined) + needDefaults = false; + + if (required === "date" && timeStyle !== undefined) + ThrowTypeError(JSMSG_INVALID_DATETIME_STYLE, "timeStyle", "toLocaleDateString"); + + if (required === "time" && dateStyle !== undefined) + ThrowTypeError(JSMSG_INVALID_DATETIME_STYLE, "dateStyle", "toLocaleTimeString"); + + // Step 6. + if (needDefaults && (defaults === "date" || defaults === "all")) { + // The specification says to call [[DefineOwnProperty]] with false for + // the Throw parameter, while Object.defineProperty uses true. For the + // calls here, the difference doesn't matter because we're adding + // properties to a new object. + _DefineDataProperty(options, "year", "numeric"); + _DefineDataProperty(options, "month", "numeric"); + _DefineDataProperty(options, "day", "numeric"); + } + + // Step 7. + if (needDefaults && (defaults === "time" || defaults === "all")) { + // See comment for step 7. + _DefineDataProperty(options, "hour", "numeric"); + _DefineDataProperty(options, "minute", "numeric"); + _DefineDataProperty(options, "second", "numeric"); + } + + // Step 8. + return options; +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 12.3.2. + */ +function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "DateTimeFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * DateTimeFormat internal properties. + * + * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.3.3. + */ +var dateTimeFormatInternalProperties = { + localeData: dateTimeFormatLocaleData, + relevantExtensionKeys: ["ca", "hc", "nu"], +}; + +function dateTimeFormatLocaleData() { + return { + ca: intl_availableCalendars, + nu: getNumberingSystems, + hc: () => { + return [null, "h11", "h12", "h23", "h24"]; + }, + default: { + ca: intl_defaultCalendar, + nu: intl_numberingSystem, + hc: () => { + return null; + }, + }, + }; +} + +/** + * Create function to be cached and returned by Intl.DateTimeFormat.prototype.format. + * + * Spec: ECMAScript Internationalization API Specification, 12.1.5. + */ +function createDateTimeFormatFormat(dtf) { + // This function is not inlined in $Intl_DateTimeFormat_format_get to avoid + // creating a call-object on each call to $Intl_DateTimeFormat_format_get. + return function(date) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(dtf), "dateTimeFormatFormatToBind called with non-Object"); + assert(GuardToDateTimeFormat(dtf) !== null, "dateTimeFormatFormatToBind called with non-DateTimeFormat"); + + // Steps 3-4. + var x = (date === undefined) ? std_Date_now() : ToNumber(date); + + // Step 5. + return intl_FormatDateTime(dtf, x, /* formatToParts = */ false); + }; +} + +/** + * Returns a function bound to this DateTimeFormat that returns a String value + * representing the result of calling ToNumber(date) according to the + * effective locale and the formatting options of this DateTimeFormat. + * + * Spec: ECMAScript Internationalization API Specification, 12.4.3. + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `_SetCanonicalName`. +function $Intl_DateTimeFormat_format_get() { + // Steps 1-3. + var thisArg = UnwrapDateTimeFormat(this); + var dtf = thisArg; + if (!IsObject(dtf) || (dtf = GuardToDateTimeFormat(dtf)) === null) { + return callFunction(CallDateTimeFormatMethodIfWrapped, thisArg, + "$Intl_DateTimeFormat_format_get"); + } + + var internals = getDateTimeFormatInternals(dtf); + + // Step 4. + if (internals.boundFormat === undefined) { + // Steps 4.a-c. + internals.boundFormat = createDateTimeFormatFormat(dtf); + } + + // Step 5. + return internals.boundFormat; +} +_SetCanonicalName($Intl_DateTimeFormat_format_get, "get format"); + +/** + * Intl.DateTimeFormat.prototype.formatToParts ( date ) + * + * Spec: ECMAScript Internationalization API Specification, 12.4.4. + */ +function Intl_DateTimeFormat_formatToParts(date) { + // Step 1. + var dtf = this; + + // Steps 2-3. + if (!IsObject(dtf) || (dtf = GuardToDateTimeFormat(dtf)) === null) { + return callFunction(CallDateTimeFormatMethodIfWrapped, this, date, + "Intl_DateTimeFormat_formatToParts"); + } + + // Steps 4-5. + var x = (date === undefined) ? std_Date_now() : ToNumber(date); + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 6. + return intl_FormatDateTime(dtf, x, /* formatToParts = */ true); +} + +/** + * Intl.DateTimeFormat.prototype.formatRange ( startDate , endDate ) + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + */ +function Intl_DateTimeFormat_formatRange(startDate, endDate) { + // Step 1. + var dtf = this; + + // Steps 2-3. + if (!IsObject(dtf) || (dtf = GuardToDateTimeFormat(dtf)) === null) { + return callFunction(CallDateTimeFormatMethodIfWrapped, this, startDate, endDate, + "Intl_DateTimeFormat_formatRange"); + } + + // Step 4. + if (startDate === undefined || endDate === undefined) { + ThrowTypeError(JSMSG_UNDEFINED_DATE, startDate === undefined ? "start" : "end", + "formatRange"); + } + + // Step 5. + var x = ToNumber(startDate); + + // Step 6. + var y = ToNumber(endDate); + + // Step 7. + if (x > y) { + ThrowRangeError(JSMSG_START_AFTER_END_DATE, "formatRange"); + } + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 8. + return intl_FormatDateTimeRange(dtf, x, y, /* formatToParts = */ false); +} + +/** + * Intl.DateTimeFormat.prototype.formatRangeToParts ( startDate , endDate ) + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + */ +function Intl_DateTimeFormat_formatRangeToParts(startDate, endDate) { + // Step 1. + var dtf = this; + + // Steps 2-3. + if (!IsObject(dtf) || (dtf = GuardToDateTimeFormat(dtf)) === null) { + return callFunction(CallDateTimeFormatMethodIfWrapped, this, startDate, endDate, + "Intl_DateTimeFormat_formatRangeToParts"); + } + + // Step 4. + if (startDate === undefined || endDate === undefined) { + ThrowTypeError(JSMSG_UNDEFINED_DATE, startDate === undefined ? "start" : "end", + "formatRangeToParts"); + } + + // Step 5. + var x = ToNumber(startDate); + + // Step 6. + var y = ToNumber(endDate); + + // Step 7. + if (x > y) { + ThrowRangeError(JSMSG_START_AFTER_END_DATE, "formatRangeToParts"); + } + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 8. + return intl_FormatDateTimeRange(dtf, x, y, /* formatToParts = */ true); +} + +/** + * Returns the resolved options for a DateTimeFormat object. + * + * Spec: ECMAScript Internationalization API Specification, 12.4.5. + */ +function Intl_DateTimeFormat_resolvedOptions() { + // Steps 1-3. + var thisArg = UnwrapDateTimeFormat(this); + var dtf = thisArg; + if (!IsObject(dtf) || (dtf = GuardToDateTimeFormat(dtf)) === null) { + return callFunction(CallDateTimeFormatMethodIfWrapped, thisArg, + "Intl_DateTimeFormat_resolvedOptions"); + } + + var internals = getDateTimeFormatInternals(dtf); + + // Steps 4-5. + var result = { + locale: internals.locale, + calendar: internals.calendar, + numberingSystem: internals.numberingSystem, + timeZone: internals.timeZone, + }; + + if (internals.patternOption !== undefined) { + _DefineDataProperty(result, "pattern", internals.pattern); + } + + var hasDateStyle = internals.dateStyle !== undefined; + var hasTimeStyle = internals.timeStyle !== undefined; + + if (hasDateStyle || hasTimeStyle) { + if (hasTimeStyle) { + // timeStyle (unlike dateStyle) requires resolving the pattern to + // ensure "hourCycle" and "hour12" properties are added to |result|. + resolveICUPattern(internals.pattern, result, /* includeDateTimeFields = */ false); + } + if (hasDateStyle) { + _DefineDataProperty(result, "dateStyle", internals.dateStyle); + } + if (hasTimeStyle) { + _DefineDataProperty(result, "timeStyle", internals.timeStyle); + } + } else { + resolveICUPattern(internals.pattern, result, /* includeDateTimeFields = */ true); + } + + // Step 6. + return result; +} + +/* eslint-disable complexity */ +/** + * Maps an ICU pattern string to a corresponding set of date-time components + * and their values, and adds properties for these components to the result + * object, which will be returned by the resolvedOptions method. For the + * interpretation of ICU pattern characters, see + * http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + */ +function resolveICUPattern(pattern, result, includeDateTimeFields) { + assert(IsObject(result), "resolveICUPattern"); + + var hourCycle, weekday, era, year, month, day, dayPeriod, hour, minute, second, + fractionalSecondDigits, timeZoneName; + var i = 0; + while (i < pattern.length) { + var c = pattern[i++]; + if (c === "'") { + while (i < pattern.length && pattern[i] !== "'") + i++; + i++; + } else { + var count = 1; + while (i < pattern.length && pattern[i] === c) { + i++; + count++; + } + + var value; + switch (c) { + // "text" cases + case "G": + case "E": + case "c": + case "B": + case "z": + case "v": + case "V": + if (count <= 3) + value = "short"; + else if (count === 4) + value = "long"; + else + value = "narrow"; + break; + // "number" cases + case "y": + case "d": + case "h": + case "H": + case "m": + case "s": + case "k": + case "K": + if (count === 2) + value = "2-digit"; + else + value = "numeric"; + break; + // "text & number" cases + case "M": + case "L": + if (count === 1) + value = "numeric"; + else if (count === 2) + value = "2-digit"; + else if (count === 3) + value = "short"; + else if (count === 4) + value = "long"; + else + value = "narrow"; + break; + case "S": + value = count; + break; + default: + // skip other pattern characters and literal text + } + + // Map ICU pattern characters back to the corresponding date-time + // components of DateTimeFormat. See + // http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + switch (c) { + case "E": + case "c": + weekday = value; + break; + case "G": + era = value; + break; + case "y": + year = value; + break; + case "M": + case "L": + month = value; + break; + case "d": + day = value; + break; + case "B": + dayPeriod = value; + break; + case "h": + hourCycle = "h12"; + hour = value; + break; + case "H": + hourCycle = "h23"; + hour = value; + break; + case "k": + hourCycle = "h24"; + hour = value; + break; + case "K": + hourCycle = "h11"; + hour = value; + break; + case "m": + minute = value; + break; + case "s": + second = value; + break; + case "S": + fractionalSecondDigits = value; + break; + case "z": + case "v": + case "V": + timeZoneName = value; + break; + } + } + } + + if (hourCycle) { + _DefineDataProperty(result, "hourCycle", hourCycle); + _DefineDataProperty(result, "hour12", hourCycle === "h11" || hourCycle === "h12"); + } + if (!includeDateTimeFields) { + return; + } + if (weekday) { + _DefineDataProperty(result, "weekday", weekday); + } + if (era) { + _DefineDataProperty(result, "era", era); + } + if (year) { + _DefineDataProperty(result, "year", year); + } + if (month) { + _DefineDataProperty(result, "month", month); + } + if (day) { + _DefineDataProperty(result, "day", day); + } +#ifdef NIGHTLY_BUILD + if (dayPeriod) { + _DefineDataProperty(result, "dayPeriod", dayPeriod); + } +#endif + if (hour) { + _DefineDataProperty(result, "hour", hour); + } + if (minute) { + _DefineDataProperty(result, "minute", minute); + } + if (second) { + _DefineDataProperty(result, "second", second); + } + if (fractionalSecondDigits) { + _DefineDataProperty(result, "fractionalSecondDigits", fractionalSecondDigits); + } + if (timeZoneName) { + _DefineDataProperty(result, "timeZoneName", timeZoneName); + } +} +/* eslint-enable complexity */ diff --git a/js/src/builtin/intl/DisplayNames.cpp b/js/src/builtin/intl/DisplayNames.cpp new file mode 100644 index 0000000000..06e4de67f2 --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.cpp @@ -0,0 +1,1076 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.DisplayNames implementation. */ + +#include "builtin/intl/DisplayNames.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <iterator> + +#include "jsapi.h" +#include "jsfriendapi.h" +#include "jsnum.h" +#include "jspubtd.h" + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/ScopedICUObject.h" +#include "builtin/intl/SharedIntlData.h" +#include "builtin/String.h" +#include "gc/AllocKind.h" +#include "gc/FreeOp.h" +#include "gc/Rooting.h" +#include "js/CallArgs.h" +#include "js/Class.h" +#include "js/experimental/Intl.h" // JS::AddMozDisplayNamesConstructor +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/GCVector.h" +#include "js/PropertyDescriptor.h" +#include "js/PropertySpec.h" +#include "js/Result.h" +#include "js/RootingAPI.h" +#include "js/TypeDecls.h" +#include "js/Utility.h" +#include "unicode/ucal.h" +#include "unicode/ucurr.h" +#include "unicode/udat.h" +#include "unicode/udatpg.h" +#include "unicode/udisplaycontext.h" +#include "unicode/uldnames.h" +#include "unicode/uloc.h" +#include "unicode/umachine.h" +#include "unicode/utypes.h" +#include "vm/GlobalObject.h" +#include "vm/JSAtom.h" +#include "vm/JSContext.h" +#include "vm/JSObject.h" +#include "vm/List.h" +#include "vm/Printer.h" +#include "vm/Runtime.h" +#include "vm/SelfHosting.h" +#include "vm/Stack.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/List-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using js::intl::CallICU; +using js::intl::IcuLocale; + +const JSClassOps DisplayNamesObject::classOps_ = {nullptr, /* addProperty */ + nullptr, /* delProperty */ + nullptr, /* enumerate */ + nullptr, /* newEnumerate */ + nullptr, /* resolve */ + nullptr, /* mayResolve */ + DisplayNamesObject::finalize}; + +const JSClass DisplayNamesObject::class_ = { + "Intl.DisplayNames", + JSCLASS_HAS_RESERVED_SLOTS(DisplayNamesObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_DisplayNames) | + JSCLASS_FOREGROUND_FINALIZE, + &DisplayNamesObject::classOps_, &DisplayNamesObject::classSpec_}; + +const JSClass& DisplayNamesObject::protoClass_ = PlainObject::class_; + +static bool displayNames_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().DisplayNames); + return true; +} + +static const JSFunctionSpec displayNames_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_DisplayNames_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec displayNames_methods[] = { + JS_SELF_HOSTED_FN("of", "Intl_DisplayNames_of", 1, 0), + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_DisplayNames_resolvedOptions", 0, + 0), + JS_FN(js_toSource_str, displayNames_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec displayNames_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.DisplayNames", JSPROP_READONLY), + JS_PS_END}; + +static bool DisplayNames(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec DisplayNamesObject::classSpec_ = { + GenericCreateConstructor<DisplayNames, 2, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<DisplayNamesObject>, + displayNames_static_methods, + nullptr, + displayNames_methods, + displayNames_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +enum class DisplayNamesOptions { + Standard, + + // Calendar display names are no longer available with the current spec + // proposal text, but may be re-enabled in the future. For our internal use + // we still need to have them present, so use a feature guard for now. + EnableMozExtensions, +}; + +/** + * Initialize a new Intl.DisplayNames object using the named self-hosted + * function. + */ +static bool InitializeDisplayNamesObject(JSContext* cx, HandleObject obj, + HandlePropertyName initializer, + HandleValue locales, + HandleValue options, + DisplayNamesOptions dnoptions) { + FixedInvokeArgs<4> args(cx); + + args[0].setObject(*obj); + args[1].set(locales); + args[2].set(options); + args[3].setBoolean(dnoptions == DisplayNamesOptions::EnableMozExtensions); + + RootedValue ignored(cx); + if (!CallSelfHostedFunction(cx, initializer, NullHandleValue, args, + &ignored)) { + return false; + } + + MOZ_ASSERT(ignored.isUndefined(), + "Unexpected return value from non-legacy Intl object initializer"); + return true; +} + +/** + * Intl.DisplayNames ([ locales [ , options ]]) + */ +static bool DisplayNames(JSContext* cx, const CallArgs& args, + DisplayNamesOptions dnoptions) { + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.DisplayNames")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (dnoptions == DisplayNamesOptions::Standard) { + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_DisplayNames, + &proto)) { + return false; + } + } else { + RootedObject newTarget(cx, &args.newTarget().toObject()); + if (!GetPrototypeFromConstructor(cx, newTarget, JSProto_Null, &proto)) { + return false; + } + } + + // TypeError anyway, but this gives a better error message. + if (!args.requireAtLeast(cx, "DisplayNames", 2)) { + return false; + } + + Rooted<DisplayNamesObject*> displayNames(cx); + displayNames = NewObjectWithClassProto<DisplayNamesObject>(cx, proto); + if (!displayNames) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Steps 3-26. + if (!InitializeDisplayNamesObject(cx, displayNames, + cx->names().InitializeDisplayNames, locales, + options, dnoptions)) { + return false; + } + + // Step 27. + args.rval().setObject(*displayNames); + return true; +} + +static bool DisplayNames(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return DisplayNames(cx, args, DisplayNamesOptions::Standard); +} + +static bool MozDisplayNames(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return DisplayNames(cx, args, DisplayNamesOptions::EnableMozExtensions); +} + +void js::DisplayNamesObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + if (ULocaleDisplayNames* ldn = + obj->as<DisplayNamesObject>().getLocaleDisplayNames()) { + intl::RemoveICUCellMemory(fop, obj, DisplayNamesObject::EstimatedMemoryUse); + + uldn_close(ldn); + } +} + +bool JS::AddMozDisplayNamesConstructor(JSContext* cx, HandleObject intl) { + RootedObject ctor(cx, GlobalObject::createConstructor( + cx, MozDisplayNames, cx->names().DisplayNames, 2)); + if (!ctor) { + return false; + } + + RootedObject proto( + cx, GlobalObject::createBlankPrototype<PlainObject>(cx, cx->global())); + if (!proto) { + return false; + } + + if (!LinkConstructorAndPrototype(cx, ctor, proto)) { + return false; + } + + if (!JS_DefineFunctions(cx, ctor, displayNames_static_methods)) { + return false; + } + + if (!JS_DefineFunctions(cx, proto, displayNames_methods)) { + return false; + } + + if (!JS_DefineProperties(cx, proto, displayNames_properties)) { + return false; + } + + RootedValue ctorValue(cx, ObjectValue(*ctor)); + return DefineDataProperty(cx, intl, cx->names().DisplayNames, ctorValue, 0); +} + +enum class DisplayNamesStyle { Long, Short, Narrow }; + +enum class DisplayNamesFallback { None, Code }; + +static ULocaleDisplayNames* NewULocaleDisplayNames( + JSContext* cx, const char* locale, DisplayNamesStyle displayStyle) { + UErrorCode status = U_ZERO_ERROR; + + UDisplayContext contexts[] = { + // Use the standard names, not the dialect names. + // For example "English (GB)" instead of "British English". + UDISPCTX_STANDARD_NAMES, + + // Assume the display names are used in a stand-alone context. + UDISPCTX_CAPITALIZATION_FOR_STANDALONE, + + // Select either the long or short form. There's no separate narrow form + // available in ICU, therefore we equate "narrow"/"short" styles here. + displayStyle == DisplayNamesStyle::Long ? UDISPCTX_LENGTH_FULL + : UDISPCTX_LENGTH_SHORT, + + // Don't apply substitutes, because we need to apply our own fallbacks. + UDISPCTX_NO_SUBSTITUTE, + }; + + ULocaleDisplayNames* ldn = uldn_openForContext(IcuLocale(locale), contexts, + std::size(contexts), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return ldn; +} + +static ULocaleDisplayNames* GetOrCreateLocaleDisplayNames( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + DisplayNamesStyle displayStyle) { + // Obtain a cached ULocaleDisplayNames object. + ULocaleDisplayNames* ldn = displayNames->getLocaleDisplayNames(); + if (!ldn) { + ldn = NewULocaleDisplayNames(cx, locale, displayStyle); + if (!ldn) { + return nullptr; + } + displayNames->setLocaleDisplayNames(ldn); + + intl::AddICUCellMemory(displayNames, + DisplayNamesObject::EstimatedMemoryUse); + } + return ldn; +} + +static void ReportInvalidOptionError(JSContext* cx, const char* type, + HandleString option) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, type, str.get()); + } +} + +static void ReportInvalidOptionError(JSContext* cx, const char* type, + double option) { + ToCStringBuf cbuf; + if (const char* str = NumberToCString(cx, &cbuf, option)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_DIGITS_VALUE, str); + } +} + +static JSString* GetLanguageDisplayName( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + DisplayNamesStyle displayStyle, DisplayNamesFallback fallback, + HandleLinearString languageStr) { + bool ok; + intl::LanguageTag tag(cx); + JS_TRY_VAR_OR_RETURN_NULL( + cx, ok, intl::LanguageTagParser::tryParseBaseName(cx, languageStr, tag)); + if (!ok) { + ReportInvalidOptionError(cx, "language", languageStr); + return nullptr; + } + + // ICU always canonicalizes the input locale, but since we know that ICU's + // canonicalization is incomplete, we need to perform our own canonicalization + // to ensure consistent result. + if (!tag.canonicalizeBaseName(cx)) { + return nullptr; + } + + UniqueChars languageChars = tag.toStringZ(cx); + if (!languageChars) { + return nullptr; + } + + ULocaleDisplayNames* ldn = + GetOrCreateLocaleDisplayNames(cx, displayNames, locale, displayStyle); + if (!ldn) { + return nullptr; + } + + JSString* str = CallICU(cx, [ldn, &languageChars](UChar* chars, uint32_t size, + UErrorCode* status) { + int32_t res = + uldn_localeDisplayName(ldn, languageChars.get(), chars, size, status); + + // |uldn_localeDisplayName| reports U_ILLEGAL_ARGUMENT_ERROR when no + // display name was found. + if (*status == U_ILLEGAL_ARGUMENT_ERROR) { + *status = U_ZERO_ERROR; + res = 0; + } + return res; + }); + if (!str) { + return nullptr; + } + + // Return the canonicalized input when no localized language name was found. + if (str->empty() && fallback == DisplayNamesFallback::Code) { + return NewStringCopyZ<CanGC>(cx, languageChars.get()); + } + + return str; +} + +template <typename CharT> +static JSString* NewStringCopy(JSContext* cx, mozilla::Span<const CharT> span) { + return NewStringCopyN<CanGC>(cx, span.data(), span.size()); +} + +static JSString* GetScriptDisplayName(JSContext* cx, + Handle<DisplayNamesObject*> displayNames, + const char* locale, + DisplayNamesStyle displayStyle, + DisplayNamesFallback fallback, + HandleLinearString scriptStr) { + intl::ScriptSubtag script; + if (!intl::ParseStandaloneScriptTag(scriptStr, script)) { + ReportInvalidOptionError(cx, "script", scriptStr); + return nullptr; + } + + intl::LanguageTag tag(cx); + tag.setLanguage("und"); + tag.setScript(script); + + // ICU always canonicalizes the input locale, but since we know that ICU's + // canonicalization is incomplete, we need to perform our own canonicalization + // to ensure consistent result. + if (!tag.canonicalizeBaseName(cx)) { + return nullptr; + } + MOZ_ASSERT(tag.script().present()); + + // |uldn_scriptDisplayName| doesn't use the stand-alone form for script + // subtags, so we're using |uloc_getDisplayScript| instead. (This only applies + // to the long form.) + // + // ICU bug: https://unicode-org.atlassian.net/browse/ICU-9301 + if (displayStyle == DisplayNamesStyle::Long) { + // |uloc_getDisplayScript| expects a full locale identifier as its input. + UniqueChars scriptChars = tag.toStringZ(cx); + if (!scriptChars) { + return nullptr; + } + + JSString* str = + CallICU(cx, [locale, &scriptChars](UChar* chars, uint32_t size, + UErrorCode* status) { + int32_t res = uloc_getDisplayScript(scriptChars.get(), locale, chars, + size, status); + + // |uloc_getDisplayScript| reports U_USING_DEFAULT_WARNING when no + // display name was found. + if (*status == U_USING_DEFAULT_WARNING) { + *status = U_ZERO_ERROR; + res = 0; + } + return res; + }); + if (!str) { + return nullptr; + } + + // Return the case-canonicalized input when no localized name was found. + if (str->empty() && fallback == DisplayNamesFallback::Code) { + script.toTitleCase(); + return NewStringCopy(cx, script.span()); + } + + return str; + } + + // Note: ICU requires the script subtag to be in canonical case. + const intl::ScriptSubtag& canonicalScript = tag.script(); + + char scriptChars[intl::LanguageTagLimits::ScriptLength + 1] = {}; + std::copy_n(canonicalScript.span().data(), canonicalScript.length(), + scriptChars); + + ULocaleDisplayNames* ldn = + GetOrCreateLocaleDisplayNames(cx, displayNames, locale, displayStyle); + if (!ldn) { + return nullptr; + } + + JSString* str = CallICU(cx, [ldn, scriptChars](UChar* chars, uint32_t size, + UErrorCode* status) { + int32_t res = uldn_scriptDisplayName(ldn, scriptChars, chars, size, status); + + // |uldn_scriptDisplayName| reports U_ILLEGAL_ARGUMENT_ERROR when no display + // name was found. + if (*status == U_ILLEGAL_ARGUMENT_ERROR) { + *status = U_ZERO_ERROR; + res = 0; + } + return res; + }); + if (!str) { + return nullptr; + } + + // Return the case-canonicalized input when no localized name was found. + if (str->empty() && fallback == DisplayNamesFallback::Code) { + script.toTitleCase(); + return NewStringCopy(cx, script.span()); + } + + return str; +} + +static JSString* GetRegionDisplayName(JSContext* cx, + Handle<DisplayNamesObject*> displayNames, + const char* locale, + DisplayNamesStyle displayStyle, + DisplayNamesFallback fallback, + HandleLinearString regionStr) { + intl::RegionSubtag region; + if (!intl::ParseStandaloneRegionTag(regionStr, region)) { + ReportInvalidOptionError(cx, "region", regionStr); + return nullptr; + } + + intl::LanguageTag tag(cx); + tag.setLanguage("und"); + tag.setRegion(region); + + // ICU always canonicalizes the input locale, but since we know that ICU's + // canonicalization is incomplete, we need to perform our own canonicalization + // to ensure consistent result. + if (!tag.canonicalizeBaseName(cx)) { + return nullptr; + } + MOZ_ASSERT(tag.region().present()); + + // Note: ICU requires the region subtag to be in canonical case. + const intl::RegionSubtag& canonicalRegion = tag.region(); + + char regionChars[intl::LanguageTagLimits::RegionLength + 1] = {}; + std::copy_n(canonicalRegion.span().data(), canonicalRegion.length(), + regionChars); + + ULocaleDisplayNames* ldn = + GetOrCreateLocaleDisplayNames(cx, displayNames, locale, displayStyle); + if (!ldn) { + return nullptr; + } + + JSString* str = CallICU(cx, [ldn, regionChars](UChar* chars, uint32_t size, + UErrorCode* status) { + int32_t res = uldn_regionDisplayName(ldn, regionChars, chars, size, status); + + // |uldn_regionDisplayName| reports U_ILLEGAL_ARGUMENT_ERROR when no display + // name was found. + if (*status == U_ILLEGAL_ARGUMENT_ERROR) { + *status = U_ZERO_ERROR; + res = 0; + } + return res; + }); + if (!str) { + return nullptr; + } + + // Return the case-canonicalized input when no localized name was found. + if (str->empty() && fallback == DisplayNamesFallback::Code) { + region.toUpperCase(); + return NewStringCopy(cx, region.span()); + } + + return str; +} + +static JSString* GetCurrencyDisplayName(JSContext* cx, const char* locale, + DisplayNamesStyle displayStyle, + DisplayNamesFallback fallback, + HandleLinearString currencyStr) { + // Inlined implementation of `IsWellFormedCurrencyCode ( currency )`. + if (currencyStr->length() != 3) { + ReportInvalidOptionError(cx, "currency", currencyStr); + return nullptr; + } + + char16_t currency[] = {currencyStr->latin1OrTwoByteChar(0), + currencyStr->latin1OrTwoByteChar(1), + currencyStr->latin1OrTwoByteChar(2), '\0'}; + + if (!mozilla::IsAsciiAlpha(currency[0]) || + !mozilla::IsAsciiAlpha(currency[1]) || + !mozilla::IsAsciiAlpha(currency[2])) { + ReportInvalidOptionError(cx, "currency", currencyStr); + return nullptr; + } + + UCurrNameStyle currencyStyle; + switch (displayStyle) { + case DisplayNamesStyle::Long: + currencyStyle = UCURR_LONG_NAME; + break; + case DisplayNamesStyle::Short: + currencyStyle = UCURR_SYMBOL_NAME; + break; + case DisplayNamesStyle::Narrow: + currencyStyle = UCURR_NARROW_SYMBOL_NAME; + break; + } + + int32_t length = 0; + UErrorCode status = U_ZERO_ERROR; + const char16_t* name = + ucurr_getName(currency, locale, currencyStyle, nullptr, &length, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + MOZ_ASSERT(length >= 0); + + if (status == U_USING_DEFAULT_WARNING) { + // Return the canonicalized input when no localized currency name was found. + if (fallback == DisplayNamesFallback::Code) { + // Canonical case for currency is upper case. + return js::StringToUpperCase(cx, currencyStr); + } + return cx->emptyString(); + } + + return NewStringCopyN<CanGC>(cx, name, size_t(length)); +} + +#ifdef DEBUG +static bool IsStandaloneMonth(UDateFormatSymbolType symbolType) { + switch (symbolType) { + case UDAT_STANDALONE_MONTHS: + case UDAT_STANDALONE_SHORT_MONTHS: + case UDAT_STANDALONE_NARROW_MONTHS: + return true; + + case UDAT_ERAS: + case UDAT_MONTHS: + case UDAT_SHORT_MONTHS: + case UDAT_WEEKDAYS: + case UDAT_SHORT_WEEKDAYS: + case UDAT_AM_PMS: + case UDAT_LOCALIZED_CHARS: + case UDAT_ERA_NAMES: + case UDAT_NARROW_MONTHS: + case UDAT_NARROW_WEEKDAYS: + case UDAT_STANDALONE_WEEKDAYS: + case UDAT_STANDALONE_SHORT_WEEKDAYS: + case UDAT_STANDALONE_NARROW_WEEKDAYS: + case UDAT_QUARTERS: + case UDAT_SHORT_QUARTERS: + case UDAT_STANDALONE_QUARTERS: + case UDAT_STANDALONE_SHORT_QUARTERS: + case UDAT_SHORTER_WEEKDAYS: + case UDAT_STANDALONE_SHORTER_WEEKDAYS: + case UDAT_CYCLIC_YEARS_WIDE: + case UDAT_CYCLIC_YEARS_ABBREVIATED: + case UDAT_CYCLIC_YEARS_NARROW: + case UDAT_ZODIAC_NAMES_WIDE: + case UDAT_ZODIAC_NAMES_ABBREVIATED: + case UDAT_ZODIAC_NAMES_NARROW: + return false; + } + + MOZ_ASSERT_UNREACHABLE("unenumerated, undocumented symbol type"); + return false; +} +#endif + +static ListObject* GetDateTimeDisplayNames( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + HandleLinearString calendar, UDateFormatSymbolType symbolType, + mozilla::Span<const int32_t> indices) { + if (auto* names = displayNames->getDateTimeNames()) { + return names; + } + + intl::LanguageTag tag(cx); + if (!intl::LanguageTagParser::parse(cx, mozilla::MakeStringSpan(locale), + tag)) { + return nullptr; + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + if (!keywords.emplaceBack("ca", calendar)) { + return nullptr; + } + + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + UniqueChars localeWithCalendar = tag.toStringZ(cx); + if (!localeWithCalendar) { + return nullptr; + } + + constexpr char16_t* timeZone = nullptr; + constexpr int32_t timeZoneLength = 0; + + constexpr char16_t* pattern = nullptr; + constexpr int32_t patternLength = 0; + + UErrorCode status = U_ZERO_ERROR; + UDateFormat* fmt = + udat_open(UDAT_DEFAULT, UDAT_DEFAULT, IcuLocale(localeWithCalendar.get()), + timeZone, timeZoneLength, pattern, patternLength, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + ScopedICUObject<UDateFormat, udat_close> datToClose(fmt); + + Rooted<ListObject*> names(cx, ListObject::create(cx)); + if (!names) { + return nullptr; + } + + RootedValue value(cx); + for (uint32_t i = 0; i < indices.size(); i++) { + int32_t index = indices[i]; + JSString* name = + CallICU(cx, [fmt, symbolType, index](UChar* chars, int32_t size, + UErrorCode* status) { + return udat_getSymbols(fmt, symbolType, index, chars, size, status); + }); + if (!name) { + return nullptr; + } + + // Everything except Undecimber should always have a non-empty name. + MOZ_ASSERT_IF(!IsStandaloneMonth(symbolType) || index != UCAL_UNDECIMBER, + !name->empty()); + + value.setString(name); + if (!names->append(cx, value)) { + return nullptr; + } + } + + displayNames->setDateTimeNames(names); + return names; +} + +static JSString* GetWeekdayDisplayName(JSContext* cx, + Handle<DisplayNamesObject*> displayNames, + const char* locale, + HandleLinearString calendar, + DisplayNamesStyle displayStyle, + HandleLinearString code) { + uint8_t weekday; + { + double d; + if (!StringToNumber(cx, code, &d)) { + return nullptr; + } + + // Inlined implementation of `IsValidWeekdayCode ( weekday )`. + if (!IsInteger(d) || d < 1 || d > 7) { + ReportInvalidOptionError(cx, "weekday", d); + return nullptr; + } + + weekday = uint8_t(d); + } + + UDateFormatSymbolType symbolType; + switch (displayStyle) { + case DisplayNamesStyle::Long: + symbolType = UDAT_STANDALONE_WEEKDAYS; + break; + + case DisplayNamesStyle::Short: + // ICU "short" is CLDR "abbreviated"; "shorter" is CLDR "short" format. + symbolType = UDAT_STANDALONE_SHORTER_WEEKDAYS; + break; + + case DisplayNamesStyle::Narrow: + symbolType = UDAT_STANDALONE_NARROW_WEEKDAYS; + break; + } + + static constexpr int32_t indices[] = { + UCAL_MONDAY, UCAL_TUESDAY, UCAL_WEDNESDAY, UCAL_THURSDAY, + UCAL_FRIDAY, UCAL_SATURDAY, UCAL_SUNDAY}; + + ListObject* names = GetDateTimeDisplayNames( + cx, displayNames, locale, calendar, symbolType, mozilla::Span(indices)); + if (!names) { + return nullptr; + } + MOZ_ASSERT(names->length() == std::size(indices)); + + return names->get(weekday - 1).toString(); +} + +static JSString* GetMonthDisplayName( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + HandleLinearString calendar, DisplayNamesStyle displayStyle, + DisplayNamesFallback fallback, HandleLinearString code) { + uint8_t month; + { + double d; + if (!StringToNumber(cx, code, &d)) { + return nullptr; + } + + // Inlined implementation of `IsValidMonthCode ( month )`. + if (!IsInteger(d) || d < 1 || d > 13) { + ReportInvalidOptionError(cx, "month", d); + return nullptr; + } + + month = uint8_t(d); + } + + UDateFormatSymbolType symbolType; + switch (displayStyle) { + case DisplayNamesStyle::Long: + symbolType = UDAT_STANDALONE_MONTHS; + break; + + case DisplayNamesStyle::Short: + symbolType = UDAT_STANDALONE_SHORT_MONTHS; + break; + + case DisplayNamesStyle::Narrow: + symbolType = UDAT_STANDALONE_NARROW_MONTHS; + break; + } + + static constexpr int32_t indices[] = { + UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, + UCAL_MAY, UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, + UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, + UCAL_UNDECIMBER}; + + ListObject* names = GetDateTimeDisplayNames( + cx, displayNames, locale, calendar, symbolType, mozilla::Span(indices)); + if (!names) { + return nullptr; + } + MOZ_ASSERT(names->length() == std::size(indices)); + + JSString* str = names->get(month - 1).toString(); + if (str->empty() && fallback == DisplayNamesFallback::Code) { + return cx->staticStrings().getInt(month); + } + return str; +} + +static JSString* GetQuarterDisplayName(JSContext* cx, + Handle<DisplayNamesObject*> displayNames, + const char* locale, + HandleLinearString calendar, + DisplayNamesStyle displayStyle, + HandleLinearString code) { + uint8_t quarter; + { + double d; + if (!StringToNumber(cx, code, &d)) { + return nullptr; + } + + // Inlined implementation of `IsValidQuarterCode ( quarter )`. + if (!IsInteger(d) || d < 1 || d > 4) { + ReportInvalidOptionError(cx, "quarter", d); + return nullptr; + } + + quarter = uint8_t(d); + } + + UDateFormatSymbolType symbolType; + switch (displayStyle) { + case DisplayNamesStyle::Long: + symbolType = UDAT_STANDALONE_QUARTERS; + break; + + case DisplayNamesStyle::Short: + case DisplayNamesStyle::Narrow: + // CLDR "narrow" style not supported in ICU. + symbolType = UDAT_STANDALONE_SHORT_QUARTERS; + break; + } + + // ICU doesn't provide an enum for quarters. + static constexpr int32_t indices[] = {0, 1, 2, 3}; + + ListObject* names = GetDateTimeDisplayNames( + cx, displayNames, locale, calendar, symbolType, mozilla::Span(indices)); + if (!names) { + return nullptr; + } + MOZ_ASSERT(names->length() == std::size(indices)); + + return names->get(quarter - 1).toString(); +} + +static JSString* GetDayPeriodDisplayName( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + HandleLinearString calendar, HandleLinearString dayPeriod) { + // Inlined implementation of `IsValidDayPeriodCode ( dayperiod )`. + uint32_t index; + if (StringEqualsLiteral(dayPeriod, "am")) { + index = 0; + } else if (StringEqualsLiteral(dayPeriod, "pm")) { + index = 1; + } else { + ReportInvalidOptionError(cx, "dayPeriod", dayPeriod); + return nullptr; + } + + UDateFormatSymbolType symbolType = UDAT_AM_PMS; + + static constexpr int32_t indices[] = {UCAL_AM, UCAL_PM}; + + ListObject* names = GetDateTimeDisplayNames( + cx, displayNames, locale, calendar, symbolType, mozilla::Span(indices)); + if (!names) { + return nullptr; + } + MOZ_ASSERT(names->length() == std::size(indices)); + + return names->get(index).toString(); +} + +static JSString* GetDateTimeFieldDisplayName(JSContext* cx, const char* locale, + DisplayNamesStyle displayStyle, + HandleLinearString dateTimeField) { + // Inlined implementation of `IsValidDateTimeFieldCode ( field )`. + UDateTimePatternField field; + if (StringEqualsLiteral(dateTimeField, "era")) { + field = UDATPG_ERA_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "year")) { + field = UDATPG_YEAR_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "quarter")) { + field = UDATPG_QUARTER_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "month")) { + field = UDATPG_MONTH_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "weekOfYear")) { + field = UDATPG_WEEK_OF_YEAR_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "weekday")) { + field = UDATPG_WEEKDAY_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "day")) { + field = UDATPG_DAY_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "dayPeriod")) { + field = UDATPG_DAYPERIOD_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "hour")) { + field = UDATPG_HOUR_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "minute")) { + field = UDATPG_MINUTE_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "second")) { + field = UDATPG_SECOND_FIELD; + } else if (StringEqualsLiteral(dateTimeField, "timeZoneName")) { + field = UDATPG_ZONE_FIELD; + } else { + ReportInvalidOptionError(cx, "dateTimeField", dateTimeField); + return nullptr; + } + + UDateTimePGDisplayWidth width; + switch (displayStyle) { + case DisplayNamesStyle::Long: + width = UDATPG_WIDE; + break; + case DisplayNamesStyle::Short: + width = UDATPG_ABBREVIATED; + break; + case DisplayNamesStyle::Narrow: + width = UDATPG_NARROW; + break; + } + + intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + UDateTimePatternGenerator* dtpg = + sharedIntlData.getDateTimePatternGenerator(cx, locale); + if (!dtpg) { + return nullptr; + } + + JSString* str = intl::CallICU(cx, [dtpg, field, width](UChar* chars, + uint32_t size, + UErrorCode* status) { + return udatpg_getFieldDisplayName(dtpg, field, width, chars, size, status); + }); + MOZ_ASSERT_IF(str, !str->empty()); + return str; +} + +/** + * intl_ComputeDisplayName(displayNames, locale, calendar, style, fallback, + * type, code) + */ +bool js::intl_ComputeDisplayName(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 7); + + Rooted<DisplayNamesObject*> displayNames( + cx, &args[0].toObject().as<DisplayNamesObject>()); + + UniqueChars locale = intl::EncodeLocale(cx, args[1].toString()); + if (!locale) { + return false; + } + + RootedLinearString calendar(cx, args[2].toString()->ensureLinear(cx)); + if (!calendar) { + return false; + } + + RootedLinearString code(cx, args[6].toString()->ensureLinear(cx)); + if (!code) { + return false; + } + + DisplayNamesStyle displayStyle; + { + JSLinearString* style = args[3].toString()->ensureLinear(cx); + if (!style) { + return false; + } + + if (StringEqualsLiteral(style, "long")) { + displayStyle = DisplayNamesStyle::Long; + } else if (StringEqualsLiteral(style, "short")) { + displayStyle = DisplayNamesStyle::Short; + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "narrow")); + displayStyle = DisplayNamesStyle::Narrow; + } + } + + DisplayNamesFallback displayFallback; + { + JSLinearString* fallback = args[4].toString()->ensureLinear(cx); + if (!fallback) { + return false; + } + + if (StringEqualsLiteral(fallback, "none")) { + displayFallback = DisplayNamesFallback::None; + } else { + MOZ_ASSERT(StringEqualsLiteral(fallback, "code")); + displayFallback = DisplayNamesFallback::Code; + } + } + + JSLinearString* type = args[5].toString()->ensureLinear(cx); + if (!type) { + return false; + } + + JSString* result; + if (StringEqualsLiteral(type, "language")) { + result = GetLanguageDisplayName(cx, displayNames, locale.get(), + displayStyle, displayFallback, code); + } else if (StringEqualsLiteral(type, "script")) { + result = GetScriptDisplayName(cx, displayNames, locale.get(), displayStyle, + displayFallback, code); + } else if (StringEqualsLiteral(type, "region")) { + result = GetRegionDisplayName(cx, displayNames, locale.get(), displayStyle, + displayFallback, code); + } else if (StringEqualsLiteral(type, "currency")) { + result = GetCurrencyDisplayName(cx, locale.get(), displayStyle, + displayFallback, code); + } else if (StringEqualsLiteral(type, "weekday")) { + result = GetWeekdayDisplayName(cx, displayNames, locale.get(), calendar, + displayStyle, code); + } else if (StringEqualsLiteral(type, "month")) { + result = GetMonthDisplayName(cx, displayNames, locale.get(), calendar, + displayStyle, displayFallback, code); + } else if (StringEqualsLiteral(type, "quarter")) { + result = GetQuarterDisplayName(cx, displayNames, locale.get(), calendar, + displayStyle, code); + } else if (StringEqualsLiteral(type, "dayPeriod")) { + result = + GetDayPeriodDisplayName(cx, displayNames, locale.get(), calendar, code); + } else { + MOZ_ASSERT(StringEqualsLiteral(type, "dateTimeField")); + result = GetDateTimeFieldDisplayName(cx, locale.get(), displayStyle, code); + } + if (!result) { + return false; + } + + if (!result->empty()) { + args.rval().setString(result); + } else if (displayFallback == DisplayNamesFallback::Code) { + args.rval().setString(code); + } else { + args.rval().setUndefined(); + } + return true; +} diff --git a/js/src/builtin/intl/DisplayNames.h b/js/src/builtin/intl/DisplayNames.h new file mode 100644 index 0000000000..b96d8a2d9f --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.h @@ -0,0 +1,93 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_DisplayNames_h +#define builtin_intl_DisplayNames_h + +#include "mozilla/Attributes.h" + +#include <stddef.h> +#include <stdint.h> + +#include "jstypes.h" +#include "NamespaceImports.h" + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" // JSClass, JSClassOps, js::ClassSpec +#include "js/Value.h" +#include "vm/JSObject.h" +#include "vm/List.h" +#include "vm/NativeObject.h" + +struct JS_PUBLIC_API JSContext; +class JS_PUBLIC_API JSFreeOp; + +struct ULocaleDisplayNames; + +namespace js { +struct ClassSpec; + +class DisplayNamesObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t ULOCALE_DISPLAY_NAMES_SLOT = 1; + static constexpr uint32_t DATE_TIME_NAMES_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for ULocaleDisplayNames (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 1256; + + ULocaleDisplayNames* getLocaleDisplayNames() const { + const auto& slot = getFixedSlot(ULOCALE_DISPLAY_NAMES_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<ULocaleDisplayNames*>(slot.toPrivate()); + } + + void setLocaleDisplayNames(ULocaleDisplayNames* localeDisplayNames) { + setFixedSlot(ULOCALE_DISPLAY_NAMES_SLOT, PrivateValue(localeDisplayNames)); + } + + ListObject* getDateTimeNames() const { + const auto& slot = getFixedSlot(DATE_TIME_NAMES_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return &slot.toObject().as<ListObject>(); + } + + void setDateTimeNames(ListObject* names) { + setFixedSlot(DATE_TIME_NAMES_SLOT, ObjectValue(*names)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Return the display name for the requested code or undefined if no applicable + * display name was found. + * + * Usage: result = intl_ComputeDisplayName(displayNames, locale, calendar, + * style, fallback, type, code) + */ +extern MOZ_MUST_USE bool intl_ComputeDisplayName(JSContext* cx, unsigned argc, + Value* vp); + +} // namespace js + +#endif /* builtin_intl_DisplayNames_h */ diff --git a/js/src/builtin/intl/DisplayNames.js b/js/src/builtin/intl/DisplayNames.js new file mode 100644 index 0000000000..4d9aa29426 --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.js @@ -0,0 +1,276 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Intl.DisplayNames internal properties. + */ +var displayNamesInternalProperties = { + localeData: function() // eslint-disable-line object-shorthand + { + // Intl.DisplayNames doesn't support any extension keys. + return {}; + }, + relevantExtensionKeys: [] +}; + +var mozDisplayNamesInternalProperties = { + localeData: function() // eslint-disable-line object-shorthand + { + return { + ca: intl_availableCalendars, + default: { + ca: intl_defaultCalendar, + }, + }; + }, + relevantExtensionKeys: ["ca"] +}; + +/** + * Intl.DisplayNames ( [ locales [ , options ] ] ) + * + * Compute an internal properties object from |lazyDisplayNamesData|. + */ +function resolveDisplayNamesInternals(lazyDisplayNamesData) { + assert(IsObject(lazyDisplayNamesData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var mozExtensions = lazyDisplayNamesData.mozExtensions; + + var DisplayNames = mozExtensions ? + mozDisplayNamesInternalProperties : + displayNamesInternalProperties; + + // Compute effective locale. + + // Step 7. + var localeData = DisplayNames.localeData; + + // Step 10. + var r = ResolveLocale("DisplayNames", + lazyDisplayNamesData.requestedLocales, + lazyDisplayNamesData.opt, + DisplayNames.relevantExtensionKeys, + localeData); + // Step 12. + internalProps.style = lazyDisplayNamesData.style; + + // Step 14. + internalProps.type = lazyDisplayNamesData.type; + + // Step 16. + internalProps.fallback = lazyDisplayNamesData.fallback; + + // Step 17. + internalProps.locale = r.locale; + + if (mozExtensions) { + internalProps.calendar = r.ca; + } + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the DisplayNames internal properties of |obj|. + */ +function getDisplayNamesInternals(obj) { + assert(IsObject(obj), "getDisplayNamesInternals called with non-object"); + assert(GuardToDisplayNames(obj) !== null, "getDisplayNamesInternals called with non-DisplayNames"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "DisplayNames", "bad type escaped getIntlObjectInternals"); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + internalProps = resolveDisplayNamesInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Intl.DisplayNames ( [ locales [ , options ] ] ) + * + * Initializes an object as a DisplayNames. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a DisplayNames. + * This later work occurs in |resolveDisplayNamesInternals|; steps not noted + * here occur there. + */ +function InitializeDisplayNames(displayNames, locales, options, mozExtensions) { + assert(IsObject(displayNames), "InitializeDisplayNames called with non-object"); + assert(GuardToDisplayNames(displayNames) !== null, "InitializeDisplayNames called with non-DisplayNames"); + + // Lazy DisplayNames data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // opt: // opt object computed in InitializeDisplayNames + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // } + // + // localeMatcher: "lookup" / "best fit", + // + // style: "narrow" / "short" / "long", + // + // type: "language" / "region" / "script" / "currency" / "weekday" / + // "month" / "quarter" / "dayPeriod" / "dateTimeField" + // + // fallback: "code" / "none", + // + // mozExtensions: true / false, + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DisplayNames lazy data object has *all* these properties, never a + // subset of them. + var lazyDisplayNamesData = std_Object_create(null); + + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyDisplayNamesData.requestedLocales = requestedLocales; + + // Step 4. + options = ToObject(options); + + // Step 5. + var opt = new Record(); + lazyDisplayNamesData.opt = opt; + lazyDisplayNamesData.mozExtensions = mozExtensions; + + // Steps 7-8. + var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + if (mozExtensions) { + var calendar = GetOption(options, "calendar", "string", undefined, undefined); + + if (calendar !== undefined) { + calendar = intl_ValidateAndCanonicalizeUnicodeExtensionType(calendar, "calendar", "ca"); + } + + opt.ca = calendar; + } + + // Step 10. + var style = GetOption(options, "style", "string", ["narrow", "short", "long"], "long"); + + // Step 11. + lazyDisplayNamesData.style = style; + + // Step 12. + var type; + if (mozExtensions) { + type = GetOption(options, "type", "string", + ["language", "region", "script", "currency", "weekday", "month", + "quarter", "dayPeriod", "dateTimeField"], undefined); + } else { + type = GetOption(options, "type", "string", + ["language", "region", "script", "currency"], undefined); + } + + // Step 13. + if (type === undefined) { + ThrowTypeError(JSMSG_UNDEFINED_TYPE); + } + + // Step 14. + lazyDisplayNamesData.type = type; + + // Step 15. + var fallback = GetOption(options, "fallback", "string", ["code", "none"], "code"); + + // Step 16. + lazyDisplayNamesData.fallback = fallback; + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(displayNames, "DisplayNames", lazyDisplayNamesData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + */ +function Intl_DisplayNames_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "DisplayNames"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Returns the resolved options for a DisplayNames object. + */ +function Intl_DisplayNames_of(code) { + // Step 1. + var displayNames = this; + + // Steps 2-3. + if (!IsObject(displayNames) || (displayNames = GuardToDisplayNames(displayNames)) === null) { + return callFunction(CallDisplayNamesMethodIfWrapped, this, "Intl_DisplayNames_of"); + } + + code = ToString(code); + + var internals = getDisplayNamesInternals(displayNames); + + // Unpack the internals object to avoid a slow runtime to selfhosted JS call + // in |intl_ComputeDisplayName()|. + var {locale, calendar = "", style, type, fallback} = internals; + + // Steps 5-10. + return intl_ComputeDisplayName(displayNames, locale, calendar, style, fallback, type, code); +} + +/** + * Returns the resolved options for a DisplayNames object. + */ +function Intl_DisplayNames_resolvedOptions() { + // Step 1. + var displayNames = this; + + // Steps 2-3. + if (!IsObject(displayNames) || (displayNames = GuardToDisplayNames(displayNames)) === null) { + return callFunction(CallDisplayNamesMethodIfWrapped, this, + "Intl_DisplayNames_resolvedOptions"); + } + + var internals = getDisplayNamesInternals(displayNames); + + // Steps 4-5. + var options = { + locale: internals.locale, + style: internals.style, + type: internals.type, + fallback: internals.fallback, + }; + + if (hasOwn("calendar", internals)) { + options.calendar = internals.calendar; + } + + // Step 6. + return options; +} diff --git a/js/src/builtin/intl/IcuMemoryUsage.java b/js/src/builtin/intl/IcuMemoryUsage.java new file mode 100644 index 0000000000..2e9b985b13 --- /dev/null +++ b/js/src/builtin/intl/IcuMemoryUsage.java @@ -0,0 +1,260 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.*; +import java.util.stream.Collectors; + +/** + * Java program to estimate the memory usage of ICU objects (bug 1585536). + * + * It computes for each Intl constructor the amount of allocated memory. We're + * currently using the maximum memory ("max" in the output) to estimate the + * memory consumption of ICU objects. + * + * Insert before {@code JS_InitWithFailureDiagnostic} in "js.cpp": + * + * <pre> + * <code> + * JS_SetICUMemoryFunctions( + * [](const void*, size_t size) { + * void* ptr = malloc(size); + * if (ptr) { + * printf(" alloc: %p -> %zu\n", ptr, size); + * } + * return ptr; + * }, + * [](const void*, void* p, size_t size) { + * void* ptr = realloc(p, size); + * if (p) { + * printf(" realloc: %p -> %p -> %zu\n", p, ptr, size); + * } else { + * printf(" alloc: %p -> %zu\n", ptr, size); + * } + * return ptr; + * }, + * [](const void*, void* p) { + * if (p) { + * printf(" free: %p\n", p); + * } + * free(p); + * }); + * </code> + * </pre> + * + * Run this script with: + * {@code java --enable-preview --source=14 IcuMemoryUsage.java $MOZ_JS_SHELL}. + */ +@SuppressWarnings("preview") +public class IcuMemoryUsage { + private enum Phase { + None, Create, Init, Destroy, Collect, Quit + } + + private static final class Memory { + private Phase phase = Phase.None; + private HashMap<Long, Map.Entry<Phase, Long>> allocations = new HashMap<>(); + private HashSet<Long> freed = new HashSet<>(); + private HashMap<Long, Map.Entry<Phase, Long>> completeAllocations = new HashMap<>(); + private int allocCount = 0; + private ArrayList<Long> allocSizes = new ArrayList<>(); + + void transition(Phase nextPhase) { + assert phase.ordinal() + 1 == nextPhase.ordinal() || (phase == Phase.Collect && nextPhase == Phase.Create); + phase = nextPhase; + + // Create a clean slate when starting a new create cycle or before termination. + if (phase == Phase.Create || phase == Phase.Quit) { + transferAllocations(); + } + + // Only measure the allocation size when creating the second object with the + // same locale. + if (phase == Phase.Collect && ++allocCount % 2 == 0) { + long size = allocations.values().stream().map(Map.Entry::getValue).reduce(0L, (a, c) -> a + c); + allocSizes.add(size); + } + } + + void transferAllocations() { + completeAllocations.putAll(allocations); + completeAllocations.keySet().removeAll(freed); + allocations.clear(); + freed.clear(); + } + + void alloc(long ptr, long size) { + allocations.put(ptr, Map.entry(phase, size)); + } + + void realloc(long oldPtr, long newPtr, long size) { + free(oldPtr); + allocations.put(newPtr, Map.entry(phase, size)); + } + + void free(long ptr) { + if (allocations.remove(ptr) == null) { + freed.add(ptr); + } + } + + LongSummaryStatistics statistics() { + return allocSizes.stream().collect(Collectors.summarizingLong(Long::valueOf)); + } + + double percentile(double p) { + var size = allocSizes.size(); + return allocSizes.stream().sorted().skip((long) ((size - 1) * p)).limit(2 - size % 2) + .mapToDouble(Long::doubleValue).average().getAsDouble(); + } + + long persistent() { + return completeAllocations.values().stream().map(Map.Entry::getValue).reduce(0L, (a, c) -> a + c); + } + } + + private static long parseSize(Matcher m, int group) { + return Long.parseLong(m.group(group), 10); + } + + private static long parsePointer(Matcher m, int group) { + return Long.parseLong(m.group(group), 16); + } + + private static void measure(String exec, String constructor, String description, String initializer) throws IOException { + var locales = Arrays.stream(Locale.getAvailableLocales()).map(Locale::toLanguageTag).sorted() + .collect(Collectors.toUnmodifiableList()); + + var pb = new ProcessBuilder(exec, "--file=-", "--", constructor, initializer, + locales.stream().collect(Collectors.joining(","))); + var process = pb.start(); + + try (var writer = new BufferedWriter( + new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) { + writer.write(sourceCode); + writer.flush(); + } + + var memory = new Memory(); + + try (var reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + var reAlloc = Pattern.compile("\\s+alloc: 0x(\\p{XDigit}+) -> (\\p{Digit}+)"); + var reRealloc = Pattern.compile("\\s+realloc: 0x(\\p{XDigit}+) -> 0x(\\p{XDigit}+) -> (\\p{Digit}+)"); + var reFree = Pattern.compile("\\s+free: 0x(\\p{XDigit}+)"); + + String line; + while ((line = reader.readLine()) != null) { + Matcher m; + if ((m = reAlloc.matcher(line)).matches()) { + var ptr = parsePointer(m, 1); + var size = parseSize(m, 2); + memory.alloc(ptr, size); + } else if ((m = reRealloc.matcher(line)).matches()) { + var oldPtr = parsePointer(m, 1); + var newPtr = parsePointer(m, 2); + var size = parseSize(m, 3); + memory.realloc(oldPtr, newPtr, size); + } else if ((m = reFree.matcher(line)).matches()) { + var ptr = parsePointer(m, 1); + memory.free(ptr); + } else { + memory.transition(Phase.valueOf(line)); + } + } + } + + try (var errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) { + String line; + while ((line = errorReader.readLine()) != null) { + System.err.println(line); + } + } + + var stats = memory.statistics(); + + System.out.printf("%s%n", description); + System.out.printf(" max: %d%n", stats.getMax()); + System.out.printf(" min: %d%n", stats.getMin()); + System.out.printf(" avg: %.0f%n", stats.getAverage()); + System.out.printf(" 50p: %.0f%n", memory.percentile(0.50)); + System.out.printf(" 75p: %.0f%n", memory.percentile(0.75)); + System.out.printf(" 85p: %.0f%n", memory.percentile(0.85)); + System.out.printf(" 95p: %.0f%n", memory.percentile(0.95)); + System.out.printf(" 99p: %.0f%n", memory.percentile(0.99)); + System.out.printf(" mem: %d%n", memory.persistent()); + + memory.transferAllocations(); + assert memory.persistent() == 0 : String.format("Leaked %d bytes", memory.persistent()); + } + + public static void main(String[] args) throws IOException { + if (args.length == 0) { + throw new RuntimeException("The first argument must point to the SpiderMonkey shell executable"); + } + + record Entry (String constructor, String description, String initializer) { + public static Entry of(String constructor, String description, String initializer) { + return new Entry(constructor, description, initializer); + } + + public static Entry of(String constructor, String initializer) { + return new Entry(constructor, constructor, initializer); + } + } + + var objects = new ArrayList<Entry>(); + objects.add(Entry.of("Collator", "o.compare('a', 'b')")); + objects.add(Entry.of("DateTimeFormat", "DateTimeFormat (UDateFormat)", "o.format(0)")); + objects.add(Entry.of("DateTimeFormat", "DateTimeFormat (UDateFormat+UDateIntervalFormat)", + "o.formatRange(0, 24*60*60*1000)")); + objects.add(Entry.of("DisplayNames", "o.of('en')")); + objects.add(Entry.of("ListFormat", "o.format(['a', 'b'])")); + objects.add(Entry.of("NumberFormat", "o.format(0)")); + // Instantiates UPluralRules and UNumberFormatter + // objects.add(Entry.of("PluralRules", "o.select(0)")); + // Instantiates only UPluralRules + objects.add(Entry.of("PluralRules", "o.resolvedOptions()")); + objects.add(Entry.of("RelativeTimeFormat", "o.format(0, 'hour')")); + + for (var entry : objects) { + measure(args[0], entry.constructor, entry.description, entry.initializer); + } + } + + private static final String sourceCode = """ +const constructorName = scriptArgs[0]; +const initializer = Function("o", scriptArgs[1]); +const locales = scriptArgs[2].split(","); + +const extras = {}; +addIntlExtras(extras); +if (extras.DisplayNames) { + Intl.DisplayNames = extras.DisplayNames; +} + +for (let i = 0; i < locales.length; ++i) { + // Loop twice in case the first time we create an object with a new locale + // allocates additional memory when loading the locale data. + for (let j = 0; j < 2; ++j) { + let constructor = Intl[constructorName]; + + print("Create"); + let obj = new constructor(locales[i]); + + print("Init"); + initializer(obj); + + print("Destroy"); + gc(); + gc(); + print("Collect"); + } +} + +print("Quit"); +quit(); +"""; +} diff --git a/js/src/builtin/intl/IntlObject.cpp b/js/src/builtin/intl/IntlObject.cpp new file mode 100644 index 0000000000..35bb4035b3 --- /dev/null +++ b/js/src/builtin/intl/IntlObject.cpp @@ -0,0 +1,846 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl object and its non-constructor properties. */ + +#include "builtin/intl/IntlObject.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Likely.h" +#include "mozilla/Range.h" + +#include <algorithm> +#include <iterator> + +#include "jsapi.h" + +#include "builtin/Array.h" +#include "builtin/intl/Collator.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/DateTimeFormat.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/NumberFormat.h" +#include "builtin/intl/PluralRules.h" +#include "builtin/intl/RelativeTimeFormat.h" +#include "builtin/intl/ScopedICUObject.h" +#include "builtin/intl/SharedIntlData.h" +#include "js/CharacterEncoding.h" +#include "js/Class.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/PropertySpec.h" +#include "js/Result.h" +#include "js/StableStringChars.h" +#include "unicode/ucal.h" +#include "unicode/udat.h" +#include "unicode/udatpg.h" +#include "unicode/uloc.h" +#include "unicode/utypes.h" +#include "vm/GlobalObject.h" +#include "vm/JSAtom.h" +#include "vm/JSContext.h" +#include "vm/JSObject.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using mozilla::Range; +using mozilla::RangedPtr; + +using JS::AutoStableStringChars; + +using js::intl::CallICU; +using js::intl::DateTimeFormatOptions; +using js::intl::IcuLocale; + +/******************** Intl ********************/ + +bool js::intl_GetCalendarInfo(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + UErrorCode status = U_ZERO_ERROR; + const UChar* uTimeZone = nullptr; + int32_t uTimeZoneLength = 0; + UCalendar* cal = ucal_open(uTimeZone, uTimeZoneLength, locale.get(), + UCAL_DEFAULT, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UCalendar, ucal_close> toClose(cal); + + RootedObject info(cx, NewBuiltinClassInstance<PlainObject>(cx)); + if (!info) { + return false; + } + + RootedValue v(cx); + int32_t firstDayOfWeek = ucal_getAttribute(cal, UCAL_FIRST_DAY_OF_WEEK); + v.setInt32(firstDayOfWeek); + + if (!DefineDataProperty(cx, info, cx->names().firstDayOfWeek, v)) { + return false; + } + + int32_t minDays = ucal_getAttribute(cal, UCAL_MINIMAL_DAYS_IN_FIRST_WEEK); + v.setInt32(minDays); + if (!DefineDataProperty(cx, info, cx->names().minDays, v)) { + return false; + } + + UCalendarWeekdayType prevDayType = + ucal_getDayOfWeekType(cal, UCAL_SATURDAY, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + RootedValue weekendStart(cx), weekendEnd(cx); + + for (int i = UCAL_SUNDAY; i <= UCAL_SATURDAY; i++) { + UCalendarDaysOfWeek dayOfWeek = static_cast<UCalendarDaysOfWeek>(i); + UCalendarWeekdayType type = ucal_getDayOfWeekType(cal, dayOfWeek, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + if (prevDayType != type) { + switch (type) { + case UCAL_WEEKDAY: + // If the first Weekday after Weekend is Sunday (1), + // then the last Weekend day is Saturday (7). + // Otherwise we'll just take the previous days number. + weekendEnd.setInt32(i == 1 ? 7 : i - 1); + break; + case UCAL_WEEKEND: + weekendStart.setInt32(i); + break; + case UCAL_WEEKEND_ONSET: + case UCAL_WEEKEND_CEASE: + // At the time this code was added, ICU apparently never behaves this + // way, so just throw, so that users will report a bug and we can + // decide what to do. + intl::ReportInternalError(cx); + return false; + default: + break; + } + } + + prevDayType = type; + } + + MOZ_ASSERT(weekendStart.isInt32()); + MOZ_ASSERT(weekendEnd.isInt32()); + + if (!DefineDataProperty(cx, info, cx->names().weekendStart, weekendStart)) { + return false; + } + + if (!DefineDataProperty(cx, info, cx->names().weekendEnd, weekendEnd)) { + return false; + } + + args.rval().setObject(*info); + return true; +} + +static void ReportBadKey(JSContext* cx, HandleString key) { + if (UniqueChars chars = QuoteString(cx, key, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_KEY, + chars.get()); + } +} + +template <typename ConstChar> +static bool MatchPart(RangedPtr<ConstChar> iter, const RangedPtr<ConstChar> end, + const char* part, size_t partlen) { + for (size_t i = 0; i < partlen; iter++, i++) { + if (iter == end || *iter != part[i]) { + return false; + } + } + + return true; +} + +template <typename ConstChar, size_t N> +inline bool MatchPart(RangedPtr<ConstChar>* iter, + const RangedPtr<ConstChar> end, const char (&part)[N]) { + if (!MatchPart(*iter, end, part, N - 1)) { + return false; + } + + *iter += N - 1; + return true; +} + +enum class DisplayNameStyle { + Narrow, + Short, + Long, +}; + +template <typename ConstChar> +static JSString* ComputeSingleDisplayName(JSContext* cx, UDateFormat* fmt, + UDateTimePatternGenerator* dtpg, + DisplayNameStyle style, + const Range<ConstChar>& pattern, + HandleString patternString) { + RangedPtr<ConstChar> iter = pattern.begin(); + const RangedPtr<ConstChar> end = pattern.end(); + + auto MatchSlash = [cx, patternString, &iter, end]() { + if (MOZ_LIKELY(iter != end && *iter == '/')) { + iter++; + return true; + } + + ReportBadKey(cx, patternString); + return false; + }; + + if (!MatchPart(&iter, end, "dates")) { + ReportBadKey(cx, patternString); + return nullptr; + } + + if (!MatchSlash()) { + return nullptr; + } + + if (MatchPart(&iter, end, "fields")) { + if (!MatchSlash()) { + return nullptr; + } + + UDateTimePatternField fieldType; + + if (MatchPart(&iter, end, "year")) { + fieldType = UDATPG_YEAR_FIELD; + } else if (MatchPart(&iter, end, "month")) { + fieldType = UDATPG_MONTH_FIELD; + } else if (MatchPart(&iter, end, "week")) { + fieldType = UDATPG_WEEK_OF_YEAR_FIELD; + } else if (MatchPart(&iter, end, "day")) { + fieldType = UDATPG_DAY_FIELD; + } else { + ReportBadKey(cx, patternString); + return nullptr; + } + + // This part must be the final part with no trailing data. + if (iter != end) { + ReportBadKey(cx, patternString); + return nullptr; + } + + int32_t resultSize; + const UChar* value = udatpg_getAppendItemName(dtpg, fieldType, &resultSize); + MOZ_ASSERT(resultSize >= 0); + + return NewStringCopyN<CanGC>(cx, value, size_t(resultSize)); + } + + if (MatchPart(&iter, end, "gregorian")) { + if (!MatchSlash()) { + return nullptr; + } + + UDateFormatSymbolType symbolType; + int32_t index; + + if (MatchPart(&iter, end, "months")) { + if (!MatchSlash()) { + return nullptr; + } + + switch (style) { + case DisplayNameStyle::Narrow: + symbolType = UDAT_STANDALONE_NARROW_MONTHS; + break; + + case DisplayNameStyle::Short: + symbolType = UDAT_STANDALONE_SHORT_MONTHS; + break; + + case DisplayNameStyle::Long: + symbolType = UDAT_STANDALONE_MONTHS; + break; + } + + if (MatchPart(&iter, end, "january")) { + index = UCAL_JANUARY; + } else if (MatchPart(&iter, end, "february")) { + index = UCAL_FEBRUARY; + } else if (MatchPart(&iter, end, "march")) { + index = UCAL_MARCH; + } else if (MatchPart(&iter, end, "april")) { + index = UCAL_APRIL; + } else if (MatchPart(&iter, end, "may")) { + index = UCAL_MAY; + } else if (MatchPart(&iter, end, "june")) { + index = UCAL_JUNE; + } else if (MatchPart(&iter, end, "july")) { + index = UCAL_JULY; + } else if (MatchPart(&iter, end, "august")) { + index = UCAL_AUGUST; + } else if (MatchPart(&iter, end, "september")) { + index = UCAL_SEPTEMBER; + } else if (MatchPart(&iter, end, "october")) { + index = UCAL_OCTOBER; + } else if (MatchPart(&iter, end, "november")) { + index = UCAL_NOVEMBER; + } else if (MatchPart(&iter, end, "december")) { + index = UCAL_DECEMBER; + } else { + ReportBadKey(cx, patternString); + return nullptr; + } + } else if (MatchPart(&iter, end, "weekdays")) { + if (!MatchSlash()) { + return nullptr; + } + + switch (style) { + case DisplayNameStyle::Narrow: + symbolType = UDAT_STANDALONE_NARROW_WEEKDAYS; + break; + + case DisplayNameStyle::Short: + symbolType = UDAT_STANDALONE_SHORT_WEEKDAYS; + break; + + case DisplayNameStyle::Long: + symbolType = UDAT_STANDALONE_WEEKDAYS; + break; + } + + if (MatchPart(&iter, end, "monday")) { + index = UCAL_MONDAY; + } else if (MatchPart(&iter, end, "tuesday")) { + index = UCAL_TUESDAY; + } else if (MatchPart(&iter, end, "wednesday")) { + index = UCAL_WEDNESDAY; + } else if (MatchPart(&iter, end, "thursday")) { + index = UCAL_THURSDAY; + } else if (MatchPart(&iter, end, "friday")) { + index = UCAL_FRIDAY; + } else if (MatchPart(&iter, end, "saturday")) { + index = UCAL_SATURDAY; + } else if (MatchPart(&iter, end, "sunday")) { + index = UCAL_SUNDAY; + } else { + ReportBadKey(cx, patternString); + return nullptr; + } + } else if (MatchPart(&iter, end, "dayperiods")) { + if (!MatchSlash()) { + return nullptr; + } + + symbolType = UDAT_AM_PMS; + + if (MatchPart(&iter, end, "am")) { + index = UCAL_AM; + } else if (MatchPart(&iter, end, "pm")) { + index = UCAL_PM; + } else { + ReportBadKey(cx, patternString); + return nullptr; + } + } else { + ReportBadKey(cx, patternString); + return nullptr; + } + + // This part must be the final part with no trailing data. + if (iter != end) { + ReportBadKey(cx, patternString); + return nullptr; + } + + return CallICU(cx, [fmt, symbolType, index](UChar* chars, int32_t size, + UErrorCode* status) { + return udat_getSymbols(fmt, symbolType, index, chars, size, status); + }); + } + + ReportBadKey(cx, patternString); + return nullptr; +} + +bool js::intl_ComputeDisplayNames(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + // 1. Assert: locale is a string. + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + // 2. Assert: style is a string. + DisplayNameStyle dnStyle; + { + JSLinearString* style = args[1].toString()->ensureLinear(cx); + if (!style) { + return false; + } + + if (StringEqualsLiteral(style, "narrow")) { + dnStyle = DisplayNameStyle::Narrow; + } else if (StringEqualsLiteral(style, "short")) { + dnStyle = DisplayNameStyle::Short; + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "long")); + dnStyle = DisplayNameStyle::Long; + } + } + + // 3. Assert: keys is an Array. + RootedArrayObject keys(cx, &args[2].toObject().as<ArrayObject>()); + if (!keys) { + return false; + } + + // 4. Let result be ArrayCreate(0). + RootedArrayObject result(cx, NewDenseFullyAllocatedArray(cx, keys->length())); + if (!result) { + return false; + } + result->ensureDenseInitializedLength(0, keys->length()); + + UErrorCode status = U_ZERO_ERROR; + + UDateFormat* fmt = + udat_open(UDAT_DEFAULT, UDAT_DEFAULT, IcuLocale(locale.get()), nullptr, 0, + nullptr, 0, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UDateFormat, udat_close> datToClose(fmt); + + // UDateTimePatternGenerator will be needed for translations of date and + // time fields like "month", "week", "day" etc. + UDateTimePatternGenerator* dtpg = + udatpg_open(IcuLocale(locale.get()), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UDateTimePatternGenerator, udatpg_close> datPgToClose(dtpg); + + // 5. For each element of keys, + RootedString keyValStr(cx); + RootedValue v(cx); + for (uint32_t i = 0; i < keys->length(); i++) { + if (!GetElement(cx, keys, keys, i, &v)) { + return false; + } + + keyValStr = v.toString(); + + AutoStableStringChars stablePatternChars(cx); + if (!stablePatternChars.init(cx, keyValStr)) { + return false; + } + + // 5.a. Perform an implementation dependent algorithm to map a key to a + // corresponding display name. + JSString* displayName = + stablePatternChars.isLatin1() + ? ComputeSingleDisplayName(cx, fmt, dtpg, dnStyle, + stablePatternChars.latin1Range(), + keyValStr) + : ComputeSingleDisplayName(cx, fmt, dtpg, dnStyle, + stablePatternChars.twoByteRange(), + keyValStr); + if (!displayName) { + return false; + } + + // 5.b. Append the result string to result. + result->setDenseElement(i, StringValue(displayName)); + } + + // 6. Return result. + args.rval().setObject(*result); + return true; +} + +bool js::intl_GetLocaleInfo(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + RootedObject info(cx, NewBuiltinClassInstance<PlainObject>(cx)); + if (!info) { + return false; + } + + if (!DefineDataProperty(cx, info, cx->names().locale, args[0])) { + return false; + } + + bool rtl = uloc_isRightToLeft(IcuLocale(locale.get())); + + RootedValue dir(cx, StringValue(rtl ? cx->names().rtl : cx->names().ltr)); + + if (!DefineDataProperty(cx, info, cx->names().direction, dir)) { + return false; + } + + args.rval().setObject(*info); + return true; +} + +using SupportedLocaleKind = js::intl::SharedIntlData::SupportedLocaleKind; + +// 9.2.2 BestAvailableLocale ( availableLocales, locale ) +static JS::Result<JSString*> BestAvailableLocale( + JSContext* cx, SupportedLocaleKind kind, HandleLinearString locale, + HandleLinearString defaultLocale) { + // In the spec, [[availableLocales]] is formally a list of all available + // locales. But in our implementation, it's an *incomplete* list, not + // necessarily including the default locale (and all locales implied by it, + // e.g. "de" implied by "de-CH"), if that locale isn't in every + // [[availableLocales]] list (because that locale is supported through + // fallback, e.g. "de-CH" supported through "de"). + // + // If we're considering the default locale, augment the spec loop with + // additional checks to also test whether the current prefix is a prefix of + // the default locale. + + intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + auto findLast = [](const auto* chars, size_t length) { + auto rbegin = std::make_reverse_iterator(chars + length); + auto rend = std::make_reverse_iterator(chars); + auto p = std::find(rbegin, rend, '-'); + + // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you + // find easier to reason about when using reserve iterators. + ptrdiff_t r = std::distance(chars, p.base()); + MOZ_ASSERT(r == std::distance(p, rend)); + + // But always subtract one to convert from the reverse iterator result to + // the correspoding forward iterator value, because reserve iterators point + // to one element past the forward iterator value. + return r - 1; + }; + + // Step 1. + RootedLinearString candidate(cx, locale); + + // Step 2. + while (true) { + // Step 2.a. + bool supported = false; + if (!sharedIntlData.isSupportedLocale(cx, kind, candidate, &supported)) { + return cx->alreadyReportedError(); + } + if (supported) { + return candidate.get(); + } + + if (defaultLocale && candidate->length() <= defaultLocale->length()) { + if (EqualStrings(candidate, defaultLocale)) { + return candidate.get(); + } + + if (candidate->length() < defaultLocale->length() && + HasSubstringAt(defaultLocale, candidate, 0) && + defaultLocale->latin1OrTwoByteChar(candidate->length()) == '-') { + return candidate.get(); + } + } + + // Step 2.b. + ptrdiff_t pos; + if (candidate->hasLatin1Chars()) { + JS::AutoCheckCannotGC nogc; + pos = findLast(candidate->latin1Chars(nogc), candidate->length()); + } else { + JS::AutoCheckCannotGC nogc; + pos = findLast(candidate->twoByteChars(nogc), candidate->length()); + } + + if (pos < 0) { + return nullptr; + } + + // Step 2.c. + size_t length = size_t(pos); + if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') { + length -= 2; + } + + // Step 2.d. + candidate = NewDependentString(cx, candidate, 0, length); + if (!candidate) { + return cx->alreadyReportedError(); + } + } +} + +// 9.2.2 BestAvailableLocale ( availableLocales, locale ) +// +// Carries an additional third argument in our implementation to provide the +// default locale. See the doc-comment in the header file. +bool js::intl_BestAvailableLocale(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + SupportedLocaleKind kind; + { + JSLinearString* typeStr = args[0].toString()->ensureLinear(cx); + if (!typeStr) { + return false; + } + + if (StringEqualsLiteral(typeStr, "Collator")) { + kind = SupportedLocaleKind::Collator; + } else if (StringEqualsLiteral(typeStr, "DateTimeFormat")) { + kind = SupportedLocaleKind::DateTimeFormat; + } else if (StringEqualsLiteral(typeStr, "DisplayNames")) { + kind = SupportedLocaleKind::DisplayNames; + } else if (StringEqualsLiteral(typeStr, "ListFormat")) { + kind = SupportedLocaleKind::ListFormat; + } else if (StringEqualsLiteral(typeStr, "NumberFormat")) { + kind = SupportedLocaleKind::NumberFormat; + } else if (StringEqualsLiteral(typeStr, "PluralRules")) { + kind = SupportedLocaleKind::PluralRules; + } else { + MOZ_ASSERT(StringEqualsLiteral(typeStr, "RelativeTimeFormat")); + kind = SupportedLocaleKind::RelativeTimeFormat; + } + } + + RootedLinearString locale(cx, args[1].toString()->ensureLinear(cx)); + if (!locale) { + return false; + } + +#ifdef DEBUG + { + intl::LanguageTag tag(cx); + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, ok, intl::LanguageTagParser::tryParse(cx, locale, tag)); + MOZ_ASSERT(ok, "locale is a structurally valid language tag"); + + MOZ_ASSERT(!tag.unicodeExtension(), + "locale must contain no Unicode extensions"); + + if (!tag.canonicalize(cx)) { + return false; + } + + JSString* tagStr = tag.toString(cx); + if (!tagStr) { + return false; + } + + bool canonical; + if (!EqualStrings(cx, locale, tagStr, &canonical)) { + return false; + } + MOZ_ASSERT(canonical, "locale is a canonicalized language tag"); + } +#endif + + MOZ_ASSERT(args[2].isNull() || args[2].isString()); + + RootedLinearString defaultLocale(cx); + if (args[2].isString()) { + defaultLocale = args[2].toString()->ensureLinear(cx); + if (!defaultLocale) { + return false; + } + } + + JSString* result; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, result, BestAvailableLocale(cx, kind, locale, defaultLocale)); + + if (result) { + args.rval().setString(result); + } else { + args.rval().setUndefined(); + } + return true; +} + +bool js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + RootedLinearString locale(cx, args[0].toString()->ensureLinear(cx)); + if (!locale) { + return false; + } + + intl::LanguageTag tag(cx); + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, ok, intl::LanguageTagParser::tryParse(cx, locale, tag)); + + RootedLinearString candidate(cx); + if (!ok) { + candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); + if (!candidate) { + return false; + } + } else { + if (!tag.canonicalize(cx)) { + return false; + } + + // The default locale must be in [[AvailableLocales]], and that list must + // not contain any locales with Unicode extension sequences, so remove any + // present in the candidate. + tag.clearUnicodeExtension(); + + JSString* canonical = tag.toString(cx); + if (!canonical) { + return false; + } + + candidate = canonical->ensureLinear(cx); + if (!candidate) { + return false; + } + + for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { + const char* oldStyle = mapping.oldStyle; + const char* modernStyle = mapping.modernStyle; + + if (StringEqualsAscii(candidate, oldStyle)) { + candidate = NewStringCopyZ<CanGC>(cx, modernStyle); + if (!candidate) { + return false; + } + break; + } + } + } + + // 9.1 Internal slots of Service Constructors + // + // - [[AvailableLocales]] is a List [...]. The list must include the value + // returned by the DefaultLocale abstract operation (6.2.4), [...]. + // + // That implies we must ignore any candidate which isn't supported by all Intl + // service constructors. + // + // Note: We don't test the supported locales of either Intl.ListFormat, + // Intl.PluralRules, Intl.RelativeTimeFormat, because ICU doesn't provide the + // necessary API to return actual set of supported locales for these + // constructors. Instead it returns the complete set of available locales for + // ULocale, which is a superset of the locales supported by Collator, + // NumberFormat, and DateTimeFormat. + bool isSupported = true; + for (auto kind : + {SupportedLocaleKind::Collator, SupportedLocaleKind::DateTimeFormat, + SupportedLocaleKind::NumberFormat}) { + JSString* supported; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr)); + + if (!supported) { + isSupported = false; + break; + } + } + + if (!isSupported) { + candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); + if (!candidate) { + return false; + } + } + + args.rval().setString(candidate); + return true; +} + +static bool intl_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Intl); + return true; +} + +static const JSFunctionSpec intl_static_methods[] = { + JS_FN(js_toSource_str, intl_toSource, 0, 0), + JS_SELF_HOSTED_FN("getCanonicalLocales", "Intl_getCanonicalLocales", 1, 0), + JS_FS_END}; + +static const JSPropertySpec intl_static_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl", JSPROP_READONLY), JS_PS_END}; + +static JSObject* CreateIntlObject(JSContext* cx, JSProtoKey key) { + Handle<GlobalObject*> global = cx->global(); + RootedObject proto(cx, GlobalObject::getOrCreateObjectPrototype(cx, global)); + if (!proto) { + return nullptr; + } + + // The |Intl| object is just a plain object with some "static" function + // properties and some constructor properties. + return NewTenuredObjectWithGivenProto(cx, &IntlClass, proto); +} + +/** + * Initializes the Intl Object and its standard built-in properties. + * Spec: ECMAScript Internationalization API Specification, 8.0, 8.1 + */ +static bool IntlClassFinish(JSContext* cx, HandleObject intl, + HandleObject proto) { + // Add the constructor properties. + RootedId ctorId(cx); + RootedValue ctorValue(cx); + for (const auto& protoKey : + {JSProto_Collator, JSProto_DateTimeFormat, JSProto_DisplayNames, + JSProto_ListFormat, JSProto_Locale, JSProto_NumberFormat, + JSProto_PluralRules, JSProto_RelativeTimeFormat}) { + JSObject* ctor = GlobalObject::getOrCreateConstructor(cx, protoKey); + if (!ctor) { + return false; + } + + ctorId = NameToId(ClassName(protoKey, cx)); + ctorValue.setObject(*ctor); + if (!DefineDataProperty(cx, intl, ctorId, ctorValue, 0)) { + return false; + } + } + + return true; +} + +static const ClassSpec IntlClassSpec = { + CreateIntlObject, nullptr, intl_static_methods, intl_static_properties, + nullptr, nullptr, IntlClassFinish}; + +const JSClass js::IntlClass = {"Intl", JSCLASS_HAS_CACHED_PROTO(JSProto_Intl), + JS_NULL_CLASS_OPS, &IntlClassSpec}; diff --git a/js/src/builtin/intl/IntlObject.h b/js/src/builtin/intl/IntlObject.h new file mode 100644 index 0000000000..043e48199c --- /dev/null +++ b/js/src/builtin/intl/IntlObject.h @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_IntlObject_h +#define builtin_intl_IntlObject_h + +#include "mozilla/Attributes.h" + +#include "js/RootingAPI.h" +#include "js/TypeDecls.h" + +namespace js { + +extern const JSClass IntlClass; + +/** + * Returns a plain object with calendar information for a single valid locale + * (callers must perform this validation). The object will have these + * properties: + * + * firstDayOfWeek + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the first day of the week in calendars, e.g. 1 for en-US, + * 2 for en-GB, 1 for bn-IN + * minDays + * an integer in the range of 1 to 7 indicating the minimum number + * of days required in the first week of the year, e.g. 1 for en-US, + * 4 for de + * weekendStart + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the beginning of a weekend, e.g. 7 for en-US, 7 for en-GB, + * 1 for bn-IN + * weekendEnd + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the end of a weekend, e.g. 1 for en-US, 1 for en-GB, + * 1 for bn-IN (note that "weekend" is *not* necessarily two days) + * + * NOTE: "calendar" and "locale" properties are *not* added to the object. + */ +extern MOZ_MUST_USE bool intl_GetCalendarInfo(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a plain object with locale information for a single valid locale + * (callers must perform this validation). The object will have these + * properties: + * + * direction + * a string with a value "ltr" for left-to-right locale, and "rtl" for + * right-to-left locale. + * locale + * a BCP47 compilant locale string for the resolved locale. + */ +extern MOZ_MUST_USE bool intl_GetLocaleInfo(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns an Array with CLDR-based fields display names. + * The function takes three arguments: + * + * locale + * BCP47 compliant locale string + * style + * A string with values: long or short or narrow + * keys + * An array or path-like strings that identify keys to be returned + * At the moment the following types of keys are supported: + * + * 'dates/fields/{year|month|week|day}' + * 'dates/gregorian/months/{january|...|december}' + * 'dates/gregorian/weekdays/{sunday|...|saturday}' + * 'dates/gregorian/dayperiods/{am|pm}' + * + * Example: + * + * let info = intl_ComputeDisplayNames( + * 'en-US', + * 'long', + * [ + * 'dates/fields/year', + * 'dates/gregorian/months/january', + * 'dates/gregorian/weekdays/monday', + * 'dates/gregorian/dayperiods/am', + * ] + * ); + * + * Returned value: + * + * [ + * 'year', + * 'January', + * 'Monday', + * 'AM' + * ] + */ +extern MOZ_MUST_USE bool intl_ComputeDisplayNames(JSContext* cx, unsigned argc, + JS::Value* vp); +/** + * Compares a BCP 47 language tag against the locales in availableLocales and + * returns the best available match -- or |undefined| if no match was found. + * Uses the fallback mechanism of RFC 4647, section 3.4. + * + * The set of available locales consulted doesn't necessarily include the + * default locale or any generalized forms of it (e.g. "de" is a more-general + * form of "de-CH"). If you want to be sure to consider the default local and + * its generalized forms (you usually will), pass the default locale as the + * value of |defaultOrNull|; otherwise pass null. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.2. + * Spec: RFC 4647, section 3.4. + * + * Usage: result = intl_BestAvailableLocale("Collator", locale, defaultOrNull) + */ +extern MOZ_MUST_USE bool intl_BestAvailableLocale(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns the input locale in its canonicalized form if ICU supports that + * locale (perhaps via fallback, e.g. supporting "de-ZA" through "de" support + * implied by a "de-DE" locale). Otherwise uses the last-ditch locale. + * + * Usage: result = intl_supportedLocaleOrFallback(locale) + */ +extern MOZ_MUST_USE bool intl_supportedLocaleOrFallback(JSContext* cx, + unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_IntlObject_h */ diff --git a/js/src/builtin/intl/IntlObject.js b/js/src/builtin/intl/IntlObject.js new file mode 100644 index 0000000000..2612df9c79 --- /dev/null +++ b/js/src/builtin/intl/IntlObject.js @@ -0,0 +1,216 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * 8.2.1 Intl.getCanonicalLocales ( locales ) + * + * ES2017 Intl draft rev 947aa9a0c853422824a0c9510d8f09be3eb416b9 + */ +function Intl_getCanonicalLocales(locales) { + // Steps 1-2. + return CanonicalizeLocaleList(locales); +} + +/** + * This function is a custom function in the style of the standard Intl.* + * functions, that isn't part of any spec or proposal yet. + * + * Returns an object with the following properties: + * locale: + * The actual resolved locale. + * + * calendar: + * The default calendar of the resolved locale. + * + * firstDayOfWeek: + * The first day of the week for the resolved locale. + * + * minDays: + * The minimum number of days in a week for the resolved locale. + * + * weekendStart: + * The day considered the beginning of a weekend for the resolved locale. + * + * weekendEnd: + * The day considered the end of a weekend for the resolved locale. + * + * Days are encoded as integers in the range 1=Sunday to 7=Saturday. + */ +function Intl_getCalendarInfo(locales) { + // 1. Let requestLocales be ? CanonicalizeLocaleList(locales). + const requestedLocales = CanonicalizeLocaleList(locales); + + const DateTimeFormat = dateTimeFormatInternalProperties; + + // 2. Let localeData be %DateTimeFormat%.[[localeData]]. + const localeData = DateTimeFormat.localeData; + + // 3. Let localeOpt be a new Record. + const localeOpt = new Record(); + + // 4. Set localeOpt.[[localeMatcher]] to "best fit". + localeOpt.localeMatcher = "best fit"; + + // 5. Let r be ResolveLocale(%DateTimeFormat%.[[availableLocales]], + // requestedLocales, localeOpt, + // %DateTimeFormat%.[[relevantExtensionKeys]], localeData). + const r = ResolveLocale("DateTimeFormat", + requestedLocales, + localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData); + + // 6. Let result be GetCalendarInfo(r.[[locale]]). + const result = intl_GetCalendarInfo(r.locale); + _DefineDataProperty(result, "calendar", r.ca); + _DefineDataProperty(result, "locale", r.locale); + + // 7. Return result. + return result; +} + +/** + * This function is a custom function in the style of the standard Intl.* + * functions, that isn't part of any spec or proposal yet. + * We want to use it internally to retrieve translated values from CLDR in + * order to ensure they're aligned with what Intl API returns. + * + * This API may one day be a foundation for an ECMA402 API spec proposal. + * + * The function takes two arguments - locales which is a list of locale strings + * and options which is an object with two optional properties: + * + * keys: + * an Array of string values that are paths to individual terms + * + * style: + * a String with a value "long", "short" or "narrow" + * + * It returns an object with properties: + * + * locale: + * a negotiated locale string + * + * style: + * negotiated style + * + * values: + * A key-value pair list of requested keys and corresponding + * translated values + * + */ +function Intl_getDisplayNames(locales, options) { + // 1. Let requestLocales be ? CanonicalizeLocaleList(locales). + const requestedLocales = CanonicalizeLocaleList(locales); + + // 2. If options is undefined, then + if (options === undefined) + // a. Let options be ObjectCreate(null). + options = std_Object_create(null); + // 3. Else, + else + // a. Let options be ? ToObject(options). + options = ToObject(options); + + const DateTimeFormat = dateTimeFormatInternalProperties; + + // 4. Let localeData be %DateTimeFormat%.[[localeData]]. + const localeData = DateTimeFormat.localeData; + + // 5. Let localeOpt be a new Record. + const localeOpt = new Record(); + + // 6. Set localeOpt.[[localeMatcher]] to "best fit". + localeOpt.localeMatcher = "best fit"; + + // 7. Let r be ResolveLocale(%DateTimeFormat%.[[availableLocales]], requestedLocales, localeOpt, + // %DateTimeFormat%.[[relevantExtensionKeys]], localeData). + const r = ResolveLocale("DateTimeFormat", + requestedLocales, + localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData); + + // 8. Let style be ? GetOption(options, "style", "string", « "long", "short", "narrow" », "long"). + const style = GetOption(options, "style", "string", ["long", "short", "narrow"], "long"); + + // 9. Let keys be ? Get(options, "keys"). + let keys = options.keys; + + // 10. If keys is undefined, + if (keys === undefined) { + // a. Let keys be ArrayCreate(0). + keys = []; + } else if (!IsObject(keys)) { + // 11. Else, + // a. If Type(keys) is not Object, throw a TypeError exception. + ThrowTypeError(JSMSG_INVALID_KEYS_TYPE); + } + + // 12. Let processedKeys be ArrayCreate(0). + // (This really should be a List, but we use an Array here in order that + // |intl_ComputeDisplayNames| may infallibly access the list's length via + // |ArrayObject::length|.) + let processedKeys = []; + + // 13. Let len be ? ToLength(? Get(keys, "length")). + let len = ToLength(keys.length); + + // 14. Let i be 0. + // 15. Repeat, while i < len + for (let i = 0; i < len; i++) { + // a. Let processedKey be ? ToString(? Get(keys, i)). + // b. Perform ? CreateDataPropertyOrThrow(processedKeys, i, processedKey). + _DefineDataProperty(processedKeys, i, ToString(keys[i])); + } + + // 16. Let names be ? ComputeDisplayNames(r.[[locale]], style, processedKeys). + const names = intl_ComputeDisplayNames(r.locale, style, processedKeys); + + // 17. Let values be ObjectCreate(%ObjectPrototype%). + const values = {}; + + // 18. Set i to 0. + // 19. Repeat, while i < len + for (let i = 0; i < len; i++) { + // a. Let key be ? Get(processedKeys, i). + const key = processedKeys[i]; + // b. Let name be ? Get(names, i). + const name = names[i]; + // c. Assert: Type(name) is string. + assert(typeof name === "string", "unexpected non-string value"); + // d. Assert: the length of name is greater than zero. + assert(name.length > 0, "empty string value"); + // e. Perform ? DefinePropertyOrThrow(values, key, name). + _DefineDataProperty(values, key, name); + } + + // 20. Let options be ObjectCreate(%ObjectPrototype%). + // 21. Perform ! DefinePropertyOrThrow(result, "locale", r.[[locale]]). + // 22. Perform ! DefinePropertyOrThrow(result, "style", style). + // 23. Perform ! DefinePropertyOrThrow(result, "values", values). + const result = { locale: r.locale, style, values }; + + // 24. Return result. + return result; +} + +function Intl_getLocaleInfo(locales) { + const requestedLocales = CanonicalizeLocaleList(locales); + + // In the future, we may want to expose uloc_getAvailable and use it here. + const DateTimeFormat = dateTimeFormatInternalProperties; + const localeData = DateTimeFormat.localeData; + + const localeOpt = new Record(); + localeOpt.localeMatcher = "best fit"; + + const r = ResolveLocale("DateTimeFormat", + requestedLocales, + localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData); + + return intl_GetLocaleInfo(r.locale); +} diff --git a/js/src/builtin/intl/LanguageTag.cpp b/js/src/builtin/intl/LanguageTag.cpp new file mode 100644 index 0000000000..ae771ecfab --- /dev/null +++ b/js/src/builtin/intl/LanguageTag.cpp @@ -0,0 +1,1742 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "builtin/intl/LanguageTag.h" + +#include "mozilla/Assertions.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Variant.h" + +#include <algorithm> +#include <iterator> +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <string.h> +#include <type_traits> +#include <utility> + +#include "jsapi.h" +#include "jsfriendapi.h" + +#include "builtin/intl/CommonFunctions.h" +#include "ds/Sort.h" +#include "gc/Tracer.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/Result.h" +#include "js/TracingAPI.h" +#include "js/Utility.h" +#include "js/Vector.h" +#include "unicode/uloc.h" +#include "unicode/utypes.h" +#include "util/StringBuffer.h" +#include "util/Text.h" +#include "vm/JSContext.h" +#include "vm/Printer.h" +#include "vm/StringType.h" + +namespace js { +namespace intl { + +using namespace js::intl::LanguageTagLimits; + +template <typename CharT> +bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + // unicode_language_subtag = alpha{2,3} | alpha{5,8}; + size_t length = language.size(); + const CharT* str = language.data(); + return ((2 <= length && length <= 3) || (5 <= length && length <= 8)) && + std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>); +} + +template bool IsStructurallyValidLanguageTag( + mozilla::Span<const char> language); +template bool IsStructurallyValidLanguageTag( + mozilla::Span<const Latin1Char> language); +template bool IsStructurallyValidLanguageTag( + mozilla::Span<const char16_t> language); + +template <typename CharT> +bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + // unicode_script_subtag = alpha{4} ; + size_t length = script.size(); + const CharT* str = script.data(); + return length == 4 && + std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>); +} + +template bool IsStructurallyValidScriptTag(mozilla::Span<const char> script); +template bool IsStructurallyValidScriptTag( + mozilla::Span<const Latin1Char> script); +template bool IsStructurallyValidScriptTag( + mozilla::Span<const char16_t> script); + +template <typename CharT> +bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + // unicode_region_subtag = (alpha{2} | digit{3}) ; + size_t length = region.size(); + const CharT* str = region.data(); + return (length == 2 && + std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>)) || + (length == 3 && + std::all_of(str, str + length, mozilla::IsAsciiDigit<CharT>)); +} + +template bool IsStructurallyValidRegionTag(mozilla::Span<const char> region); +template bool IsStructurallyValidRegionTag( + mozilla::Span<const Latin1Char> region); +template bool IsStructurallyValidRegionTag( + mozilla::Span<const char16_t> region); + +#ifdef DEBUG +bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant) { + // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ; + size_t length = variant.size(); + const char* str = variant.data(); + return ((5 <= length && length <= 8) || + (length == 4 && mozilla::IsAsciiDigit(str[0]))) && + std::all_of(str, str + length, mozilla::IsAsciiAlphanumeric<char>); +} + +bool IsStructurallyValidUnicodeExtensionTag( + mozilla::Span<const char> extension) { + return LanguageTagParser::canParseUnicodeExtension(extension); +} + +static bool IsStructurallyValidExtensionTag( + mozilla::Span<const char> extension) { + // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ; + // NB: Allow any extension, including Unicode and Transform here, because + // this function is only used for an assertion. + + size_t length = extension.size(); + const char* str = extension.data(); + const char* const end = extension.data() + length; + if (length <= 2) { + return false; + } + if (!mozilla::IsAsciiAlphanumeric(str[0]) || str[0] == 'x' || str[0] == 'X') { + return false; + } + str++; + if (*str++ != '-') { + return false; + } + while (true) { + const char* sep = + reinterpret_cast<const char*>(memchr(str, '-', end - str)); + size_t len = (sep ? sep : end) - str; + if (len < 2 || len > 8 || + !std::all_of(str, str + len, mozilla::IsAsciiAlphanumeric<char>)) { + return false; + } + if (!sep) { + return true; + } + str = sep + 1; + } +} + +bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse) { + // pu_extensions = sep [xX] (sep alphanum{1,8})+ ; + + size_t length = privateUse.size(); + const char* str = privateUse.data(); + const char* const end = privateUse.data() + length; + if (length <= 2) { + return false; + } + if (str[0] != 'x' && str[0] != 'X') { + return false; + } + str++; + if (*str++ != '-') { + return false; + } + while (true) { + const char* sep = + reinterpret_cast<const char*>(memchr(str, '-', end - str)); + size_t len = (sep ? sep : end) - str; + if (len == 0 || len > 8 || + !std::all_of(str, str + len, mozilla::IsAsciiAlphanumeric<char>)) { + return false; + } + if (!sep) { + return true; + } + str = sep + 1; + } +} +#endif + +ptrdiff_t LanguageTag::unicodeExtensionIndex() const { + // The extension subtags aren't necessarily sorted, so we can't use binary + // search here. + auto p = std::find_if( + extensions().begin(), extensions().end(), + [](const auto& ext) { return ext[0] == 'u' || ext[0] == 'U'; }); + if (p != extensions().end()) { + return std::distance(extensions().begin(), p); + } + return -1; +} + +const char* LanguageTag::unicodeExtension() const { + ptrdiff_t index = unicodeExtensionIndex(); + if (index >= 0) { + return extensions()[index].get(); + } + return nullptr; +} + +bool LanguageTag::setUnicodeExtension(UniqueChars extension) { + MOZ_ASSERT(IsStructurallyValidUnicodeExtensionTag( + mozilla::MakeStringSpan(extension.get()))); + + // Replace the existing Unicode extension subtag or append a new one. + ptrdiff_t index = unicodeExtensionIndex(); + if (index >= 0) { + extensions_[index] = std::move(extension); + return true; + } + return extensions_.append(std::move(extension)); +} + +void LanguageTag::clearUnicodeExtension() { + ptrdiff_t index = unicodeExtensionIndex(); + if (index >= 0) { + extensions_.erase(extensions_.begin() + index); + } +} + +template <size_t InitialCapacity> +static bool SortAlphabetically(JSContext* cx, + Vector<UniqueChars, InitialCapacity>& subtags) { + size_t length = subtags.length(); + + // Zero or one element lists are already sorted. + if (length < 2) { + return true; + } + + // Handle two element lists inline. + if (length == 2) { + if (strcmp(subtags[0].get(), subtags[1].get()) > 0) { + subtags[0].swap(subtags[1]); + } + return true; + } + + Vector<char*, 8> scratch(cx); + if (!scratch.resizeUninitialized(length * 2)) { + return false; + } + for (size_t i = 0; i < length; i++) { + scratch[i] = subtags[i].release(); + } + + MOZ_ALWAYS_TRUE( + MergeSort(scratch.begin(), length, scratch.begin() + length, + [](const char* a, const char* b, bool* lessOrEqualp) { + *lessOrEqualp = strcmp(a, b) <= 0; + return true; + })); + + for (size_t i = 0; i < length; i++) { + subtags[i] = UniqueChars(scratch[i]); + } + return true; +} + +bool LanguageTag::canonicalizeBaseName(JSContext* cx) { + // Per 6.2.3 CanonicalizeUnicodeLocaleId, the very first step is to + // canonicalize the syntax by normalizing the case and ordering all subtags. + // The canonical syntax form is specified in UTS 35, 3.2.1. + + // Language codes need to be in lower case. "JA" -> "ja" + language_.toLowerCase(); + MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span())); + + // The first character of a script code needs to be capitalized. + // "hans" -> "Hans" + script_.toTitleCase(); + MOZ_ASSERT(script().missing() || + IsStructurallyValidScriptTag(script().span())); + + // Region codes need to be in upper case. "bu" -> "BU" + region_.toUpperCase(); + MOZ_ASSERT(region().missing() || + IsStructurallyValidRegionTag(region().span())); + + // The canonical case for variant subtags is lowercase. + for (UniqueChars& variant : variants_) { + char* variantChars = variant.get(); + size_t variantLength = strlen(variantChars); + AsciiToLowerCase(variantChars, variantLength, variantChars); + + MOZ_ASSERT(IsStructurallyValidVariantTag({variantChars, variantLength})); + } + + // Extensions and privateuse subtags are case normalized in the + // |canonicalizeExtensions| method. + + // The second step in UTS 35, 3.2.1, is to order all subtags. + + if (variants_.length() > 1) { + // 1. Any variants are in alphabetical order. + if (!SortAlphabetically(cx, variants_)) { + return false; + } + + // Reject the Locale identifier if a duplicate variant was found, e.g. + // "en-variant-Variant". + const UniqueChars* duplicate = std::adjacent_find( + variants().begin(), variants().end(), [](const auto& a, const auto& b) { + return strcmp(a.get(), b.get()) == 0; + }); + if (duplicate != variants().end()) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG, + duplicate->get()); + return false; + } + } + + // 2. Any extensions are in alphabetical order by their singleton. + // 3. All attributes are sorted in alphabetical order. + // 4. All keywords and tfields are sorted by alphabetical order of their keys, + // within their respective extensions. + // 5. Any type or tfield value "true" is removed. + // - A subsequent call to canonicalizeExtensions() will perform these steps. + + // 6.2.3 CanonicalizeUnicodeLocaleId, step 2 transforms the locale identifier + // into its canonical form per UTS 3.2.1. + + // 1. Use the bcp47 data to replace keys, types, tfields, and tvalues by their + // canonical forms. + // - A subsequent call to canonicalizeExtensions() will perform this step. + + // 2. Replace aliases in the unicode_language_id and tlang (if any). + // - tlang is handled in canonicalizeExtensions(). + + // Replace deprecated language, region, and variant subtags with their + // preferred mappings. + + if (!updateGrandfatheredMappings(cx)) { + return false; + } + + // Replace deprecated language subtags with their preferred values. + if (!languageMapping(language_) && complexLanguageMapping(language_)) { + performComplexLanguageMappings(); + } + + // No script replacements are currently present. + + // Replace deprecated region subtags with their preferred values. + if (region().present()) { + if (!regionMapping(region_) && complexRegionMapping(region_)) { + performComplexRegionMappings(); + } + } + + // Replace deprecated variant subtags with their preferred values. + if (!performVariantMappings(cx)) { + return false; + } + + // No extension replacements are currently present. + // Private use sequences are left as is. + + // 3. Replace aliases in special key values. + // - A subsequent call to canonicalizeExtensions() will perform this step. + + return true; +} + +#ifdef DEBUG +template <typename CharT> +static bool IsAsciiLowercaseAlphanumericOrDash( + mozilla::Span<const CharT> span) { + const CharT* ptr = span.data(); + size_t length = span.size(); + return std::all_of(ptr, ptr + length, [](auto c) { + return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c) || + c == '-'; + }); +} +#endif + +bool LanguageTag::canonicalizeExtensions(JSContext* cx) { + // The canonical case for all extension subtags is lowercase. + for (UniqueChars& extension : extensions_) { + char* extensionChars = extension.get(); + size_t extensionLength = strlen(extensionChars); + AsciiToLowerCase(extensionChars, extensionLength, extensionChars); + + MOZ_ASSERT( + IsStructurallyValidExtensionTag({extensionChars, extensionLength})); + } + + // Any extensions are in alphabetical order by their singleton. + // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese" + if (!SortAlphabetically(cx, extensions_)) { + return false; + } + + for (UniqueChars& extension : extensions_) { + if (extension[0] == 'u') { + if (!canonicalizeUnicodeExtension(cx, extension)) { + return false; + } + } else if (extension[0] == 't') { + if (!canonicalizeTransformExtension(cx, extension)) { + return false; + } + } + + MOZ_ASSERT(IsAsciiLowercaseAlphanumericOrDash( + mozilla::MakeStringSpan(extension.get()))); + } + + // The canonical case for privateuse subtags is lowercase. + if (char* privateuse = privateuse_.get()) { + size_t privateuseLength = strlen(privateuse); + AsciiToLowerCase(privateuse, privateuseLength, privateuse); + + MOZ_ASSERT( + IsStructurallyValidPrivateUseTag({privateuse, privateuseLength})); + } + return true; +} + +/** + * CanonicalizeUnicodeExtension( attributes, keywords ) + * + * Canonical syntax per + * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>: + * + * - All attributes and keywords are in lowercase. + * - Note: The parser already converted keywords to lowercase. + * - All attributes are sorted in alphabetical order. + * - All keywords are sorted by alphabetical order of their keys. + * - Any type value "true" is removed. + * + * Canonical form: + * - All keys and types use the canonical form (from the name attribute; + * see Section 3.6.4 U Extension Data Files). + */ +bool LanguageTag::canonicalizeUnicodeExtension( + JSContext* cx, JS::UniqueChars& unicodeExtension) { + const char* const extension = unicodeExtension.get(); + MOZ_ASSERT(extension[0] == 'u'); + MOZ_ASSERT(extension[1] == '-'); + MOZ_ASSERT( + IsStructurallyValidExtensionTag(mozilla::MakeStringSpan(extension))); + + size_t length = strlen(extension); + + LanguageTagParser::AttributesVector attributes(cx); + LanguageTagParser::KeywordsVector keywords(cx); + + using Attribute = LanguageTagParser::AttributesVector::ElementType; + using Keyword = LanguageTagParser::KeywordsVector::ElementType; + + mozilla::DebugOnly<bool> ok; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, ok, + LanguageTagParser::parseUnicodeExtension( + cx, mozilla::Span(extension, length), attributes, keywords)); + MOZ_ASSERT(ok, "unexpected invalid Unicode extension subtag"); + + auto attributesLessOrEqual = [extension](const Attribute& a, + const Attribute& b) { + const char* astr = a.begin(extension); + const char* bstr = b.begin(extension); + size_t alen = a.length(); + size_t blen = b.length(); + + if (int r = + std::char_traits<char>::compare(astr, bstr, std::min(alen, blen))) { + return r < 0; + } + return alen <= blen; + }; + + // All attributes are sorted in alphabetical order. + size_t attributesLength = attributes.length(); + if (attributesLength > 1) { + if (!attributes.growByUninitialized(attributesLength)) { + return false; + } + + MOZ_ALWAYS_TRUE( + MergeSort(attributes.begin(), attributesLength, + attributes.begin() + attributesLength, + [&](const auto& a, const auto& b, bool* lessOrEqualp) { + *lessOrEqualp = attributesLessOrEqual(a, b); + return true; + })); + + attributes.shrinkBy(attributesLength); + } + + auto keywordsLessOrEqual = [extension](const Keyword& a, const Keyword& b) { + const char* astr = a.begin(extension); + const char* bstr = b.begin(extension); + MOZ_ASSERT(a.length() >= UnicodeKeyLength); + MOZ_ASSERT(b.length() >= UnicodeKeyLength); + + return std::char_traits<char>::compare(astr, bstr, UnicodeKeyLength) <= 0; + }; + + // All keywords are sorted by alphabetical order of keys. + size_t keywordsLength = keywords.length(); + if (keywordsLength > 1) { + if (!keywords.growByUninitialized(keywordsLength)) { + return false; + } + + // Using merge sort, being a stable sort algorithm, guarantees that two + // keywords using the same key are never reordered. That means for example + // when we have the input "u-nu-thai-kf-false-nu-latn", we are guaranteed to + // get the result "u-kf-false-nu-thai-nu-latn", i.e. "nu-thai" still occurs + // before "nu-latn". + // This is required so that deduplication below preserves the first keyword + // for a given key and discards the rest. + MOZ_ALWAYS_TRUE(MergeSort( + keywords.begin(), keywordsLength, keywords.begin() + keywordsLength, + [&](const auto& a, const auto& b, bool* lessOrEqualp) { + *lessOrEqualp = keywordsLessOrEqual(a, b); + return true; + })); + + keywords.shrinkBy(keywordsLength); + } + + Vector<char, 32> sb(cx); + if (!sb.append('u')) { + return false; + } + + // Append all Unicode extension attributes. + for (size_t i = 0; i < attributes.length(); i++) { + const auto& attribute = attributes[i]; + + // Skip duplicate attributes. + if (i > 0) { + const auto& lastAttribute = attributes[i - 1]; + if (attribute.length() == lastAttribute.length() && + std::char_traits<char>::compare(attribute.begin(extension), + lastAttribute.begin(extension), + attribute.length()) == 0) { + continue; + } + MOZ_ASSERT(!attributesLessOrEqual(attribute, lastAttribute)); + } + + if (!sb.append('-')) { + return false; + } + if (!sb.append(attribute.begin(extension), attribute.length())) { + return false; + } + } + + static constexpr size_t UnicodeKeyWithSepLength = UnicodeKeyLength + 1; + + using StringSpan = mozilla::Span<const char>; + + static auto isTrue = [](StringSpan type) { + constexpr char True[] = "true"; + const size_t TrueLength = strlen(True); + return type.size() == TrueLength && + std::char_traits<char>::compare(type.data(), True, TrueLength) == 0; + }; + + auto appendKey = [&sb, extension](const Keyword& keyword) { + MOZ_ASSERT(keyword.length() == UnicodeKeyLength); + return sb.append(keyword.begin(extension), UnicodeKeyLength); + }; + + auto appendKeyword = [&sb, extension](const Keyword& keyword, + StringSpan type) { + MOZ_ASSERT(keyword.length() > UnicodeKeyLength); + + // Elide the Unicode extension type "true". + if (isTrue(type)) { + return sb.append(keyword.begin(extension), UnicodeKeyLength); + } + // Otherwise append the complete Unicode extension keyword. + return sb.append(keyword.begin(extension), keyword.length()); + }; + + auto appendReplacement = [&sb, extension](const Keyword& keyword, + StringSpan replacement) { + MOZ_ASSERT(keyword.length() > UnicodeKeyLength); + + // Elide the type "true" if present in the replacement. + if (isTrue(replacement)) { + return sb.append(keyword.begin(extension), UnicodeKeyLength); + } + // Otherwise append the Unicode key (including the separator) and the + // replaced type. + return sb.append(keyword.begin(extension), UnicodeKeyWithSepLength) && + sb.append(replacement.data(), replacement.size()); + }; + + // Append all Unicode extension keywords. + for (size_t i = 0; i < keywords.length(); i++) { + const auto& keyword = keywords[i]; + + // Skip duplicate keywords. + if (i > 0) { + const auto& lastKeyword = keywords[i - 1]; + if (std::char_traits<char>::compare(keyword.begin(extension), + lastKeyword.begin(extension), + UnicodeKeyLength) == 0) { + continue; + } + MOZ_ASSERT(!keywordsLessOrEqual(keyword, lastKeyword)); + } + + if (!sb.append('-')) { + return false; + } + + if (keyword.length() == UnicodeKeyLength) { + // Keyword without type value. + if (!appendKey(keyword)) { + return false; + } + } else { + StringSpan key(keyword.begin(extension), UnicodeKeyLength); + StringSpan type(keyword.begin(extension) + UnicodeKeyWithSepLength, + keyword.length() - UnicodeKeyWithSepLength); + + // Search if there's a replacement for the current Unicode keyword. + if (const char* replacement = replaceUnicodeExtensionType(key, type)) { + if (!appendReplacement(keyword, mozilla::MakeStringSpan(replacement))) { + return false; + } + } else { + if (!appendKeyword(keyword, type)) { + return false; + } + } + } + } + + // We can keep the previous extension when canonicalization didn't modify it. + if (sb.length() != length || + std::char_traits<char>::compare(sb.begin(), extension, length) != 0) { + // Null-terminate the new string and replace the previous extension. + if (!sb.append('\0')) { + return false; + } + UniqueChars canonical(sb.extractOrCopyRawBuffer()); + if (!canonical) { + return false; + } + unicodeExtension = std::move(canonical); + } + + return true; +} + +template <class Buffer> +static bool LanguageTagToString(JSContext* cx, const LanguageTag& tag, + Buffer& sb) { + auto appendSubtag = [&sb](const auto& subtag) { + auto span = subtag.span(); + MOZ_ASSERT(!span.empty()); + return sb.append(span.data(), span.size()); + }; + + auto appendSubtagZ = [&sb](const char* subtag) { + MOZ_ASSERT(strlen(subtag) > 0); + return sb.append(subtag, strlen(subtag)); + }; + + auto appendSubtagsZ = [&sb, &appendSubtagZ](const auto& subtags) { + for (const auto& subtag : subtags) { + if (!sb.append('-') || !appendSubtagZ(subtag.get())) { + return false; + } + } + return true; + }; + + // Append the language subtag. + if (!appendSubtag(tag.language())) { + return false; + } + + // Append the script subtag if present. + if (tag.script().present()) { + if (!sb.append('-') || !appendSubtag(tag.script())) { + return false; + } + } + + // Append the region subtag if present. + if (tag.region().present()) { + if (!sb.append('-') || !appendSubtag(tag.region())) { + return false; + } + } + + // Append the variant subtags if present. + if (!appendSubtagsZ(tag.variants())) { + return false; + } + + // Append the extensions subtags if present. + if (!appendSubtagsZ(tag.extensions())) { + return false; + } + + // Append the private-use subtag if present. + if (tag.privateuse()) { + if (!sb.append('-') || !appendSubtagZ(tag.privateuse())) { + return false; + } + } + + return true; +} + +/** + * CanonicalizeTransformExtension + * + * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>: + * + * - These subtags are all in lowercase (that is the canonical casing for these + * subtags), [...]. + * + * And per + * <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>: + * + * - All keywords and tfields are sorted by alphabetical order of their keys, + * within their respective extensions. + */ +bool LanguageTag::canonicalizeTransformExtension( + JSContext* cx, JS::UniqueChars& transformExtension) { + const char* const extension = transformExtension.get(); + MOZ_ASSERT(extension[0] == 't'); + MOZ_ASSERT(extension[1] == '-'); + MOZ_ASSERT( + IsStructurallyValidExtensionTag(mozilla::MakeStringSpan(extension))); + + size_t length = strlen(extension); + + LanguageTag tag(cx); + LanguageTagParser::TFieldVector fields(cx); + + using TField = LanguageTagParser::TFieldVector::ElementType; + + mozilla::DebugOnly<bool> ok; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, ok, + LanguageTagParser::parseTransformExtension( + cx, mozilla::Span(extension, length), tag, fields)); + MOZ_ASSERT(ok, "unexpected invalid transform extension subtag"); + + auto tfieldLessOrEqual = [extension](const TField& a, const TField& b) { + MOZ_ASSERT(a.length() > TransformKeyLength); + MOZ_ASSERT(b.length() > TransformKeyLength); + const char* astr = a.begin(extension); + const char* bstr = b.begin(extension); + return std::char_traits<char>::compare(astr, bstr, TransformKeyLength) <= 0; + }; + + // All tfields are sorted by alphabetical order of their keys. + if (size_t fieldsLength = fields.length(); fieldsLength > 1) { + if (!fields.growByUninitialized(fieldsLength)) { + return false; + } + + MOZ_ALWAYS_TRUE( + MergeSort(fields.begin(), fieldsLength, fields.begin() + fieldsLength, + [&](const auto& a, const auto& b, bool* lessOrEqualp) { + *lessOrEqualp = tfieldLessOrEqual(a, b); + return true; + })); + + fields.shrinkBy(fieldsLength); + } + + Vector<char, 32> sb(cx); + if (!sb.append('t')) { + return false; + } + + // Append the language subtag if present. + // + // Replace aliases in tlang per + // <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>. + if (tag.language().present()) { + if (!sb.append('-')) { + return false; + } + + if (!tag.canonicalizeBaseName(cx)) { + return false; + } + + // The canonical case for Transform extensions is lowercase per + // <https://unicode.org/reports/tr35/#BCP47_T_Extension>. Convert the two + // subtags which don't use lowercase for their canonical syntax. + tag.script_.toLowerCase(); + tag.region_.toLowerCase(); + + if (!LanguageTagToString(cx, tag, sb)) { + return false; + } + } + + static constexpr size_t TransformKeyWithSepLength = TransformKeyLength + 1; + + using StringSpan = mozilla::Span<const char>; + + // Append all fields. + // + // UTS 35, 3.2.1 specifies: + // - Any type or tfield value "true" is removed. + // + // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so ignore + // this apparently invalid part of the UTS 35 specification and simply + // append all `tfield` subtags. + for (const auto& field : fields) { + if (!sb.append('-')) { + return false; + } + + StringSpan key(field.begin(extension), TransformKeyLength); + StringSpan value(field.begin(extension) + TransformKeyWithSepLength, + field.length() - TransformKeyWithSepLength); + + // Search if there's a replacement for the current transform keyword. + if (const char* replacement = replaceTransformExtensionType(key, value)) { + if (!sb.append(field.begin(extension), TransformKeyWithSepLength)) { + return false; + } + if (!sb.append(replacement, strlen(replacement))) { + return false; + } + } else { + if (!sb.append(field.begin(extension), field.length())) { + return false; + } + } + } + + // We can keep the previous extension when canonicalization didn't modify it. + if (sb.length() != length || + std::char_traits<char>::compare(sb.begin(), extension, length) != 0) { + // Null-terminate the new string and replace the previous extension. + if (!sb.append('\0')) { + return false; + } + UniqueChars canonical(sb.extractOrCopyRawBuffer()); + if (!canonical) { + return false; + } + transformExtension = std::move(canonical); + } + + return true; +} + +JSString* LanguageTag::toString(JSContext* cx) const { + JSStringBuilder sb(cx); + if (!LanguageTagToString(cx, *this, sb)) { + return nullptr; + } + + return sb.finishString(); +} + +UniqueChars LanguageTag::toStringZ(JSContext* cx) const { + Vector<char, 16> sb(cx); + if (!LanguageTagToString(cx, *this, sb)) { + return nullptr; + } + if (!sb.append('\0')) { + return nullptr; + } + + return UniqueChars(sb.extractOrCopyRawBuffer()); +} + +// Zero-terminated ICU Locale ID. +using LocaleId = + js::Vector<char, LanguageLength + 1 + ScriptLength + 1 + RegionLength + 1>; + +enum class LikelySubtags : bool { Add, Remove }; + +// Return true iff the language tag is already maximized resp. minimized. +static bool HasLikelySubtags(LikelySubtags likelySubtags, + const LanguageTag& tag) { + // The language tag is already maximized if the language, script, and region + // subtags are present and no placeholder subtags ("und", "Zzzz", "ZZ") are + // used. + if (likelySubtags == LikelySubtags::Add) { + return !tag.language().equalTo("und") && + (tag.script().present() && !tag.script().equalTo("Zzzz")) && + (tag.region().present() && !tag.region().equalTo("ZZ")); + } + + // The language tag is already minimized if it only contains a language + // subtag whose value is not the placeholder value "und". + return !tag.language().equalTo("und") && tag.script().missing() && + tag.region().missing(); +} + +// Create an ICU locale ID from the given language tag. +static bool CreateLocaleForLikelySubtags(const LanguageTag& tag, + LocaleId& locale) { + MOZ_ASSERT(locale.length() == 0); + + auto appendSubtag = [&locale](const auto& subtag) { + auto span = subtag.span(); + MOZ_ASSERT(!span.empty()); + return locale.append(span.data(), span.size()); + }; + + // Append the language subtag. + if (!appendSubtag(tag.language())) { + return false; + } + + // Append the script subtag if present. + if (tag.script().present()) { + if (!locale.append('_') || !appendSubtag(tag.script())) { + return false; + } + } + + // Append the region subtag if present. + if (tag.region().present()) { + if (!locale.append('_') || !appendSubtag(tag.region())) { + return false; + } + } + + // Zero-terminated for use with ICU. + return locale.append('\0'); +} + +// Assign the language, script, and region subtags from an ICU locale ID. +// +// ICU provides |uloc_getLanguage|, |uloc_getScript|, and |uloc_getCountry| to +// retrieve these subtags, but unfortunately these functions are rather slow, so +// we use our own implementation. +static bool AssignFromLocaleId(JSContext* cx, LocaleId& localeId, + LanguageTag& tag) { + MOZ_ASSERT(localeId.back() == '\0', + "Locale ID should be zero-terminated for ICU"); + + // Replace the ICU locale ID separator. + std::replace(localeId.begin(), localeId.end(), '_', '-'); + + // ICU replaces "und" with the empty string, which means "und" becomes "" and + // "und-Latn" becomes "-Latn". Handle this case separately. + if (localeId[0] == '\0' || localeId[0] == '-') { + static constexpr char und[] = "und"; + size_t length = strlen(und); + + // Insert "und" in front of the locale ID. + if (!localeId.growBy(length)) { + return false; + } + memmove(localeId.begin() + length, localeId.begin(), localeId.length()); + memmove(localeId.begin(), und, length); + } + + mozilla::Span<const char> localeSpan(localeId.begin(), localeId.length() - 1); + + // Retrieve the language, script, and region subtags from the locale ID, but + // ignore any other subtags. + LanguageTag localeTag(cx); + if (!LanguageTagParser::parseBaseName(cx, localeSpan, localeTag)) { + return false; + } + + tag.setLanguage(localeTag.language()); + tag.setScript(localeTag.script()); + tag.setRegion(localeTag.region()); + + return true; +} + +template <decltype(uloc_addLikelySubtags) likelySubtagsFn> +static bool CallLikelySubtags(JSContext* cx, const LocaleId& localeId, + LocaleId& result) { + // Locale ID must be zero-terminated before passing it to ICU. + MOZ_ASSERT(localeId.back() == '\0'); + MOZ_ASSERT(result.length() == 0); + + // Ensure there's enough room for the result. + MOZ_ALWAYS_TRUE(result.resize(LocaleId::InlineLength)); + + int32_t length = intl::CallICU( + cx, + [&localeId](char* chars, int32_t size, UErrorCode* status) { + return likelySubtagsFn(localeId.begin(), chars, size, status); + }, + result); + if (length < 0) { + return false; + } + + MOZ_ASSERT( + size_t(length) <= LocaleId::InlineLength, + "Unexpected extra subtags were added by ICU. If this assertion ever " + "fails, simply remove it and move on like nothing ever happended."); + + // Resize the vector to the actual string length. + result.shrinkTo(length); + + // Zero-terminated for use with ICU. + return result.append('\0'); +} + +// The canonical way to compute the Unicode BCP 47 locale identifier with likely +// subtags is as follows: +// +// 1. Call uloc_forLanguageTag() to transform the locale identifer into an ICU +// locale ID. +// 2. Call uloc_addLikelySubtags() to add the likely subtags to the locale ID. +// 3. Call uloc_toLanguageTag() to transform the resulting locale ID back into +// a Unicode BCP 47 locale identifier. +// +// Since uloc_forLanguageTag() and uloc_toLanguageTag() are both kind of slow +// and we know, by construction, that the input Unicode BCP 47 locale identifier +// only contains valid language, script, and region subtags, we can avoid both +// calls if we implement them ourselves, see CreateLocaleForLikelySubtags() and +// AssignFromLocaleId(). (Where "slow" means about 50% of the execution time of +// |Intl.Locale.prototype.maximize|.) +static bool LikelySubtags(JSContext* cx, LikelySubtags likelySubtags, + LanguageTag& tag) { + // Return early if the input is already maximized/minimized. + if (HasLikelySubtags(likelySubtags, tag)) { + return true; + } + + // Create the locale ID for the input argument. + LocaleId locale(cx); + if (!CreateLocaleForLikelySubtags(tag, locale)) { + return false; + } + + // UTS #35 requires that locale ID is maximized before its likely subtags are + // removed, so we need to call uloc_addLikelySubtags() for both cases. + // See <https://ssl.icu-project.org/trac/ticket/10220> and + // <https://ssl.icu-project.org/trac/ticket/12345>. + + LocaleId localeLikelySubtags(cx); + + // Add likely subtags to the locale ID. When minimizing we can skip adding the + // likely subtags for already maximized tags. (When maximizing we've already + // verified above that the tag is missing likely subtags.) + bool addLikelySubtags = likelySubtags == LikelySubtags::Add || + !HasLikelySubtags(LikelySubtags::Add, tag); + + if (addLikelySubtags) { + if (!CallLikelySubtags<uloc_addLikelySubtags>(cx, locale, + localeLikelySubtags)) { + return false; + } + } + + // Now that we've succesfully maximized the locale, we can minimize it. + if (likelySubtags == LikelySubtags::Remove) { + if (addLikelySubtags) { + // Copy the maximized subtags back into |locale|. + locale = std::move(localeLikelySubtags); + localeLikelySubtags = LocaleId(cx); + } + + // Remove likely subtags from the locale ID. + if (!CallLikelySubtags<uloc_minimizeSubtags>(cx, locale, + localeLikelySubtags)) { + return false; + } + } + + // Assign the language, script, and region subtags from the locale ID. + if (!AssignFromLocaleId(cx, localeLikelySubtags, tag)) { + return false; + } + + // Update mappings in case ICU returned a non-canonical locale. + return tag.canonicalizeBaseName(cx); +} + +bool LanguageTag::addLikelySubtags(JSContext* cx) { + return LikelySubtags(cx, LikelySubtags::Add, *this); +} + +bool LanguageTag::removeLikelySubtags(JSContext* cx) { + return LikelySubtags(cx, LikelySubtags::Remove, *this); +} + +LanguageTagParser::Token LanguageTagParser::nextToken() { + MOZ_ASSERT(index_ <= length_ + 1, "called after 'None' token was read"); + + TokenKind kind = TokenKind::None; + size_t tokenLength = 0; + for (size_t i = index_; i < length_; i++) { + // UTS 35, section 3.1. + // alpha = [A-Z a-z] ; + // digit = [0-9] ; + char16_t c = charAtUnchecked(i); + if (mozilla::IsAsciiAlpha(c)) { + kind |= TokenKind::Alpha; + } else if (mozilla::IsAsciiDigit(c)) { + kind |= TokenKind::Digit; + } else if (c == '-' && i > index_ && i + 1 < length_) { + break; + } else { + return {TokenKind::Error, 0, 0}; + } + tokenLength += 1; + } + + Token token{kind, index_, tokenLength}; + index_ += tokenLength + 1; + return token; +} + +UniqueChars LanguageTagParser::chars(JSContext* cx, size_t index, + size_t length) const { + // Add +1 to null-terminate the string. + auto chars = cx->make_pod_array<char>(length + 1); + if (chars) { + char* dest = chars.get(); + if (locale_.is<const JS::Latin1Char*>()) { + std::copy_n(locale_.as<const JS::Latin1Char*>() + index, length, dest); + } else { + std::copy_n(locale_.as<const char16_t*>() + index, length, dest); + } + dest[length] = '\0'; + } + return chars; +} + +// Parse the `unicode_language_id` production. +// +// unicode_language_id = unicode_language_subtag +// (sep unicode_script_subtag)? +// (sep unicode_region_subtag)? +// (sep unicode_variant_subtag)* ; +// +// sep = "-" +// +// Note: Unicode CLDR locale identifier backward compatibility extensions +// removed from `unicode_language_id`. +// +// |tok| is the current token from |ts|. +// +// All subtags will be added unaltered to |tag|, without canonicalizing their +// case or, in the case of variant subtags, detecting and rejecting duplicate +// variants. Users must subsequently |canonicalizeBaseName| to perform these +// actions. +// +// Do not use this function directly: use |parseBaseName| or +// |parseTlangFromTransformExtension| instead. +JS::Result<bool> LanguageTagParser::internalParseBaseName(JSContext* cx, + LanguageTagParser& ts, + LanguageTag& tag, + Token& tok) { + if (ts.isLanguage(tok)) { + ts.copyChars(tok, tag.language_); + + tok = ts.nextToken(); + } else { + // The language subtag is mandatory. + return false; + } + + if (ts.isScript(tok)) { + ts.copyChars(tok, tag.script_); + + tok = ts.nextToken(); + } + + if (ts.isRegion(tok)) { + ts.copyChars(tok, tag.region_); + + tok = ts.nextToken(); + } + + auto& variants = tag.variants_; + MOZ_ASSERT(variants.length() == 0); + while (ts.isVariant(tok)) { + auto variant = ts.chars(cx, tok); + if (!variant) { + return cx->alreadyReportedOOM(); + } + if (!variants.append(std::move(variant))) { + return cx->alreadyReportedOOM(); + } + + tok = ts.nextToken(); + } + + return true; +} + +static mozilla::Variant<const Latin1Char*, const char16_t*> StringChars( + const char* locale) { + return mozilla::AsVariant(reinterpret_cast<const JS::Latin1Char*>(locale)); +} + +static mozilla::Variant<const Latin1Char*, const char16_t*> StringChars( + JSLinearString* linear, JS::AutoCheckCannotGC& nogc) { + if (linear->hasLatin1Chars()) { + return mozilla::AsVariant(linear->latin1Chars(nogc)); + } + return mozilla::AsVariant(linear->twoByteChars(nogc)); +} + +JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx, + JSLinearString* locale, + LanguageTag& tag) { + JS::AutoCheckCannotGC nogc; + LocaleChars localeChars = StringChars(locale, nogc); + return tryParse(cx, localeChars, locale->length(), tag); +} + +JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx, + mozilla::Span<const char> locale, + LanguageTag& tag) { + LocaleChars localeChars = StringChars(locale.data()); + return tryParse(cx, localeChars, locale.size(), tag); +} + +JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx, + LocaleChars& localeChars, + size_t localeLength, + LanguageTag& tag) { + // unicode_locale_id = unicode_language_id + // extensions* + // pu_extensions? ; + + LanguageTagParser ts(localeChars, localeLength); + Token tok = ts.nextToken(); + + bool ok; + MOZ_TRY_VAR(ok, parseBaseName(cx, ts, tag, tok)); + if (!ok) { + return false; + } + + // extensions = unicode_locale_extensions + // | transformed_extensions + // | other_extensions ; + + // Bit set of seen singletons. + uint64_t seenSingletons = 0; + + auto& extensions = tag.extensions_; + while (ts.isExtensionStart(tok)) { + char singleton = ts.singletonKey(tok); + + // Reject the input if a duplicate singleton was found. + uint64_t hash = 1ULL << (mozilla::AsciiAlphanumericToNumber(singleton) + 1); + if (seenSingletons & hash) { + return false; + } + seenSingletons |= hash; + + Token start = tok; + tok = ts.nextToken(); + + // We'll check for missing non-singleton subtags after this block by + // comparing |startValue| with the then-current position. + size_t startValue = tok.index(); + + if (singleton == 'u') { + while (ts.isUnicodeExtensionPart(tok)) { + tok = ts.nextToken(); + } + } else if (singleton == 't') { + // transformed_extensions = sep [tT] + // ((sep tlang (sep tfield)*) + // | (sep tfield)+) ; + + // tlang = unicode_language_subtag + // (sep unicode_script_subtag)? + // (sep unicode_region_subtag)? + // (sep unicode_variant_subtag)* ; + if (ts.isLanguage(tok)) { + tok = ts.nextToken(); + + if (ts.isScript(tok)) { + tok = ts.nextToken(); + } + + if (ts.isRegion(tok)) { + tok = ts.nextToken(); + } + + while (ts.isVariant(tok)) { + tok = ts.nextToken(); + } + } + + // tfield = tkey tvalue; + while (ts.isTransformExtensionKey(tok)) { + tok = ts.nextToken(); + + size_t startTValue = tok.index(); + while (ts.isTransformExtensionPart(tok)) { + tok = ts.nextToken(); + } + + // `tfield` requires at least one `tvalue`. + if (tok.index() <= startTValue) { + return false; + } + } + } else { + while (ts.isOtherExtensionPart(tok)) { + tok = ts.nextToken(); + } + } + + // Singletons must be followed by a non-singleton subtag, "en-a-b" is not + // allowed. + if (tok.index() <= startValue) { + return false; + } + + UniqueChars extension = ts.extension(cx, start, tok); + if (!extension) { + return cx->alreadyReportedOOM(); + } + if (!extensions.append(std::move(extension))) { + return cx->alreadyReportedOOM(); + } + } + + // Trailing `pu_extension` component of the `unicode_locale_id` production. + if (ts.isPrivateUseStart(tok)) { + Token start = tok; + tok = ts.nextToken(); + + size_t startValue = tok.index(); + while (ts.isPrivateUsePart(tok)) { + tok = ts.nextToken(); + } + + // There must be at least one subtag after the "-x-". + if (tok.index() <= startValue) { + return false; + } + + UniqueChars privateUse = ts.extension(cx, start, tok); + if (!privateUse) { + return cx->alreadyReportedOOM(); + } + tag.privateuse_ = std::move(privateUse); + } + + // Return true if the complete input was successfully parsed. + return tok.isNone(); +} + +bool LanguageTagParser::parse(JSContext* cx, JSLinearString* locale, + LanguageTag& tag) { + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, tryParse(cx, locale, tag)); + if (ok) { + return true; + } + if (UniqueChars localeChars = QuoteString(cx, locale, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LANGUAGE_TAG, localeChars.get()); + } + return false; +} + +bool LanguageTagParser::parse(JSContext* cx, mozilla::Span<const char> locale, + LanguageTag& tag) { + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, tryParse(cx, locale, tag)); + if (ok) { + return true; + } + if (UniqueChars localeChars = + DuplicateString(cx, locale.data(), locale.size())) { + JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LANGUAGE_TAG, localeChars.get()); + } + return false; +} + +bool LanguageTagParser::parseBaseName(JSContext* cx, + mozilla::Span<const char> locale, + LanguageTag& tag) { + LocaleChars localeChars = StringChars(locale.data()); + LanguageTagParser ts(localeChars, locale.size()); + Token tok = ts.nextToken(); + + // Parse only the base-name part and ignore any trailing characters. + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, parseBaseName(cx, ts, tag, tok)); + if (ok) { + return true; + } + if (UniqueChars localeChars = + DuplicateString(cx, locale.data(), locale.size())) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LANGUAGE_TAG, localeChars.get()); + } + return false; +} + +JS::Result<bool> LanguageTagParser::tryParseBaseName(JSContext* cx, + JSLinearString* locale, + LanguageTag& tag) { + JS::AutoCheckCannotGC nogc; + LocaleChars localeChars = StringChars(locale, nogc); + LanguageTagParser ts(localeChars, locale->length()); + Token tok = ts.nextToken(); + + // Return true if the complete input was successfully parsed. + bool ok; + MOZ_TRY_VAR(ok, parseBaseName(cx, ts, tag, tok)); + return ok && tok.isNone(); +} + +// Parse |extension|, which must be a valid `transformed_extensions` subtag, and +// fill |tag| and |fields| from the `tlang` and `tfield` components. +JS::Result<bool> LanguageTagParser::parseTransformExtension( + JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag, + TFieldVector& fields) { + LocaleChars extensionChars = StringChars(extension.data()); + LanguageTagParser ts(extensionChars, extension.size()); + Token tok = ts.nextToken(); + + if (!ts.isExtensionStart(tok) || ts.singletonKey(tok) != 't') { + return false; + } + + tok = ts.nextToken(); + + if (tok.isNone()) { + return false; + } + + if (ts.isLanguage(tok)) { + // We're parsing a possible `tlang` in a known-valid transform extension, so + // use the special-purpose function that takes advantage of this to compute + // lowercased |tag| contents in an optimal manner. + MOZ_TRY(parseTlangInTransformExtension(cx, ts, tag, tok)); + + // After `tlang` we must have a `tfield` and its `tkey`, or we're at the end + // of the transform extension. + MOZ_ASSERT(ts.isTransformExtensionKey(tok) || tok.isNone()); + } else { + // If there's no `tlang` subtag, at least one `tfield` must be present. + MOZ_ASSERT(ts.isTransformExtensionKey(tok)); + } + + // Trailing `tfield` subtags. (Any other trailing subtags are an error, + // because we're guaranteed to only see a valid tranform extension here.) + while (ts.isTransformExtensionKey(tok)) { + size_t begin = tok.index(); + tok = ts.nextToken(); + + size_t startTValue = tok.index(); + while (ts.isTransformExtensionPart(tok)) { + tok = ts.nextToken(); + } + + // `tfield` requires at least one `tvalue`. + if (tok.index() <= startTValue) { + return false; + } + + size_t length = tok.index() - 1 - begin; + if (!fields.emplaceBack(begin, length)) { + return cx->alreadyReportedOOM(); + } + } + + // Return true if the complete input was successfully parsed. + return tok.isNone(); +} + +// Parse |extension|, which must be a valid `unicode_locale_extensions` subtag, +// and fill |attributes| and |keywords| from the `attribute` and `keyword` +// components. +JS::Result<bool> LanguageTagParser::parseUnicodeExtension( + JSContext* cx, mozilla::Span<const char> extension, + AttributesVector& attributes, KeywordsVector& keywords) { + LocaleChars extensionChars = StringChars(extension.data()); + LanguageTagParser ts(extensionChars, extension.size()); + Token tok = ts.nextToken(); + + // unicode_locale_extensions = sep [uU] ((sep keyword)+ | + // (sep attribute)+ (sep keyword)*) ; + + if (!ts.isExtensionStart(tok) || ts.singletonKey(tok) != 'u') { + return false; + } + + tok = ts.nextToken(); + + if (tok.isNone()) { + return false; + } + + while (ts.isUnicodeExtensionAttribute(tok)) { + if (!attributes.emplaceBack(tok.index(), tok.length())) { + return cx->alreadyReportedOOM(); + } + + tok = ts.nextToken(); + } + + // keyword = key (sep type)? ; + while (ts.isUnicodeExtensionKey(tok)) { + size_t begin = tok.index(); + tok = ts.nextToken(); + + while (ts.isUnicodeExtensionType(tok)) { + tok = ts.nextToken(); + } + + if (tok.isError()) { + return false; + } + + size_t length = tok.index() - 1 - begin; + if (!keywords.emplaceBack(begin, length)) { + return cx->alreadyReportedOOM(); + } + } + + // Return true if the complete input was successfully parsed. + return tok.isNone(); +} + +bool LanguageTagParser::canParseUnicodeExtension( + mozilla::Span<const char> extension) { + LocaleChars extensionChars = StringChars(extension.data()); + LanguageTagParser ts(extensionChars, extension.size()); + Token tok = ts.nextToken(); + + // unicode_locale_extensions = sep [uU] ((sep keyword)+ | + // (sep attribute)+ (sep keyword)*) ; + + if (!ts.isExtensionStart(tok) || ts.singletonKey(tok) != 'u') { + return false; + } + + tok = ts.nextToken(); + + if (tok.isNone()) { + return false; + } + + while (ts.isUnicodeExtensionAttribute(tok)) { + tok = ts.nextToken(); + } + + // keyword = key (sep type)? ; + while (ts.isUnicodeExtensionKey(tok)) { + tok = ts.nextToken(); + + while (ts.isUnicodeExtensionType(tok)) { + tok = ts.nextToken(); + } + + if (tok.isError()) { + return false; + } + } + + // Return true if the complete input was successfully parsed. + return tok.isNone(); +} + +bool LanguageTagParser::canParseUnicodeExtensionType( + JSLinearString* unicodeType) { + MOZ_ASSERT(unicodeType->length() > 0, "caller must exclude empty strings"); + + JS::AutoCheckCannotGC nogc; + LocaleChars unicodeTypeChars = StringChars(unicodeType, nogc); + + LanguageTagParser ts(unicodeTypeChars, unicodeType->length()); + Token tok = ts.nextToken(); + + while (ts.isUnicodeExtensionType(tok)) { + tok = ts.nextToken(); + } + + // Return true if the complete input was successfully parsed. + return tok.isNone(); +} + +bool ParseStandaloneLanguageTag(HandleLinearString str, + LanguageSubtag& result) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + if (!IsStructurallyValidLanguageTag<Latin1Char>(str->latin1Range(nogc))) { + return false; + } + result.set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!IsStructurallyValidLanguageTag<char16_t>(str->twoByteRange(nogc))) { + return false; + } + result.set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +bool ParseStandaloneScriptTag(HandleLinearString str, ScriptSubtag& result) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + if (!IsStructurallyValidScriptTag<Latin1Char>(str->latin1Range(nogc))) { + return false; + } + result.set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!IsStructurallyValidScriptTag<char16_t>(str->twoByteRange(nogc))) { + return false; + } + result.set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +bool ParseStandaloneRegionTag(HandleLinearString str, RegionSubtag& result) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + if (!IsStructurallyValidRegionTag<Latin1Char>(str->latin1Range(nogc))) { + return false; + } + result.set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!IsStructurallyValidRegionTag<char16_t>(str->twoByteRange(nogc))) { + return false; + } + result.set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +template <typename CharT> +static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + const CharT* ptr = span.data(); + size_t length = span.size(); + return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>); +} + +static bool IsAsciiLowercaseAlpha(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + return IsAsciiLowercaseAlpha<Latin1Char>(str->latin1Range(nogc)); + } + return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc)); +} + +template <typename CharT> +static bool IsAsciiAlpha(mozilla::Span<const CharT> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + const CharT* ptr = span.data(); + size_t length = span.size(); + return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>); +} + +static bool IsAsciiAlpha(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + return IsAsciiAlpha<Latin1Char>(str->latin1Range(nogc)); + } + return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc)); +} + +JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx, + HandleLinearString str) { + // ISO-639 language codes contain either two or three characters. + size_t length = str->length(); + if (length != 2 && length != 3) { + return nullptr; + } + + // We can directly the return the input below if it's in the correct case. + bool isLowerCase = IsAsciiLowercaseAlpha(str); + if (!isLowerCase) { + // Must be an ASCII alpha string. + if (!IsAsciiAlpha(str)) { + return nullptr; + } + } + + LanguageSubtag languageTag; + if (str->hasLatin1Chars()) { + JS::AutoCheckCannotGC nogc; + languageTag.set<Latin1Char>(str->latin1Range(nogc)); + } else { + JS::AutoCheckCannotGC nogc; + languageTag.set<char16_t>(str->twoByteRange(nogc)); + } + + if (!isLowerCase) { + // The language subtag is canonicalized to lower case. + languageTag.toLowerCase(); + } + + // Reject the input if the canonical tag contains more than just a single + // language subtag. + if (LanguageTag::complexLanguageMapping(languageTag)) { + return nullptr; + } + + // Take care to replace deprecated subtags with their preferred values. + JSString* result; + if (LanguageTag::languageMapping(languageTag) || !isLowerCase) { + auto span = languageTag.span(); + result = NewStringCopyN<CanGC>(cx, span.data(), span.size()); + } else { + result = str; + } + if (!result) { + return cx->alreadyReportedOOM(); + } + return result; +} + +void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) { + TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type"); +} + +} // namespace intl +} // namespace js diff --git a/js/src/builtin/intl/LanguageTag.h b/js/src/builtin/intl/LanguageTag.h new file mode 100644 index 0000000000..405a76f544 --- /dev/null +++ b/js/src/builtin/intl/LanguageTag.h @@ -0,0 +1,768 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Structured representation of Unicode locale IDs used with Intl functions. */ + +#ifndef builtin_intl_LanguageTag_h +#define builtin_intl_LanguageTag_h + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" +#include "mozilla/TypedEnumBits.h" +#include "mozilla/Variant.h" + +#include <algorithm> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <utility> + +#include "js/AllocPolicy.h" +#include "js/GCAPI.h" +#include "js/Result.h" +#include "js/RootingAPI.h" +#include "js/Utility.h" +#include "js/Vector.h" + +struct JS_PUBLIC_API JSContext; +class JSLinearString; +class JS_PUBLIC_API JSString; +class JS_PUBLIC_API JSTracer; + +namespace js { + +namespace intl { + +/** + * Return true if |language| is a valid language subtag. + */ +template <typename CharT> +bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language); + +/** + * Return true if |script| is a valid script subtag. + */ +template <typename CharT> +bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script); + +/** + * Return true if |region| is a valid region subtag. + */ +template <typename CharT> +bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region); + +#ifdef DEBUG +/** + * Return true if |variant| is a valid variant subtag. + */ +bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant); + +/** + * Return true if |extension| is a valid Unicode extension subtag. + */ +bool IsStructurallyValidUnicodeExtensionTag( + mozilla::Span<const char> extension); + +/** + * Return true if |privateUse| is a valid private-use subtag. + */ +bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse); + +#endif + +template <typename CharT> +char AsciiToLowerCase(CharT c) { + MOZ_ASSERT(mozilla::IsAscii(c)); + return mozilla::IsAsciiUppercaseAlpha(c) ? (c + 0x20) : c; +} + +template <typename CharT> +char AsciiToUpperCase(CharT c) { + MOZ_ASSERT(mozilla::IsAscii(c)); + return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c; +} + +template <typename CharT> +void AsciiToLowerCase(CharT* chars, size_t length, char* dest) { + // Tell the analysis the |std::transform| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + char (&fn)(CharT) = AsciiToLowerCase; + std::transform(chars, chars + length, dest, fn); +} + +template <typename CharT> +void AsciiToUpperCase(CharT* chars, size_t length, char* dest) { + // Tell the analysis the |std::transform| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + char (&fn)(CharT) = AsciiToUpperCase; + std::transform(chars, chars + length, dest, fn); +} + +template <typename CharT> +void AsciiToTitleCase(CharT* chars, size_t length, char* dest) { + if (length > 0) { + AsciiToUpperCase(chars, 1, dest); + AsciiToLowerCase(chars + 1, length - 1, dest + 1); + } +} + +// Constants for language subtag lengths. +namespace LanguageTagLimits { + +// unicode_language_subtag = alpha{2,3} | alpha{5,8} ; +static constexpr size_t LanguageLength = 8; + +// unicode_script_subtag = alpha{4} ; +static constexpr size_t ScriptLength = 4; + +// unicode_region_subtag = (alpha{2} | digit{3}) ; +static constexpr size_t RegionLength = 3; +static constexpr size_t AlphaRegionLength = 2; +static constexpr size_t DigitRegionLength = 3; + +// key = alphanum alpha ; +static constexpr size_t UnicodeKeyLength = 2; + +// tkey = alpha digit ; +static constexpr size_t TransformKeyLength = 2; + +} // namespace LanguageTagLimits + +// Fixed size language subtag which is stored inline in LanguageTag. +template <size_t Length> +class LanguageTagSubtag final { + uint8_t length_ = 0; + char chars_[Length] = {}; // zero initialize + + public: + LanguageTagSubtag() = default; + + LanguageTagSubtag(const LanguageTagSubtag&) = delete; + LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete; + + size_t length() const { return length_; } + bool missing() const { return length_ == 0; } + bool present() const { return length_ > 0; } + + mozilla::Span<const char> span() const { return {chars_, length_}; } + + template <typename CharT> + void set(mozilla::Span<const CharT> str) { + MOZ_ASSERT(str.size() <= Length); + std::copy_n(str.data(), str.size(), chars_); + length_ = str.size(); + } + + // The toXYZCase() methods are using |Length| instead of |length()|, because + // current compilers (tested GCC and Clang) can't infer the maximum string + // length - even when using hints like |std::min| - and instead are emitting + // SIMD optimized code. Using a fixed sized length avoids emitting the SIMD + // code. (Emitting SIMD code doesn't make sense here, because the SIMD code + // only kicks in for long strings.) A fixed length will additionally ensure + // the compiler unrolls the loop in the case conversion code. + + void toLowerCase() { AsciiToLowerCase(chars_, Length, chars_); } + + void toUpperCase() { AsciiToUpperCase(chars_, Length, chars_); } + + void toTitleCase() { AsciiToTitleCase(chars_, Length, chars_); } + + template <size_t N> + bool equalTo(const char (&str)[N]) const { + static_assert(N - 1 <= Length, + "subtag literals must not exceed the maximum subtag length"); + + return length_ == N - 1 && memcmp(chars_, str, N - 1) == 0; + } +}; + +using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>; +using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>; +using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>; + +/** + * Object representing a language tag. + * + * All subtags are already in canonicalized case. + */ +class MOZ_STACK_CLASS LanguageTag final { + LanguageSubtag language_ = {}; + ScriptSubtag script_ = {}; + RegionSubtag region_ = {}; + + using VariantsVector = Vector<JS::UniqueChars, 2>; + using ExtensionsVector = Vector<JS::UniqueChars, 2>; + + VariantsVector variants_; + ExtensionsVector extensions_; + JS::UniqueChars privateuse_ = nullptr; + + friend class LanguageTagParser; + + bool canonicalizeUnicodeExtension(JSContext* cx, + JS::UniqueChars& unicodeExtension); + + bool canonicalizeTransformExtension(JSContext* cx, + JS::UniqueChars& transformExtension); + + public: + static bool languageMapping(LanguageSubtag& language); + static bool complexLanguageMapping(const LanguageSubtag& language); + + private: + static bool regionMapping(RegionSubtag& region); + static bool complexRegionMapping(const RegionSubtag& region); + + void performComplexLanguageMappings(); + void performComplexRegionMappings(); + MOZ_MUST_USE bool performVariantMappings(JSContext* cx); + + MOZ_MUST_USE bool updateGrandfatheredMappings(JSContext* cx); + + static const char* replaceTransformExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type); + + public: + /** + * Given a Unicode key and type, return the null-terminated preferred + * replacement for that type if there is one, or null if there is none, e.g. + * in effect + * |replaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"| + * and + * |replaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|. + */ + static const char* replaceUnicodeExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type); + + public: + explicit LanguageTag(JSContext* cx) : variants_(cx), extensions_(cx) {} + + LanguageTag(const LanguageTag&) = delete; + LanguageTag& operator=(const LanguageTag&) = delete; + + const LanguageSubtag& language() const { return language_; } + const ScriptSubtag& script() const { return script_; } + const RegionSubtag& region() const { return region_; } + const auto& variants() const { return variants_; } + const auto& extensions() const { return extensions_; } + const char* privateuse() const { return privateuse_.get(); } + + /** + * Return the Unicode extension subtag or nullptr if not present. + */ + const char* unicodeExtension() const; + + private: + ptrdiff_t unicodeExtensionIndex() const; + + public: + /** + * Set the language subtag. The input must be a valid language subtag. + */ + template <size_t N> + void setLanguage(const char (&language)[N]) { + mozilla::Span<const char> span(language, N - 1); + MOZ_ASSERT(IsStructurallyValidLanguageTag(span)); + language_.set(span); + } + + /** + * Set the language subtag. The input must be a valid language subtag. + */ + void setLanguage(const LanguageSubtag& language) { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span())); + language_.set(language.span()); + } + + /** + * Set the script subtag. The input must be a valid script subtag. + */ + template <size_t N> + void setScript(const char (&script)[N]) { + mozilla::Span<const char> span(script, N - 1); + MOZ_ASSERT(IsStructurallyValidScriptTag(span)); + script_.set(span); + } + + /** + * Set the script subtag. The input must be a valid script subtag or the empty + * string. + */ + void setScript(const ScriptSubtag& script) { + MOZ_ASSERT(script.missing() || IsStructurallyValidScriptTag(script.span())); + script_.set(script.span()); + } + + /** + * Set the region subtag. The input must be a valid region subtag. + */ + template <size_t N> + void setRegion(const char (®ion)[N]) { + mozilla::Span<const char> span(region, N - 1); + MOZ_ASSERT(IsStructurallyValidRegionTag(span)); + region_.set(span); + } + + /** + * Set the region subtag. The input must be a valid region subtag or the empty + * empty string. + */ + void setRegion(const RegionSubtag& region) { + MOZ_ASSERT(region.missing() || IsStructurallyValidRegionTag(region.span())); + region_.set(region.span()); + } + + /** + * Removes all variant subtags. + */ + void clearVariants() { variants_.clearAndFree(); } + + /** + * Set the Unicode extension subtag. The input must be a valid Unicode + * extension subtag. + */ + bool setUnicodeExtension(JS::UniqueChars extension); + + /** + * Remove any Unicode extension subtag if present. + */ + void clearUnicodeExtension(); + + /** + * Set the private-use subtag. The input must be a valid private-use subtag + * or nullptr. + */ + void setPrivateuse(JS::UniqueChars privateuse) { + MOZ_ASSERT(!privateuse || + IsStructurallyValidPrivateUseTag( + {privateuse.get(), strlen(privateuse.get())})); + privateuse_ = std::move(privateuse); + } + + /** Canonicalize the base-name (language, script, region, variant) subtags. */ + bool canonicalizeBaseName(JSContext* cx); + + /** + * Canonicalize all extension subtags. + */ + bool canonicalizeExtensions(JSContext* cx); + + /** + * Canonicalizes the given structurally valid Unicode BCP 47 locale + * identifier, including regularized case of subtags. For example, the + * language tag Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, + * where + * + * Zh ; 2*3ALPHA + * -haNS ; ["-" script] + * -bu ; ["-" region] + * -variant2 ; *("-" variant) + * -Variant1 + * -u-ca-chinese ; *("-" extension) + * -t-Zh-laTN + * -x-PRIVATE ; ["-" privateuse] + * + * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private + * + * Spec: ECMAScript Internationalization API Specification, 6.2.3. + */ + bool canonicalize(JSContext* cx) { + return canonicalizeBaseName(cx) && canonicalizeExtensions(cx); + } + + /** + * Return the string representation of this language tag. + */ + JSString* toString(JSContext* cx) const; + + /** + * Return the string representation of this language tag as a null-terminated + * C-string. + */ + JS::UniqueChars toStringZ(JSContext* cx) const; + + /** + * Add likely-subtags to the language tag. + * + * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags> + */ + bool addLikelySubtags(JSContext* cx); + + /** + * Remove likely-subtags from the language tag. + * + * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags> + */ + bool removeLikelySubtags(JSContext* cx); +}; + +/** + * Parser for Unicode BCP 47 locale identifiers. + * + * <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers> + */ +class MOZ_STACK_CLASS LanguageTagParser final { + public: + // Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|. + enum class TokenKind : uint8_t { + None = 0b000, + Alpha = 0b001, + Digit = 0b010, + AlphaDigit = 0b011, + Error = 0b100 + }; + + private: + class Token final { + size_t index_; + size_t length_; + TokenKind kind_; + + public: + Token(TokenKind kind, size_t index, size_t length) + : index_(index), length_(length), kind_(kind) {} + + TokenKind kind() const { return kind_; } + size_t index() const { return index_; } + size_t length() const { return length_; } + + bool isError() const { return kind_ == TokenKind::Error; } + bool isNone() const { return kind_ == TokenKind::None; } + bool isAlpha() const { return kind_ == TokenKind::Alpha; } + bool isDigit() const { return kind_ == TokenKind::Digit; } + bool isAlphaDigit() const { return kind_ == TokenKind::AlphaDigit; } + }; + + using LocaleChars = mozilla::Variant<const JS::Latin1Char*, const char16_t*>; + + const LocaleChars& locale_; + size_t length_; + size_t index_ = 0; + + LanguageTagParser(const LocaleChars& locale, size_t length) + : locale_(locale), length_(length) {} + + char16_t charAtUnchecked(size_t index) const { + if (locale_.is<const JS::Latin1Char*>()) { + return locale_.as<const JS::Latin1Char*>()[index]; + } + return locale_.as<const char16_t*>()[index]; + } + + char charAt(size_t index) const { + char16_t c = charAtUnchecked(index); + MOZ_ASSERT(mozilla::IsAscii(c)); + return c; + } + + // Copy the token characters into |subtag|. + template <size_t N> + void copyChars(const Token& tok, LanguageTagSubtag<N>& subtag) const { + size_t index = tok.index(); + size_t length = tok.length(); + if (locale_.is<const JS::Latin1Char*>()) { + using T = const JS::Latin1Char; + subtag.set(mozilla::Span(locale_.as<T*>() + index, length)); + } else { + using T = const char16_t; + subtag.set(mozilla::Span(locale_.as<T*>() + index, length)); + } + } + + // Create a string copy of |length| characters starting at |index|. + JS::UniqueChars chars(JSContext* cx, size_t index, size_t length) const; + + // Create a string copy of the token characters. + JS::UniqueChars chars(JSContext* cx, const Token& tok) const { + return chars(cx, tok.index(), tok.length()); + } + + JS::UniqueChars extension(JSContext* cx, const Token& start, + const Token& end) const { + MOZ_ASSERT(start.index() < end.index()); + + size_t length = end.index() - 1 - start.index(); + return chars(cx, start.index(), length); + } + + Token nextToken(); + + // unicode_language_subtag = alpha{2,3} | alpha{5,8} ; + // + // Four character language subtags are not allowed in Unicode BCP 47 locale + // identifiers. Also see the comparison to Unicode CLDR locale identifiers in + // <https://unicode.org/reports/tr35/#BCP_47_Conformance>. + bool isLanguage(const Token& tok) const { + return tok.isAlpha() && ((2 <= tok.length() && tok.length() <= 3) || + (5 <= tok.length() && tok.length() <= 8)); + } + + // unicode_script_subtag = alpha{4} ; + bool isScript(const Token& tok) const { + return tok.isAlpha() && tok.length() == 4; + } + + // unicode_region_subtag = (alpha{2} | digit{3}) ; + bool isRegion(const Token& tok) const { + return (tok.isAlpha() && tok.length() == 2) || + (tok.isDigit() && tok.length() == 3); + } + + // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ; + bool isVariant(const Token& tok) const { + return (5 <= tok.length() && tok.length() <= 8) || + (tok.length() == 4 && mozilla::IsAsciiDigit(charAt(tok.index()))); + } + + // Returns the code unit of the first character at the given singleton token. + // Always returns the lower case form of an alphabetical character. + char singletonKey(const Token& tok) const { + MOZ_ASSERT(tok.length() == 1); + return AsciiToLowerCase(charAt(tok.index())); + } + + // extensions = unicode_locale_extensions | + // transformed_extensions | + // other_extensions ; + // + // unicode_locale_extensions = sep [uU] ((sep keyword)+ | + // (sep attribute)+ (sep keyword)*) ; + // + // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) | + // (sep tfield)+) ; + // + // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ; + bool isExtensionStart(const Token& tok) const { + return tok.length() == 1 && singletonKey(tok) != 'x'; + } + + // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ; + bool isOtherExtensionPart(const Token& tok) const { + return 2 <= tok.length() && tok.length() <= 8; + } + + // unicode_locale_extensions = sep [uU] ((sep keyword)+ | + // (sep attribute)+ (sep keyword)*) ; + // keyword = key (sep type)? ; + bool isUnicodeExtensionPart(const Token& tok) const { + return isUnicodeExtensionKey(tok) || isUnicodeExtensionType(tok) || + isUnicodeExtensionAttribute(tok); + } + + // attribute = alphanum{3,8} ; + bool isUnicodeExtensionAttribute(const Token& tok) const { + return 3 <= tok.length() && tok.length() <= 8; + } + + // key = alphanum alpha ; + bool isUnicodeExtensionKey(const Token& tok) const { + return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index() + 1)); + } + + // type = alphanum{3,8} (sep alphanum{3,8})* ; + bool isUnicodeExtensionType(const Token& tok) const { + return 3 <= tok.length() && tok.length() <= 8; + } + + // tkey = alpha digit ; + bool isTransformExtensionKey(const Token& tok) const { + return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index())) && + mozilla::IsAsciiDigit(charAt(tok.index() + 1)); + } + + // tvalue = (sep alphanum{3,8})+ ; + bool isTransformExtensionPart(const Token& tok) const { + return 3 <= tok.length() && tok.length() <= 8; + } + + // pu_extensions = sep [xX] (sep alphanum{1,8})+ ; + bool isPrivateUseStart(const Token& tok) const { + return tok.length() == 1 && singletonKey(tok) == 'x'; + } + + // pu_extensions = sep [xX] (sep alphanum{1,8})+ ; + bool isPrivateUsePart(const Token& tok) const { + return 1 <= tok.length() && tok.length() <= 8; + } + + // Helper function for use in |parseBaseName| and + // |parseTlangInTransformExtension|. Do not use this directly! + static JS::Result<bool> internalParseBaseName(JSContext* cx, + LanguageTagParser& ts, + LanguageTag& tag, Token& tok); + + // Parse the `unicode_language_id` production, i.e. the + // language/script/region/variants portion of a language tag, into |tag|. + // |tok| must be the current token. + static JS::Result<bool> parseBaseName(JSContext* cx, LanguageTagParser& ts, + LanguageTag& tag, Token& tok) { + return internalParseBaseName(cx, ts, tag, tok); + } + + // Parse the `tlang` production within a parsed 't' transform extension. + // The precise requirements for "previously parsed" are: + // + // * the input begins from current token |tok| with a valid `tlang` + // * the `tlang` is wholly lowercase (*not* canonical case) + // * variant subtags in the `tlang` may contain duplicates and be + // unordered + // + // Return an error on internal failure. Otherwise, return a success value. If + // there was no `tlang`, then |tag.language().missing()|. But if there was a + // `tlang`, then |tag| is filled with subtags exactly as they appeared in the + // parse input. + static JS::Result<JS::Ok> parseTlangInTransformExtension( + JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok) { + MOZ_ASSERT(ts.isLanguage(tok)); + return internalParseBaseName(cx, ts, tag, tok).map([](bool parsed) { + MOZ_ASSERT(parsed); + return JS::Ok(); + }); + } + + friend class LanguageTag; + + class Range final { + size_t begin_; + size_t length_; + + public: + Range(size_t begin, size_t length) : begin_(begin), length_(length) {} + + template <typename T> + T* begin(T* ptr) const { + return ptr + begin_; + } + + size_t length() const { return length_; } + }; + + using TFieldVector = js::Vector<Range, 8>; + using AttributesVector = js::Vector<Range, 8>; + using KeywordsVector = js::Vector<Range, 8>; + + // Parse |extension|, which must be a validated, fully lowercase + // `transformed_extensions` subtag, and fill |tag| and |fields| from the + // `tlang` and `tfield` components. Data in |tag| is lowercase, consistent + // with |extension|. + static JS::Result<bool> parseTransformExtension( + JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag, + TFieldVector& fields); + + // Parse |extension|, which must be a validated, fully lowercase + // `unicode_locale_extensions` subtag, and fill |attributes| and |keywords| + // from the `attribute` and `keyword` components. + static JS::Result<bool> parseUnicodeExtension( + JSContext* cx, mozilla::Span<const char> extension, + AttributesVector& attributes, KeywordsVector& keywords); + + static JS::Result<bool> tryParse(JSContext* cx, LocaleChars& localeChars, + size_t localeLength, LanguageTag& tag); + + public: + // Parse the input string as a language tag. Reports an error to the context + // if the input can't be parsed completely. + static bool parse(JSContext* cx, JSLinearString* locale, LanguageTag& tag); + + // Parse the input string as a language tag. Reports an error to the context + // if the input can't be parsed completely. + static bool parse(JSContext* cx, mozilla::Span<const char> locale, + LanguageTag& tag); + + // Parse the input string as a language tag. Returns Ok(true) if the input + // could be completely parsed, Ok(false) if the input couldn't be parsed, + // or Err() in case of internal error. + static JS::Result<bool> tryParse(JSContext* cx, JSLinearString* locale, + LanguageTag& tag); + + // Parse the input string as a language tag. Returns Ok(true) if the input + // could be completely parsed, Ok(false) if the input couldn't be parsed, + // or Err() in case of internal error. + static JS::Result<bool> tryParse(JSContext* cx, + mozilla::Span<const char> locale, + LanguageTag& tag); + + // Parse the input string as the base-name parts (language, script, region, + // variants) of a language tag. Ignores any trailing characters. + static bool parseBaseName(JSContext* cx, mozilla::Span<const char> locale, + LanguageTag& tag); + + // Parse the input string as the base-name parts (language, script, region, + // variants) of a language tag. Returns Ok(true) if the input could be + // completely parsed, Ok(false) if the input couldn't be parsed, or Err() in + // case of internal error. + static JS::Result<bool> tryParseBaseName(JSContext* cx, + JSLinearString* locale, + LanguageTag& tag); + + // Return true iff |extension| can be parsed as a Unicode extension subtag. + static bool canParseUnicodeExtension(mozilla::Span<const char> extension); + + // Return true iff |unicodeType| can be parsed as a Unicode extension type. + static bool canParseUnicodeExtensionType(JSLinearString* unicodeType); +}; + +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LanguageTagParser::TokenKind) + +/** + * Parse a string as a standalone |language| tag. If |str| is a standalone + * language tag, store it in |result| and return true. Otherwise return false. + */ +MOZ_MUST_USE bool ParseStandaloneLanguageTag(JS::Handle<JSLinearString*> str, + LanguageSubtag& result); + +/** + * Parse a string as a standalone |script| tag. If |str| is a standalone script + * tag, store it in |result| and return true. Otherwise return false. + */ +MOZ_MUST_USE bool ParseStandaloneScriptTag(JS::Handle<JSLinearString*> str, + ScriptSubtag& result); + +/** + * Parse a string as a standalone |region| tag. If |str| is a standalone region + * tag, store it in |result| and return true. Otherwise return false. + */ +MOZ_MUST_USE bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str, + RegionSubtag& result); + +/** + * Parse a string as an ISO-639 language code. Return |nullptr| in the result if + * the input could not be parsed or the canonical form of the resulting language + * tag contains more than a single language subtag. + */ +JS::Result<JSString*> ParseStandaloneISO639LanguageTag( + JSContext* cx, JS::Handle<JSLinearString*> str); + +class UnicodeExtensionKeyword final { + char key_[LanguageTagLimits::UnicodeKeyLength]; + JSLinearString* type_; + + public: + using UnicodeKey = const char (&)[LanguageTagLimits::UnicodeKeyLength + 1]; + using UnicodeKeySpan = + mozilla::Span<const char, LanguageTagLimits::UnicodeKeyLength>; + + UnicodeExtensionKeyword(UnicodeKey key, JSLinearString* type) + : key_{key[0], key[1]}, type_(type) {} + + UnicodeKeySpan key() const { return {key_, sizeof(key_)}; } + JSLinearString* type() const { return type_; } + + void trace(JSTracer* trc); +}; + +extern MOZ_MUST_USE bool ApplyUnicodeExtensionToTag( + JSContext* cx, LanguageTag& tag, + JS::HandleVector<UnicodeExtensionKeyword> keywords); + +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_LanguageTag_h */ diff --git a/js/src/builtin/intl/LanguageTagGenerated.cpp b/js/src/builtin/intl/LanguageTagGenerated.cpp new file mode 100644 index 0000000000..b018889d65 --- /dev/null +++ b/js/src/builtin/intl/LanguageTagGenerated.cpp @@ -0,0 +1,959 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// Version: CLDR-37 +// URL: https://unicode.org/Public/cldr/37/core.zip + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <type_traits> + +#include "builtin/intl/LanguageTag.h" +#include "util/Text.h" +#include "vm/JSContext.h" + +using namespace js::intl::LanguageTagLimits; + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline bool HasReplacement( + const char (&subtags)[Length][TagLength], + const js::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.span().data(); + return std::binary_search(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); +} + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline const char* SearchReplacement( + const char (&subtags)[Length][TagLength], + const char* (&aliases)[Length], + const js::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.span().data(); + auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); + if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { + return aliases[std::distance(std::begin(subtags), p)]; + } + return nullptr; +} + +#ifdef DEBUG +static bool IsAsciiLowercaseAlphanumeric(char c) { + return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); +} + +static bool IsAsciiLowercaseAlphanumericOrDash(char c) { + return IsAsciiLowercaseAlphanumeric(c) || c == '-'; +} + +static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) || + std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>); +} + +static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); +} + +static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); +} +#endif + +// Mappings from language subtags to preferred values. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span())); + + if (language.length() == 2) { + static const char languages[9][3] = { + "bh", "in", "iw", "ji", "jw", "mo", "no", "tl", "tw", + }; + static const char* aliases[9] = { + "bho", "id", "he", "yi", "jv", "ro", "nb", "fil", "ak", + }; + + if (const char* replacement = SearchReplacement(languages, aliases, language)) { + language.set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + if (language.length() == 3) { + static const char languages[345][4] = { + "aam", "aar", "abk", "adp", "afr", "aju", "aka", "alb", "als", "amh", + "ara", "arb", "arg", "arm", "asd", "asm", "aue", "ava", "ave", "aym", + "ayr", "ayx", "aze", "azj", "bak", "bam", "baq", "bcc", "bcl", "bel", + "ben", "bgm", "bih", "bis", "bjd", "bod", "bos", "bre", "bul", "bur", + "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv", + "cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre", + "cwd", "cym", "cze", "dan", "deu", "dgo", "dhd", "dik", "diq", "dit", + "div", "drh", "dut", "dzo", "ekk", "ell", "emk", "eng", "epo", "esk", + "est", "eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", + "fry", "fuc", "ful", "gav", "gaz", "gbo", "geo", "ger", "gfx", "ggn", + "gla", "gle", "glg", "glv", "gno", "gre", "grn", "gti", "gug", "guj", + "guv", "gya", "hat", "hau", "hdn", "hea", "heb", "her", "him", "hin", + "hmo", "hrr", "hrv", "hun", "hye", "ibi", "ibo", "ice", "ido", "iii", + "ike", "iku", "ile", "ilw", "ina", "ind", "ipk", "isl", "ita", "jav", + "jeg", "jpn", "kal", "kan", "kas", "kat", "kau", "kaz", "kgc", "kgh", + "khk", "khm", "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", + "kom", "kon", "kor", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq", + "kxe", "kzj", "kzt", "lao", "lat", "lav", "lbk", "lii", "lim", "lin", + "lit", "llo", "lmm", "ltz", "lub", "lug", "lvs", "mac", "mah", "mal", + "mao", "mar", "may", "meg", "mhr", "mkd", "mlg", "mlt", "mnk", "mol", + "mon", "mri", "msa", "mst", "mup", "mwj", "mya", "myd", "myt", "nad", + "nau", "nav", "nbl", "ncp", "nde", "ndo", "nep", "nld", "nno", "nns", + "nnx", "nob", "nor", "npi", "nts", "nya", "oci", "ojg", "oji", "ori", + "orm", "ory", "oss", "oun", "pan", "pbu", "pcr", "per", "pes", "pli", + "plt", "pmc", "pmu", "pnb", "pol", "por", "ppa", "ppr", "pry", "pus", + "puz", "que", "quz", "rmy", "roh", "ron", "rum", "run", "rus", "sag", + "san", "sca", "scc", "scr", "sin", "skk", "slk", "slo", "slv", "sme", + "smo", "sna", "snd", "som", "sot", "spa", "spy", "sqi", "src", "srd", + "srp", "ssw", "sun", "swa", "swe", "swh", "tah", "tam", "tat", "tdu", + "tel", "tgk", "tgl", "tha", "thc", "thx", "tib", "tie", "tir", "tkk", + "tlw", "tmp", "tne", "ton", "tsf", "tsn", "tso", "ttq", "tuk", "tur", + "twi", "uig", "ukr", "umu", "uok", "urd", "uzb", "uzn", "ven", "vie", + "vol", "wel", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xsj", + "xsl", "ybd", "ydd", "yid", "yma", "ymt", "yor", "yos", "yuu", "zai", + "zha", "zho", "zsm", "zul", "zyb", + }; + static const char* aliases[345] = { + "aas", "aa", "ab", "dz", "af", "jrb", "ak", "sq", "sq", "am", + "ar", "ar", "an", "hy", "snz", "as", "ktz", "av", "ae", "ay", + "ay", "nun", "az", "az", "ba", "bm", "eu", "bal", "bik", "be", + "bn", "bcg", "bho", "bi", "drl", "bo", "bs", "br", "bg", "my", + "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", "cv", + "mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh", "cr", + "cr", "cy", "cs", "da", "de", "doi", "mwr", "din", "zza", "dif", + "dv", "mn", "nl", "dz", "et", "el", "man", "en", "eo", "ik", + "et", "eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr", + "fy", "ff", "ff", "dev", "om", "grb", "ka", "de", "vaj", "gvr", + "gd", "ga", "gl", "gv", "gon", "el", "gn", "nyc", "gn", "gu", + "duz", "gba", "ht", "ha", "hai", "hmn", "he", "hz", "srx", "hi", + "ho", "jal", "hr", "hu", "hy", "opa", "ig", "is", "io", "ii", + "iu", "iu", "ie", "gal", "ia", "id", "ik", "is", "it", "jv", + "oyb", "ja", "kl", "kn", "ks", "ka", "kr", "kk", "tdf", "kml", + "mn", "km", "ki", "rw", "ky", "ku", "kr", "kg", "kok", "kwv", + "kv", "kg", "ko", "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam", + "tvd", "dtp", "dtp", "lo", "la", "lv", "bnc", "raq", "li", "ln", + "lt", "ngt", "rmx", "lb", "lu", "lg", "lv", "mk", "mh", "ml", + "mi", "mr", "ms", "cir", "chm", "mk", "mg", "mt", "man", "ro", + "mn", "mi", "ms", "mry", "raj", "vaj", "my", "aog", "mry", "xny", + "na", "nv", "nr", "kdz", "nd", "ng", "ne", "nl", "nn", "nbr", + "ngv", "nb", "nb", "ne", "pij", "ny", "oc", "oj", "oj", "or", + "om", "or", "os", "vaj", "pa", "ps", "adx", "fa", "fa", "pi", + "mg", "huw", "phr", "lah", "pl", "pt", "bfy", "lcq", "prt", "ps", + "pub", "qu", "qu", "rom", "rm", "ro", "ro", "rn", "ru", "sg", + "sa", "hle", "sr", "hr", "si", "oyb", "sk", "sk", "sl", "se", + "sm", "sn", "sd", "so", "st", "es", "kln", "sq", "sc", "sc", + "sr", "ss", "su", "sw", "sv", "sw", "ty", "ta", "tt", "dtp", + "te", "tg", "fil", "th", "tpo", "oyb", "bo", "ras", "ti", "twm", + "weo", "tyj", "kak", "to", "taj", "tn", "ts", "tmh", "tk", "tr", + "ak", "ug", "uk", "del", "ema", "ur", "uz", "uz", "ve", "vi", + "vo", "cy", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "suj", + "den", "rki", "yi", "yi", "lrr", "mtm", "yo", "zom", "yug", "zap", + "za", "zh", "ms", "zu", "za", + }; + + if (const char* replacement = SearchReplacement(languages, aliases, language)) { + language.set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + return false; +} + +// Language subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span())); + + if (language.length() == 2) { + return language.equalTo("sh"); + } + + if (language.length() == 3) { + static const char languages[6][4] = { + "cnr", "drw", "hbs", "prs", "swc", "tnf", + }; + + return HasReplacement(languages, language); + } + + return false; +} + +// Mappings from region subtags to preferred values. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) { + MOZ_ASSERT(IsStructurallyValidRegionTag(region.span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span())); + + if (region.length() == 2) { + static const char regions[23][3] = { + "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI", + "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK", + "YD", "YU", "ZR", + }; + static const char* aliases[23] = { + "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM", + "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM", + "YE", "RS", "CD", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + region.set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + { + static const char regions[300][4] = { + "004", "008", "010", "012", "016", "020", "024", "028", "031", "032", + "036", "040", "044", "048", "050", "051", "052", "056", "060", "062", + "064", "068", "070", "072", "074", "076", "084", "086", "090", "092", + "096", "100", "104", "108", "112", "116", "120", "124", "132", "136", + "140", "144", "148", "152", "156", "158", "162", "166", "170", "174", + "175", "178", "180", "184", "188", "191", "192", "196", "203", "204", + "208", "212", "214", "218", "222", "226", "230", "231", "232", "233", + "234", "238", "239", "242", "246", "248", "249", "250", "254", "258", + "260", "262", "266", "268", "270", "275", "276", "278", "280", "288", + "292", "296", "300", "304", "308", "312", "316", "320", "324", "328", + "332", "334", "336", "340", "344", "348", "352", "356", "360", "364", + "368", "372", "376", "380", "384", "388", "392", "398", "400", "404", + "408", "410", "414", "417", "418", "422", "426", "428", "430", "434", + "438", "440", "442", "446", "450", "454", "458", "462", "466", "470", + "474", "478", "480", "484", "492", "496", "498", "499", "500", "504", + "508", "512", "516", "520", "524", "528", "531", "533", "534", "535", + "540", "548", "554", "558", "562", "566", "570", "574", "578", "580", + "581", "583", "584", "585", "586", "591", "598", "600", "604", "608", + "612", "616", "620", "624", "626", "630", "634", "638", "642", "643", + "646", "652", "654", "659", "660", "662", "663", "666", "670", "674", + "678", "682", "686", "688", "690", "694", "702", "703", "704", "705", + "706", "710", "716", "720", "724", "728", "729", "732", "736", "740", + "744", "748", "752", "756", "760", "762", "764", "768", "772", "776", + "780", "784", "788", "792", "795", "796", "798", "800", "804", "807", + "818", "826", "830", "831", "832", "833", "834", "840", "850", "854", + "858", "860", "862", "876", "882", "886", "887", "891", "894", "958", + "959", "960", "962", "963", "964", "965", "966", "967", "968", "969", + "970", "971", "972", "973", "974", "975", "976", "977", "978", "979", + "980", "981", "982", "983", "984", "985", "986", "987", "988", "989", + "990", "991", "992", "993", "994", "995", "996", "997", "998", "999", + }; + static const char* aliases[300] = { + "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR", + "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "034", + "BT", "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", + "BN", "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", + "CF", "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", + "YT", "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", + "DK", "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", + "FO", "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", + "TF", "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", + "GI", "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", + "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", + "IQ", "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", + "KP", "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", + "LI", "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", + "MQ", "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", + "MZ", "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", + "NC", "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", + "UM", "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", + "PN", "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", + "RW", "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", + "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", + "SO", "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", + "SJ", "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", + "TT", "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", + "EG", "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", + "UY", "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", + "QM", "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", + "QX", "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", + "XH", "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", + "XR", "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + region.set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } +} + +// Region subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) { + MOZ_ASSERT(IsStructurallyValidRegionTag(region.span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span())); + + if (region.length() == 2) { + return region.equalTo("AN") || + region.equalTo("NT") || + region.equalTo("PC") || + region.equalTo("SU"); + } + + { + static const char regions[8][4] = { + "172", "200", "530", "532", "536", "582", "810", "890", + }; + + return HasReplacement(regions, region); + } +} + +// Language subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +void js::intl::LanguageTag::performComplexLanguageMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); + + if (language().equalTo("cnr")) { + setLanguage("sr"); + if (region().missing()) { + setRegion("ME"); + } + } + else if (language().equalTo("drw") || + language().equalTo("prs") || + language().equalTo("tnf")) { + setLanguage("fa"); + if (region().missing()) { + setRegion("AF"); + } + } + else if (language().equalTo("hbs") || + language().equalTo("sh")) { + setLanguage("sr"); + if (script().missing()) { + setScript("Latn"); + } + } + else if (language().equalTo("swc")) { + setLanguage("sw"); + if (region().missing()) { + setRegion("CD"); + } + } +} + +// Region subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +void js::intl::LanguageTag::performComplexRegionMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); + MOZ_ASSERT(IsStructurallyValidRegionTag(region().span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region().span())); + + if (region().equalTo("172")) { + if (language().equalTo("hy") || + (language().equalTo("und") && script().equalTo("Armn"))) { + setRegion("AM"); + } + else if (language().equalTo("az") || + language().equalTo("tkr") || + language().equalTo("tly") || + language().equalTo("ttt")) { + setRegion("AZ"); + } + else if (language().equalTo("be")) { + setRegion("BY"); + } + else if (language().equalTo("ab") || + language().equalTo("ka") || + (language().equalTo("ku") && script().equalTo("Yezi")) || + language().equalTo("os") || + (language().equalTo("und") && script().equalTo("Geor")) || + (language().equalTo("und") && script().equalTo("Yezi")) || + language().equalTo("xmf")) { + setRegion("GE"); + } + else if (language().equalTo("ky")) { + setRegion("KG"); + } + else if (language().equalTo("kk") || + (language().equalTo("ug") && script().equalTo("Cyrl"))) { + setRegion("KZ"); + } + else if (language().equalTo("gag")) { + setRegion("MD"); + } + else if (language().equalTo("tg")) { + setRegion("TJ"); + } + else if (language().equalTo("tk")) { + setRegion("TM"); + } + else if (language().equalTo("crh") || + language().equalTo("got") || + language().equalTo("ji") || + language().equalTo("rue") || + language().equalTo("uk") || + (language().equalTo("und") && script().equalTo("Goth"))) { + setRegion("UA"); + } + else if (language().equalTo("kaa") || + language().equalTo("sog") || + (language().equalTo("und") && script().equalTo("Chrs")) || + (language().equalTo("und") && script().equalTo("Sogd")) || + (language().equalTo("und") && script().equalTo("Sogo")) || + language().equalTo("uz") || + language().equalTo("xco")) { + setRegion("UZ"); + } + else { + setRegion("RU"); + } + } + else if (region().equalTo("200")) { + if (language().equalTo("sk")) { + setRegion("SK"); + } + else { + setRegion("CZ"); + } + } + else if (region().equalTo("530") || + region().equalTo("532") || + region().equalTo("AN")) { + if (language().equalTo("vic")) { + setRegion("SX"); + } + else { + setRegion("CW"); + } + } + else if (region().equalTo("536") || + region().equalTo("NT")) { + if (language().equalTo("akk") || + language().equalTo("ckb") || + (language().equalTo("ku") && script().equalTo("Arab")) || + language().equalTo("mis") || + language().equalTo("syr") || + (language().equalTo("und") && script().equalTo("Hatr")) || + (language().equalTo("und") && script().equalTo("Syrc")) || + (language().equalTo("und") && script().equalTo("Xsux"))) { + setRegion("IQ"); + } + else { + setRegion("SA"); + } + } + else if (region().equalTo("582") || + region().equalTo("PC")) { + if (language().equalTo("mh")) { + setRegion("MH"); + } + else if (language().equalTo("pau")) { + setRegion("PW"); + } + else { + setRegion("FM"); + } + } + else if (region().equalTo("810") || + region().equalTo("SU")) { + if (language().equalTo("hy") || + (language().equalTo("und") && script().equalTo("Armn"))) { + setRegion("AM"); + } + else if (language().equalTo("az") || + language().equalTo("tkr") || + language().equalTo("tly") || + language().equalTo("ttt")) { + setRegion("AZ"); + } + else if (language().equalTo("be")) { + setRegion("BY"); + } + else if (language().equalTo("et") || + language().equalTo("vro")) { + setRegion("EE"); + } + else if (language().equalTo("ab") || + language().equalTo("ka") || + (language().equalTo("ku") && script().equalTo("Yezi")) || + language().equalTo("os") || + (language().equalTo("und") && script().equalTo("Geor")) || + (language().equalTo("und") && script().equalTo("Yezi")) || + language().equalTo("xmf")) { + setRegion("GE"); + } + else if (language().equalTo("ky")) { + setRegion("KG"); + } + else if (language().equalTo("kk") || + (language().equalTo("ug") && script().equalTo("Cyrl"))) { + setRegion("KZ"); + } + else if (language().equalTo("lt") || + language().equalTo("sgs")) { + setRegion("LT"); + } + else if (language().equalTo("ltg") || + language().equalTo("lv")) { + setRegion("LV"); + } + else if (language().equalTo("gag")) { + setRegion("MD"); + } + else if (language().equalTo("tg")) { + setRegion("TJ"); + } + else if (language().equalTo("tk")) { + setRegion("TM"); + } + else if (language().equalTo("crh") || + language().equalTo("got") || + language().equalTo("ji") || + language().equalTo("rue") || + language().equalTo("uk") || + (language().equalTo("und") && script().equalTo("Goth"))) { + setRegion("UA"); + } + else if (language().equalTo("kaa") || + language().equalTo("sog") || + (language().equalTo("und") && script().equalTo("Chrs")) || + (language().equalTo("und") && script().equalTo("Sogd")) || + (language().equalTo("und") && script().equalTo("Sogo")) || + language().equalTo("uz") || + language().equalTo("xco")) { + setRegion("UZ"); + } + else { + setRegion("RU"); + } + } + else if (region().equalTo("890")) { + if (language().equalTo("bs")) { + setRegion("BA"); + } + else if (language().equalTo("hr")) { + setRegion("HR"); + } + else if (language().equalTo("mk")) { + setRegion("MK"); + } + else if (language().equalTo("sl")) { + setRegion("SI"); + } + else { + setRegion("RS"); + } + } +} + +static const char* ToCharPointer(const char* str) { + return str; +} + +static const char* ToCharPointer(const js::UniqueChars& str) { + return str.get(); +} + +template <typename T, typename U = T> +static bool IsLessThan(const T& a, const U& b) { + return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0; +} + +// Mappings from variant subtags to preferred values. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) { + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(), + IsLessThan<decltype(variants_)::ElementType>)); + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant, + IsLessThan<decltype(variants_)::ElementType, + decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != variants_.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateString(cx, variant); + if (!preferred) { + return false; + } + return !!variants_.insert(p, std::move(preferred)); + }; + + for (size_t i = 0; i < variants_.length(); ) { + auto& variant = variants_[i]; + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get()))); + + if (strcmp(variant.get(), "aaland") == 0) { + variants_.erase(variants_.begin() + i); + setRegion("AX"); + } + else if (strcmp(variant.get(), "arevela") == 0) { + variants_.erase(variants_.begin() + i); + setLanguage("hy"); + } + else if (strcmp(variant.get(), "arevmda") == 0) { + variants_.erase(variants_.begin() + i); + setLanguage("hyw"); + } + else if (strcmp(variant.get(), "heploc") == 0) { + variants_.erase(variants_.begin() + i); + if (!insertVariantSortedIfNotPresent("alalc97")) { + return false; + } + } + else if (strcmp(variant.get(), "polytoni") == 0) { + variants_.erase(variants_.begin() + i); + if (!insertVariantSortedIfNotPresent("polyton")) { + return false; + } + } + else { + i++; + } + } + return true; +} + +// Canonicalize grandfathered locale identifiers. +// Derived from CLDR Supplemental Data, version 37. +// https://unicode.org/Public/cldr/37/core.zip +bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) { + // We're mapping regular grandfathered tags to non-grandfathered form here. + // Other tags remain unchanged. + // + // regular = "art-lojban" + // / "cel-gaulish" + // / "no-bok" + // / "no-nyn" + // / "zh-guoyu" + // / "zh-hakka" + // / "zh-min" + // / "zh-min-nan" + // / "zh-xiang" + // + // Therefore we can quickly exclude most tags by checking every + // |unicode_locale_id| subcomponent for characteristics not shared by any of + // the regular grandfathered (RG) tags: + // + // * Real-world |unicode_language_subtag|s are all two or three letters, + // so don't waste time running a useless |language.length > 3| fast-path. + // * No RG tag has a "script"-looking component. + // * No RG tag has a "region"-looking component. + // * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish, + // zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok, + // no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag + // that |unicode_locale_id| doesn't support.) + // * No RG tag contains |extensions| or |pu_extensions|. + if (script().present() || + region().present() || + variants().length() != 1 || + extensions().length() != 0 || + privateuse()) { + return true; + } + + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variants()[0].get()))); + + auto variantEqualTo = [this](const char* variant) { + return strcmp(variants()[0].get(), variant) == 0; + }; + + // art-lojban -> jbo + if (language().equalTo("art") && variantEqualTo("lojban")) { + setLanguage("jbo"); + clearVariants(); + return true; + } + + // cel-gaulish -> xtg-x-cel-gaulish + else if (language().equalTo("cel") && variantEqualTo("gaulish")) { + setLanguage("xtg"); + clearVariants(); + + auto privateuse = DuplicateString(cx, "x-cel-gaulish"); + if (!privateuse) { + return false; + } + setPrivateuse(std::move(privateuse)); + return true; + } + + // zh-guoyu -> zh + else if (language().equalTo("zh") && variantEqualTo("guoyu")) { + setLanguage("zh"); + clearVariants(); + return true; + } + + // zh-hakka -> hak + else if (language().equalTo("zh") && variantEqualTo("hakka")) { + setLanguage("hak"); + clearVariants(); + return true; + } + + // zh-xiang -> hsn + else if (language().equalTo("zh") && variantEqualTo("xiang")) { + setLanguage("hsn"); + clearVariants(); + return true; + } + + return true; +} + +template <size_t Length> +static inline bool IsUnicodeKey( + mozilla::Span<const char> key, const char (&str)[Length]) { + static_assert(Length == UnicodeKeyLength + 1, + "Unicode extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +} + +template <size_t Length> +static inline bool IsUnicodeType( + mozilla::Span<const char> type, const char (&str)[Length]) { + static_assert(Length > UnicodeKeyLength + 1, + "Unicode extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +} + +static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) { + MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'), + "unexpected null-character in string"); + + using UnsignedChar = unsigned char; + for (size_t i = 0; i < b.size(); i++) { + // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if + // we've reached the end of |a|, the below if-statement will always be true. + // That ensures we don't read past the end of |a|. + if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) { + return r; + } + } + + // Return zero if both strings are equal or a negative number if |b| is a + // prefix of |a|. + return -int32_t(UnsignedChar(a[b.size()])); +} + +template <size_t Length> +static inline const char* SearchUnicodeReplacement( + const char* (&types)[Length], const char* (&aliases)[Length], + mozilla::Span<const char> type) { + + auto p = std::lower_bound(std::begin(types), std::end(types), type, + [](const auto& a, const auto& b) { + return CompareUnicodeType(a, b) < 0; + }); + if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) { + return aliases[std::distance(std::begin(types), p)]; + } + return nullptr; +} + +/** + * Mapping from deprecated BCP 47 Unicode extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* js::intl::LanguageTag::replaceUnicodeExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) { + MOZ_ASSERT(key.size() == UnicodeKeyLength); + MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key)); + + MOZ_ASSERT(type.size() > UnicodeKeyLength); + MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type)); + + if (IsUnicodeKey(key, "ca")) { + if (IsUnicodeType(type, "ethiopic-amete-alem")) { + return "ethioaa"; + } + if (IsUnicodeType(type, "islamicc")) { + return "islamic-civil"; + } + } + else if (IsUnicodeKey(key, "kb") || + IsUnicodeKey(key, "kc") || + IsUnicodeKey(key, "kh") || + IsUnicodeKey(key, "kk") || + IsUnicodeKey(key, "kn")) { + if (IsUnicodeType(type, "yes")) { + return "true"; + } + } + else if (IsUnicodeKey(key, "ks")) { + if (IsUnicodeType(type, "primary")) { + return "level1"; + } + if (IsUnicodeType(type, "tertiary")) { + return "level3"; + } + } + else if (IsUnicodeKey(key, "ms")) { + if (IsUnicodeType(type, "imperial")) { + return "uksystem"; + } + } + else if (IsUnicodeKey(key, "rg") || + IsUnicodeKey(key, "sd")) { + static const char* types[117] = { + "cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23", + "cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41", + "cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52", + "cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a", + "cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613", + "cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627", + "czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa", + "czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb", + "frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj", + "frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr", + "frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul", + "mrnkc", "no23", "nzn", "nzs", "omba", "omsh", "plds", "plkp", + "pllb", "plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk", + "plpm", "plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm", + "ttwto", "twkhq", "twtnq", "twtpq", "twtxq", + }; + static const char* aliases[117] = { + "cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl", + "cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha", + "cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz", + "cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110", + "cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633", + "cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647", + "cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53", + "cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq", + "frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf", + "frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl", + "frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca", + "mr13", "no50", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04", + "pl08", "pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18", + "pl22", "pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc", + "tttob", "twkhh", "twtnn", "twnwt", "twtxg", + }; + return SearchUnicodeReplacement(types, aliases, type); + } + else if (IsUnicodeKey(key, "tz")) { + static const char* types[28] = { + "aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt", + "eire", "est", "gmt0", "hongkong", "hst", "iceland", + "iran", "israel", "jamaica", "japan", "libya", "mst", + "navajo", "poland", "portugal", "prc", "roc", "rok", + "turkey", "uct", "usnavajo", "zulu", + }; + static const char* aliases[28] = { + "nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai", + "iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey", + "irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07", + "usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel", + "trist", "utc", "usden", "utc", + }; + return SearchUnicodeReplacement(types, aliases, type); + } + return nullptr; +} + +template <size_t Length> +static inline bool IsTransformKey( + mozilla::Span<const char> key, const char (&str)[Length]) { + static_assert(Length == TransformKeyLength + 1, + "Transform extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +} + +template <size_t Length> +static inline bool IsTransformType( + mozilla::Span<const char> type, const char (&str)[Length]) { + static_assert(Length > TransformKeyLength + 1, + "Transform extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +} + +/** + * Mapping from deprecated BCP 47 Transform extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* js::intl::LanguageTag::replaceTransformExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) { + MOZ_ASSERT(key.size() == TransformKeyLength); + MOZ_ASSERT(IsCanonicallyCasedTransformKey(key)); + + MOZ_ASSERT(type.size() > TransformKeyLength); + MOZ_ASSERT(IsCanonicallyCasedTransformType(type)); + + if (IsTransformKey(key, "d0")) { + if (IsTransformType(type, "name")) { + return "charname"; + } + } + else if (IsTransformKey(key, "m0")) { + if (IsTransformType(type, "names")) { + return "prprname"; + } + } + return nullptr; +} diff --git a/js/src/builtin/intl/ListFormat.cpp b/js/src/builtin/intl/ListFormat.cpp new file mode 100644 index 0000000000..57bc5ba7c0 --- /dev/null +++ b/js/src/builtin/intl/ListFormat.cpp @@ -0,0 +1,554 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "builtin/intl/ListFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/PodOperations.h" +#include "mozilla/Unused.h" + +#include <stddef.h> +#include <stdint.h> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/ScopedICUObject.h" +#include "gc/FreeOp.h" +#include "js/Utility.h" +#include "js/Vector.h" +#include "unicode/uformattedvalue.h" +#include "unicode/ulistformatter.h" +#include "unicode/utypes.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Runtime.h" // js::ReportAllocationOverflow +#include "vm/SelfHosting.h" +#include "vm/Stack.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" +#include "vm/ObjectOperations-inl.h" + +using namespace js; + +using mozilla::CheckedInt; + +using js::intl::CallICU; +using js::intl::IcuLocale; + +const JSClassOps ListFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + ListFormatObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; +const JSClass ListFormatObject::class_ = { + "Intl.ListFormat", + JSCLASS_HAS_RESERVED_SLOTS(ListFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_ListFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &ListFormatObject::classOps_, &ListFormatObject::classSpec_}; + +const JSClass& ListFormatObject::protoClass_ = PlainObject::class_; + +static bool listFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().ListFormat); + return true; +} + +static const JSFunctionSpec listFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_ListFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec listFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_ListFormat_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("format", "Intl_ListFormat_format", 1, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_ListFormat_formatToParts", 1, 0), + JS_FN(js_toSource_str, listFormat_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec listFormat_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.ListFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool ListFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec ListFormatObject::classSpec_ = { + GenericCreateConstructor<ListFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<ListFormatObject>, + listFormat_static_methods, + nullptr, + listFormat_methods, + listFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +enum class ListFormatOptions { + SupportsTypeAndStyle, + NoTypeAndStyle, +}; + +/** + * Initialize a new Intl.ListFormat object using the named self-hosted function. + */ +static bool InitializeListFormatObject(JSContext* cx, HandleObject obj, + HandlePropertyName initializer, + HandleValue locales, HandleValue options, + ListFormatOptions lfoptions) { + FixedInvokeArgs<4> args(cx); + + args[0].setObject(*obj); + args[1].set(locales); + args[2].set(options); + args[3].setBoolean(lfoptions == ListFormatOptions::SupportsTypeAndStyle); + + RootedValue ignored(cx); + if (!CallSelfHostedFunction(cx, initializer, JS::NullHandleValue, args, + &ignored)) { + return false; + } + + MOZ_ASSERT(ignored.isUndefined(), + "Unexpected return value from non-legacy Intl object initializer"); + return true; +} + +/** + * Intl.ListFormat([ locales [, options]]) + */ +static bool ListFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.ListFormat")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_ListFormat, + &proto)) { + return false; + } + + Rooted<ListFormatObject*> listFormat( + cx, NewObjectWithClassProto<ListFormatObject>(cx, proto)); + if (!listFormat) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + constexpr ListFormatOptions lfoptions = +#ifndef U_HIDE_DRAFT_API + ListFormatOptions::SupportsTypeAndStyle +#else + ListFormatOptions::NoTypeAndStyle +#endif + ; + + // Step 3. + if (!InitializeListFormatObject(cx, listFormat, + cx->names().InitializeListFormat, locales, + options, lfoptions)) { + return false; + } + + args.rval().setObject(*listFormat); + return true; +} + +void js::ListFormatObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + if (UListFormatter* lf = obj->as<ListFormatObject>().getListFormatter()) { + intl::RemoveICUCellMemory(fop, obj, ListFormatObject::EstimatedMemoryUse); + + ulistfmt_close(lf); + } +} + +/** + * Returns a new UListFormatter with the locale and list formatting options + * of the given ListFormat. + */ +static UListFormatter* NewUListFormatter(JSContext* cx, + Handle<ListFormatObject*> listFormat) { + RootedObject internals(cx, intl::GetInternalsObject(cx, listFormat)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + enum class ListFormatType { Conjunction, Disjunction, Unit }; + + ListFormatType type; + if (!GetProperty(cx, internals, internals, cx->names().type, &value)) { + return nullptr; + } + { + JSLinearString* strType = value.toString()->ensureLinear(cx); + if (!strType) { + return nullptr; + } + + if (StringEqualsLiteral(strType, "conjunction")) { + type = ListFormatType::Conjunction; + } else if (StringEqualsLiteral(strType, "disjunction")) { + type = ListFormatType::Disjunction; + } else { + MOZ_ASSERT(StringEqualsLiteral(strType, "unit")); + type = ListFormatType::Unit; + } + } + + enum class ListFormatStyle { Long, Short, Narrow }; + + ListFormatStyle style; + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return nullptr; + } + { + JSLinearString* strStyle = value.toString()->ensureLinear(cx); + if (!strStyle) { + return nullptr; + } + + if (StringEqualsLiteral(strStyle, "long")) { + style = ListFormatStyle::Long; + } else if (StringEqualsLiteral(strStyle, "short")) { + style = ListFormatStyle::Short; + } else { + MOZ_ASSERT(StringEqualsLiteral(strStyle, "narrow")); + style = ListFormatStyle::Narrow; + } + } + + UErrorCode status = U_ZERO_ERROR; + UListFormatter* lf; + +#ifndef U_HIDE_DRAFT_API + UListFormatterType utype; + switch (type) { + case ListFormatType::Conjunction: + utype = ULISTFMT_TYPE_AND; + break; + case ListFormatType::Disjunction: + utype = ULISTFMT_TYPE_OR; + break; + case ListFormatType::Unit: + utype = ULISTFMT_TYPE_UNITS; + break; + } + + UListFormatterWidth uwidth; + switch (style) { + case ListFormatStyle::Long: + uwidth = ULISTFMT_WIDTH_WIDE; + break; + case ListFormatStyle::Short: + uwidth = ULISTFMT_WIDTH_SHORT; + break; + case ListFormatStyle::Narrow: + uwidth = ULISTFMT_WIDTH_NARROW; + break; + } + + lf = ulistfmt_openForType(IcuLocale(locale.get()), utype, uwidth, &status); +#else + MOZ_ASSERT(type == ListFormatType::Conjunction); + MOZ_ASSERT(style == ListFormatStyle::Long); + + mozilla::Unused << type; + mozilla::Unused << style; + + lf = ulistfmt_open(IcuLocale(locale.get()), &status); +#endif + + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return lf; +} + +static constexpr size_t DEFAULT_LIST_LENGTH = 8; + +using ListFormatStringVector = Vector<UniqueTwoByteChars, DEFAULT_LIST_LENGTH>; +using ListFormatStringLengthVector = Vector<int32_t, DEFAULT_LIST_LENGTH>; + +static_assert(sizeof(UniqueTwoByteChars) == sizeof(char16_t*), + "UniqueTwoByteChars are stored efficiently and are held in " + "continuous memory"); + +/** + * FormatList ( listFormat, list ) + */ +static bool FormatList(JSContext* cx, UListFormatter* lf, + const ListFormatStringVector& strings, + const ListFormatStringLengthVector& stringLengths, + MutableHandleValue result) { + MOZ_ASSERT(strings.length() == stringLengths.length()); + MOZ_ASSERT(strings.length() <= INT32_MAX); + + JSString* str = intl::CallICU(cx, [lf, &strings, &stringLengths]( + UChar* chars, int32_t size, + UErrorCode* status) { + return ulistfmt_format( + lf, reinterpret_cast<char16_t* const*>(strings.begin()), + stringLengths.begin(), int32_t(strings.length()), chars, size, status); + }); + if (!str) { + return false; + } + + result.setString(str); + return true; +} + +/** + * FormatListToParts ( listFormat, list ) + */ +static bool FormatListToParts(JSContext* cx, UListFormatter* lf, + const ListFormatStringVector& strings, + const ListFormatStringLengthVector& stringLengths, + MutableHandleValue result) { + MOZ_ASSERT(strings.length() == stringLengths.length()); + MOZ_ASSERT(strings.length() <= INT32_MAX); + + UErrorCode status = U_ZERO_ERROR; + UFormattedList* formatted = ulistfmt_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UFormattedList, ulistfmt_closeResult> toClose(formatted); + + ulistfmt_formatStringsToResult( + lf, reinterpret_cast<char16_t* const*>(strings.begin()), + stringLengths.begin(), int32_t(strings.length()), formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + const UFormattedValue* formattedValue = + ulistfmt_resultAsValue(formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + RootedString overallResult(cx, + intl::FormattedValueToString(cx, formattedValue)); + if (!overallResult) { + return false; + } + + RootedArrayObject partsArray(cx, NewDenseEmptyArray(cx)); + if (!partsArray) { + return false; + } + + using FieldType = js::ImmutablePropertyNamePtr JSAtomState::*; + + size_t lastEndIndex = 0; + RootedObject singlePart(cx); + RootedValue val(cx); + + auto AppendPart = [&](FieldType type, size_t beginIndex, size_t endIndex) { + singlePart = NewBuiltinClassInstance<PlainObject>(cx); + if (!singlePart) { + return false; + } + + val = StringValue(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { + return false; + } + + JSLinearString* partSubstr = NewDependentString( + cx, overallResult, beginIndex, endIndex - beginIndex); + if (!partSubstr) { + return false; + } + + val = StringValue(partSubstr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { + return false; + } + + if (!NewbornArrayPush(cx, partsArray, ObjectValue(*singlePart))) { + return false; + } + + lastEndIndex = endIndex; + return true; + }; + + UConstrainedFieldPosition* fpos = ucfpos_open(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos); + + // We're only interested in ULISTFMT_ELEMENT_FIELD fields. + ucfpos_constrainField(fpos, UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, + &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + while (true) { + bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + if (!hasMore) { + break; + } + + int32_t beginIndexInt, endIndexInt; + ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + MOZ_ASSERT(beginIndexInt >= 0); + MOZ_ASSERT(endIndexInt >= 0); + MOZ_ASSERT(beginIndexInt <= endIndexInt, + "field iterator returning invalid range"); + + size_t beginIndex = size_t(beginIndexInt); + size_t endIndex = size_t(endIndexInt); + + // Indices are guaranteed to be returned in order (from left to right). + MOZ_ASSERT(lastEndIndex <= beginIndex, + "field iteration didn't return fields in order start to " + "finish as expected"); + + if (lastEndIndex < beginIndex) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, beginIndex)) { + return false; + } + } + + if (!AppendPart(&JSAtomState::element, beginIndex, endIndex)) { + return false; + } + } + + // Append any final literal. + if (lastEndIndex < overallResult->length()) { + if (!AppendPart(&JSAtomState::literal, lastEndIndex, + overallResult->length())) { + return false; + } + } + + result.setObject(*partsArray); + return true; +} + +bool js::intl_FormatList(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + Rooted<ListFormatObject*> listFormat( + cx, &args[0].toObject().as<ListFormatObject>()); + + bool formatToParts = args[2].toBoolean(); + + // Obtain a cached UListFormatter object. + UListFormatter* lf = listFormat->getListFormatter(); + if (!lf) { + lf = NewUListFormatter(cx, listFormat); + if (!lf) { + return false; + } + listFormat->setListFormatter(lf); + + intl::AddICUCellMemory(listFormat, ListFormatObject::EstimatedMemoryUse); + } + + // Collect all strings and their lengths. + ListFormatStringVector strings(cx); + ListFormatStringLengthVector stringLengths(cx); + + // Keep a conservative running count of overall length. + CheckedInt<int32_t> stringLengthTotal(0); + + RootedArrayObject list(cx, &args[1].toObject().as<ArrayObject>()); + RootedValue value(cx); + uint32_t listLen = list->length(); + for (uint32_t i = 0; i < listLen; i++) { + if (!GetElement(cx, list, list, i, &value)) { + return false; + } + + JSLinearString* linear = value.toString()->ensureLinear(cx); + if (!linear) { + return false; + } + + size_t linearLength = linear->length(); + if (!stringLengths.append(linearLength)) { + return false; + } + stringLengthTotal += linearLength; + + UniqueTwoByteChars chars = cx->make_pod_array<char16_t>(linearLength); + if (!chars) { + return false; + } + CopyChars(chars.get(), *linear); + + if (!strings.append(std::move(chars))) { + return false; + } + } + + // Add space for N unrealistically large conjunctions. + constexpr int32_t MaxConjunctionLen = 100; + stringLengthTotal += CheckedInt<int32_t>(listLen) * MaxConjunctionLen; + + // If the overestimate exceeds ICU length limits, don't try to format. + if (!stringLengthTotal.isValid()) { + ReportAllocationOverflow(cx); + return false; + } + + // Use the UListFormatter to actually format the strings. + if (formatToParts) { + return FormatListToParts(cx, lf, strings, stringLengths, args.rval()); + } + return FormatList(cx, lf, strings, stringLengths, args.rval()); +} diff --git a/js/src/builtin/intl/ListFormat.h b/js/src/builtin/intl/ListFormat.h new file mode 100644 index 0000000000..bf4d99ec56 --- /dev/null +++ b/js/src/builtin/intl/ListFormat.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_ListFormat_h +#define builtin_intl_ListFormat_h + +#include "mozilla/Attributes.h" + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "js/RootingAPI.h" +#include "vm/NativeObject.h" + +class JSFreeOp; +struct UListFormatter; + +namespace js { + +class ListFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t ULIST_FORMATTER_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UListFormatter (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 24; + + UListFormatter* getListFormatter() const { + const auto& slot = getFixedSlot(ULIST_FORMATTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UListFormatter*>(slot.toPrivate()); + } + + void setListFormatter(UListFormatter* formatter) { + setFixedSlot(ULIST_FORMATTER_SLOT, PrivateValue(formatter)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a string representing the array of string values |list| according to + * the effective locale and the formatting options of the given ListFormat. + * + * Usage: formatted = intl_FormatList(listFormat, list, formatToParts) + */ +extern MOZ_MUST_USE bool intl_FormatList(JSContext* cx, unsigned argc, + Value* vp); + +} // namespace js + +#endif /* builtin_intl_ListFormat_h */ diff --git a/js/src/builtin/intl/ListFormat.js b/js/src/builtin/intl/ListFormat.js new file mode 100644 index 0000000000..ddb1c0e084 --- /dev/null +++ b/js/src/builtin/intl/ListFormat.js @@ -0,0 +1,276 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * ListFormat internal properties. + */ +var listFormatInternalProperties = { + localeData: function() // eslint-disable-line object-shorthand + { + // ListFormat don't support any extension keys. + return {}; + }, + relevantExtensionKeys: [] +}; + +/** + * Intl.ListFormat ( [ locales [ , options ] ] ) + * + * Compute an internal properties object from |lazyListFormatData|. + */ +function resolveListFormatInternals(lazyListFormatData) { + assert(IsObject(lazyListFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var ListFormat = listFormatInternalProperties; + + // Compute effective locale. + + // Step 9. + var localeData = ListFormat.localeData; + + // Step 10. + var r = ResolveLocale("ListFormat", + lazyListFormatData.requestedLocales, + lazyListFormatData.opt, + ListFormat.relevantExtensionKeys, + localeData); + + // Step 11. + internalProps.locale = r.locale; + + // Step 13. + internalProps.type = lazyListFormatData.type; + + // Step 15. + internalProps.style = lazyListFormatData.style; + + // Steps 16-23 (not applicable in our implementation). + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the ListFormat internal properties of |obj|. + */ +function getListFormatInternals(obj) { + assert(IsObject(obj), "getListFormatInternals called with non-object"); + assert(GuardToListFormat(obj) !== null, "getListFormatInternals called with non-ListFormat"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "ListFormat", "bad type escaped getIntlObjectInternals"); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + internalProps = resolveListFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Intl.ListFormat ( [ locales [ , options ] ] ) + * + * Initializes an object as a ListFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a ListFormat. + * This later work occurs in |resolveListFormatInternals|; steps not noted + * here occur there. + */ +function InitializeListFormat(listFormat, locales, options, supportsTypeAndStyle) { + assert(IsObject(listFormat), "InitializeListFormat called with non-object"); + assert(GuardToListFormat(listFormat) !== null, "InitializeListFormat called with non-ListFormat"); + + // Lazy ListFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // type: "conjunction" / "disjunction" / "unit", + // style: "long" / "short" / "narrow", + // + // opt: // opt object computed in InitializeListFormat + // { + // localeMatcher: "lookup" / "best fit", + // } + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every ListFormat lazy data object has *all* these properties, never a + // subset of them. + var lazyListFormatData = std_Object_create(null); + + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyListFormatData.requestedLocales = requestedLocales; + + // Steps 4-5. + if (options === undefined) + options = std_Object_create(null); + else + options = ToObject(options); + + // Step 6. + var opt = new Record(); + lazyListFormatData.opt = opt; + + // Steps 7-8. + let matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + // Compute formatting options. + + // Supporting all "type" and "style" options requires draft APIs in ICU 67, + // which may not be available when compiling against a system ICU. + + // Steps 12-13. + var type = GetOption(options, "type", "string", + supportsTypeAndStyle ? ["conjunction", "disjunction", "unit"] : ["conjunction"], + "conjunction"); + lazyListFormatData.type = type; + + // Steps 14-15. + var style = GetOption(options, "style", "string", + supportsTypeAndStyle ? ["long", "short", "narrow"] : ["long"], + "long"); + lazyListFormatData.style = style; + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(listFormat, "ListFormat", lazyListFormatData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + */ +function Intl_ListFormat_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "ListFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * StringListFromIterable ( iterable ) + */ +function StringListFromIterable(iterable, methodName) { + // Step 1. + if (iterable === undefined) { + return []; + } + + // Step 3. + var list = []; + + // Steps 2, 4-5. + for (var element of allowContentIter(iterable)) { + // Step 5.b.ii. + if (typeof element !== "string") { + ThrowTypeError(JSMSG_NOT_EXPECTED_TYPE, methodName, "string", typeof element); + } + + // Step 5.b.iii. + _DefineDataProperty(list, list.length, element); + } + + // Step 6. + return list; +} + +/** + * Intl.ListFormat.prototype.format ( list ) + */ +function Intl_ListFormat_format(list) { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if (!IsObject(listFormat) || (listFormat = GuardToListFormat(listFormat)) === null) { + return callFunction(CallListFormatMethodIfWrapped, this, list, + "Intl_ListFormat_format"); + } + + // Step 4. + var stringList = StringListFromIterable(list, "format"); + + // We can directly return if |stringList| contains less than two elements. + if (stringList.length < 2) { + return stringList.length === 0 ? "" : stringList[0]; + } + + // Ensure the ListFormat internals are resolved. + getListFormatInternals(listFormat); + + // Step 5. + return intl_FormatList(listFormat, stringList, /* formatToParts = */ false); +} + +/** + * Intl.ListFormat.prototype.formatToParts ( list ) + */ +function Intl_ListFormat_formatToParts(list) { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if (!IsObject(listFormat) || (listFormat = GuardToListFormat(listFormat)) === null) { + return callFunction(CallListFormatMethodIfWrapped, this, list, + "Intl_ListFormat_formatToParts"); + } + + // Step 4. + var stringList = StringListFromIterable(list, "formatToParts"); + + // We can directly return if |stringList| contains less than two elements. + if (stringList.length < 2) { + return stringList.length === 0 ? [] : [{type: "element", value: stringList[0]}]; + } + + // Ensure the ListFormat internals are resolved. + getListFormatInternals(listFormat); + + // Step 5. + return intl_FormatList(listFormat, stringList, /* formatToParts = */ true); +} + +/** + * Returns the resolved options for a ListFormat object. + */ +function Intl_ListFormat_resolvedOptions() { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if (!IsObject(listFormat) || (listFormat = GuardToListFormat(listFormat)) === null) { + return callFunction(CallListFormatMethodIfWrapped, this, + "Intl_ListFormat_resolvedOptions"); + } + + var internals = getListFormatInternals(listFormat); + + // Steps 4-5. + var result = { + locale: internals.locale, + type: internals.type, + style: internals.style, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/Locale.cpp b/js/src/builtin/intl/Locale.cpp new file mode 100644 index 0000000000..70c28823b5 --- /dev/null +++ b/js/src/builtin/intl/Locale.cpp @@ -0,0 +1,1416 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.Locale implementation. */ + +#include "builtin/intl/Locale.h" + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" +#include "mozilla/Maybe.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <iterator> +#include <string> +#include <string.h> +#include <utility> + +#include "jsapi.h" +#include "jsfriendapi.h" + +#include "builtin/Boolean.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/String.h" +#include "gc/Rooting.h" +#include "js/Conversions.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/TypeDecls.h" +#include "js/Wrapper.h" +#include "util/StringBuffer.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Printer.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; +using namespace js::intl::LanguageTagLimits; + +using intl::LanguageTag; +using intl::LanguageTagParser; + +const JSClass LocaleObject::class_ = { + "Intl.Locale", + JSCLASS_HAS_RESERVED_SLOTS(LocaleObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_Locale), + JS_NULL_CLASS_OPS, &LocaleObject::classSpec_}; + +const JSClass& LocaleObject::protoClass_ = PlainObject::class_; + +static inline bool IsLocale(HandleValue v) { + return v.isObject() && v.toObject().is<LocaleObject>(); +} + +// Return the length of the base-name subtags. +static size_t BaseNameLength(const LanguageTag& tag) { + size_t baseNameLength = tag.language().length(); + if (tag.script().present()) { + baseNameLength += 1 + tag.script().length(); + } + if (tag.region().present()) { + baseNameLength += 1 + tag.region().length(); + } + for (const auto& variant : tag.variants()) { + baseNameLength += 1 + strlen(variant.get()); + } + return baseNameLength; +} + +struct IndexAndLength { + size_t index; + size_t length; + + IndexAndLength(size_t index, size_t length) : index(index), length(length){}; + + template <typename T> + mozilla::Span<const T> spanOf(const T* ptr) const { + return {ptr + index, length}; + } +}; + +// Compute the Unicode extension's index and length in the extension subtag. +static mozilla::Maybe<IndexAndLength> UnicodeExtensionPosition( + const LanguageTag& tag) { + size_t index = 0; + for (const auto& extension : tag.extensions()) { + MOZ_ASSERT(!mozilla::IsAsciiUppercaseAlpha(extension[0]), + "extensions are case normalized to lowercase"); + + size_t extensionLength = strlen(extension.get()); + if (extension[0] == 'u') { + return mozilla::Some(IndexAndLength{index, extensionLength}); + } + + // Add +1 to skip over the preceding separator. + index += 1 + extensionLength; + } + return mozilla::Nothing(); +} + +static LocaleObject* CreateLocaleObject(JSContext* cx, HandleObject prototype, + const LanguageTag& tag) { + RootedString tagStr(cx, tag.toString(cx)); + if (!tagStr) { + return nullptr; + } + + size_t baseNameLength = BaseNameLength(tag); + + RootedString baseName(cx, NewDependentString(cx, tagStr, 0, baseNameLength)); + if (!baseName) { + return nullptr; + } + + RootedValue unicodeExtension(cx, UndefinedValue()); + if (auto result = UnicodeExtensionPosition(tag)) { + JSString* str = NewDependentString( + cx, tagStr, baseNameLength + 1 + result->index, result->length); + if (!str) { + return nullptr; + } + + unicodeExtension.setString(str); + } + + auto* locale = NewObjectWithClassProto<LocaleObject>(cx, prototype); + if (!locale) { + return nullptr; + } + + locale->setFixedSlot(LocaleObject::LANGUAGE_TAG_SLOT, StringValue(tagStr)); + locale->setFixedSlot(LocaleObject::BASENAME_SLOT, StringValue(baseName)); + locale->setFixedSlot(LocaleObject::UNICODE_EXTENSION_SLOT, unicodeExtension); + + return locale; +} + +static inline bool IsValidUnicodeExtensionValue(JSLinearString* linear) { + return linear->length() > 0 && + LanguageTagParser::canParseUnicodeExtensionType(linear); +} + +/** Iterate through (sep keyword) in a valid, lowercased Unicode extension. */ +template <typename CharT> +class SepKeywordIterator { + const CharT* iter_; + const CharT* const end_; + + public: + SepKeywordIterator(const CharT* unicodeExtensionBegin, + const CharT* unicodeExtensionEnd) + : iter_(unicodeExtensionBegin), end_(unicodeExtensionEnd) {} + + /** + * Return (sep keyword) in the Unicode locale extension from begin to end. + * The first call after all (sep keyword) are consumed returns |nullptr|; no + * further calls are allowed. + */ + const CharT* next() { + MOZ_ASSERT(iter_ != nullptr, + "can't call next() once it's returned nullptr"); + + constexpr size_t SepKeyLength = 1 + UnicodeKeyLength; // "-co"/"-nu"/etc. + + MOZ_ASSERT(iter_ + SepKeyLength <= end_, + "overall Unicode locale extension or non-leading subtags must " + "be at least key-sized"); + + MOZ_ASSERT((iter_[0] == 'u' && iter_[1] == '-') || iter_[0] == '-'); + + while (true) { + // Skip past '-' so |std::char_traits::find| makes progress. Skipping + // 'u' is harmless -- skip or not, |find| returns the first '-'. + iter_++; + + // Find the next separator. + iter_ = std::char_traits<CharT>::find( + iter_, mozilla::PointerRangeSize(iter_, end_), CharT('-')); + if (!iter_) { + return nullptr; + } + + MOZ_ASSERT(iter_ + SepKeyLength <= end_, + "non-leading subtags in a Unicode locale extension are all " + "at least as long as a key"); + + if (iter_ + SepKeyLength == end_ || // key is terminal subtag + iter_[SepKeyLength] == '-') { // key is followed by more subtags + break; + } + } + + MOZ_ASSERT(iter_[0] == '-'); + MOZ_ASSERT(mozilla::IsAsciiLowercaseAlpha(iter_[1]) || + mozilla::IsAsciiDigit(iter_[1])); + MOZ_ASSERT(mozilla::IsAsciiLowercaseAlpha(iter_[2])); + MOZ_ASSERT_IF(iter_ + SepKeyLength < end_, iter_[SepKeyLength] == '-'); + return iter_; + } +}; + +/** + * 9.2.10 GetOption ( options, property, type, values, fallback ) + * + * If the requested property is present and not-undefined, set the result string + * to |ToString(value)|. Otherwise set the result string to nullptr. + */ +static bool GetStringOption(JSContext* cx, HandleObject options, + HandlePropertyName name, + MutableHandle<JSLinearString*> string) { + // Step 1. + RootedValue option(cx); + if (!GetProperty(cx, options, options, name, &option)) { + return false; + } + + // Step 2. + JSLinearString* linear = nullptr; + if (!option.isUndefined()) { + // Steps 2.a-b, 2.d (not applicable). + + // Steps 2.c, 2.e. + JSString* str = ToString(cx, option); + if (!str) { + return false; + } + linear = str->ensureLinear(cx); + if (!linear) { + return false; + } + } + + // Step 3. + string.set(linear); + return true; +} + +/** + * 9.2.10 GetOption ( options, property, type, values, fallback ) + * + * If the requested property is present and not-undefined, set the result string + * to |ToString(ToBoolean(value))|. Otherwise set the result string to nullptr. + */ +static bool GetBooleanOption(JSContext* cx, HandleObject options, + HandlePropertyName name, + MutableHandle<JSLinearString*> string) { + // Step 1. + RootedValue option(cx); + if (!GetProperty(cx, options, options, name, &option)) { + return false; + } + + // Step 2. + JSLinearString* linear = nullptr; + if (!option.isUndefined()) { + // Steps 2.a, 2.c-d (not applicable). + + // Steps 2.c, 2.e. + linear = BooleanToString(cx, ToBoolean(option)); + } + + // Step 3. + string.set(linear); + return true; +} + +/** + * ApplyOptionsToTag ( tag, options ) + */ +static bool ApplyOptionsToTag(JSContext* cx, LanguageTag& tag, + HandleObject options) { + // Steps 1-2 (Already performed in caller). + + RootedLinearString option(cx); + + // Step 3. + if (!GetStringOption(cx, options, cx->names().language, &option)) { + return false; + } + + // Step 4. + intl::LanguageSubtag language; + if (option && !intl::ParseStandaloneLanguageTag(option, language)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "language", + str.get()); + } + return false; + } + + // Step 5. + if (!GetStringOption(cx, options, cx->names().script, &option)) { + return false; + } + + // Step 6. + intl::ScriptSubtag script; + if (option && !intl::ParseStandaloneScriptTag(option, script)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "script", + str.get()); + } + return false; + } + + // Step 7. + if (!GetStringOption(cx, options, cx->names().region, &option)) { + return false; + } + + // Step 8. + intl::RegionSubtag region; + if (option && !intl::ParseStandaloneRegionTag(option, region)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "region", + str.get()); + } + return false; + } + + // Step 9 (Already performed in caller). + + // Skip steps 10-13 when no subtags were modified. + if (language.present() || script.present() || region.present()) { + // Step 10. + if (language.present()) { + tag.setLanguage(language); + } + + // Step 11. + if (script.present()) { + tag.setScript(script); + } + + // Step 12. + if (region.present()) { + tag.setRegion(region); + } + + // Step 13. + // Optimized to only canonicalize the base-name subtags. All other + // canonicalization steps will happen later. + if (!tag.canonicalizeBaseName(cx)) { + return true; + } + } + + return true; +} + +/** + * ApplyUnicodeExtensionToTag( tag, options, relevantExtensionKeys ) + */ +bool js::intl::ApplyUnicodeExtensionToTag( + JSContext* cx, LanguageTag& tag, + JS::HandleVector<intl::UnicodeExtensionKeyword> keywords) { + // If no Unicode extensions were present in the options object, we can skip + // everything below and directly return. + if (keywords.length() == 0) { + return true; + } + + Vector<char, 32> newExtension(cx); + if (!newExtension.append('u')) { + return false; + } + + // Check if there's an existing Unicode extension subtag. + + const char* unicodeExtensionEnd = nullptr; + const char* unicodeExtensionKeywords = nullptr; + if (const char* unicodeExtension = tag.unicodeExtension()) { + unicodeExtensionEnd = unicodeExtension + strlen(unicodeExtension); + + SepKeywordIterator<char> iter(unicodeExtension, unicodeExtensionEnd); + + // Find the start of the first keyword. + unicodeExtensionKeywords = iter.next(); + + // Copy any attributes present before the first keyword. + const char* attributesEnd = unicodeExtensionKeywords + ? unicodeExtensionKeywords + : unicodeExtensionEnd; + if (!newExtension.append(unicodeExtension + 1, attributesEnd)) { + return false; + } + } + + // Append the new keywords before any existing keywords. That way any previous + // keyword with the same key is detected as a duplicate when canonicalizing + // the Unicode extension subtag and gets discarded. + + for (const auto& keyword : keywords) { + UnicodeExtensionKeyword::UnicodeKeySpan key = keyword.key(); + if (!newExtension.append('-')) { + return false; + } + if (!newExtension.append(key.data(), key.size())) { + return false; + } + if (!newExtension.append('-')) { + return false; + } + + JS::AutoCheckCannotGC nogc; + JSLinearString* type = keyword.type(); + if (type->hasLatin1Chars()) { + if (!newExtension.append(type->latin1Chars(nogc), type->length())) { + return false; + } + } else { + if (!newExtension.append(type->twoByteChars(nogc), type->length())) { + return false; + } + } + } + + // Append the remaining keywords from the previous Unicode extension subtag. + if (unicodeExtensionKeywords) { + if (!newExtension.append(unicodeExtensionKeywords, unicodeExtensionEnd)) { + return false; + } + } + + // Null-terminate the new Unicode extension string. + if (!newExtension.append('\0')) { + return false; + } + + // Insert the new Unicode extension string into the language tag. + UniqueChars newExtensionChars(newExtension.extractOrCopyRawBuffer()); + if (!newExtensionChars) { + return false; + } + return tag.setUnicodeExtension(std::move(newExtensionChars)); +} + +static JS::Result<JSString*> LanguageTagFromMaybeWrappedLocale(JSContext* cx, + JSObject* obj) { + if (obj->is<LocaleObject>()) { + return obj->as<LocaleObject>().languageTag(); + } + + JSObject* unwrapped = CheckedUnwrapStatic(obj); + if (!unwrapped) { + ReportAccessDenied(cx); + return cx->alreadyReportedError(); + } + + if (!unwrapped->is<LocaleObject>()) { + return nullptr; + } + + RootedString tagStr(cx, unwrapped->as<LocaleObject>().languageTag()); + if (!cx->compartment()->wrap(cx, &tagStr)) { + return cx->alreadyReportedError(); + } + return tagStr.get(); +} + +/** + * Intl.Locale( tag[, options] ) + */ +static bool Locale(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.Locale")) { + return false; + } + + // Steps 2-6 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Locale, &proto)) { + return false; + } + + // Steps 7-9. + HandleValue tagValue = args.get(0); + JSString* tagStr; + if (tagValue.isObject()) { + JS_TRY_VAR_OR_RETURN_FALSE( + cx, tagStr, + LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject())); + if (!tagStr) { + tagStr = ToString(cx, tagValue); + if (!tagStr) { + return false; + } + } + } else if (tagValue.isString()) { + tagStr = tagValue.toString(); + } else { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LOCALES_ELEMENT); + return false; + } + + RootedLinearString tagLinearStr(cx, tagStr->ensureLinear(cx)); + if (!tagLinearStr) { + return false; + } + + // Steps 10-11. + RootedObject options(cx); + if (args.hasDefined(1)) { + options = ToObject(cx, args[1]); + if (!options) { + return false; + } + } + + // ApplyOptionsToTag, steps 2 and 9. + LanguageTag tag(cx); + if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) { + return false; + } + + if (!tag.canonicalizeBaseName(cx)) { + return false; + } + + if (options) { + // Step 12. + if (!ApplyOptionsToTag(cx, tag, options)) { + return false; + } + + // Step 13. + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + // Step 14. + RootedLinearString calendar(cx); + if (!GetStringOption(cx, options, cx->names().calendar, &calendar)) { + return false; + } + + // Steps 15-16. + if (calendar) { + if (!IsValidUnicodeExtensionValue(calendar)) { + if (UniqueChars str = QuoteString(cx, calendar, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "calendar", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("ca", calendar)) { + return false; + } + } + + // Step 17. + RootedLinearString collation(cx); + if (!GetStringOption(cx, options, cx->names().collation, &collation)) { + return false; + } + + // Steps 18-19. + if (collation) { + if (!IsValidUnicodeExtensionValue(collation)) { + if (UniqueChars str = QuoteString(cx, collation, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "collation", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("co", collation)) { + return false; + } + } + + // Step 20 (without validation). + RootedLinearString hourCycle(cx); + if (!GetStringOption(cx, options, cx->names().hourCycle, &hourCycle)) { + return false; + } + + // Steps 20-21. + if (hourCycle) { + if (!StringEqualsLiteral(hourCycle, "h11") && + !StringEqualsLiteral(hourCycle, "h12") && + !StringEqualsLiteral(hourCycle, "h23") && + !StringEqualsLiteral(hourCycle, "h24")) { + if (UniqueChars str = QuoteString(cx, hourCycle, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "hourCycle", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("hc", hourCycle)) { + return false; + } + } + + // Step 22 (without validation). + RootedLinearString caseFirst(cx); + if (!GetStringOption(cx, options, cx->names().caseFirst, &caseFirst)) { + return false; + } + + // Steps 22-23. + if (caseFirst) { + if (!StringEqualsLiteral(caseFirst, "upper") && + !StringEqualsLiteral(caseFirst, "lower") && + !StringEqualsLiteral(caseFirst, "false")) { + if (UniqueChars str = QuoteString(cx, caseFirst, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "caseFirst", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("kf", caseFirst)) { + return false; + } + } + + // Steps 24-25. + RootedLinearString numeric(cx); + if (!GetBooleanOption(cx, options, cx->names().numeric, &numeric)) { + return false; + } + + // Step 26. + if (numeric) { + if (!keywords.emplaceBack("kn", numeric)) { + return false; + } + } + + // Step 27. + RootedLinearString numberingSystem(cx); + if (!GetStringOption(cx, options, cx->names().numberingSystem, + &numberingSystem)) { + return false; + } + + // Steps 28-29. + if (numberingSystem) { + if (!IsValidUnicodeExtensionValue(numberingSystem)) { + if (UniqueChars str = QuoteString(cx, numberingSystem, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, + "numberingSystem", str.get()); + } + return false; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return false; + } + } + + // Step 30. + if (!ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return false; + } + } + + // ApplyOptionsToTag, steps 9 and 13. + // ApplyUnicodeExtensionToTag, step 9. + if (!tag.canonicalizeExtensions(cx)) { + return false; + } + + // Steps 6, 31-37. + JSObject* obj = CreateLocaleObject(cx, proto, tag); + if (!obj) { + return false; + } + + // Step 38. + args.rval().setObject(*obj); + return true; +} + +using UnicodeKey = const char (&)[UnicodeKeyLength + 1]; + +// Returns the tuple [index, length] of the `type` in the `keyword` in Unicode +// locale extension |extension| that has |key| as its `key`. If `keyword` lacks +// a type, the returned |index| will be where `type` would have been, and +// |length| will be set to zero. +template <typename CharT> +static mozilla::Maybe<IndexAndLength> FindUnicodeExtensionType( + const CharT* extension, size_t length, UnicodeKey key) { + MOZ_ASSERT(extension[0] == 'u'); + MOZ_ASSERT(extension[1] == '-'); + + const CharT* end = extension + length; + + SepKeywordIterator<CharT> iter(extension, end); + + // Search all keywords until a match was found. + const CharT* beginKey; + while (true) { + beginKey = iter.next(); + if (!beginKey) { + return mozilla::Nothing(); + } + + // Add +1 to skip over the separator preceding the keyword. + MOZ_ASSERT(beginKey[0] == '-'); + beginKey++; + + // Exit the loop on the first match. + if (std::equal(beginKey, beginKey + UnicodeKeyLength, key)) { + break; + } + } + + // Skip over the key. + const CharT* beginType = beginKey + UnicodeKeyLength; + + // Find the start of the next keyword. + const CharT* endType = iter.next(); + + // No further keyword present, the current keyword ends the Unicode extension. + if (!endType) { + endType = end; + } + + // If the keyword has a type, skip over the separator preceding the type. + if (beginType != endType) { + MOZ_ASSERT(beginType[0] == '-'); + beginType++; + } + return mozilla::Some(IndexAndLength{size_t(beginType - extension), + size_t(endType - beginType)}); +} + +static inline auto FindUnicodeExtensionType(JSLinearString* unicodeExtension, + UnicodeKey key) { + JS::AutoCheckCannotGC nogc; + return unicodeExtension->hasLatin1Chars() + ? FindUnicodeExtensionType(unicodeExtension->latin1Chars(nogc), + unicodeExtension->length(), key) + : FindUnicodeExtensionType(unicodeExtension->twoByteChars(nogc), + unicodeExtension->length(), key); +} + +// Return the sequence of types for the Unicode extension keyword specified by +// key or undefined when the keyword isn't present. +static bool GetUnicodeExtension(JSContext* cx, LocaleObject* locale, + UnicodeKey key, MutableHandleValue value) { + // Return undefined when no Unicode extension subtag is present. + const Value& unicodeExtensionValue = locale->unicodeExtension(); + if (unicodeExtensionValue.isUndefined()) { + value.setUndefined(); + return true; + } + + JSLinearString* unicodeExtension = + unicodeExtensionValue.toString()->ensureLinear(cx); + if (!unicodeExtension) { + return false; + } + + // Find the type of the requested key in the Unicode extension subtag. + auto result = FindUnicodeExtensionType(unicodeExtension, key); + + // Return undefined if the requested key isn't present in the extension. + if (!result) { + value.setUndefined(); + return true; + } + + size_t index = result->index; + size_t length = result->length; + + // Otherwise return the type value of the found keyword. + JSString* str = NewDependentString(cx, unicodeExtension, index, length); + if (!str) { + return false; + } + value.setString(str); + return true; +} + +struct BaseNamePartsResult { + IndexAndLength language; + mozilla::Maybe<IndexAndLength> script; + mozilla::Maybe<IndexAndLength> region; +}; + +// Returns [language-length, script-index, region-index, region-length]. +template <typename CharT> +static BaseNamePartsResult BaseNameParts(const CharT* baseName, size_t length) { + size_t languageLength; + size_t scriptIndex = 0; + size_t regionIndex = 0; + size_t regionLength = 0; + + // Search the first separator to find the end of the language subtag. + if (const CharT* sep = std::char_traits<CharT>::find(baseName, length, '-')) { + languageLength = sep - baseName; + + // Add +1 to skip over the separator character. + size_t nextSubtag = languageLength + 1; + + // Script subtags are always four characters long, but take care for a four + // character long variant subtag. These start with a digit. + if ((nextSubtag + ScriptLength == length || + (nextSubtag + ScriptLength < length && + baseName[nextSubtag + ScriptLength] == '-')) && + mozilla::IsAsciiAlpha(baseName[nextSubtag])) { + scriptIndex = nextSubtag; + nextSubtag = scriptIndex + ScriptLength + 1; + } + + // Region subtags can be either two or three characters long. + if (nextSubtag < length) { + for (size_t rlen : {AlphaRegionLength, DigitRegionLength}) { + MOZ_ASSERT(nextSubtag + rlen <= length); + if (nextSubtag + rlen == length || baseName[nextSubtag + rlen] == '-') { + regionIndex = nextSubtag; + regionLength = rlen; + break; + } + } + } + } else { + // No separator found, the base-name consists of just a language subtag. + languageLength = length; + } + + IndexAndLength language{0, languageLength}; + MOZ_ASSERT(intl::IsStructurallyValidLanguageTag(language.spanOf(baseName))); + + mozilla::Maybe<IndexAndLength> script{}; + if (scriptIndex) { + script.emplace(scriptIndex, ScriptLength); + MOZ_ASSERT(intl::IsStructurallyValidScriptTag(script->spanOf(baseName))); + } + + mozilla::Maybe<IndexAndLength> region{}; + if (regionIndex) { + region.emplace(regionIndex, regionLength); + MOZ_ASSERT(intl::IsStructurallyValidRegionTag(region->spanOf(baseName))); + } + + return {language, script, region}; +} + +static inline auto BaseNameParts(JSLinearString* baseName) { + JS::AutoCheckCannotGC nogc; + return baseName->hasLatin1Chars() + ? BaseNameParts(baseName->latin1Chars(nogc), baseName->length()) + : BaseNameParts(baseName->twoByteChars(nogc), baseName->length()); +} + +// Intl.Locale.prototype.maximize () +static bool Locale_maximize(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + RootedLinearString tagStr(cx, locale->languageTag()->ensureLinear(cx)); + if (!tagStr) { + return false; + } + + LanguageTag tag(cx); + if (!LanguageTagParser::parse(cx, tagStr, tag)) { + return false; + } + + if (!tag.addLikelySubtags(cx)) { + return false; + } + + // Step 4. + auto* result = CreateLocaleObject(cx, nullptr, tag); + if (!result) { + return false; + } + args.rval().setObject(*result); + return true; +} + +// Intl.Locale.prototype.maximize () +static bool Locale_maximize(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_maximize>(cx, args); +} + +// Intl.Locale.prototype.minimize () +static bool Locale_minimize(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + RootedLinearString tagStr(cx, locale->languageTag()->ensureLinear(cx)); + if (!tagStr) { + return false; + } + + LanguageTag tag(cx); + if (!LanguageTagParser::parse(cx, tagStr, tag)) { + return false; + } + + if (!tag.removeLikelySubtags(cx)) { + return false; + } + + // Step 4. + auto* result = CreateLocaleObject(cx, nullptr, tag); + if (!result) { + return false; + } + args.rval().setObject(*result); + return true; +} + +// Intl.Locale.prototype.minimize () +static bool Locale_minimize(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_minimize>(cx, args); +} + +// Intl.Locale.prototype.toString () +static bool Locale_toString(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + args.rval().setString(locale->languageTag()); + return true; +} + +// Intl.Locale.prototype.toString () +static bool Locale_toString(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_toString>(cx, args); +} + +// get Intl.Locale.prototype.baseName +static bool Locale_baseName(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Steps 3-4. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + args.rval().setString(locale->baseName()); + return true; +} + +// get Intl.Locale.prototype.baseName +static bool Locale_baseName(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_baseName>(cx, args); +} + +// get Intl.Locale.prototype.calendar +static bool Locale_calendar(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "ca", args.rval()); +} + +// get Intl.Locale.prototype.calendar +static bool Locale_calendar(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_calendar>(cx, args); +} + +// get Intl.Locale.prototype.caseFirst +static bool Locale_caseFirst(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "kf", args.rval()); +} + +// get Intl.Locale.prototype.caseFirst +static bool Locale_caseFirst(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_caseFirst>(cx, args); +} + +// get Intl.Locale.prototype.collation +static bool Locale_collation(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "co", args.rval()); +} + +// get Intl.Locale.prototype.collation +static bool Locale_collation(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_collation>(cx, args); +} + +// get Intl.Locale.prototype.hourCycle +static bool Locale_hourCycle(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "hc", args.rval()); +} + +// get Intl.Locale.prototype.hourCycle +static bool Locale_hourCycle(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_hourCycle>(cx, args); +} + +// get Intl.Locale.prototype.numeric +static bool Locale_numeric(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + RootedValue value(cx); + if (!GetUnicodeExtension(cx, locale, "kn", &value)) { + return false; + } + + // Compare against the empty string per Intl.Locale, step 36.a. The Unicode + // extension is already canonicalized, so we don't need to compare against + // "true" at this point. + MOZ_ASSERT(value.isUndefined() || value.isString()); + MOZ_ASSERT_IF(value.isString(), + !StringEqualsLiteral(&value.toString()->asLinear(), "true")); + + args.rval().setBoolean(value.isString() && value.toString()->empty()); + return true; +} + +// get Intl.Locale.prototype.numeric +static bool Locale_numeric(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_numeric>(cx, args); +} + +// get Intl.Locale.prototype.numberingSystem +static bool Intl_Locale_numberingSystem(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "nu", args.rval()); +} + +// get Intl.Locale.prototype.numberingSystem +static bool Locale_numberingSystem(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Intl_Locale_numberingSystem>(cx, args); +} + +// get Intl.Locale.prototype.language +static bool Locale_language(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto language = BaseNameParts(baseName).language; + + size_t index = language.index; + size_t length = language.length; + + // Step 5. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.language +static bool Locale_language(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_language>(cx, args); +} + +// get Intl.Locale.prototype.script +static bool Locale_script(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto script = BaseNameParts(baseName).script; + + // Step 5. + if (!script) { + args.rval().setUndefined(); + return true; + } + + size_t index = script->index; + size_t length = script->length; + + // Step 6. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.script +static bool Locale_script(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_script>(cx, args); +} + +// get Intl.Locale.prototype.region +static bool Locale_region(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto region = BaseNameParts(baseName).region; + + // Step 5. + if (!region) { + args.rval().setUndefined(); + return true; + } + + size_t index = region->index; + size_t length = region->length; + + // Step 6. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.region +static bool Locale_region(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_region>(cx, args); +} + +static bool Locale_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Locale); + return true; +} + +static const JSFunctionSpec locale_methods[] = { + JS_FN("maximize", Locale_maximize, 0, 0), + JS_FN("minimize", Locale_minimize, 0, 0), + JS_FN(js_toString_str, Locale_toString, 0, 0), + JS_FN(js_toSource_str, Locale_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec locale_properties[] = { + JS_PSG("baseName", Locale_baseName, 0), + JS_PSG("calendar", Locale_calendar, 0), + JS_PSG("caseFirst", Locale_caseFirst, 0), + JS_PSG("collation", Locale_collation, 0), + JS_PSG("hourCycle", Locale_hourCycle, 0), + JS_PSG("numeric", Locale_numeric, 0), + JS_PSG("numberingSystem", Locale_numberingSystem, 0), + JS_PSG("language", Locale_language, 0), + JS_PSG("script", Locale_script, 0), + JS_PSG("region", Locale_region, 0), + JS_STRING_SYM_PS(toStringTag, "Intl.Locale", JSPROP_READONLY), + JS_PS_END}; + +const ClassSpec LocaleObject::classSpec_ = { + GenericCreateConstructor<Locale, 1, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<LocaleObject>, + nullptr, + nullptr, + locale_methods, + locale_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +bool js::intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + HandleValue tagValue = args[0]; + bool applyToString = args[1].toBoolean(); + + if (tagValue.isObject()) { + JSString* tagStr; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, tagStr, + LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject())); + if (tagStr) { + args.rval().setString(tagStr); + return true; + } + } + + if (!applyToString && !tagValue.isString()) { + args.rval().setNull(); + return true; + } + + JSString* tagStr = ToString(cx, tagValue); + if (!tagStr) { + return false; + } + + RootedLinearString tagLinearStr(cx, tagStr->ensureLinear(cx)); + if (!tagLinearStr) { + return false; + } + + // Handle the common case (a standalone language) first. + // Only the following Unicode BCP 47 locale identifier subset is accepted: + // unicode_locale_id = unicode_language_id + // unicode_language_id = unicode_language_subtag + // unicode_language_subtag = alpha{2,3} + JSString* language; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, language, intl::ParseStandaloneISO639LanguageTag(cx, tagLinearStr)); + if (language) { + args.rval().setString(language); + return true; + } + + LanguageTag tag(cx); + if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) { + return false; + } + + if (!tag.canonicalize(cx)) { + return false; + } + + JSString* resultStr = tag.toString(cx); + if (!resultStr) { + return false; + } + args.rval().setString(resultStr); + return true; +} + +bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx, + unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx)); + if (!linear) { + return false; + } + + LanguageTag tag(cx); + bool ok; + JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, + LanguageTagParser::tryParse(cx, linear, tag)); + + // The caller handles invalid inputs. + if (!ok) { + args.rval().setNull(); + return true; + } + + if (!tag.canonicalize(cx)) { + return false; + } + + JSString* resultStr = tag.toString(cx); + if (!resultStr) { + return false; + } + args.rval().setString(resultStr); + return true; +} + +bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx, + unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + HandleValue typeArg = args[0]; + MOZ_ASSERT(typeArg.isString(), "type must be a string"); + + HandleValue optionArg = args[1]; + MOZ_ASSERT(optionArg.isString(), "option name must be a string"); + + HandleValue keyArg = args[2]; + MOZ_ASSERT(keyArg.isString(), "key must be a string"); + + RootedLinearString unicodeType(cx, typeArg.toString()->ensureLinear(cx)); + if (!unicodeType) { + return false; + } + + if (!IsValidUnicodeExtensionValue(unicodeType)) { + UniqueChars optionChars = EncodeAscii(cx, optionArg.toString()); + if (!optionChars) { + return false; + } + + UniqueChars unicodeTypeChars = QuoteString(cx, unicodeType, '"'); + if (!unicodeTypeChars) { + return false; + } + + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, optionChars.get(), + unicodeTypeChars.get()); + return false; + } + + char unicodeKey[UnicodeKeyLength]; + { + JSLinearString* str = keyArg.toString()->ensureLinear(cx); + if (!str) { + return false; + } + MOZ_ASSERT(str->length() == UnicodeKeyLength); + + for (size_t i = 0; i < UnicodeKeyLength; i++) { + char16_t ch = str->latin1OrTwoByteChar(i); + MOZ_ASSERT(mozilla::IsAscii(ch)); + unicodeKey[i] = char(ch); + } + } + + UniqueChars unicodeTypeChars = EncodeAscii(cx, unicodeType); + if (!unicodeTypeChars) { + return false; + } + + size_t unicodeTypeLength = unicodeType->length(); + MOZ_ASSERT(strlen(unicodeTypeChars.get()) == unicodeTypeLength); + + // Convert into canonical case before searching for replacements. + intl::AsciiToLowerCase(unicodeTypeChars.get(), unicodeTypeLength, + unicodeTypeChars.get()); + + auto key = mozilla::Span(unicodeKey, UnicodeKeyLength); + auto type = mozilla::Span(unicodeTypeChars.get(), unicodeTypeLength); + + // Search if there's a replacement for the current Unicode keyword. + JSString* result; + if (const char* replacement = + LanguageTag::replaceUnicodeExtensionType(key, type)) { + result = NewStringCopyZ<CanGC>(cx, replacement); + } else { + result = StringToLowerCase(cx, unicodeType); + } + if (!result) { + return false; + } + + args.rval().setString(result); + return true; +} diff --git a/js/src/builtin/intl/Locale.h b/js/src/builtin/intl/Locale.h new file mode 100644 index 0000000000..f8a2182e19 --- /dev/null +++ b/js/src/builtin/intl/Locale.h @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_Locale_h +#define builtin_intl_Locale_h + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace js { + +class LocaleObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t LANGUAGE_TAG_SLOT = 0; + static constexpr uint32_t BASENAME_SLOT = 1; + static constexpr uint32_t UNICODE_EXTENSION_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + /** + * Returns the complete language tag, including any extensions and privateuse + * subtags. + */ + JSString* languageTag() const { + return getFixedSlot(LANGUAGE_TAG_SLOT).toString(); + } + + /** + * Returns the basename subtags, i.e. excluding any extensions and privateuse + * subtags. + */ + JSString* baseName() const { return getFixedSlot(BASENAME_SLOT).toString(); } + + const Value& unicodeExtension() const { + return getFixedSlot(UNICODE_EXTENSION_SLOT); + } + + private: + static const ClassSpec classSpec_; +}; + +extern MOZ_MUST_USE bool intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, + unsigned argc, + Value* vp); + +extern MOZ_MUST_USE bool intl_TryValidateAndCanonicalizeLanguageTag( + JSContext* cx, unsigned argc, Value* vp); + +extern MOZ_MUST_USE bool intl_ValidateAndCanonicalizeUnicodeExtensionType( + JSContext* cx, unsigned argc, Value* vp); + +} // namespace js + +#endif /* builtin_intl_Locale_h */ diff --git a/js/src/builtin/intl/MeasureUnitGenerated.h b/js/src/builtin/intl/MeasureUnitGenerated.h new file mode 100644 index 0000000000..2f89a09236 --- /dev/null +++ b/js/src/builtin/intl/MeasureUnitGenerated.h @@ -0,0 +1,59 @@ +// Generated by make_intl_data.py. DO NOT EDIT. + +struct MeasureUnit { + const char* const type; + const char* const name; +}; + +/** + * The list of currently supported simple unit identifiers. + * + * The list must be kept in alphabetical order of |name|. + */ +inline constexpr MeasureUnit simpleMeasureUnits[] = { + // clang-format off + {"area", "acre"}, + {"digital", "bit"}, + {"digital", "byte"}, + {"temperature", "celsius"}, + {"length", "centimeter"}, + {"duration", "day"}, + {"angle", "degree"}, + {"temperature", "fahrenheit"}, + {"volume", "fluid-ounce"}, + {"length", "foot"}, + {"volume", "gallon"}, + {"digital", "gigabit"}, + {"digital", "gigabyte"}, + {"mass", "gram"}, + {"area", "hectare"}, + {"duration", "hour"}, + {"length", "inch"}, + {"digital", "kilobit"}, + {"digital", "kilobyte"}, + {"mass", "kilogram"}, + {"length", "kilometer"}, + {"volume", "liter"}, + {"digital", "megabit"}, + {"digital", "megabyte"}, + {"length", "meter"}, + {"length", "mile"}, + {"length", "mile-scandinavian"}, + {"volume", "milliliter"}, + {"length", "millimeter"}, + {"duration", "millisecond"}, + {"duration", "minute"}, + {"duration", "month"}, + {"mass", "ounce"}, + {"concentr", "percent"}, + {"digital", "petabyte"}, + {"mass", "pound"}, + {"duration", "second"}, + {"mass", "stone"}, + {"digital", "terabit"}, + {"digital", "terabyte"}, + {"duration", "week"}, + {"length", "yard"}, + {"duration", "year"}, + // clang-format on +}; diff --git a/js/src/builtin/intl/NumberFormat.cpp b/js/src/builtin/intl/NumberFormat.cpp new file mode 100644 index 0000000000..dd01c9cfc0 --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.cpp @@ -0,0 +1,1467 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.NumberFormat implementation. */ + +#include "builtin/intl/NumberFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" +#include "mozilla/FloatingPoint.h" +#include "mozilla/UniquePtr.h" + +#include <algorithm> +#include <cstring> +#include <iterator> +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <type_traits> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/MeasureUnitGenerated.h" +#include "builtin/intl/RelativeTimeFormat.h" +#include "builtin/intl/ScopedICUObject.h" +#include "ds/Sort.h" +#include "gc/FreeOp.h" +#include "js/CharacterEncoding.h" +#include "js/PropertySpec.h" +#include "js/RootingAPI.h" +#include "js/TypeDecls.h" +#include "js/Vector.h" +#include "unicode/udata.h" +#include "unicode/ufieldpositer.h" +#include "unicode/uformattedvalue.h" +#include "unicode/unum.h" +#include "unicode/unumberformatter.h" +#include "unicode/unumsys.h" +#include "unicode/ures.h" +#include "unicode/utypes.h" +#include "vm/BigIntType.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/SelfHosting.h" +#include "vm/Stack.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" + +using namespace js; + +using mozilla::AssertedCast; +using mozilla::IsFinite; +using mozilla::IsNaN; +using mozilla::IsNegative; +using mozilla::SpecificNaN; + +using js::intl::CallICU; +using js::intl::DateTimeFormatOptions; +using js::intl::FieldType; +using js::intl::IcuLocale; + +const JSClassOps NumberFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + NumberFormatObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; + +const JSClass NumberFormatObject::class_ = { + "Intl.NumberFormat", + JSCLASS_HAS_RESERVED_SLOTS(NumberFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_NumberFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &NumberFormatObject::classOps_, &NumberFormatObject::classSpec_}; + +const JSClass& NumberFormatObject::protoClass_ = PlainObject::class_; + +static bool numberFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().NumberFormat); + return true; +} + +static const JSFunctionSpec numberFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_NumberFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec numberFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_NumberFormat_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_NumberFormat_formatToParts", 1, 0), + JS_FN(js_toSource_str, numberFormat_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec numberFormat_properties[] = { + JS_SELF_HOSTED_GET("format", "$Intl_NumberFormat_format_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.NumberFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool NumberFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec NumberFormatObject::classSpec_ = { + GenericCreateConstructor<NumberFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<NumberFormatObject>, + numberFormat_static_methods, + nullptr, + numberFormat_methods, + numberFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 11.2.1 Intl.NumberFormat([ locales [, options]]) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +static bool NumberFormat(JSContext* cx, const CallArgs& args, bool construct) { + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_NumberFormat, + &proto)) { + return false; + } + + Rooted<NumberFormatObject*> numberFormat(cx); + numberFormat = NewObjectWithClassProto<NumberFormatObject>(cx, proto); + if (!numberFormat) { + return false; + } + + RootedValue thisValue(cx, + construct ? ObjectValue(*numberFormat) : args.thisv()); + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + return intl::LegacyInitializeObject( + cx, numberFormat, cx->names().InitializeNumberFormat, thisValue, locales, + options, DateTimeFormatOptions::Standard, args.rval()); +} + +static bool NumberFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return NumberFormat(cx, args, args.isConstructing()); +} + +bool js::intl_NumberFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(!args.isConstructing()); + // intl_NumberFormat is an intrinsic for self-hosted JavaScript, so it + // cannot be used with "new", but it still has to be treated as a + // constructor. + return NumberFormat(cx, args, true); +} + +void js::NumberFormatObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + auto* numberFormat = &obj->as<NumberFormatObject>(); + UNumberFormatter* nf = numberFormat->getNumberFormatter(); + UFormattedNumber* formatted = numberFormat->getFormattedNumber(); + + if (nf) { + intl::RemoveICUCellMemory(fop, obj, NumberFormatObject::EstimatedMemoryUse); + + unumf_close(nf); + } + if (formatted) { + // UFormattedNumber memory tracked as part of UNumberFormatter. + + unumf_closeResult(formatted); + } +} + +bool js::intl_numberingSystem(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + UErrorCode status = U_ZERO_ERROR; + UNumberingSystem* numbers = unumsys_open(IcuLocale(locale.get()), &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + ScopedICUObject<UNumberingSystem, unumsys_close> toClose(numbers); + + const char* name = unumsys_getName(numbers); + if (!name) { + intl::ReportInternalError(cx); + return false; + } + + JSString* jsname = NewStringCopyZ<CanGC>(cx, name); + if (!jsname) { + return false; + } + + args.rval().setString(jsname); + return true; +} + +#if DEBUG || MOZ_SYSTEM_ICU +class UResourceBundleDeleter { + public: + void operator()(UResourceBundle* aPtr) { ures_close(aPtr); } +}; + +using UniqueUResourceBundle = + mozilla::UniquePtr<UResourceBundle, UResourceBundleDeleter>; + +bool js::intl_availableMeasurementUnits(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + RootedObject measurementUnits( + cx, NewObjectWithGivenProto<PlainObject>(cx, nullptr)); + if (!measurementUnits) { + return false; + } + + // Lookup the available measurement units in the resource boundle of the root + // locale. + + static const char packageName[] = + U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit"; + static const char rootLocale[] = ""; + + UErrorCode status = U_ZERO_ERROR; + UResourceBundle* rawRes = ures_open(packageName, rootLocale, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + UniqueUResourceBundle res(rawRes); + + UResourceBundle* rawUnits = + ures_getByKey(res.get(), "units", nullptr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + UniqueUResourceBundle units(rawUnits); + + RootedAtom unitAtom(cx); + + int32_t unitsSize = ures_getSize(units.get()); + for (int32_t i = 0; i < unitsSize; i++) { + UResourceBundle* rawType = + ures_getByIndex(units.get(), i, nullptr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + UniqueUResourceBundle type(rawType); + + int32_t typeSize = ures_getSize(type.get()); + for (int32_t j = 0; j < typeSize; j++) { + UResourceBundle* rawSubtype = + ures_getByIndex(type.get(), j, nullptr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + UniqueUResourceBundle subtype(rawSubtype); + + const char* unitIdentifier = ures_getKey(subtype.get()); + + unitAtom = Atomize(cx, unitIdentifier, strlen(unitIdentifier)); + if (!unitAtom) { + return false; + } + if (!DefineDataProperty(cx, measurementUnits, unitAtom->asPropertyName(), + TrueHandleValue)) { + return false; + } + } + } + + args.rval().setObject(*measurementUnits); + return true; +} +#endif + +bool js::intl::NumberFormatterSkeleton::currency(JSLinearString* currency) { + MOZ_ASSERT(currency->length() == 3, + "IsWellFormedCurrencyCode permits only length-3 strings"); + + char16_t currencyChars[] = {currency->latin1OrTwoByteChar(0), + currency->latin1OrTwoByteChar(1), + currency->latin1OrTwoByteChar(2), '\0'}; + return append(u"currency/") && append(currencyChars) && append(' '); +} + +bool js::intl::NumberFormatterSkeleton::currencyDisplay( + CurrencyDisplay display) { + switch (display) { + case CurrencyDisplay::Code: + return appendToken(u"unit-width-iso-code"); + case CurrencyDisplay::Name: + return appendToken(u"unit-width-full-name"); + case CurrencyDisplay::Symbol: + // Default, no additional tokens needed. + return true; + case CurrencyDisplay::NarrowSymbol: + return appendToken(u"unit-width-narrow"); + } + MOZ_CRASH("unexpected currency display type"); +} + +static const MeasureUnit& FindSimpleMeasureUnit(const char* name) { + auto measureUnit = std::lower_bound( + std::begin(simpleMeasureUnits), std::end(simpleMeasureUnits), name, + [](const auto& measureUnit, const char* name) { + return strcmp(measureUnit.name, name) < 0; + }); + MOZ_ASSERT(measureUnit != std::end(simpleMeasureUnits), + "unexpected unit identifier: unit not found"); + MOZ_ASSERT(strcmp(measureUnit->name, name) == 0, + "unexpected unit identifier: wrong unit found"); + return *measureUnit; +} + +static constexpr size_t MaxUnitLength() { + size_t length = 0; + for (const auto& unit : simpleMeasureUnits) { + length = std::max(length, std::char_traits<char>::length(unit.name)); + } + return length * 2 + std::char_traits<char>::length("-per-"); +} + +bool js::intl::NumberFormatterSkeleton::unit(JSLinearString* unit) { + MOZ_RELEASE_ASSERT(unit->length() <= MaxUnitLength()); + + char unitChars[MaxUnitLength() + 1] = {}; + CopyChars(reinterpret_cast<Latin1Char*>(unitChars), *unit); + + auto appendUnit = [this](const MeasureUnit& unit) { + return append(unit.type, strlen(unit.type)) && append('-') && + append(unit.name, strlen(unit.name)); + }; + + // |unit| can be a compound unit identifier, separated by "-per-". + + static constexpr char separator[] = "-per-"; + if (char* p = strstr(unitChars, separator)) { + // Split into two strings. + p[0] = '\0'; + + auto& numerator = FindSimpleMeasureUnit(unitChars); + if (!append(u"measure-unit/") || !appendUnit(numerator) || !append(' ')) { + return false; + } + + auto& denominator = FindSimpleMeasureUnit(p + strlen(separator)); + if (!append(u"per-measure-unit/") || !appendUnit(denominator) || + !append(' ')) { + return false; + } + } else { + auto& simple = FindSimpleMeasureUnit(unitChars); + if (!append(u"measure-unit/") || !appendUnit(simple) || !append(' ')) { + return false; + } + } + return true; +} + +bool js::intl::NumberFormatterSkeleton::unitDisplay(UnitDisplay display) { + switch (display) { + case UnitDisplay::Short: + return appendToken(u"unit-width-short"); + case UnitDisplay::Narrow: + return appendToken(u"unit-width-narrow"); + case UnitDisplay::Long: + return appendToken(u"unit-width-full-name"); + } + MOZ_CRASH("unexpected unit display type"); +} + +bool js::intl::NumberFormatterSkeleton::percent() { + return appendToken(u"percent scale/100"); +} + +bool js::intl::NumberFormatterSkeleton::fractionDigits(uint32_t min, + uint32_t max) { + // Note: |min| can be zero here. + MOZ_ASSERT(min <= max); + return append('.') && appendN('0', min) && appendN('#', max - min) && + append(' '); +} + +bool js::intl::NumberFormatterSkeleton::integerWidth(uint32_t min) { + MOZ_ASSERT(min > 0); + return append(u"integer-width/+") && appendN('0', min) && append(' '); +} + +bool js::intl::NumberFormatterSkeleton::significantDigits(uint32_t min, + uint32_t max) { + MOZ_ASSERT(min > 0); + MOZ_ASSERT(min <= max); + return appendN('@', min) && appendN('#', max - min) && append(' '); +} + +bool js::intl::NumberFormatterSkeleton::useGrouping(bool on) { + return on || appendToken(u"group-off"); +} + +bool js::intl::NumberFormatterSkeleton::notation(Notation style) { + switch (style) { + case Notation::Standard: + // Default, no additional tokens needed. + return true; + case Notation::Scientific: + return appendToken(u"scientific"); + case Notation::Engineering: + return appendToken(u"engineering"); + case Notation::CompactShort: + return appendToken(u"compact-short"); + case Notation::CompactLong: + return appendToken(u"compact-long"); + } + MOZ_CRASH("unexpected notation style"); +} + +bool js::intl::NumberFormatterSkeleton::signDisplay(SignDisplay display) { + switch (display) { + case SignDisplay::Auto: + // Default, no additional tokens needed. + return true; + case SignDisplay::Always: + return appendToken(u"sign-always"); + case SignDisplay::Never: + return appendToken(u"sign-never"); + case SignDisplay::ExceptZero: + return appendToken(u"sign-except-zero"); + case SignDisplay::Accounting: + return appendToken(u"sign-accounting"); + case SignDisplay::AccountingAlways: + return appendToken(u"sign-accounting-always"); + case SignDisplay::AccountingExceptZero: + return appendToken(u"sign-accounting-except-zero"); + } + MOZ_CRASH("unexpected sign display type"); +} + +bool js::intl::NumberFormatterSkeleton::roundingModeHalfUp() { + return appendToken(u"rounding-mode-half-up"); +} + +UNumberFormatter* js::intl::NumberFormatterSkeleton::toFormatter( + JSContext* cx, const char* locale) { + UErrorCode status = U_ZERO_ERROR; + UNumberFormatter* nf = unumf_openForSkeletonAndLocale( + vector_.begin(), vector_.length(), locale, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return nf; +} + +/** + * Returns a new UNumberFormatter with the locale and number formatting options + * of the given NumberFormat. + */ +static UNumberFormatter* NewUNumberFormatter( + JSContext* cx, Handle<NumberFormatObject*> numberFormat) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, numberFormat)); + if (!internals) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects numberingSystem as a Unicode locale extensions on locale. + + intl::LanguageTag tag(cx); + { + JSLinearString* locale = value.toString()->ensureLinear(cx); + if (!locale) { + return nullptr; + } + + if (!intl::LanguageTagParser::parse(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of + // the Unicode extension subtag. We're then relying on ICU to follow RFC + // 6067, which states that any trailing keywords using the same key + // should be ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + UniqueChars locale = tag.toStringZ(cx); + if (!locale) { + return nullptr; + } + + intl::NumberFormatterSkeleton skeleton(cx); + + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return nullptr; + } + + bool accountingSign = false; + { + JSLinearString* style = value.toString()->ensureLinear(cx); + if (!style) { + return nullptr; + } + + if (StringEqualsLiteral(style, "currency")) { + if (!GetProperty(cx, internals, internals, cx->names().currency, + &value)) { + return nullptr; + } + JSLinearString* currency = value.toString()->ensureLinear(cx); + if (!currency) { + return nullptr; + } + + if (!skeleton.currency(currency)) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().currencyDisplay, + &value)) { + return nullptr; + } + JSLinearString* currencyDisplay = value.toString()->ensureLinear(cx); + if (!currencyDisplay) { + return nullptr; + } + + using CurrencyDisplay = intl::NumberFormatterSkeleton::CurrencyDisplay; + + CurrencyDisplay display; + if (StringEqualsLiteral(currencyDisplay, "code")) { + display = CurrencyDisplay::Code; + } else if (StringEqualsLiteral(currencyDisplay, "symbol")) { + display = CurrencyDisplay::Symbol; + } else if (StringEqualsLiteral(currencyDisplay, "narrowSymbol")) { + display = CurrencyDisplay::NarrowSymbol; + } else { + MOZ_ASSERT(StringEqualsLiteral(currencyDisplay, "name")); + display = CurrencyDisplay::Name; + } + + if (!skeleton.currencyDisplay(display)) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().currencySign, + &value)) { + return nullptr; + } + JSLinearString* currencySign = value.toString()->ensureLinear(cx); + if (!currencySign) { + return nullptr; + } + + if (StringEqualsLiteral(currencySign, "accounting")) { + accountingSign = true; + } else { + MOZ_ASSERT(StringEqualsLiteral(currencySign, "standard")); + } + } else if (StringEqualsLiteral(style, "percent")) { + if (!skeleton.percent()) { + return nullptr; + } + } else if (StringEqualsLiteral(style, "unit")) { + if (!GetProperty(cx, internals, internals, cx->names().unit, &value)) { + return nullptr; + } + JSLinearString* unit = value.toString()->ensureLinear(cx); + if (!unit) { + return nullptr; + } + + if (!skeleton.unit(unit)) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().unitDisplay, + &value)) { + return nullptr; + } + JSLinearString* unitDisplay = value.toString()->ensureLinear(cx); + if (!unitDisplay) { + return nullptr; + } + + using UnitDisplay = intl::NumberFormatterSkeleton::UnitDisplay; + + UnitDisplay display; + if (StringEqualsLiteral(unitDisplay, "short")) { + display = UnitDisplay::Short; + } else if (StringEqualsLiteral(unitDisplay, "narrow")) { + display = UnitDisplay::Narrow; + } else { + MOZ_ASSERT(StringEqualsLiteral(unitDisplay, "long")); + display = UnitDisplay::Long; + } + + if (!skeleton.unitDisplay(display)) { + return nullptr; + } + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "decimal")); + } + } + + bool hasMinimumSignificantDigits; + if (!HasProperty(cx, internals, cx->names().minimumSignificantDigits, + &hasMinimumSignificantDigits)) { + return nullptr; + } + + if (hasMinimumSignificantDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumSignificantDigits, &value)) { + return nullptr; + } + uint32_t minimumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumSignificantDigits, &value)) { + return nullptr; + } + uint32_t maximumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.significantDigits(minimumSignificantDigits, + maximumSignificantDigits)) { + return nullptr; + } + } + + bool hasMinimumFractionDigits; + if (!HasProperty(cx, internals, cx->names().minimumFractionDigits, + &hasMinimumFractionDigits)) { + return nullptr; + } + + if (hasMinimumFractionDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumFractionDigits, &value)) { + return nullptr; + } + uint32_t minimumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumFractionDigits, &value)) { + return nullptr; + } + uint32_t maximumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.fractionDigits(minimumFractionDigits, + maximumFractionDigits)) { + return nullptr; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().minimumIntegerDigits, + &value)) { + return nullptr; + } + uint32_t minimumIntegerDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.integerWidth(minimumIntegerDigits)) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().useGrouping, &value)) { + return nullptr; + } + if (!skeleton.useGrouping(value.toBoolean())) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().notation, &value)) { + return nullptr; + } + + { + JSLinearString* notation = value.toString()->ensureLinear(cx); + if (!notation) { + return nullptr; + } + + using Notation = intl::NumberFormatterSkeleton::Notation; + + Notation style; + if (StringEqualsLiteral(notation, "standard")) { + style = Notation::Standard; + } else if (StringEqualsLiteral(notation, "scientific")) { + style = Notation::Scientific; + } else if (StringEqualsLiteral(notation, "engineering")) { + style = Notation::Engineering; + } else { + MOZ_ASSERT(StringEqualsLiteral(notation, "compact")); + + if (!GetProperty(cx, internals, internals, cx->names().compactDisplay, + &value)) { + return nullptr; + } + + JSLinearString* compactDisplay = value.toString()->ensureLinear(cx); + if (!compactDisplay) { + return nullptr; + } + + if (StringEqualsLiteral(compactDisplay, "short")) { + style = Notation::CompactShort; + } else { + MOZ_ASSERT(StringEqualsLiteral(compactDisplay, "long")); + style = Notation::CompactLong; + } + } + + if (!skeleton.notation(style)) { + return nullptr; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().signDisplay, &value)) { + return nullptr; + } + + { + JSLinearString* signDisplay = value.toString()->ensureLinear(cx); + if (!signDisplay) { + return nullptr; + } + + using SignDisplay = intl::NumberFormatterSkeleton::SignDisplay; + + SignDisplay display; + if (StringEqualsLiteral(signDisplay, "auto")) { + if (accountingSign) { + display = SignDisplay::Accounting; + } else { + display = SignDisplay::Auto; + } + } else if (StringEqualsLiteral(signDisplay, "never")) { + display = SignDisplay::Never; + } else if (StringEqualsLiteral(signDisplay, "always")) { + if (accountingSign) { + display = SignDisplay::AccountingAlways; + } else { + display = SignDisplay::Always; + } + } else { + MOZ_ASSERT(StringEqualsLiteral(signDisplay, "exceptZero")); + if (accountingSign) { + display = SignDisplay::AccountingExceptZero; + } else { + display = SignDisplay::ExceptZero; + } + } + + if (!skeleton.signDisplay(display)) { + return nullptr; + } + } + + if (!skeleton.roundingModeHalfUp()) { + return nullptr; + } + + return skeleton.toFormatter(cx, locale.get()); +} + +static UFormattedNumber* NewUFormattedNumber(JSContext* cx) { + UErrorCode status = U_ZERO_ERROR; + UFormattedNumber* formatted = unumf_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return formatted; +} + +static const UFormattedValue* PartitionNumberPattern( + JSContext* cx, const UNumberFormatter* nf, UFormattedNumber* formatted, + HandleValue x) { + UErrorCode status = U_ZERO_ERROR; + if (x.isNumber()) { + double num = x.toNumber(); + + // ICU incorrectly formats NaN values with the sign bit set, as if they + // were negative. Replace all NaNs with a single pattern with sign bit + // unset ("positive", that is) until ICU is fixed. + if (MOZ_UNLIKELY(IsNaN(num))) { + num = SpecificNaN<double>(0, 1); + } + + unumf_formatDouble(nf, num, formatted, &status); + } else { + RootedBigInt bi(cx, x.toBigInt()); + + int64_t num; + if (BigInt::isInt64(bi, &num)) { + unumf_formatInt(nf, num, formatted, &status); + } else { + JSLinearString* str = BigInt::toString<CanGC>(cx, bi, 10); + if (!str) { + return nullptr; + } + MOZ_ASSERT(str->hasLatin1Chars()); + + // Tell the analysis the |unumf_formatDecimal| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + const char* chars = reinterpret_cast<const char*>(str->latin1Chars(nogc)); + unumf_formatDecimal(nf, chars, str->length(), formatted, &status); + } + } + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + const UFormattedValue* formattedValue = + unumf_resultAsValue(formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return formattedValue; +} + +static JSString* FormattedNumberToString( + JSContext* cx, const UFormattedValue* formattedValue) { + UErrorCode status = U_ZERO_ERROR; + int32_t strLength; + const char16_t* str = ufmtval_getString(formattedValue, &strLength, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + return NewStringCopyN<CanGC>(cx, str, AssertedCast<uint32_t>(strLength)); +} + +static bool FormatNumeric(JSContext* cx, const UNumberFormatter* nf, + UFormattedNumber* formatted, HandleValue x, + MutableHandleValue result) { + const UFormattedValue* formattedValue = + PartitionNumberPattern(cx, nf, formatted, x); + if (!formattedValue) { + return false; + } + + JSString* str = FormattedNumberToString(cx, formattedValue); + if (!str) { + return false; + } + + result.setString(str); + return true; +} + +enum class FormattingType { ForUnit, NotForUnit }; + +static FieldType GetFieldTypeForNumberField(UNumberFormatFields fieldName, + HandleValue x, + FormattingType formattingType) { + // See intl/icu/source/i18n/unicode/unum.h for a detailed field list. This + // list is deliberately exhaustive: cases might have to be added/removed if + // this code is compiled with a different ICU with more UNumberFormatFields + // enum initializers. Please guard such cases with appropriate ICU + // version-testing #ifdefs, should cross-version divergence occur. + switch (fieldName) { + case UNUM_INTEGER_FIELD: + if (x.isNumber()) { + double d = x.toNumber(); + if (IsNaN(d)) { + return &JSAtomState::nan; + } + if (!IsFinite(d)) { + return &JSAtomState::infinity; + } + } + return &JSAtomState::integer; + + case UNUM_GROUPING_SEPARATOR_FIELD: + return &JSAtomState::group; + + case UNUM_DECIMAL_SEPARATOR_FIELD: + return &JSAtomState::decimal; + + case UNUM_FRACTION_FIELD: + return &JSAtomState::fraction; + + case UNUM_SIGN_FIELD: { + // We coerce all NaNs to one with the sign bit unset, so all NaNs are + // positive in our implementation. + bool isNegative = x.isNumber() + ? !IsNaN(x.toNumber()) && IsNegative(x.toNumber()) + : x.toBigInt()->isNegative(); + return isNegative ? &JSAtomState::minusSign : &JSAtomState::plusSign; + } + + case UNUM_PERCENT_FIELD: + // Percent fields are returned as "unit" elements when the number + // formatter's style is "unit". + if (formattingType == FormattingType::ForUnit) { + return &JSAtomState::unit; + } + return &JSAtomState::percentSign; + + case UNUM_CURRENCY_FIELD: + return &JSAtomState::currency; + + case UNUM_PERMILL_FIELD: + MOZ_ASSERT_UNREACHABLE( + "unexpected permill field found, even though " + "we don't use any user-defined patterns that " + "would require a permill field"); + break; + + case UNUM_EXPONENT_SYMBOL_FIELD: + return &JSAtomState::exponentSeparator; + + case UNUM_EXPONENT_SIGN_FIELD: + return &JSAtomState::exponentMinusSign; + + case UNUM_EXPONENT_FIELD: + return &JSAtomState::exponentInteger; + + case UNUM_MEASURE_UNIT_FIELD: + return &JSAtomState::unit; + + case UNUM_COMPACT_FIELD: + return &JSAtomState::compact; + +#ifndef U_HIDE_DEPRECATED_API + case UNUM_FIELD_COUNT: + MOZ_ASSERT_UNREACHABLE( + "format field sentinel value returned by iterator!"); + break; +#endif + } + + MOZ_ASSERT_UNREACHABLE( + "unenumerated, undocumented format field returned by iterator"); + return nullptr; +} + +struct Field { + uint32_t begin; + uint32_t end; + FieldType type; + + // Needed for vector-resizing scratch space. + Field() = default; + + Field(uint32_t begin, uint32_t end, FieldType type) + : begin(begin), end(end), type(type) {} +}; + +class NumberFormatFields { + using FieldsVector = Vector<Field, 16>; + + FieldsVector fields_; + + public: + explicit NumberFormatFields(JSContext* cx) : fields_(cx) {} + + MOZ_MUST_USE bool append(FieldType type, int32_t begin, int32_t end); + + MOZ_MUST_USE ArrayObject* toArray(JSContext* cx, + JS::HandleString overallResult, + FieldType unitType); +}; + +bool NumberFormatFields::append(FieldType type, int32_t begin, int32_t end) { + MOZ_ASSERT(begin >= 0); + MOZ_ASSERT(end >= 0); + MOZ_ASSERT(begin < end, "erm, aren't fields always non-empty?"); + + return fields_.emplaceBack(uint32_t(begin), uint32_t(end), type); +} + +ArrayObject* NumberFormatFields::toArray(JSContext* cx, + HandleString overallResult, + FieldType unitType) { + // Merge sort the fields vector. Expand the vector to have scratch space for + // performing the sort. + size_t fieldsLen = fields_.length(); + if (!fields_.growByUninitialized(fieldsLen)) { + return nullptr; + } + + MOZ_ALWAYS_TRUE(MergeSort( + fields_.begin(), fieldsLen, fields_.begin() + fieldsLen, + [](const Field& left, const Field& right, bool* lessOrEqual) { + // Sort first by begin index, then to place + // enclosing fields before nested fields. + *lessOrEqual = left.begin < right.begin || + (left.begin == right.begin && left.end > right.end); + return true; + })); + + // Delete the elements in the scratch space. + fields_.shrinkBy(fieldsLen); + + // Then iterate over the sorted field list to generate a sequence of parts + // (what ECMA-402 actually exposes). A part is a maximal character sequence + // entirely within no field or a single most-nested field. + // + // Diagrams may be helpful to illustrate how fields map to parts. Consider + // formatting -19,766,580,028,249.41, the US national surplus (negative + // because it's actually a debt) on October 18, 2016. + // + // var options = + // { style: "currency", currency: "USD", currencyDisplay: "name" }; + // var usdFormatter = new Intl.NumberFormat("en-US", options); + // usdFormatter.format(-19766580028249.41); + // + // The formatted result is "-19,766,580,028,249.41 US dollars". ICU + // identifies these fields in the string: + // + // UNUM_GROUPING_SEPARATOR_FIELD + // | + // UNUM_SIGN_FIELD | UNUM_DECIMAL_SEPARATOR_FIELD + // | __________/| | + // | / | | | | + // "-19,766,580,028,249.41 US dollars" + // \________________/ |/ \_______/ + // | | | + // UNUM_INTEGER_FIELD | UNUM_CURRENCY_FIELD + // | + // UNUM_FRACTION_FIELD + // + // These fields map to parts as follows: + // + // integer decimal + // _____|________ | + // / /| |\ |\ |\ | literal + // /| / | | \ | \ | \| | + // "-19,766,580,028,249.41 US dollars" + // | \___|___|___/ |/ \________/ + // | | | | + // | group | currency + // | | + // minusSign fraction + // + // The sign is a part. Each comma is a part, splitting the integer field + // into parts for trillions/billions/&c. digits. The decimal point is a + // part. Cents are a part. The space between cents and currency is a part + // (outside any field). Last, the currency field is a part. + // + // Because parts fully partition the formatted string, we only track the + // end of each part -- the beginning is implicitly the last part's end. + struct Part { + uint32_t end; + FieldType type; + }; + + class PartGenerator { + // The fields in order from start to end, then least to most nested. + const FieldsVector& fields; + + // Index of the current field, in |fields|, being considered to + // determine part boundaries. |lastEnd <= fields[index].begin| is an + // invariant. + size_t index; + + // The end index of the last part produced, always less than or equal + // to |limit|, strictly increasing. + uint32_t lastEnd; + + // The length of the overall formatted string. + const uint32_t limit; + + Vector<size_t, 4> enclosingFields; + + void popEnclosingFieldsEndingAt(uint32_t end) { + MOZ_ASSERT_IF(enclosingFields.length() > 0, + fields[enclosingFields.back()].end >= end); + + while (enclosingFields.length() > 0 && + fields[enclosingFields.back()].end == end) { + enclosingFields.popBack(); + } + } + + bool nextPartInternal(Part* part) { + size_t len = fields.length(); + MOZ_ASSERT(index <= len); + + // If we're out of fields, all that remains are part(s) consisting + // of trailing portions of enclosing fields, and maybe a final + // literal part. + if (index == len) { + if (enclosingFields.length() > 0) { + const auto& enclosing = fields[enclosingFields.popCopy()]; + part->end = enclosing.end; + part->type = enclosing.type; + + // If additional enclosing fields end where this part ends, + // pop them as well. + popEnclosingFieldsEndingAt(part->end); + } else { + part->end = limit; + part->type = &JSAtomState::literal; + } + + return true; + } + + // Otherwise we still have a field to process. + const Field* current = &fields[index]; + MOZ_ASSERT(lastEnd <= current->begin); + MOZ_ASSERT(current->begin < current->end); + + // But first, deal with inter-field space. + if (lastEnd < current->begin) { + if (enclosingFields.length() > 0) { + // Space between fields, within an enclosing field, is part + // of that enclosing field, until the start of the current + // field or the end of the enclosing field, whichever is + // earlier. + const auto& enclosing = fields[enclosingFields.back()]; + part->end = std::min(enclosing.end, current->begin); + part->type = enclosing.type; + popEnclosingFieldsEndingAt(part->end); + } else { + // If there's no enclosing field, the space is a literal. + part->end = current->begin; + part->type = &JSAtomState::literal; + } + + return true; + } + + // Otherwise, the part spans a prefix of the current field. Find + // the most-nested field containing that prefix. + const Field* next; + do { + current = &fields[index]; + + // If the current field is last, the part extends to its end. + if (++index == len) { + part->end = current->end; + part->type = current->type; + return true; + } + + next = &fields[index]; + MOZ_ASSERT(current->begin <= next->begin); + MOZ_ASSERT(current->begin < next->end); + + // If the next field nests within the current field, push an + // enclosing field. (If there are no nested fields, don't + // bother pushing a field that'd be immediately popped.) + if (current->end > next->begin) { + if (!enclosingFields.append(index - 1)) { + return false; + } + } + + // Do so until the next field begins after this one. + } while (current->begin == next->begin); + + part->type = current->type; + + if (current->end <= next->begin) { + // The next field begins after the current field ends. Therefore + // the current part ends at the end of the current field. + part->end = current->end; + popEnclosingFieldsEndingAt(part->end); + } else { + // The current field encloses the next one. The current part + // ends where the next field/part will start. + part->end = next->begin; + } + + return true; + } + + public: + PartGenerator(JSContext* cx, const FieldsVector& vec, uint32_t limit) + : fields(vec), + index(0), + lastEnd(0), + limit(limit), + enclosingFields(cx) {} + + bool nextPart(bool* hasPart, Part* part) { + // There are no parts left if we've partitioned the entire string. + if (lastEnd == limit) { + MOZ_ASSERT(enclosingFields.length() == 0); + *hasPart = false; + return true; + } + + if (!nextPartInternal(part)) { + return false; + } + + *hasPart = true; + lastEnd = part->end; + return true; + } + }; + + // Finally, generate the result array. + size_t lastEndIndex = 0; + RootedObject singlePart(cx); + RootedValue propVal(cx); + + RootedArrayObject partsArray(cx, NewDenseEmptyArray(cx)); + if (!partsArray) { + return nullptr; + } + + PartGenerator gen(cx, fields_, overallResult->length()); + do { + bool hasPart; + Part part; + if (!gen.nextPart(&hasPart, &part)) { + return nullptr; + } + + if (!hasPart) { + break; + } + + FieldType type = part.type; + size_t endIndex = part.end; + + MOZ_ASSERT(lastEndIndex < endIndex); + + singlePart = NewBuiltinClassInstance<PlainObject>(cx); + if (!singlePart) { + return nullptr; + } + + propVal.setString(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, propVal)) { + return nullptr; + } + + JSLinearString* partSubstr = NewDependentString( + cx, overallResult, lastEndIndex, endIndex - lastEndIndex); + if (!partSubstr) { + return nullptr; + } + + propVal.setString(partSubstr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, propVal)) { + return nullptr; + } + + if (unitType != nullptr && type != &JSAtomState::literal) { + propVal.setString(cx->names().*unitType); + if (!DefineDataProperty(cx, singlePart, cx->names().unit, propVal)) { + return nullptr; + } + } + + if (!NewbornArrayPush(cx, partsArray, ObjectValue(*singlePart))) { + return nullptr; + } + + lastEndIndex = endIndex; + } while (true); + + MOZ_ASSERT(lastEndIndex == overallResult->length(), + "result array must partition the entire string"); + + return partsArray; +} + +static bool FormattedNumberToParts(JSContext* cx, + const UFormattedValue* formattedValue, + HandleValue number, + FieldType relativeTimeUnit, + FormattingType formattingType, + MutableHandleValue result) { + MOZ_ASSERT(number.isNumeric()); + + RootedString overallResult(cx, FormattedNumberToString(cx, formattedValue)); + if (!overallResult) { + return false; + } + + UErrorCode status = U_ZERO_ERROR; + UConstrainedFieldPosition* fpos = ucfpos_open(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos); + + // We're only interested in UFIELD_CATEGORY_NUMBER fields. + ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_NUMBER, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + // Vacuum up fields in the overall formatted string. + + NumberFormatFields fields(cx); + + while (true) { + bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + if (!hasMore) { + break; + } + + int32_t field = ucfpos_getField(fpos, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + int32_t beginIndex, endIndex; + ucfpos_getIndexes(fpos, &beginIndex, &endIndex, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + FieldType type = GetFieldTypeForNumberField(UNumberFormatFields(field), + number, formattingType); + + if (!fields.append(type, beginIndex, endIndex)) { + return false; + } + } + + ArrayObject* array = fields.toArray(cx, overallResult, relativeTimeUnit); + if (!array) { + return false; + } + + result.setObject(*array); + return true; +} + +bool js::intl::FormattedRelativeTimeToParts( + JSContext* cx, const UFormattedValue* formattedValue, double timeValue, + FieldType relativeTimeUnit, MutableHandleValue result) { + Value tval = DoubleValue(timeValue); + return FormattedNumberToParts( + cx, formattedValue, HandleValue::fromMarkedLocation(&tval), + relativeTimeUnit, FormattingType::NotForUnit, result); +} + +static bool FormatNumericToParts(JSContext* cx, const UNumberFormatter* nf, + UFormattedNumber* formatted, HandleValue x, + FormattingType formattingType, + MutableHandleValue result) { + const UFormattedValue* formattedValue = + PartitionNumberPattern(cx, nf, formatted, x); + if (!formattedValue) { + return false; + } + + return FormattedNumberToParts(cx, formattedValue, x, nullptr, formattingType, + result); +} + +bool js::intl_FormatNumber(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumeric()); + MOZ_ASSERT(args[2].isBoolean()); + MOZ_ASSERT(args[3].isBoolean()); + + Rooted<NumberFormatObject*> numberFormat( + cx, &args[0].toObject().as<NumberFormatObject>()); + + // Obtain a cached UNumberFormatter object. + UNumberFormatter* nf = numberFormat->getNumberFormatter(); + if (!nf) { + nf = NewUNumberFormatter(cx, numberFormat); + if (!nf) { + return false; + } + numberFormat->setNumberFormatter(nf); + + intl::AddICUCellMemory(numberFormat, + NumberFormatObject::EstimatedMemoryUse); + } + + // Obtain a cached UFormattedNumber object. + UFormattedNumber* formatted = numberFormat->getFormattedNumber(); + if (!formatted) { + formatted = NewUFormattedNumber(cx); + if (!formatted) { + return false; + } + numberFormat->setFormattedNumber(formatted); + + // UFormattedNumber memory tracked as part of UNumberFormatter. + } + + // Use the UNumberFormatter to actually format the number. + if (args[2].toBoolean()) { + FormattingType formattingType = args[3].toBoolean() + ? FormattingType::ForUnit + : FormattingType::NotForUnit; + return FormatNumericToParts(cx, nf, formatted, args[1], formattingType, + args.rval()); + } + + return FormatNumeric(cx, nf, formatted, args[1], args.rval()); +} diff --git a/js/src/builtin/intl/NumberFormat.h b/js/src/builtin/intl/NumberFormat.h new file mode 100644 index 0000000000..0ad24da577 --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.h @@ -0,0 +1,278 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_NumberFormat_h +#define builtin_intl_NumberFormat_h + +#include "mozilla/Attributes.h" + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +struct UFormattedNumber; +struct UNumberFormatter; + +namespace js { + +class NumberFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t UNUMBER_FORMATTER_SLOT = 1; + static constexpr uint32_t UFORMATTED_NUMBER_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UNumberFormatter and UFormattedNumber + // (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 750; + + UNumberFormatter* getNumberFormatter() const { + const auto& slot = getFixedSlot(UNUMBER_FORMATTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UNumberFormatter*>(slot.toPrivate()); + } + + void setNumberFormatter(UNumberFormatter* formatter) { + setFixedSlot(UNUMBER_FORMATTER_SLOT, PrivateValue(formatter)); + } + + UFormattedNumber* getFormattedNumber() const { + const auto& slot = getFixedSlot(UFORMATTED_NUMBER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UFormattedNumber*>(slot.toPrivate()); + } + + void setFormattedNumber(UFormattedNumber* formatted) { + setFixedSlot(UFORMATTED_NUMBER_SLOT, PrivateValue(formatted)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in NumberFormat constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: numberFormat = intl_NumberFormat(locales, options) + */ +extern MOZ_MUST_USE bool intl_NumberFormat(JSContext* cx, unsigned argc, + Value* vp); + +/** + * Returns the numbering system type identifier per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * default numbering system for the given locale. + * + * Usage: defaultNumberingSystem = intl_numberingSystem(locale) + */ +extern MOZ_MUST_USE bool intl_numberingSystem(JSContext* cx, unsigned argc, + Value* vp); + +/** + * Returns a string representing the number x according to the effective + * locale and the formatting options of the given NumberFormat. + * + * Spec: ECMAScript Internationalization API Specification, 11.3.2. + * + * Usage: formatted = intl_FormatNumber(numberFormat, x, formatToParts, + * unitStyle) + */ +extern MOZ_MUST_USE bool intl_FormatNumber(JSContext* cx, unsigned argc, + Value* vp); + +#if DEBUG || MOZ_SYSTEM_ICU +/** + * Returns an object with all available measurement units. + * + * Usage: units = intl_availableMeasurementUnits() + */ +extern MOZ_MUST_USE bool intl_availableMeasurementUnits(JSContext* cx, + unsigned argc, + Value* vp); +#endif + +namespace intl { + +/** + * Class to create a number formatter skeleton. + * + * The skeleton syntax is documented at: + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md + */ +class MOZ_STACK_CLASS NumberFormatterSkeleton final { + static constexpr size_t DefaultVectorSize = 128; + using SkeletonVector = Vector<char16_t, DefaultVectorSize>; + + SkeletonVector vector_; + + bool append(char16_t c) { return vector_.append(c); } + + bool appendN(char16_t c, size_t times) { return vector_.appendN(c, times); } + + template <size_t N> + bool append(const char16_t (&chars)[N]) { + static_assert(N > 0, + "should only be used with string literals or properly " + "null-terminated arrays"); + MOZ_ASSERT(chars[N - 1] == '\0', + "should only be used with string literals or properly " + "null-terminated arrays"); + return vector_.append(chars, N - 1); // Without trailing \0. + } + + template <size_t N> + bool appendToken(const char16_t (&token)[N]) { + return append(token) && append(' '); + } + + bool append(const char* chars, size_t length) { + return vector_.append(chars, length); + } + + public: + explicit NumberFormatterSkeleton(JSContext* cx) : vector_(cx) {} + + /** + * Return a new UNumberFormatter based on this skeleton. + */ + UNumberFormatter* toFormatter(JSContext* cx, const char* locale); + + /** + * Set this skeleton to display a currency amount. |currency| must be a + * three-letter currency code. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit + */ + MOZ_MUST_USE bool currency(JSLinearString* currency); + + enum class CurrencyDisplay { Code, Name, Symbol, NarrowSymbol }; + + /** + * Set the currency display style for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width + */ + MOZ_MUST_USE bool currencyDisplay(CurrencyDisplay display); + + /** + * Set this skeleton to display a unit amount. |unit| must be a well-formed + * unit identifier. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#per-unit + */ + MOZ_MUST_USE bool unit(JSLinearString* unit); + + enum class UnitDisplay { Short, Narrow, Long }; + + /** + * Set the unit display style for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width + */ + MOZ_MUST_USE bool unitDisplay(UnitDisplay display); + + /** + * Set this skeleton to display a percent number. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#scale + */ + MOZ_MUST_USE bool percent(); + + /** + * Set the fraction digits settings for this skeleton. |min| can be zero, + * |max| must be larger-or-equal to |min|. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision + */ + MOZ_MUST_USE bool fractionDigits(uint32_t min, uint32_t max); + + /** + * Set the integer-width settings for this skeleton. |min| must be a non-zero + * number. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#integer-width + */ + MOZ_MUST_USE bool integerWidth(uint32_t min); + + /** + * Set the significant digits settings for this skeleton. |min| must be a + * non-zero number, |max| must be larger-or-equal to |min|. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#significant-digits-precision + */ + MOZ_MUST_USE bool significantDigits(uint32_t min, uint32_t max); + + /** + * Enable or disable grouping for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#grouping + */ + MOZ_MUST_USE bool useGrouping(bool on); + + enum class Notation { + Standard, + Scientific, + Engineering, + CompactShort, + CompactLong + }; + + /** + * Set the notation style for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#notation + */ + MOZ_MUST_USE bool notation(Notation style); + + enum class SignDisplay { + Auto, + Never, + Always, + ExceptZero, + Accounting, + AccountingAlways, + AccountingExceptZero + }; + + /** + * Set the sign-display for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#sign-display + */ + MOZ_MUST_USE bool signDisplay(SignDisplay display); + + /** + * Set the rounding mode to 'half-up' for this skeleton. + * + * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#rounding-mode + */ + MOZ_MUST_USE bool roundingModeHalfUp(); +}; + +} // namespace intl +} // namespace js + +#endif /* builtin_intl_NumberFormat_h */ diff --git a/js/src/builtin/intl/NumberFormat.js b/js/src/builtin/intl/NumberFormat.js new file mode 100644 index 0000000000..83e306123c --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.js @@ -0,0 +1,768 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +#include "NumberingSystemsGenerated.h" + +/** + * NumberFormat internal properties. + * + * Spec: ECMAScript Internationalization API Specification, 9.1 and 11.3.3. + */ +var numberFormatInternalProperties = { + localeData: numberFormatLocaleData, + relevantExtensionKeys: ["nu"], +}; + +/** + * Compute an internal properties object from |lazyNumberFormatData|. + */ +function resolveNumberFormatInternals(lazyNumberFormatData) { + assert(IsObject(lazyNumberFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var NumberFormat = numberFormatInternalProperties; + + // Compute effective locale. + + // Step 7. + var localeData = NumberFormat.localeData; + + // Step 8. + var r = ResolveLocale("NumberFormat", + lazyNumberFormatData.requestedLocales, + lazyNumberFormatData.opt, + NumberFormat.relevantExtensionKeys, + localeData); + + // Steps 9-10. (Step 11 is not relevant to our implementation.) + internalProps.locale = r.locale; + internalProps.numberingSystem = r.nu; + + // Compute formatting options. + // Step 13. + var style = lazyNumberFormatData.style; + internalProps.style = style; + + // Steps 17, 19. + if (style === "currency") { + internalProps.currency = lazyNumberFormatData.currency; + internalProps.currencyDisplay = lazyNumberFormatData.currencyDisplay; + internalProps.currencySign = lazyNumberFormatData.currencySign; + } + + // Intl.NumberFormat Unified API Proposal + if (style === "unit") { + internalProps.unit = lazyNumberFormatData.unit; + internalProps.unitDisplay = lazyNumberFormatData.unitDisplay; + } + + // Intl.NumberFormat Unified API Proposal + var notation = lazyNumberFormatData.notation; + internalProps.notation = notation; + + // Step 22. + internalProps.minimumIntegerDigits = lazyNumberFormatData.minimumIntegerDigits; + + if ("minimumFractionDigits" in lazyNumberFormatData) { + // Note: Intl.NumberFormat.prototype.resolvedOptions() exposes the + // actual presence (versus undefined-ness) of these properties. + assert("maximumFractionDigits" in lazyNumberFormatData, "min/max frac digits mismatch"); + internalProps.minimumFractionDigits = lazyNumberFormatData.minimumFractionDigits; + internalProps.maximumFractionDigits = lazyNumberFormatData.maximumFractionDigits; + } + + if ("minimumSignificantDigits" in lazyNumberFormatData) { + // Note: Intl.NumberFormat.prototype.resolvedOptions() exposes the + // actual presence (versus undefined-ness) of these properties. + assert("maximumSignificantDigits" in lazyNumberFormatData, "min/max sig digits mismatch"); + internalProps.minimumSignificantDigits = lazyNumberFormatData.minimumSignificantDigits; + internalProps.maximumSignificantDigits = lazyNumberFormatData.maximumSignificantDigits; + } + + // Intl.NumberFormat Unified API Proposal + if (notation === "compact") + internalProps.compactDisplay = lazyNumberFormatData.compactDisplay; + + // Step 24. + internalProps.useGrouping = lazyNumberFormatData.useGrouping; + + // Intl.NumberFormat Unified API Proposal + internalProps.signDisplay = lazyNumberFormatData.signDisplay; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the NumberFormat internal properties of |obj|. + */ +function getNumberFormatInternals(obj) { + assert(IsObject(obj), "getNumberFormatInternals called with non-object"); + assert(GuardToNumberFormat(obj) !== null, "getNumberFormatInternals called with non-NumberFormat"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "NumberFormat", "bad type escaped getIntlObjectInternals"); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + // Otherwise it's time to fully create them. + internalProps = resolveNumberFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * 11.1.11 UnwrapNumberFormat( nf ) + */ +function UnwrapNumberFormat(nf) { + // Steps 2 and 4 (error handling moved to caller). + if (IsObject(nf) && + GuardToNumberFormat(nf) === null && + !IsWrappedNumberFormat(nf) && + nf instanceof GetBuiltinConstructor("NumberFormat")) + { + nf = nf[intlFallbackSymbol()]; + } + return nf; +} + +/** + * Applies digit options used for number formatting onto the intl object. + * + * Spec: ECMAScript Internationalization API Specification, 11.1.1. + */ +function SetNumberFormatDigitOptions(lazyData, options, mnfdDefault, mxfdDefault, notation) { + // We skip step 1 because we set the properties on a lazyData object. + + // Steps 2-4. + assert(IsObject(options), "SetNumberFormatDigitOptions"); + assert(typeof mnfdDefault === "number", "SetNumberFormatDigitOptions"); + assert(typeof mxfdDefault === "number", "SetNumberFormatDigitOptions"); + assert(mnfdDefault <= mxfdDefault, "SetNumberFormatDigitOptions"); + assert(typeof notation === "string", "SetNumberFormatDigitOptions"); + + // Steps 5-9. + const mnid = GetNumberOption(options, "minimumIntegerDigits", 1, 21, 1); + let mnfd = options.minimumFractionDigits; + let mxfd = options.maximumFractionDigits; + let mnsd = options.minimumSignificantDigits; + let mxsd = options.maximumSignificantDigits; + + // Step 10. + lazyData.minimumIntegerDigits = mnid; + + // Step 11. + if (mnsd !== undefined || mxsd !== undefined) { + // Step 11.a (Omitted). + + // Step 11.b. + mnsd = DefaultNumberOption(mnsd, 1, 21, 1); + + // Step 11.c. + mxsd = DefaultNumberOption(mxsd, mnsd, 21, 21); + + // Step 11.d. + lazyData.minimumSignificantDigits = mnsd; + + // Step 11.e. + lazyData.maximumSignificantDigits = mxsd; + } + + // Step 12. + else if (mnfd !== undefined || mxfd !== undefined) { + // Step 12.a (Omitted). + + // Step 12.b. + mnfd = DefaultNumberOption(mnfd, 0, 20, undefined); + + // Step 12.c. + mxfd = DefaultNumberOption(mxfd, 0, 20, undefined); + + // Steps 12.d-e. + // Inlined DefaultNumberOption, only the fallback case applies here. + if (mnfd === undefined) { + assert(mxfd !== undefined, "mxfd isn't undefined when mnfd is undefined"); + mnfd = std_Math_min(mnfdDefault, mxfd); + } + + // Step 12.f. + // Inlined DefaultNumberOption, only the fallback case applies here. + else if (mxfd === undefined) { + mxfd = std_Math_max(mxfdDefault, mnfd); + } + + // Step 12.g. + else if (mnfd > mxfd) { + ThrowRangeError(JSMSG_INVALID_DIGITS_VALUE, mxfd); + } + + // Step 12.h. + lazyData.minimumFractionDigits = mnfd; + + // Step 12.i. + lazyData.maximumFractionDigits = mxfd; + } + + // Step 13. + else if (notation === "compact") { + // Step 13.a (Omitted). + } + + // Step 14. + else { + // Step 14.a (Omitted). + + // Step 14.b. + lazyData.minimumFractionDigits = mnfdDefault; + + // Step 14.c. + lazyData.maximumFractionDigits = mxfdDefault; + } +} + +/** + * Convert s to upper case, but limited to characters a-z. + * + * Spec: ECMAScript Internationalization API Specification, 6.1. + */ +function toASCIIUpperCase(s) { + assert(typeof s === "string", "toASCIIUpperCase"); + + // String.prototype.toUpperCase may map non-ASCII characters into ASCII, + // so go character by character (actually code unit by code unit, but + // since we only care about ASCII characters here, that's OK). + var result = ""; + for (var i = 0; i < s.length; i++) { + var c = callFunction(std_String_charCodeAt, s, i); + result += (0x61 <= c && c <= 0x7A) + ? callFunction(std_String_fromCharCode, null, c & ~0x20) + : s[i]; + } + return result; +} + +/** + * Verifies that the given string is a well-formed ISO 4217 currency code. + * + * Spec: ECMAScript Internationalization API Specification, 6.3.1. + */ +function IsWellFormedCurrencyCode(currency) { + assert(typeof currency === "string", "currency is a string value"); + + return currency.length === 3 && IsASCIIAlphaString(currency); +} + +/** + * Verifies that the given string is a well-formed core unit identifier as + * defined in UTS #35, Part 2, Section 6. In addition to obeying the UTS #35 + * core unit identifier syntax, |unitIdentifier| must be one of the identifiers + * sanctioned by UTS #35 or be a compound unit composed of two sanctioned simple + * units. + * + * Intl.NumberFormat Unified API Proposal + */ +function IsWellFormedUnitIdentifier(unitIdentifier) { + assert(typeof unitIdentifier === "string", "unitIdentifier is a string value"); + + // Step 1. + if (IsSanctionedSimpleUnitIdentifier(unitIdentifier)) + return true; + + // Step 2. + var pos = callFunction(std_String_indexOf, unitIdentifier, "-per-"); + if (pos < 0) + return false; + + var next = pos + "-per-".length; + + // Steps 3 and 5. + var numerator = Substring(unitIdentifier, 0, pos); + var denominator = Substring(unitIdentifier, next, unitIdentifier.length - next); + + // Steps 4 and 6. + return IsSanctionedSimpleUnitIdentifier(numerator) && + IsSanctionedSimpleUnitIdentifier(denominator); +} + +#if DEBUG || MOZ_SYSTEM_ICU +var availableMeasurementUnits = { + value: null +}; +#endif + +/** + * Verifies that the given string is a sanctioned simple core unit identifier. + * + * Intl.NumberFormat Unified API Proposal + * + * Also see: https://unicode.org/reports/tr35/tr35-general.html#Unit_Elements + */ +function IsSanctionedSimpleUnitIdentifier(unitIdentifier) { + assert(typeof unitIdentifier === "string", "unitIdentifier is a string value"); + + var isSanctioned = hasOwn(unitIdentifier, sanctionedSimpleUnitIdentifiers); + +#if DEBUG || MOZ_SYSTEM_ICU + if (isSanctioned) { + if (availableMeasurementUnits.value === null) + availableMeasurementUnits.value = intl_availableMeasurementUnits(); + + var isSupported = hasOwn(unitIdentifier, availableMeasurementUnits.value); + +#if MOZ_SYSTEM_ICU + // A system ICU may support fewer measurement units, so we need to make + // sure the unit is actually supported. + isSanctioned = isSupported; +#else + // Otherwise just assert that the sanctioned unit is also supported. + assert(isSupported, +`"${unitIdentifier}" is sanctioned but not supported. Did you forget to update +intl/icu/data_filter.json to include the unit (and any implicit compound units)? +For example "speed/kilometer-per-hour" is implied by "length/kilometer" and +"duration/hour" and must therefore also be present.`); +#endif + } +#endif + + return isSanctioned; +} + +/** + * Initializes an object as a NumberFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a NumberFormat. + * This later work occurs in |resolveNumberFormatInternals|; steps not noted + * here occur there. + * + * Spec: ECMAScript Internationalization API Specification, 11.1.2. + */ +function InitializeNumberFormat(numberFormat, thisValue, locales, options) { + assert(IsObject(numberFormat), "InitializeNumberFormat called with non-object"); + assert(GuardToNumberFormat(numberFormat) !== null, "InitializeNumberFormat called with non-NumberFormat"); + + // Lazy NumberFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // style: "decimal" / "percent" / "currency" / "unit", + // + // // fields present only if style === "currency": + // currency: a well-formed currency code (IsWellFormedCurrencyCode), + // currencyDisplay: "code" / "symbol" / "narrowSymbol" / "name", + // currencySign: "standard" / "accounting", + // + // // fields present only if style === "unit": + // unit: a well-formed unit identifier (IsWellFormedUnitIdentifier), + // unitDisplay: "short" / "narrow" / "long", + // + // opt: // opt object computed in InitializeNumberFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // nu: string matching a Unicode extension type, // optional + // } + // + // minimumIntegerDigits: integer ∈ [1, 21], + // + // // optional, mutually exclusive with the significant-digits option + // minimumFractionDigits: integer ∈ [0, 20], + // maximumFractionDigits: integer ∈ [0, 20], + // + // // optional, mutually exclusive with the fraction-digits option + // minimumSignificantDigits: integer ∈ [1, 21], + // maximumSignificantDigits: integer ∈ [1, 21], + // + // useGrouping: true / false, + // + // notation: "standard" / "scientific" / "engineering" / "compact", + // + // // optional, if notation is "compact" + // compactDisplay: "short" / "long", + // + // signDisplay: "auto" / "never" / "always" / "exceptZero", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every NumberFormat lazy data object has *all* these properties, never a + // subset of them. + var lazyNumberFormatData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyNumberFormatData.requestedLocales = requestedLocales; + + // Steps 2-3. + // + // If we ever need more speed here at startup, we should try to detect the + // case where |options === undefined| and then directly use the default + // value for each option. For now, just keep it simple. + if (options === undefined) + options = std_Object_create(null); + else + options = ToObject(options); + + // Compute options that impact interpretation of locale. + // Step 4. + var opt = new Record(); + lazyNumberFormatData.opt = opt; + + // Steps 5-6. + var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + var numberingSystem = GetOption(options, "numberingSystem", "string", undefined, undefined); + + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType(numberingSystem, + "numberingSystem", + "nu"); + } + + opt.nu = numberingSystem; + + // Compute formatting options. + // Step 12. + var style = GetOption(options, "style", "string", ["decimal", "percent", "currency", "unit"], + "decimal"); + lazyNumberFormatData.style = style; + + // Steps 14-17. + var currency = GetOption(options, "currency", "string", undefined, undefined); + + // Per the Intl.NumberFormat Unified API Proposal, this check should only + // happen for |style === "currency"|, which seems inconsistent, given that + // we normally validate all options when present, even the ones which are + // unused. + // TODO: File issue at <https://github.com/tc39/proposal-unified-intl-numberformat>. + if (currency !== undefined && !IsWellFormedCurrencyCode(currency)) + ThrowRangeError(JSMSG_INVALID_CURRENCY_CODE, currency); + + var cDigits; + if (style === "currency") { + if (currency === undefined) + ThrowTypeError(JSMSG_UNDEFINED_CURRENCY); + + // Steps 19.a-c. + currency = toASCIIUpperCase(currency); + lazyNumberFormatData.currency = currency; + cDigits = CurrencyDigits(currency); + } + + // Step 18. + var currencyDisplay = GetOption(options, "currencyDisplay", "string", + ["code", "symbol", "narrowSymbol", "name"], "symbol"); + if (style === "currency") + lazyNumberFormatData.currencyDisplay = currencyDisplay; + + // Intl.NumberFormat Unified API Proposal + var currencySign = GetOption(options, "currencySign", "string", ["standard", "accounting"], + "standard"); + if (style === "currency") + lazyNumberFormatData.currencySign = currencySign; + + // Intl.NumberFormat Unified API Proposal + var unit = GetOption(options, "unit", "string", undefined, undefined); + + // Aligned with |currency| check from above, see note about spec issue there. + if (unit !== undefined && !IsWellFormedUnitIdentifier(unit)) + ThrowRangeError(JSMSG_INVALID_UNIT_IDENTIFIER, unit); + + var unitDisplay = GetOption(options, "unitDisplay", "string", + ["short", "narrow", "long"], "short"); + + if (style === "unit") { + if (unit === undefined) + ThrowTypeError(JSMSG_UNDEFINED_UNIT); + + lazyNumberFormatData.unit = unit; + lazyNumberFormatData.unitDisplay = unitDisplay; + } + + // Steps 20-21. + var mnfdDefault, mxfdDefault; + if (style === "currency") { + mnfdDefault = cDigits; + mxfdDefault = cDigits; + } else { + mnfdDefault = 0; + mxfdDefault = style === "percent" ? 0 : 3; + } + + // Intl.NumberFormat Unified API Proposal + var notation = GetOption(options, "notation", "string", + ["standard", "scientific", "engineering", "compact"], "standard"); + lazyNumberFormatData.notation = notation; + + // Step 22. + SetNumberFormatDigitOptions(lazyNumberFormatData, options, mnfdDefault, mxfdDefault, notation); + + // Intl.NumberFormat Unified API Proposal + var compactDisplay = GetOption(options, "compactDisplay", "string", + ["short", "long"], "short"); + if (notation === "compact") + lazyNumberFormatData.compactDisplay = compactDisplay; + + // Steps 23. + var useGrouping = GetOption(options, "useGrouping", "boolean", undefined, true); + lazyNumberFormatData.useGrouping = useGrouping; + + // Intl.NumberFormat Unified API Proposal + var signDisplay = GetOption(options, "signDisplay", "string", + ["auto", "never", "always", "exceptZero"], "auto"); + lazyNumberFormatData.signDisplay = signDisplay; + + // Step 31. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(numberFormat, "NumberFormat", lazyNumberFormatData); + + // 11.2.1, steps 4-5. + // TODO: spec issue - The current spec doesn't have the IsObject check, + // which means |Intl.NumberFormat.call(null)| is supposed to throw here. + if (numberFormat !== thisValue && IsObject(thisValue) && + thisValue instanceof GetBuiltinConstructor("NumberFormat")) + { + _DefineDataProperty(thisValue, intlFallbackSymbol(), numberFormat, + ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE); + + return thisValue; + } + + // 11.2.1, step 6. + return numberFormat; +} + +/** + * Returns the number of decimal digits to be used for the given currency. + * + * Spec: ECMAScript Internationalization API Specification, 11.1.3. + */ +function CurrencyDigits(currency) { + assert(typeof currency === "string", "currency is a string value"); + assert(IsWellFormedCurrencyCode(currency), "currency is well-formed"); + assert(currency == toASCIIUpperCase(currency), "currency is all upper-case"); + + if (hasOwn(currency, currencyDigits)) + return currencyDigits[currency]; + return 2; +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 11.3.2. + */ +function Intl_NumberFormat_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "NumberFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +function getNumberingSystems(locale) { + // ICU doesn't have an API to determine the set of numbering systems + // supported for a locale; it generally pretends that any numbering system + // can be used with any locale. Supporting a decimal numbering system + // (where only the digits are replaced) is easy, so we offer them all here. + // Algorithmic numbering systems are typically tied to one locale, so for + // lack of information we don't offer them. + // The one thing we can find out from ICU is the default numbering system + // for a locale. + var defaultNumberingSystem = intl_numberingSystem(locale); + return [ + defaultNumberingSystem, + NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS + ]; +} + +function numberFormatLocaleData() { + return { + nu: getNumberingSystems, + default: { + nu: intl_numberingSystem, + }, + }; +} + +/** + * Create function to be cached and returned by Intl.NumberFormat.prototype.format. + * + * Spec: ECMAScript Internationalization API Specification, 11.1.4. + */ +function createNumberFormatFormat(nf) { + // This function is not inlined in $Intl_NumberFormat_format_get to avoid + // creating a call-object on each call to $Intl_NumberFormat_format_get. + return function(value) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(nf), "InitializeNumberFormat called with non-object"); + assert(GuardToNumberFormat(nf) !== null, "InitializeNumberFormat called with non-NumberFormat"); + + // Steps 3-4. + var x = ToNumeric(value); + + // Step 5. + return intl_FormatNumber(nf, x, /* formatToParts = */ false, /* unitStyle = */ false); + }; +} + +/** + * Returns a function bound to this NumberFormat that returns a String value + * representing the result of calling ToNumber(value) according to the + * effective locale and the formatting options of this NumberFormat. + * + * Spec: ECMAScript Internationalization API Specification, 11.4.3. + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `_SetCanonicalName`. +function $Intl_NumberFormat_format_get() { + // Steps 1-3. + var thisArg = UnwrapNumberFormat(this); + var nf = thisArg; + if (!IsObject(nf) || (nf = GuardToNumberFormat(nf)) === null) { + return callFunction(CallNumberFormatMethodIfWrapped, thisArg, + "$Intl_NumberFormat_format_get"); + } + + var internals = getNumberFormatInternals(nf); + + // Step 4. + if (internals.boundFormat === undefined) { + // Steps 4.a-c. + internals.boundFormat = createNumberFormatFormat(nf); + } + + // Step 5. + return internals.boundFormat; +} +_SetCanonicalName($Intl_NumberFormat_format_get, "get format"); + +/** + * 11.4.4 Intl.NumberFormat.prototype.formatToParts ( value ) + */ +function Intl_NumberFormat_formatToParts(value) { + // Step 1. + var nf = this; + + // Steps 2-3. + if (!IsObject(nf) || (nf = GuardToNumberFormat(nf)) === null) { + return callFunction(CallNumberFormatMethodIfWrapped, this, value, + "Intl_NumberFormat_formatToParts"); + } + + // Step 4. + var x = ToNumeric(value); + + var internals = getNumberFormatInternals(nf); + var unitStyle = internals.style === "unit"; + + // Step 5. + return intl_FormatNumber(nf, x, /* formatToParts = */ true, unitStyle); +} + +/** + * Returns the resolved options for a NumberFormat object. + * + * Spec: ECMAScript Internationalization API Specification, 11.4.5. + */ +function Intl_NumberFormat_resolvedOptions() { + // Steps 1-3. + var thisArg = UnwrapNumberFormat(this); + var nf = thisArg; + if (!IsObject(nf) || (nf = GuardToNumberFormat(nf)) === null) { + return callFunction(CallNumberFormatMethodIfWrapped, thisArg, + "Intl_NumberFormat_resolvedOptions"); + } + + var internals = getNumberFormatInternals(nf); + + // Steps 4-5. + var result = { + locale: internals.locale, + numberingSystem: internals.numberingSystem, + style: internals.style, + }; + + // currency, currencyDisplay, and currencySign are only present for currency + // formatters. + assert(hasOwn("currency", internals) === (internals.style === "currency"), + "currency is present iff style is 'currency'"); + assert(hasOwn("currencyDisplay", internals) === (internals.style === "currency"), + "currencyDisplay is present iff style is 'currency'"); + assert(hasOwn("currencySign", internals) === (internals.style === "currency"), + "currencySign is present iff style is 'currency'"); + + if (hasOwn("currency", internals)) { + _DefineDataProperty(result, "currency", internals.currency); + _DefineDataProperty(result, "currencyDisplay", internals.currencyDisplay); + _DefineDataProperty(result, "currencySign", internals.currencySign); + } + + // unit and unitDisplay are only present for unit formatters. + assert(hasOwn("unit", internals) === (internals.style === "unit"), + "unit is present iff style is 'unit'"); + assert(hasOwn("unitDisplay", internals) === (internals.style === "unit"), + "unitDisplay is present iff style is 'unit'"); + + if (hasOwn("unit", internals)) { + _DefineDataProperty(result, "unit", internals.unit); + _DefineDataProperty(result, "unitDisplay", internals.unitDisplay); + } + + _DefineDataProperty(result, "minimumIntegerDigits", internals.minimumIntegerDigits); + + // Min/Max fraction digits are either both present or not present at all. + assert(hasOwn("minimumFractionDigits", internals) === + hasOwn("maximumFractionDigits", internals), + "minimumFractionDigits is present iff maximumFractionDigits is present"); + + if (hasOwn("minimumFractionDigits", internals)) { + _DefineDataProperty(result, "minimumFractionDigits", internals.minimumFractionDigits); + _DefineDataProperty(result, "maximumFractionDigits", internals.maximumFractionDigits); + } + + // Min/Max significant digits are either both present or not present at all. + assert(hasOwn("minimumSignificantDigits", internals) === + hasOwn("maximumSignificantDigits", internals), + "minimumSignificantDigits is present iff maximumSignificantDigits is present"); + + if (hasOwn("minimumSignificantDigits", internals)) { + _DefineDataProperty(result, "minimumSignificantDigits", + internals.minimumSignificantDigits); + _DefineDataProperty(result, "maximumSignificantDigits", + internals.maximumSignificantDigits); + } + + _DefineDataProperty(result, "useGrouping", internals.useGrouping); + + var notation = internals.notation; + _DefineDataProperty(result, "notation", notation); + + if (notation === "compact") + _DefineDataProperty(result, "compactDisplay", internals.compactDisplay); + + _DefineDataProperty(result, "signDisplay", internals.signDisplay); + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/NumberingSystems.yaml b/js/src/builtin/intl/NumberingSystems.yaml new file mode 100644 index 0000000000..61d59304a9 --- /dev/null +++ b/js/src/builtin/intl/NumberingSystems.yaml @@ -0,0 +1,79 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# 12.1.7 PartitionNotationSubPattern ( numberFormat, x, n, exponent ) +# +# Numbering systems with simple digit mappings +# +# https://tc39.es/ecma402/#table-numbering-system-digits + +# Run |make_intl_data numbering| to regenerate all files which reference this list +# of numbering systems. + +- adlm +- ahom +- arab +- arabext +- bali +- beng +- bhks +- brah +- cakm +- cham +- deva +- diak +- fullwide +- gong +- gonm +- gujr +- guru +- hanidec +- hmng +- hmnp +- java +- kali +- khmr +- knda +- lana +- lanatham +- laoo +- latn +- lepc +- limb +- mathbold +- mathdbl +- mathmono +- mathsanb +- mathsans +- mlym +- modi +- mong +- mroo +- mtei +- mymr +- mymrshan +- mymrtlng +- newa +- nkoo +- olck +- orya +- osma +- rohg +- saur +- segment +- shrd +- sind +- sinh +- sora +- sund +- takr +- talu +- tamldec +- telu +- thai +- tibt +- tirh +- vaii +- wara +- wcho diff --git a/js/src/builtin/intl/NumberingSystemsGenerated.h b/js/src/builtin/intl/NumberingSystemsGenerated.h new file mode 100644 index 0000000000..fa970ab348 --- /dev/null +++ b/js/src/builtin/intl/NumberingSystemsGenerated.h @@ -0,0 +1,80 @@ +// Generated by make_intl_data.py. DO NOT EDIT. + +/** + * The list of numbering systems with simple digit mappings. + */ + +#ifndef builtin_intl_NumberingSystemsGenerated_h +#define builtin_intl_NumberingSystemsGenerated_h + +// clang-format off +#define NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS \ + "adlm", \ + "ahom", \ + "arab", \ + "arabext", \ + "bali", \ + "beng", \ + "bhks", \ + "brah", \ + "cakm", \ + "cham", \ + "deva", \ + "diak", \ + "fullwide", \ + "gong", \ + "gonm", \ + "gujr", \ + "guru", \ + "hanidec", \ + "hmng", \ + "hmnp", \ + "java", \ + "kali", \ + "khmr", \ + "knda", \ + "lana", \ + "lanatham", \ + "laoo", \ + "latn", \ + "lepc", \ + "limb", \ + "mathbold", \ + "mathdbl", \ + "mathmono", \ + "mathsanb", \ + "mathsans", \ + "mlym", \ + "modi", \ + "mong", \ + "mroo", \ + "mtei", \ + "mymr", \ + "mymrshan", \ + "mymrtlng", \ + "newa", \ + "nkoo", \ + "olck", \ + "orya", \ + "osma", \ + "rohg", \ + "saur", \ + "segment", \ + "shrd", \ + "sind", \ + "sinh", \ + "sora", \ + "sund", \ + "takr", \ + "talu", \ + "tamldec", \ + "telu", \ + "thai", \ + "tibt", \ + "tirh", \ + "vaii", \ + "wara", \ + "wcho" +// clang-format on + +#endif // builtin_intl_NumberingSystemsGenerated_h diff --git a/js/src/builtin/intl/PluralRules.cpp b/js/src/builtin/intl/PluralRules.cpp new file mode 100644 index 0000000000..757f6a2beb --- /dev/null +++ b/js/src/builtin/intl/PluralRules.cpp @@ -0,0 +1,435 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl.PluralRules proposal. */ + +#include "builtin/intl/PluralRules.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/NumberFormat.h" +#include "builtin/intl/ScopedICUObject.h" +#include "gc/FreeOp.h" +#include "js/CharacterEncoding.h" +#include "js/PropertySpec.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "unicode/unumberformatter.h" +#include "unicode/upluralrules.h" +#include "unicode/utypes.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using mozilla::AssertedCast; + +using js::intl::CallICU; +using js::intl::IcuLocale; + +const JSClassOps PluralRulesObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + PluralRulesObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; + +const JSClass PluralRulesObject::class_ = { + "Intl.PluralRules", + JSCLASS_HAS_RESERVED_SLOTS(PluralRulesObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_PluralRules) | + JSCLASS_FOREGROUND_FINALIZE, + &PluralRulesObject::classOps_, &PluralRulesObject::classSpec_}; + +const JSClass& PluralRulesObject::protoClass_ = PlainObject::class_; + +static bool pluralRules_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().PluralRules); + return true; +} + +static const JSFunctionSpec pluralRules_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_PluralRules_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec pluralRules_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_PluralRules_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("select", "Intl_PluralRules_select", 1, 0), + JS_FN(js_toSource_str, pluralRules_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec pluralRules_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.PluralRules", JSPROP_READONLY), + JS_PS_END}; + +static bool PluralRules(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec PluralRulesObject::classSpec_ = { + GenericCreateConstructor<PluralRules, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<PluralRulesObject>, + pluralRules_static_methods, + nullptr, + pluralRules_methods, + pluralRules_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * PluralRules constructor. + * Spec: ECMAScript 402 API, PluralRules, 13.2.1 + */ +static bool PluralRules(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.PluralRules")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_PluralRules, + &proto)) { + return false; + } + + Rooted<PluralRulesObject*> pluralRules(cx); + pluralRules = NewObjectWithClassProto<PluralRulesObject>(cx, proto); + if (!pluralRules) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + if (!intl::InitializeObject(cx, pluralRules, + cx->names().InitializePluralRules, locales, + options)) { + return false; + } + + args.rval().setObject(*pluralRules); + return true; +} + +void js::PluralRulesObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + auto* pluralRules = &obj->as<PluralRulesObject>(); + UPluralRules* pr = pluralRules->getPluralRules(); + UNumberFormatter* nf = pluralRules->getNumberFormatter(); + UFormattedNumber* formatted = pluralRules->getFormattedNumber(); + + if (pr) { + intl::RemoveICUCellMemory( + fop, obj, PluralRulesObject::UPluralRulesEstimatedMemoryUse); + } + if (nf) { + intl::RemoveICUCellMemory( + fop, obj, PluralRulesObject::UNumberFormatterEstimatedMemoryUse); + + // UFormattedNumber memory tracked as part of UNumberFormatter. + } + + if (pr) { + uplrules_close(pr); + } + if (nf) { + unumf_close(nf); + } + if (formatted) { + unumf_closeResult(formatted); + } +} + +/** + * This creates a new UNumberFormatter with calculated digit formatting + * properties for PluralRules. + * + * This is similar to NewUNumberFormatter but doesn't allow for currency or + * percent types. + */ +static UNumberFormatter* NewUNumberFormatterForPluralRules( + JSContext* cx, Handle<PluralRulesObject*> pluralRules) { + RootedObject internals(cx, intl::GetInternalsObject(cx, pluralRules)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + intl::NumberFormatterSkeleton skeleton(cx); + + bool hasMinimumSignificantDigits; + if (!HasProperty(cx, internals, cx->names().minimumSignificantDigits, + &hasMinimumSignificantDigits)) { + return nullptr; + } + + if (hasMinimumSignificantDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumSignificantDigits, &value)) { + return nullptr; + } + uint32_t minimumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumSignificantDigits, &value)) { + return nullptr; + } + uint32_t maximumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.significantDigits(minimumSignificantDigits, + maximumSignificantDigits)) { + return nullptr; + } + } else { + if (!GetProperty(cx, internals, internals, + cx->names().minimumFractionDigits, &value)) { + return nullptr; + } + uint32_t minimumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumFractionDigits, &value)) { + return nullptr; + } + uint32_t maximumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.fractionDigits(minimumFractionDigits, + maximumFractionDigits)) { + return nullptr; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().minimumIntegerDigits, + &value)) { + return nullptr; + } + uint32_t minimumIntegerDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!skeleton.integerWidth(minimumIntegerDigits)) { + return nullptr; + } + + if (!skeleton.roundingModeHalfUp()) { + return nullptr; + } + + return skeleton.toFormatter(cx, locale.get()); +} + +static UFormattedNumber* NewUFormattedNumberForPluralRules(JSContext* cx) { + UErrorCode status = U_ZERO_ERROR; + UFormattedNumber* formatted = unumf_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return formatted; +} + +/** + * Returns a new UPluralRules with the locale and type options of the given + * PluralRules. + */ +static UPluralRules* NewUPluralRules(JSContext* cx, + Handle<PluralRulesObject*> pluralRules) { + RootedObject internals(cx, intl::GetInternalsObject(cx, pluralRules)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().type, &value)) { + return nullptr; + } + + UPluralType category; + { + JSLinearString* type = value.toString()->ensureLinear(cx); + if (!type) { + return nullptr; + } + + if (StringEqualsLiteral(type, "cardinal")) { + category = UPLURAL_TYPE_CARDINAL; + } else { + MOZ_ASSERT(StringEqualsLiteral(type, "ordinal")); + category = UPLURAL_TYPE_ORDINAL; + } + } + + UErrorCode status = U_ZERO_ERROR; + UPluralRules* pr = + uplrules_openForType(IcuLocale(locale.get()), category, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + return pr; +} + +bool js::intl_SelectPluralRule(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + Rooted<PluralRulesObject*> pluralRules( + cx, &args[0].toObject().as<PluralRulesObject>()); + + double x = args[1].toNumber(); + + // Obtain a cached UPluralRules object. + UPluralRules* pr = pluralRules->getPluralRules(); + if (!pr) { + pr = NewUPluralRules(cx, pluralRules); + if (!pr) { + return false; + } + pluralRules->setPluralRules(pr); + + intl::AddICUCellMemory(pluralRules, + PluralRulesObject::UPluralRulesEstimatedMemoryUse); + } + + // Obtain a cached UNumberFormat object. + UNumberFormatter* nf = pluralRules->getNumberFormatter(); + if (!nf) { + nf = NewUNumberFormatterForPluralRules(cx, pluralRules); + if (!nf) { + return false; + } + pluralRules->setNumberFormatter(nf); + + intl::AddICUCellMemory( + pluralRules, PluralRulesObject::UNumberFormatterEstimatedMemoryUse); + } + + // Obtain a cached UFormattedNumber object. + UFormattedNumber* formatted = pluralRules->getFormattedNumber(); + if (!formatted) { + formatted = NewUFormattedNumberForPluralRules(cx); + if (!formatted) { + return false; + } + pluralRules->setFormattedNumber(formatted); + + // UFormattedNumber memory tracked as part of UNumberFormatter. + } + + UErrorCode status = U_ZERO_ERROR; + unumf_formatDouble(nf, x, formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + JSString* str = CallICU( + cx, [pr, formatted](UChar* chars, int32_t size, UErrorCode* status) { + return uplrules_selectFormatted(pr, formatted, chars, size, status); + }); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +bool js::intl_GetPluralCategories(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<PluralRulesObject*> pluralRules( + cx, &args[0].toObject().as<PluralRulesObject>()); + + // Obtain a cached UPluralRules object. + UPluralRules* pr = pluralRules->getPluralRules(); + if (!pr) { + pr = NewUPluralRules(cx, pluralRules); + if (!pr) { + return false; + } + pluralRules->setPluralRules(pr); + + intl::AddICUCellMemory(pluralRules, + PluralRulesObject::UPluralRulesEstimatedMemoryUse); + } + + UErrorCode status = U_ZERO_ERROR; + UEnumeration* ue = uplrules_getKeywords(pr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UEnumeration, uenum_close> closeEnum(ue); + + RootedObject res(cx, NewDenseEmptyArray(cx)); + if (!res) { + return false; + } + + do { + int32_t catSize; + const char* cat = uenum_next(ue, &catSize, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + if (!cat) { + break; + } + + MOZ_ASSERT(catSize >= 0); + JSString* str = NewStringCopyN<CanGC>(cx, cat, catSize); + if (!str) { + return false; + } + + if (!NewbornArrayPush(cx, res, StringValue(str))) { + return false; + } + } while (true); + + args.rval().setObject(*res); + return true; +} diff --git a/js/src/builtin/intl/PluralRules.h b/js/src/builtin/intl/PluralRules.h new file mode 100644 index 0000000000..e9ad2ea4c8 --- /dev/null +++ b/js/src/builtin/intl/PluralRules.h @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_PluralRules_h +#define builtin_intl_PluralRules_h + +#include "mozilla/Attributes.h" + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "js/RootingAPI.h" +#include "vm/NativeObject.h" + +struct UFormattedNumber; +struct UNumberFormatter; +struct UPluralRules; + +namespace js { + +class PluralRulesObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t UPLURAL_RULES_SLOT = 1; + static constexpr uint32_t UNUMBER_FORMATTER_SLOT = 2; + static constexpr uint32_t UFORMATTED_NUMBER_SLOT = 3; + static constexpr uint32_t SLOT_COUNT = 4; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UNumberFormatter and UFormattedNumber + // (see IcuMemoryUsage). + static constexpr size_t UNumberFormatterEstimatedMemoryUse = 750; + + // Estimated memory use for UPluralRules (see IcuMemoryUsage). + static constexpr size_t UPluralRulesEstimatedMemoryUse = 2976; + + UPluralRules* getPluralRules() const { + const auto& slot = getFixedSlot(UPLURAL_RULES_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UPluralRules*>(slot.toPrivate()); + } + + void setPluralRules(UPluralRules* pluralRules) { + setFixedSlot(UPLURAL_RULES_SLOT, PrivateValue(pluralRules)); + } + + UNumberFormatter* getNumberFormatter() const { + const auto& slot = getFixedSlot(UNUMBER_FORMATTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UNumberFormatter*>(slot.toPrivate()); + } + + void setNumberFormatter(UNumberFormatter* formatter) { + setFixedSlot(UNUMBER_FORMATTER_SLOT, PrivateValue(formatter)); + } + + UFormattedNumber* getFormattedNumber() const { + const auto& slot = getFixedSlot(UFORMATTED_NUMBER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<UFormattedNumber*>(slot.toPrivate()); + } + + void setFormattedNumber(UFormattedNumber* formatted) { + setFixedSlot(UFORMATTED_NUMBER_SLOT, PrivateValue(formatted)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a plural rule for the number x according to the effective + * locale and the formatting options of the given PluralRules. + * + * A plural rule is a grammatical category that expresses count distinctions + * (such as "one", "two", "few" etc.). + * + * Usage: rule = intl_SelectPluralRule(pluralRules, x) + */ +extern MOZ_MUST_USE bool intl_SelectPluralRule(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns an array of plural rules categories for a given pluralRules object. + * + * Usage: categories = intl_GetPluralCategories(pluralRules) + * + * Example: + * + * pluralRules = new Intl.PluralRules('pl', {type: 'cardinal'}); + * intl_getPluralCategories(pluralRules); // ['one', 'few', 'many', 'other'] + */ +extern MOZ_MUST_USE bool intl_GetPluralCategories(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_PluralRules_h */ diff --git a/js/src/builtin/intl/PluralRules.js b/js/src/builtin/intl/PluralRules.js new file mode 100644 index 0000000000..1672fef507 --- /dev/null +++ b/js/src/builtin/intl/PluralRules.js @@ -0,0 +1,271 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * PluralRules internal properties. + * + * Spec: ECMAScript 402 API, PluralRules, 13.3.3. + */ +var pluralRulesInternalProperties = { + localeData: pluralRulesLocaleData, + relevantExtensionKeys: [], +}; + +function pluralRulesLocaleData() { + // PluralRules don't support any extension keys. + return {}; +} + +/** + * Compute an internal properties object from |lazyPluralRulesData|. + */ +function resolvePluralRulesInternals(lazyPluralRulesData) { + assert(IsObject(lazyPluralRulesData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var PluralRules = pluralRulesInternalProperties; + + // Compute effective locale. + + // Step 10. + var localeData = PluralRules.localeData; + + // Step 11. + const r = ResolveLocale("PluralRules", + lazyPluralRulesData.requestedLocales, + lazyPluralRulesData.opt, + PluralRules.relevantExtensionKeys, + localeData); + + // Step 12. + internalProps.locale = r.locale; + + // Step 8. + internalProps.type = lazyPluralRulesData.type; + + // Step 9. + internalProps.minimumIntegerDigits = lazyPluralRulesData.minimumIntegerDigits; + + if ("minimumFractionDigits" in lazyPluralRulesData) { + assert("maximumFractionDigits" in lazyPluralRulesData, "min/max frac digits mismatch"); + internalProps.minimumFractionDigits = lazyPluralRulesData.minimumFractionDigits; + internalProps.maximumFractionDigits = lazyPluralRulesData.maximumFractionDigits; + } + + if ("minimumSignificantDigits" in lazyPluralRulesData) { + assert("maximumSignificantDigits" in lazyPluralRulesData, "min/max sig digits mismatch"); + internalProps.minimumSignificantDigits = lazyPluralRulesData.minimumSignificantDigits; + internalProps.maximumSignificantDigits = lazyPluralRulesData.maximumSignificantDigits; + } + + // Step 13 (lazily computed on first access). + internalProps.pluralCategories = null; + + return internalProps; +} + +/** + * Returns an object containing the PluralRules internal properties of |obj|. + */ +function getPluralRulesInternals(obj) { + assert(IsObject(obj), "getPluralRulesInternals called with non-object"); + assert(GuardToPluralRules(obj) !== null, "getPluralRulesInternals called with non-PluralRules"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "PluralRules", "bad type escaped getIntlObjectInternals"); + + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + internalProps = resolvePluralRulesInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Initializes an object as a PluralRules. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a PluralRules. + * This later work occurs in |resolvePluralRulesInternals|; steps not noted + * here occur there. + * + * Spec: ECMAScript 402 API, PluralRules, 13.1.1. + */ +function InitializePluralRules(pluralRules, locales, options) { + assert(IsObject(pluralRules), "InitializePluralRules called with non-object"); + assert(GuardToPluralRules(pluralRules) !== null, "InitializePluralRules called with non-PluralRules"); + + // Lazy PluralRules data has the following structure: + // + // { + // requestedLocales: List of locales, + // type: "cardinal" / "ordinal", + // + // opt: // opt object computer in InitializePluralRules + // { + // localeMatcher: "lookup" / "best fit", + // } + // + // minimumIntegerDigits: integer ∈ [1, 21], + // + // // optional, mutually exclusive with the significant-digits option + // minimumFractionDigits: integer ∈ [0, 20], + // maximumFractionDigits: integer ∈ [0, 20], + // + // // optional, mutually exclusive with the fraction-digits option + // minimumSignificantDigits: integer ∈ [1, 21], + // maximumSignificantDigits: integer ∈ [1, 21], + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every PluralRules lazy data object has *all* these properties, never a + // subset of them. + const lazyPluralRulesData = std_Object_create(null); + + // Step 1. + let requestedLocales = CanonicalizeLocaleList(locales); + lazyPluralRulesData.requestedLocales = requestedLocales; + + // Steps 2-3. + if (options === undefined) + options = std_Object_create(null); + else + options = ToObject(options); + + // Step 4. + let opt = new Record(); + lazyPluralRulesData.opt = opt; + + // Steps 5-6. + let matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + // Step 7. + const type = GetOption(options, "type", "string", ["cardinal", "ordinal"], "cardinal"); + lazyPluralRulesData.type = type; + + // Step 9. + SetNumberFormatDigitOptions(lazyPluralRulesData, options, 0, 3, "standard"); + + // Step 15. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(pluralRules, "PluralRules", lazyPluralRulesData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript 402 API, PluralRules, 13.3.2. + */ +function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "PluralRules"; + + // Step 2. + let requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Returns a String value representing the plural category matching + * the number passed as value according to the + * effective locale and the formatting options of this PluralRules. + * + * Spec: ECMAScript 402 API, PluralRules, 13.4.3. + */ +function Intl_PluralRules_select(value) { + // Step 1. + let pluralRules = this; + + // Steps 2-3. + if (!IsObject(pluralRules) || (pluralRules = GuardToPluralRules(pluralRules)) === null) { + return callFunction(CallPluralRulesMethodIfWrapped, this, value, + "Intl_PluralRules_select"); + } + + // Step 4. + let n = ToNumber(value); + + // Ensure the PluralRules internals are resolved. + getPluralRulesInternals(pluralRules); + + // Step 5. + return intl_SelectPluralRule(pluralRules, n); +} + +/** + * Returns the resolved options for a PluralRules object. + * + * Spec: ECMAScript 402 API, PluralRules, 13.4.4. + */ +function Intl_PluralRules_resolvedOptions() { + // Step 1. + var pluralRules = this; + + // Steps 2-3. + if (!IsObject(pluralRules) || (pluralRules = GuardToPluralRules(pluralRules)) === null) { + return callFunction(CallPluralRulesMethodIfWrapped, this, + "Intl_PluralRules_resolvedOptions"); + } + + var internals = getPluralRulesInternals(pluralRules); + + // Steps 4-5. + var result = { + locale: internals.locale, + type: internals.type, + minimumIntegerDigits: internals.minimumIntegerDigits, + }; + + // Min/Max fraction digits are either both present or not present at all. + assert(hasOwn("minimumFractionDigits", internals) === + hasOwn("maximumFractionDigits", internals), + "minimumFractionDigits is present iff maximumFractionDigits is present"); + + if (hasOwn("minimumFractionDigits", internals)) { + _DefineDataProperty(result, "minimumFractionDigits", internals.minimumFractionDigits); + _DefineDataProperty(result, "maximumFractionDigits", internals.maximumFractionDigits); + } + + // Min/Max significant digits are either both present or not present at all. + assert(hasOwn("minimumSignificantDigits", internals) === + hasOwn("maximumSignificantDigits", internals), + "minimumSignificantDigits is present iff maximumSignificantDigits is present"); + + if (hasOwn("minimumSignificantDigits", internals)) { + _DefineDataProperty(result, "minimumSignificantDigits", + internals.minimumSignificantDigits); + _DefineDataProperty(result, "maximumSignificantDigits", + internals.maximumSignificantDigits); + } + + // Step 6. + var internalsPluralCategories = internals.pluralCategories; + if (internalsPluralCategories === null) { + internalsPluralCategories = intl_GetPluralCategories(pluralRules); + internals.pluralCategories = internalsPluralCategories; + } + + var pluralCategories = []; + for (var i = 0; i < internalsPluralCategories.length; i++) + _DefineDataProperty(pluralCategories, i, internalsPluralCategories[i]); + + // Step 7. + _DefineDataProperty(result, "pluralCategories", pluralCategories); + + // Step 8. + return result; +} diff --git a/js/src/builtin/intl/RelativeTimeFormat.cpp b/js/src/builtin/intl/RelativeTimeFormat.cpp new file mode 100644 index 0000000000..fd83cd34e0 --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.cpp @@ -0,0 +1,468 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl.RelativeTimeFormat proposal. */ + +#include "builtin/intl/RelativeTimeFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/FloatingPoint.h" + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/ScopedICUObject.h" +#include "gc/FreeOp.h" +#include "js/CharacterEncoding.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/PropertySpec.h" +#include "unicode/udisplaycontext.h" +#include "unicode/uloc.h" +#include "unicode/unum.h" +#include "unicode/ureldatefmt.h" +#include "unicode/utypes.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Printer.h" +#include "vm/StringType.h" + +#include "vm/NativeObject-inl.h" + +using namespace js; + +using js::intl::CallICU; +using js::intl::IcuLocale; + +/**************** RelativeTimeFormat *****************/ + +const JSClassOps RelativeTimeFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + RelativeTimeFormatObject::finalize, // finalize + nullptr, // call + nullptr, // hasInstance + nullptr, // construct + nullptr, // trace +}; + +const JSClass RelativeTimeFormatObject::class_ = { + "Intl.RelativeTimeFormat", + JSCLASS_HAS_RESERVED_SLOTS(RelativeTimeFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_RelativeTimeFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &RelativeTimeFormatObject::classOps_, + &RelativeTimeFormatObject::classSpec_}; + +const JSClass& RelativeTimeFormatObject::protoClass_ = PlainObject::class_; + +static bool relativeTimeFormat_toSource(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().RelativeTimeFormat); + return true; +} + +static const JSFunctionSpec relativeTimeFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_RelativeTimeFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec relativeTimeFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", + "Intl_RelativeTimeFormat_resolvedOptions", 0, 0), + JS_SELF_HOSTED_FN("format", "Intl_RelativeTimeFormat_format", 2, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_RelativeTimeFormat_formatToParts", + 2, 0), + JS_FN(js_toSource_str, relativeTimeFormat_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec relativeTimeFormat_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.RelativeTimeFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool RelativeTimeFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec RelativeTimeFormatObject::classSpec_ = { + GenericCreateConstructor<RelativeTimeFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<RelativeTimeFormatObject>, + relativeTimeFormat_static_methods, + nullptr, + relativeTimeFormat_methods, + relativeTimeFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * RelativeTimeFormat constructor. + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.1 + */ +static bool RelativeTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.RelativeTimeFormat")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RelativeTimeFormat, + &proto)) { + return false; + } + + Rooted<RelativeTimeFormatObject*> relativeTimeFormat(cx); + relativeTimeFormat = + NewObjectWithClassProto<RelativeTimeFormatObject>(cx, proto); + if (!relativeTimeFormat) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + if (!intl::InitializeObject(cx, relativeTimeFormat, + cx->names().InitializeRelativeTimeFormat, locales, + options)) { + return false; + } + + args.rval().setObject(*relativeTimeFormat); + return true; +} + +void js::RelativeTimeFormatObject::finalize(JSFreeOp* fop, JSObject* obj) { + MOZ_ASSERT(fop->onMainThread()); + + if (URelativeDateTimeFormatter* rtf = + obj->as<RelativeTimeFormatObject>().getRelativeDateTimeFormatter()) { + intl::RemoveICUCellMemory(fop, obj, + RelativeTimeFormatObject::EstimatedMemoryUse); + + ureldatefmt_close(rtf); + } +} + +/** + * Returns a new URelativeDateTimeFormatter with the locale and options of the + * given RelativeTimeFormatObject. + */ +static URelativeDateTimeFormatter* NewURelativeDateTimeFormatter( + JSContext* cx, Handle<RelativeTimeFormatObject*> relativeTimeFormat) { + RootedObject internals(cx, intl::GetInternalsObject(cx, relativeTimeFormat)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects numberingSystem as a Unicode locale extensions on locale. + + intl::LanguageTag tag(cx); + { + JSLinearString* locale = value.toString()->ensureLinear(cx); + if (!locale) { + return nullptr; + } + + if (!intl::LanguageTagParser::parse(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of the + // Unicode extension subtag. We're then relying on ICU to follow RFC 6067, + // which states that any trailing keywords using the same key should be + // ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + UniqueChars locale = tag.toStringZ(cx); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return nullptr; + } + + UDateRelativeDateTimeFormatterStyle relDateTimeStyle; + { + JSLinearString* style = value.toString()->ensureLinear(cx); + if (!style) { + return nullptr; + } + + if (StringEqualsLiteral(style, "short")) { + relDateTimeStyle = UDAT_STYLE_SHORT; + } else if (StringEqualsLiteral(style, "narrow")) { + relDateTimeStyle = UDAT_STYLE_NARROW; + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "long")); + relDateTimeStyle = UDAT_STYLE_LONG; + } + } + + UErrorCode status = U_ZERO_ERROR; + UNumberFormat* nf = unum_open(UNUM_DECIMAL, nullptr, 0, + IcuLocale(locale.get()), nullptr, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + ScopedICUObject<UNumberFormat, unum_close> toClose(nf); + + // Use the default values as if a new Intl.NumberFormat had been constructed. + unum_setAttribute(nf, UNUM_MIN_INTEGER_DIGITS, 1); + unum_setAttribute(nf, UNUM_MIN_FRACTION_DIGITS, 0); + unum_setAttribute(nf, UNUM_MAX_FRACTION_DIGITS, 3); + unum_setAttribute(nf, UNUM_GROUPING_USED, true); + + // The undocumented magic value -2 is needed to request locale-specific data. + // See |icu::number::impl::Grouper::{fGrouping1, fGrouping2, fMinGrouping}|. + // + // Future ICU versions (> ICU 67) will expose it as a proper constant: + // https://unicode-org.atlassian.net/browse/ICU-21109 + // https://github.com/unicode-org/icu/pull/1152 + constexpr int32_t useLocaleData = -2; + + unum_setAttribute(nf, UNUM_GROUPING_SIZE, useLocaleData); + unum_setAttribute(nf, UNUM_SECONDARY_GROUPING_SIZE, useLocaleData); + unum_setAttribute(nf, UNUM_MINIMUM_GROUPING_DIGITS, useLocaleData); + + URelativeDateTimeFormatter* rtf = + ureldatefmt_open(IcuLocale(locale.get()), nf, relDateTimeStyle, + UDISPCTX_CAPITALIZATION_FOR_STANDALONE, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + // Ownership was transferred to the URelativeDateTimeFormatter. + toClose.forget(); + return rtf; +} + +enum class RelativeTimeNumeric { + /** + * Only strings with numeric components like `1 day ago`. + */ + Always, + /** + * Natural-language strings like `yesterday` when possible, + * otherwise strings with numeric components as in `7 months ago`. + */ + Auto, +}; + +static bool intl_FormatRelativeTime(JSContext* cx, + URelativeDateTimeFormatter* rtf, double t, + URelativeDateTimeUnit unit, + RelativeTimeNumeric numeric, + MutableHandleValue result) { + JSString* str = CallICU( + cx, + [rtf, t, unit, numeric](UChar* chars, int32_t size, UErrorCode* status) { + auto fmt = numeric == RelativeTimeNumeric::Auto + ? ureldatefmt_format + : ureldatefmt_formatNumeric; + return fmt(rtf, t, unit, chars, size, status); + }); + if (!str) { + return false; + } + + result.setString(str); + return true; +} + +static bool intl_FormatToPartsRelativeTime(JSContext* cx, + URelativeDateTimeFormatter* rtf, + double t, URelativeDateTimeUnit unit, + RelativeTimeNumeric numeric, + MutableHandleValue result) { + UErrorCode status = U_ZERO_ERROR; + UFormattedRelativeDateTime* formatted = ureldatefmt_openResult(&status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + ScopedICUObject<UFormattedRelativeDateTime, ureldatefmt_closeResult> toClose( + formatted); + + if (numeric == RelativeTimeNumeric::Auto) { + ureldatefmt_formatToResult(rtf, t, unit, formatted, &status); + } else { + ureldatefmt_formatNumericToResult(rtf, t, unit, formatted, &status); + } + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + const UFormattedValue* formattedValue = + ureldatefmt_resultAsValue(formatted, &status); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return false; + } + + intl::FieldType unitType; + switch (unit) { + case UDAT_REL_UNIT_SECOND: + unitType = &JSAtomState::second; + break; + case UDAT_REL_UNIT_MINUTE: + unitType = &JSAtomState::minute; + break; + case UDAT_REL_UNIT_HOUR: + unitType = &JSAtomState::hour; + break; + case UDAT_REL_UNIT_DAY: + unitType = &JSAtomState::day; + break; + case UDAT_REL_UNIT_WEEK: + unitType = &JSAtomState::week; + break; + case UDAT_REL_UNIT_MONTH: + unitType = &JSAtomState::month; + break; + case UDAT_REL_UNIT_QUARTER: + unitType = &JSAtomState::quarter; + break; + case UDAT_REL_UNIT_YEAR: + unitType = &JSAtomState::year; + break; + default: + MOZ_CRASH("unexpected relative time unit"); + } + + return intl::FormattedRelativeTimeToParts(cx, formattedValue, t, unitType, + result); +} + +bool js::intl_FormatRelativeTime(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 5); + + Rooted<RelativeTimeFormatObject*> relativeTimeFormat(cx); + relativeTimeFormat = &args[0].toObject().as<RelativeTimeFormatObject>(); + + bool formatToParts = args[4].toBoolean(); + + // PartitionRelativeTimePattern, step 4. + double t = args[1].toNumber(); + if (!mozilla::IsFinite(t)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DATE_NOT_FINITE, "RelativeTimeFormat", + formatToParts ? "formatToParts" : "format"); + return false; + } + + // Obtain a cached URelativeDateTimeFormatter object. + URelativeDateTimeFormatter* rtf = + relativeTimeFormat->getRelativeDateTimeFormatter(); + if (!rtf) { + rtf = NewURelativeDateTimeFormatter(cx, relativeTimeFormat); + if (!rtf) { + return false; + } + relativeTimeFormat->setRelativeDateTimeFormatter(rtf); + + intl::AddICUCellMemory(relativeTimeFormat, + RelativeTimeFormatObject::EstimatedMemoryUse); + } + + URelativeDateTimeUnit relDateTimeUnit; + { + JSLinearString* unit = args[2].toString()->ensureLinear(cx); + if (!unit) { + return false; + } + + // PartitionRelativeTimePattern, step 5. + if (StringEqualsLiteral(unit, "second") || + StringEqualsLiteral(unit, "seconds")) { + relDateTimeUnit = UDAT_REL_UNIT_SECOND; + } else if (StringEqualsLiteral(unit, "minute") || + StringEqualsLiteral(unit, "minutes")) { + relDateTimeUnit = UDAT_REL_UNIT_MINUTE; + } else if (StringEqualsLiteral(unit, "hour") || + StringEqualsLiteral(unit, "hours")) { + relDateTimeUnit = UDAT_REL_UNIT_HOUR; + } else if (StringEqualsLiteral(unit, "day") || + StringEqualsLiteral(unit, "days")) { + relDateTimeUnit = UDAT_REL_UNIT_DAY; + } else if (StringEqualsLiteral(unit, "week") || + StringEqualsLiteral(unit, "weeks")) { + relDateTimeUnit = UDAT_REL_UNIT_WEEK; + } else if (StringEqualsLiteral(unit, "month") || + StringEqualsLiteral(unit, "months")) { + relDateTimeUnit = UDAT_REL_UNIT_MONTH; + } else if (StringEqualsLiteral(unit, "quarter") || + StringEqualsLiteral(unit, "quarters")) { + relDateTimeUnit = UDAT_REL_UNIT_QUARTER; + } else if (StringEqualsLiteral(unit, "year") || + StringEqualsLiteral(unit, "years")) { + relDateTimeUnit = UDAT_REL_UNIT_YEAR; + } else { + if (auto unitChars = QuoteString(cx, unit, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "unit", + unitChars.get()); + } + return false; + } + } + + RelativeTimeNumeric relDateTimeNumeric; + { + JSLinearString* numeric = args[3].toString()->ensureLinear(cx); + if (!numeric) { + return false; + } + + if (StringEqualsLiteral(numeric, "auto")) { + relDateTimeNumeric = RelativeTimeNumeric::Auto; + } else { + MOZ_ASSERT(StringEqualsLiteral(numeric, "always")); + relDateTimeNumeric = RelativeTimeNumeric::Always; + } + } + + return formatToParts + ? intl_FormatToPartsRelativeTime(cx, rtf, t, relDateTimeUnit, + relDateTimeNumeric, args.rval()) + : intl_FormatRelativeTime(cx, rtf, t, relDateTimeUnit, + relDateTimeNumeric, args.rval()); +} diff --git a/js/src/builtin/intl/RelativeTimeFormat.h b/js/src/builtin/intl/RelativeTimeFormat.h new file mode 100644 index 0000000000..699d901eb7 --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.h @@ -0,0 +1,86 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_RelativeTimeFormat_h +#define builtin_intl_RelativeTimeFormat_h + +#include "mozilla/Attributes.h" + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "gc/Barrier.h" +#include "js/Class.h" +#include "vm/NativeObject.h" +#include "vm/Runtime.h" + +struct UFormattedValue; +struct URelativeDateTimeFormatter; + +namespace js { + +class RelativeTimeFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t URELATIVE_TIME_FORMAT_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for URelativeDateTimeFormatter (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 278; + + URelativeDateTimeFormatter* getRelativeDateTimeFormatter() const { + const auto& slot = getFixedSlot(URELATIVE_TIME_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<URelativeDateTimeFormatter*>(slot.toPrivate()); + } + + void setRelativeDateTimeFormatter(URelativeDateTimeFormatter* rtf) { + setFixedSlot(URELATIVE_TIME_FORMAT_SLOT, PrivateValue(rtf)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JSFreeOp* fop, JSObject* obj); +}; + +/** + * Returns a relative time as a string formatted according to the effective + * locale and the formatting options of the given RelativeTimeFormat. + * + * |t| should be a number representing a number to be formatted. + * |unit| should be "second", "minute", "hour", "day", "week", "month", + * "quarter", or "year". + * |numeric| should be "always" or "auto". + * + * Usage: formatted = intl_FormatRelativeTime(relativeTimeFormat, t, + * unit, numeric, formatToParts) + */ +extern MOZ_MUST_USE bool intl_FormatRelativeTime(JSContext* cx, unsigned argc, + JS::Value* vp); + +namespace intl { + +using FieldType = js::ImmutablePropertyNamePtr JSAtomState::*; + +MOZ_MUST_USE bool FormattedRelativeTimeToParts( + JSContext* cx, const UFormattedValue* formattedValue, double timeValue, + FieldType relativeTimeUnit, MutableHandleValue result); + +} // namespace intl +} // namespace js + +#endif /* builtin_intl_RelativeTimeFormat_h */ diff --git a/js/src/builtin/intl/RelativeTimeFormat.js b/js/src/builtin/intl/RelativeTimeFormat.js new file mode 100644 index 0000000000..b62400b622 --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.js @@ -0,0 +1,268 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * RelativeTimeFormat internal properties. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.3.3. + */ +var relativeTimeFormatInternalProperties = { + localeData: relativeTimeFormatLocaleData, + relevantExtensionKeys: ["nu"], +}; + +function relativeTimeFormatLocaleData() { + return { + nu: getNumberingSystems, + default: { + nu: intl_numberingSystem, + }, + }; +} + +/** + * Compute an internal properties object from |lazyRelativeTimeFormatData|. + */ +function resolveRelativeTimeFormatInternals(lazyRelativeTimeFormatData) { + assert(IsObject(lazyRelativeTimeFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var RelativeTimeFormat = relativeTimeFormatInternalProperties; + + // Steps 10-11. + const r = ResolveLocale("RelativeTimeFormat", + lazyRelativeTimeFormatData.requestedLocales, + lazyRelativeTimeFormatData.opt, + RelativeTimeFormat.relevantExtensionKeys, + RelativeTimeFormat.localeData); + + // Steps 12-13. + internalProps.locale = r.locale; + + // Step 14. + internalProps.numberingSystem = r.nu; + + // Step 15 (Not relevant in our implementation). + + // Step 17. + internalProps.style = lazyRelativeTimeFormatData.style; + + // Step 19. + internalProps.numeric = lazyRelativeTimeFormatData.numeric; + + // Steps 20-24 (Not relevant in our implementation). + + return internalProps; +} + +/** + * Returns an object containing the RelativeTimeFormat internal properties of |obj|. + */ +function getRelativeTimeFormatInternals(obj) { + assert(IsObject(obj), "getRelativeTimeFormatInternals called with non-object"); + assert(GuardToRelativeTimeFormat(obj) !== null, "getRelativeTimeFormatInternals called with non-RelativeTimeFormat"); + + var internals = getIntlObjectInternals(obj); + assert(internals.type === "RelativeTimeFormat", "bad type escaped getIntlObjectInternals"); + + var internalProps = maybeInternalProperties(internals); + if (internalProps) + return internalProps; + + internalProps = resolveRelativeTimeFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Initializes an object as a RelativeTimeFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a RelativeTimeFormat. + * This later work occurs in |resolveRelativeTimeFormatInternals|; steps not noted + * here occur there. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.1.1. + */ +function InitializeRelativeTimeFormat(relativeTimeFormat, locales, options) { + assert(IsObject(relativeTimeFormat), + "InitializeRelativeimeFormat called with non-object"); + assert(GuardToRelativeTimeFormat(relativeTimeFormat) !== null, + "InitializeRelativeTimeFormat called with non-RelativeTimeFormat"); + + // Lazy RelativeTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // style: "long" / "short" / "narrow", + // numeric: "always" / "auto", + // + // opt: // opt object computed in InitializeRelativeTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // } + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every RelativeTimeFormat lazy data object has *all* these properties, never a + // subset of them. + const lazyRelativeTimeFormatData = std_Object_create(null); + + // Step 1. + let requestedLocales = CanonicalizeLocaleList(locales); + lazyRelativeTimeFormatData.requestedLocales = requestedLocales; + + // Steps 2-3. + if (options === undefined) + options = std_Object_create(null); + else + options = ToObject(options); + + // Step 4. + let opt = new Record(); + + // Steps 5-6. + let matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit"); + opt.localeMatcher = matcher; + + // Steps 7-9. + let numberingSystem = GetOption(options, "numberingSystem", "string", undefined, undefined); + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType(numberingSystem, + "numberingSystem", + "nu"); + } + opt.nu = numberingSystem; + + lazyRelativeTimeFormatData.opt = opt; + + // Steps 16-17. + const style = GetOption(options, "style", "string", ["long", "short", "narrow"], "long"); + lazyRelativeTimeFormatData.style = style; + + // Steps 18-19. + const numeric = GetOption(options, "numeric", "string", ["always", "auto"], "always"); + lazyRelativeTimeFormatData.numeric = numeric; + + initializeIntlObject(relativeTimeFormat, "RelativeTimeFormat", lazyRelativeTimeFormatData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.3.2. + */ +function Intl_RelativeTimeFormat_supportedLocalesOf(locales /*, options*/) { + var options = arguments.length > 1 ? arguments[1] : undefined; + + // Step 1. + var availableLocales = "RelativeTimeFormat"; + + // Step 2. + let requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Returns a String value representing the written form of a relative date + * formatted according to the effective locale and the formatting options + * of this RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTImeFormat, 1.4.3. + */ +function Intl_RelativeTimeFormat_format(value, unit) { + // Step 1. + let relativeTimeFormat = this; + + // Step 2. + if (!IsObject(relativeTimeFormat) || + (relativeTimeFormat = GuardToRelativeTimeFormat(relativeTimeFormat)) === null) + { + return callFunction(CallRelativeTimeFormatMethodIfWrapped, this, value, unit, + "Intl_RelativeTimeFormat_format"); + } + + // Step 3. + let t = ToNumber(value); + + // Step 4. + let u = ToString(unit); + + // Ensure the RelativeTimeFormat internals are resolved. + var internals = getRelativeTimeFormatInternals(relativeTimeFormat); + + // Step 5. + return intl_FormatRelativeTime(relativeTimeFormat, t, u, internals.numeric, + false); +} + +/** + * Returns an Array composed of the components of a relative date formatted + * according to the effective locale and the formatting options of this + * RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTImeFormat, 1.4.4. + */ +function Intl_RelativeTimeFormat_formatToParts(value, unit) { + // Step 1. + let relativeTimeFormat = this; + + // Step 2. + if (!IsObject(relativeTimeFormat) || + (relativeTimeFormat = GuardToRelativeTimeFormat(relativeTimeFormat)) === null) + { + return callFunction(CallRelativeTimeFormatMethodIfWrapped, this, value, unit, + "Intl_RelativeTimeFormat_formatToParts"); + } + + // Step 3. + let t = ToNumber(value); + + // Step 4. + let u = ToString(unit); + + // Ensure the RelativeTimeFormat internals are resolved. + var internals = getRelativeTimeFormatInternals(relativeTimeFormat); + + // Step 5. + return intl_FormatRelativeTime(relativeTimeFormat, t, u, internals.numeric, + true); +} + +/** + * Returns the resolved options for a RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.4.5. + */ +function Intl_RelativeTimeFormat_resolvedOptions() { + // Step 1. + var relativeTimeFormat = this; + + // Steps 2-3. + if (!IsObject(relativeTimeFormat) || + (relativeTimeFormat = GuardToRelativeTimeFormat(relativeTimeFormat)) === null) + { + return callFunction(CallRelativeTimeFormatMethodIfWrapped, this, + "Intl_RelativeTimeFormat_resolvedOptions"); + } + + var internals = getRelativeTimeFormatInternals(relativeTimeFormat); + + // Steps 4-5. + var result = { + locale: internals.locale, + style: internals.style, + numeric: internals.numeric, + numberingSystem: internals.numberingSystem, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml new file mode 100644 index 0000000000..509a25758f --- /dev/null +++ b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# 6.5.2 IsSanctionedSimpleUnitIdentifier ( unitIdentifier ) +# +# Simple units sanctioned for use in ECMAScript +# +# https://tc39.es/ecma402/#table-sanctioned-simple-unit-identifiers + +# Run |make_intl_data units| to regenerate all files which reference this list +# of sanctioned unit identifiers. + +- acre +- bit +- byte +- celsius +- centimeter +- day +- degree +- fahrenheit +- fluid-ounce +- foot +- gallon +- gigabit +- gigabyte +- gram +- hectare +- hour +- inch +- kilobit +- kilobyte +- kilogram +- kilometer +- liter +- megabit +- megabyte +- meter +- mile +- mile-scandinavian +- milliliter +- millimeter +- millisecond +- minute +- month +- ounce +- percent +- petabyte +- pound +- second +- stone +- terabit +- terabyte +- week +- yard +- year diff --git a/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js new file mode 100644 index 0000000000..cb2703760e --- /dev/null +++ b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js @@ -0,0 +1,52 @@ +// Generated by make_intl_data.py. DO NOT EDIT. + +/** + * The list of currently supported simple unit identifiers. + * + * Intl.NumberFormat Unified API Proposal + */ +var sanctionedSimpleUnitIdentifiers = { + "acre": true, + "bit": true, + "byte": true, + "celsius": true, + "centimeter": true, + "day": true, + "degree": true, + "fahrenheit": true, + "fluid-ounce": true, + "foot": true, + "gallon": true, + "gigabit": true, + "gigabyte": true, + "gram": true, + "hectare": true, + "hour": true, + "inch": true, + "kilobit": true, + "kilobyte": true, + "kilogram": true, + "kilometer": true, + "liter": true, + "megabit": true, + "megabyte": true, + "meter": true, + "mile": true, + "mile-scandinavian": true, + "milliliter": true, + "millimeter": true, + "millisecond": true, + "minute": true, + "month": true, + "ounce": true, + "percent": true, + "petabyte": true, + "pound": true, + "second": true, + "stone": true, + "terabit": true, + "terabyte": true, + "week": true, + "yard": true, + "year": true +}; diff --git a/js/src/builtin/intl/ScopedICUObject.h b/js/src/builtin/intl/ScopedICUObject.h new file mode 100644 index 0000000000..557a7e4bd6 --- /dev/null +++ b/js/src/builtin/intl/ScopedICUObject.h @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_ScopedICUObject_h +#define builtin_intl_ScopedICUObject_h + +/* + * A simple RAII class to assure ICU objects are automatically deallocated at + * scope end. Unfortunately, ICU's C++ API is uniformly unstable, so we can't + * use its smart pointers for this. + */ + +namespace js { + +template <typename T, void(Delete)(T*)> +class ScopedICUObject { + T* ptr_; + + public: + explicit ScopedICUObject(T* ptr) : ptr_(ptr) {} + + ~ScopedICUObject() { + if (ptr_) { + Delete(ptr_); + } + } + + // In cases where an object should be deleted on abnormal exits, + // but returned to the caller if everything goes well, call forget() + // to transfer the object just before returning. + T* forget() { + T* tmp = ptr_; + ptr_ = nullptr; + return tmp; + } +}; + +} // namespace js + +#endif /* builtin_intl_ScopedICUObject_h */ diff --git a/js/src/builtin/intl/SharedIntlData.cpp b/js/src/builtin/intl/SharedIntlData.cpp new file mode 100644 index 0000000000..410dcfde4e --- /dev/null +++ b/js/src/builtin/intl/SharedIntlData.cpp @@ -0,0 +1,670 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Runtime-wide Intl data shared across compartments. */ + +#include "builtin/intl/SharedIntlData.h" + +#include "mozilla/Assertions.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <stdint.h> +#include <utility> + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/ScopedICUObject.h" +#include "builtin/intl/TimeZoneDataGenerated.h" +#include "builtin/String.h" +#include "js/Utility.h" +#include "js/Vector.h" +#include "unicode/ucal.h" +#include "unicode/ucol.h" +#include "unicode/udat.h" +#include "unicode/udatpg.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "unicode/unum.h" +#include "unicode/utypes.h" +#include "vm/JSAtom.h" +#include "vm/StringType.h" + +using js::HashNumber; +using js::intl::StringsAreEqual; + +template <typename Char> +static constexpr Char ToUpperASCII(Char c) { + return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c; +} + +static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly"); +static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly"); +static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly"); +static_assert(ToUpperASCII(u'a') == u'A', + "verifying u'a' uppercases correctly"); +static_assert(ToUpperASCII(u'k') == u'K', + "verifying u'k' uppercases correctly"); +static_assert(ToUpperASCII(u'z') == u'Z', + "verifying u'z' uppercases correctly"); + +template <typename Char> +static HashNumber HashStringIgnoreCaseASCII(const Char* s, size_t length) { + uint32_t hash = 0; + for (size_t i = 0; i < length; i++) { + hash = mozilla::AddToHash(hash, ToUpperASCII(s[i])); + } + return hash; +} + +js::intl::SharedIntlData::TimeZoneHasher::Lookup::Lookup( + JSLinearString* timeZone) + : js::intl::SharedIntlData::LinearStringLookup(timeZone) { + if (isLatin1) { + hash = HashStringIgnoreCaseASCII(latin1Chars, length); + } else { + hash = HashStringIgnoreCaseASCII(twoByteChars, length); + } +} + +template <typename Char1, typename Char2> +static bool EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, + size_t len) { + for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) { + if (ToUpperASCII(*s1) != ToUpperASCII(*s2)) { + return false; + } + } + return true; +} + +bool js::intl::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, + const Lookup& lookup) { + if (key->length() != lookup.length) { + return false; + } + + // Compare time zone names ignoring ASCII case differences. + if (key->hasLatin1Chars()) { + const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); + if (lookup.isLatin1) { + return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, + lookup.length); + } + return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, + lookup.length); + } + + const char16_t* keyChars = key->twoByteChars(lookup.nogc); + if (lookup.isLatin1) { + return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, + lookup.length); + } + return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, + lookup.length); +} + +static bool IsLegacyICUTimeZone(const char* timeZone) { + for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) { + if (StringsAreEqual(timeZone, legacyTimeZone)) { + return true; + } + } + return false; +} + +bool js::intl::SharedIntlData::ensureTimeZones(JSContext* cx) { + if (timeZoneDataInitialized) { + return true; + } + + // If ensureTimeZones() was called previously, but didn't complete due to + // OOM, clear all sets/maps and start from scratch. + availableTimeZones.clearAndCompact(); + + UErrorCode status = U_ZERO_ERROR; + UEnumeration* values = ucal_openTimeZones(&status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + ScopedICUObject<UEnumeration, uenum_close> toClose(values); + + RootedAtom timeZone(cx); + while (true) { + int32_t size; + const char* rawTimeZone = uenum_next(values, &size, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + + if (rawTimeZone == nullptr) { + break; + } + + // Skip legacy ICU time zone names. + if (IsLegacyICUTimeZone(rawTimeZone)) { + continue; + } + + MOZ_ASSERT(size >= 0); + timeZone = Atomize(cx, rawTimeZone, size_t(size)); + if (!timeZone) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZone); + TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate time zone names, but if it does, + // just ignore the duplicate name. + if (!p && !availableTimeZones.add(p, timeZone)) { + ReportOutOfMemory(cx); + return false; + } + } + + ianaZonesTreatedAsLinksByICU.clearAndCompact(); + + for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) { + MOZ_ASSERT(rawTimeZone != nullptr); + timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone)); + if (!timeZone) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZone); + TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup); + MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU"); + + if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) { + ReportOutOfMemory(cx); + return false; + } + } + + ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); + + RootedAtom linkName(cx); + RootedAtom& target = timeZone; + for (const auto& linkAndTarget : + timezone::ianaLinksCanonicalizedDifferentlyByICU) { + const char* rawLinkName = linkAndTarget.link; + const char* rawTarget = linkAndTarget.target; + + MOZ_ASSERT(rawLinkName != nullptr); + linkName = Atomize(cx, rawLinkName, strlen(rawLinkName)); + if (!linkName) { + return false; + } + + MOZ_ASSERT(rawTarget != nullptr); + target = Atomize(cx, rawTarget, strlen(rawTarget)); + if (!target) { + return false; + } + + TimeZoneHasher::Lookup lookup(linkName); + TimeZoneMap::AddPtr p = + ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup); + MOZ_ASSERT( + !p, + "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU"); + + if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) { + ReportOutOfMemory(cx); + return false; + } + } + + MOZ_ASSERT(!timeZoneDataInitialized, + "ensureTimeZones is neither reentrant nor thread-safe"); + timeZoneDataInitialized = true; + + return true; +} + +bool js::intl::SharedIntlData::validateTimeZoneName(JSContext* cx, + HandleString timeZone, + MutableHandleAtom result) { + if (!ensureTimeZones(cx)) { + return false; + } + + RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx)); + if (!timeZoneLinear) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZoneLinear); + if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup)) { + result.set(*p); + } + + return true; +} + +bool js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA( + JSContext* cx, HandleString timeZone, MutableHandleAtom result) { + if (!ensureTimeZones(cx)) { + return false; + } + + RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx)); + if (!timeZoneLinear) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZoneLinear); + MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name"); + + if (TimeZoneMap::Ptr p = + ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) { + // The effectively supported time zones aren't known at compile time, + // when + // 1. SpiderMonkey was compiled with "--with-system-icu". + // 2. ICU's dynamic time zone data loading feature was used. + // (ICU supports loading time zone files at runtime through the + // ICU_TIMEZONE_FILES_DIR environment variable.) + // Ensure ICU supports the new target zone before applying the update. + TimeZoneName targetTimeZone = p->value(); + TimeZoneHasher::Lookup targetLookup(targetTimeZone); + if (availableTimeZones.has(targetLookup)) { + result.set(targetTimeZone); + } + } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) { + result.set(*p); + } + + return true; +} + +js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale) + : js::intl::SharedIntlData::LinearStringLookup(locale) { + if (isLatin1) { + hash = mozilla::HashString(latin1Chars, length); + } else { + hash = mozilla::HashString(twoByteChars, length); + } +} + +js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(const char* chars, + size_t length) + : js::intl::SharedIntlData::LinearStringLookup(chars, length) { + hash = mozilla::HashString(latin1Chars, length); +} + +bool js::intl::SharedIntlData::LocaleHasher::match(Locale key, + const Lookup& lookup) { + if (key->length() != lookup.length) { + return false; + } + + if (key->hasLatin1Chars()) { + const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); + if (lookup.isLatin1) { + return EqualChars(keyChars, lookup.latin1Chars, lookup.length); + } + return EqualChars(keyChars, lookup.twoByteChars, lookup.length); + } + + const char16_t* keyChars = key->twoByteChars(lookup.nogc); + if (lookup.isLatin1) { + return EqualChars(lookup.latin1Chars, keyChars, lookup.length); + } + return EqualChars(keyChars, lookup.twoByteChars, lookup.length); +} + +bool js::intl::SharedIntlData::getAvailableLocales( + JSContext* cx, LocaleSet& locales, CountAvailable countAvailable, + GetAvailable getAvailable) { + auto addLocale = [cx, &locales](const char* locale, size_t length) { + JSAtom* atom = Atomize(cx, locale, length); + if (!atom) { + return false; + } + + LocaleHasher::Lookup lookup(atom); + LocaleSet::AddPtr p = locales.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate locales, but if it does, just + // ignore the duplicated locale. + if (!p && !locales.add(p, atom)) { + ReportOutOfMemory(cx); + return false; + } + + return true; + }; + + js::Vector<char, 16> lang(cx); + + int32_t count = countAvailable(); + for (int32_t i = 0; i < count; i++) { + const char* locale = getAvailable(i); + size_t length = strlen(locale); + + lang.clear(); + if (!lang.append(locale, length)) { + return false; + } + + std::replace(lang.begin(), lang.end(), '_', '-'); + + if (!addLocale(lang.begin(), length)) { + return false; + } + } + + // Add old-style language tags without script code for locales that in current + // usage would include a script subtag. Also add an entry for the last-ditch + // locale, in case ICU doesn't directly support it (but does support it + // through fallback, e.g. supporting "en-GB" indirectly using "en" support). + + // Certain old-style language tags lack a script code, but in current usage + // they *would* include a script code. Map these over to modern forms. + for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { + const char* oldStyle = mapping.oldStyle; + const char* modernStyle = mapping.modernStyle; + + LocaleHasher::Lookup lookup(modernStyle, strlen(modernStyle)); + if (locales.has(lookup)) { + if (!addLocale(oldStyle, strlen(oldStyle))) { + return false; + } + } + } + + // Also forcibly provide the last-ditch locale. + { + const char* lastDitch = intl::LastDitchLocale(); + MOZ_ASSERT(strcmp(lastDitch, "en-GB") == 0); + +#ifdef DEBUG + static constexpr char lastDitchParent[] = "en"; + + LocaleHasher::Lookup lookup(lastDitchParent, strlen(lastDitchParent)); + MOZ_ASSERT(locales.has(lookup), + "shouldn't be a need to add every locale implied by the " + "last-ditch locale, merely just the last-ditch locale"); +#endif + + if (!addLocale(lastDitch, strlen(lastDitch))) { + return false; + } + } + + return true; +} + +#ifdef DEBUG +template <typename CountAvailable, typename GetAvailable> +static bool IsSameAvailableLocales(CountAvailable countAvailable1, + GetAvailable getAvailable1, + CountAvailable countAvailable2, + GetAvailable getAvailable2) { + int32_t count = countAvailable1(); + if (count != countAvailable2()) { + return false; + } + for (int32_t i = 0; i < count; i++) { + if (getAvailable1(i) != getAvailable2(i)) { + return false; + } + } + return true; +} +#endif + +bool js::intl::SharedIntlData::ensureSupportedLocales(JSContext* cx) { + if (supportedLocalesInitialized) { + return true; + } + + // If ensureSupportedLocales() was called previously, but didn't complete due + // to OOM, clear all data and start from scratch. + supportedLocales.clearAndCompact(); + collatorSupportedLocales.clearAndCompact(); + + if (!getAvailableLocales(cx, supportedLocales, uloc_countAvailable, + uloc_getAvailable)) { + return false; + } + if (!getAvailableLocales(cx, collatorSupportedLocales, ucol_countAvailable, + ucol_getAvailable)) { + return false; + } + + MOZ_ASSERT(IsSameAvailableLocales(uloc_countAvailable, uloc_getAvailable, + udat_countAvailable, udat_getAvailable)); + + MOZ_ASSERT(IsSameAvailableLocales(uloc_countAvailable, uloc_getAvailable, + unum_countAvailable, unum_getAvailable)); + + MOZ_ASSERT(!supportedLocalesInitialized, + "ensureSupportedLocales is neither reentrant nor thread-safe"); + supportedLocalesInitialized = true; + + return true; +} + +bool js::intl::SharedIntlData::isSupportedLocale(JSContext* cx, + SupportedLocaleKind kind, + HandleString locale, + bool* supported) { + if (!ensureSupportedLocales(cx)) { + return false; + } + + RootedLinearString localeLinear(cx, locale->ensureLinear(cx)); + if (!localeLinear) { + return false; + } + + LocaleHasher::Lookup lookup(localeLinear); + + switch (kind) { + case SupportedLocaleKind::Collator: + *supported = collatorSupportedLocales.has(lookup); + return true; + case SupportedLocaleKind::DateTimeFormat: + case SupportedLocaleKind::DisplayNames: + case SupportedLocaleKind::ListFormat: + case SupportedLocaleKind::NumberFormat: + case SupportedLocaleKind::PluralRules: + case SupportedLocaleKind::RelativeTimeFormat: + *supported = supportedLocales.has(lookup); + return true; + } + MOZ_CRASH("Invalid Intl constructor"); +} + +#if DEBUG || MOZ_SYSTEM_ICU +bool js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx) { + if (upperCaseFirstInitialized) { + return true; + } + + // If ensureUpperCaseFirstLocales() was called previously, but didn't + // complete due to OOM, clear all data and start from scratch. + upperCaseFirstLocales.clearAndCompact(); + + UErrorCode status = U_ZERO_ERROR; + UEnumeration* available = ucol_openAvailableLocales(&status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + ScopedICUObject<UEnumeration, uenum_close> toClose(available); + + RootedAtom locale(cx); + while (true) { + int32_t size; + const char* rawLocale = uenum_next(available, &size, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + + if (rawLocale == nullptr) { + break; + } + + UCollator* collator = ucol_open(rawLocale, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator); + + UColAttributeValue caseFirst = + ucol_getAttribute(collator, UCOL_CASE_FIRST, &status); + if (U_FAILURE(status)) { + ReportInternalError(cx); + return false; + } + + if (caseFirst != UCOL_UPPER_FIRST) { + continue; + } + + MOZ_ASSERT(size >= 0); + locale = Atomize(cx, rawLocale, size_t(size)); + if (!locale) { + return false; + } + + LocaleHasher::Lookup lookup(locale); + LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate locales, but if it does, just + // ignore the duplicated locale. + if (!p && !upperCaseFirstLocales.add(p, locale)) { + ReportOutOfMemory(cx); + return false; + } + } + + MOZ_ASSERT( + !upperCaseFirstInitialized, + "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe"); + upperCaseFirstInitialized = true; + + return true; +} +#endif // DEBUG || MOZ_SYSTEM_ICU + +bool js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, + HandleString locale, + bool* isUpperFirst) { +#if DEBUG || MOZ_SYSTEM_ICU + if (!ensureUpperCaseFirstLocales(cx)) { + return false; + } +#endif + + RootedLinearString localeLinear(cx, locale->ensureLinear(cx)); + if (!localeLinear) { + return false; + } + +#if !MOZ_SYSTEM_ICU + // "da" (Danish) and "mt" (Maltese) are the only two supported locales using + // upper-case first. CLDR also lists "cu" (Church Slavic) as an upper-case + // first locale, but since it's not supported in ICU, we don't care about it + // here. + bool isDefaultUpperCaseFirstLocale = + js::StringEqualsLiteral(localeLinear, "da") || + js::StringEqualsLiteral(localeLinear, "mt"); +#endif + +#if DEBUG || MOZ_SYSTEM_ICU + LocaleHasher::Lookup lookup(localeLinear); + *isUpperFirst = upperCaseFirstLocales.has(lookup); +#else + *isUpperFirst = isDefaultUpperCaseFirstLocale; +#endif + +#if !MOZ_SYSTEM_ICU + MOZ_ASSERT(*isUpperFirst == isDefaultUpperCaseFirstLocale, + "upper-case first locales don't match hard-coded list"); +#endif + + return true; +} + +void js::intl::DateTimePatternGeneratorDeleter::operator()( + UDateTimePatternGenerator* ptr) { + udatpg_close(ptr); +} + +UDateTimePatternGenerator* +js::intl::SharedIntlData::getDateTimePatternGenerator(JSContext* cx, + const char* locale) { + // Return the cached instance if the requested locale matches the locale + // of the cached generator. + if (dateTimePatternGeneratorLocale && + StringsAreEqual(dateTimePatternGeneratorLocale.get(), locale)) { + return dateTimePatternGenerator.get(); + } + + UErrorCode status = U_ZERO_ERROR; + UniqueUDateTimePatternGenerator gen(udatpg_open(IcuLocale(locale), &status)); + if (U_FAILURE(status)) { + intl::ReportInternalError(cx); + return nullptr; + } + + JS::UniqueChars localeCopy = js::DuplicateString(cx, locale); + if (!localeCopy) { + return nullptr; + } + + dateTimePatternGenerator = std::move(gen); + dateTimePatternGeneratorLocale = std::move(localeCopy); + + return dateTimePatternGenerator.get(); +} + +void js::intl::SharedIntlData::destroyInstance() { + availableTimeZones.clearAndCompact(); + ianaZonesTreatedAsLinksByICU.clearAndCompact(); + ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); + supportedLocales.clearAndCompact(); + collatorSupportedLocales.clearAndCompact(); +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.clearAndCompact(); +#endif +} + +void js::intl::SharedIntlData::trace(JSTracer* trc) { + // Atoms are always tenured. + if (!JS::RuntimeHeapIsMinorCollecting()) { + availableTimeZones.trace(trc); + ianaZonesTreatedAsLinksByICU.trace(trc); + ianaLinksCanonicalizedDifferentlyByICU.trace(trc); + supportedLocales.trace(trc); + collatorSupportedLocales.trace(trc); +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.trace(trc); +#endif + } +} + +size_t js::intl::SharedIntlData::sizeOfExcludingThis( + mozilla::MallocSizeOf mallocSizeOf) const { + return availableTimeZones.shallowSizeOfExcludingThis(mallocSizeOf) + + ianaZonesTreatedAsLinksByICU.shallowSizeOfExcludingThis(mallocSizeOf) + + ianaLinksCanonicalizedDifferentlyByICU.shallowSizeOfExcludingThis( + mallocSizeOf) + + supportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + + collatorSupportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.shallowSizeOfExcludingThis(mallocSizeOf) + +#endif + mallocSizeOf(dateTimePatternGeneratorLocale.get()); +} diff --git a/js/src/builtin/intl/SharedIntlData.h b/js/src/builtin/intl/SharedIntlData.h new file mode 100644 index 0000000000..e2ba19e314 --- /dev/null +++ b/js/src/builtin/intl/SharedIntlData.h @@ -0,0 +1,313 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_SharedIntlData_h +#define builtin_intl_SharedIntlData_h + +#include "mozilla/MemoryReporting.h" +#include "mozilla/UniquePtr.h" + +#include <stddef.h> + +#include "js/AllocPolicy.h" +#include "js/CharacterEncoding.h" +#include "js/GCAPI.h" +#include "js/GCHashTable.h" +#include "js/RootingAPI.h" +#include "js/Utility.h" +#include "vm/StringType.h" + +using UDateTimePatternGenerator = void*; + +namespace js { + +namespace intl { + +class DateTimePatternGeneratorDeleter { + public: + void operator()(UDateTimePatternGenerator* ptr); +}; + +/** + * Stores Intl data which can be shared across compartments (but not contexts). + * + * Used for data which is expensive when computed repeatedly or is not + * available through ICU. + */ +class SharedIntlData { + struct LinearStringLookup { + union { + const JS::Latin1Char* latin1Chars; + const char16_t* twoByteChars; + }; + bool isLatin1; + size_t length; + JS::AutoCheckCannotGC nogc; + HashNumber hash = 0; + + explicit LinearStringLookup(JSLinearString* string) + : isLatin1(string->hasLatin1Chars()), length(string->length()) { + if (isLatin1) { + latin1Chars = string->latin1Chars(nogc); + } else { + twoByteChars = string->twoByteChars(nogc); + } + } + + LinearStringLookup(const char* chars, size_t length) + : isLatin1(true), length(length) { + latin1Chars = reinterpret_cast<const JS::Latin1Char*>(chars); + } + }; + + private: + /** + * Information tracking the set of the supported time zone names, derived + * from the IANA time zone database <https://www.iana.org/time-zones>. + * + * There are two kinds of IANA time zone names: Zone and Link (denoted as + * such in database source files). Zone names are the canonical, preferred + * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to + * target Zone names for their meaning, e.g. Asia/Calcutta targets + * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a + * sense of deprecation: some Link names also exist partly for convenience, + * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC. + * + * Two data sources determine the time zone names we support: those ICU + * supports and IANA's zone information. + * + * Unfortunately the names ICU and IANA support, and their Link + * relationships from name to target, aren't identical, so we can't simply + * implicitly trust ICU's name handling. We must perform various + * preprocessing of user-provided zone names and post-processing of + * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior. + * + * Also see <https://ssl.icu-project.org/trac/ticket/12044> and + * <http://unicode.org/cldr/trac/ticket/9892>. + */ + + using TimeZoneName = JSAtom*; + + struct TimeZoneHasher { + struct Lookup : LinearStringLookup { + explicit Lookup(JSLinearString* timeZone); + }; + + static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; } + static bool match(TimeZoneName key, const Lookup& lookup); + }; + + using TimeZoneSet = + GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>; + using TimeZoneMap = + GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>; + + /** + * As a threshold matter, available time zones are those time zones ICU + * supports, via ucal_openTimeZones. But ICU supports additional non-IANA + * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in + * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards + * compatibility purposes. This set consists of ICU's supported time zones, + * minus all backwards-compatibility time zones. + */ + TimeZoneSet availableTimeZones; + + /** + * IANA treats some time zone names as Zones, that ICU instead treats as + * Links. For example, IANA considers "America/Indiana/Indianapolis" to be + * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU + * considers the former a Link that targets "America/Indianapolis" (which + * IANA treats as a Link). + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name in this set, we must *not* return ICU's + * canonicalization. + */ + TimeZoneSet ianaZonesTreatedAsLinksByICU; + + /** + * IANA treats some time zone names as Links to one target, that ICU + * instead treats as either Zones, or Links to different targets. An + * example of the former is "Asia/Calcutta, which IANA assigns the target + * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter + * is "America/Virgin", which IANA assigns the target + * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas". + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name that's a key in this map, we *must* return + * the corresponding value and *must not* return ICU's canonicalization. + */ + TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU; + + bool timeZoneDataInitialized = false; + + /** + * Precomputes the available time zone names, because it's too expensive to + * call ucal_openTimeZones() repeatedly. + */ + bool ensureTimeZones(JSContext* cx); + + public: + /** + * Returns the validated time zone name in |result|. If the input time zone + * isn't a valid IANA time zone name, |result| remains unchanged. + */ + bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone, + JS::MutableHandle<JSAtom*> result); + + /** + * Returns the canonical time zone name in |result|. If no canonical name + * was found, |result| remains unchanged. + * + * This method only handles time zones which are canonicalized differently + * by ICU when compared to IANA. + */ + bool tryCanonicalizeTimeZoneConsistentWithIANA( + JSContext* cx, JS::Handle<JSString*> timeZone, + JS::MutableHandle<JSAtom*> result); + + private: + using Locale = JSAtom*; + + struct LocaleHasher { + struct Lookup : LinearStringLookup { + explicit Lookup(JSLinearString* locale); + Lookup(const char* chars, size_t length); + }; + + static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; } + static bool match(Locale key, const Lookup& lookup); + }; + + using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>; + + // Set of supported locales for all Intl service constructors except Collator, + // which uses its own set. + // + // UDateFormat: + // udat_[count,get]Available() return the same results as their + // uloc_[count,get]Available() counterparts. + // + // UNumberFormatter: + // unum_[count,get]Available() return the same results as their + // uloc_[count,get]Available() counterparts. + // + // UListFormatter, UPluralRules, and URelativeDateTimeFormatter: + // We're going to use ULocale availableLocales as per ICU recommendation: + // https://unicode-org.atlassian.net/browse/ICU-12756 + LocaleSet supportedLocales; + + // ucol_[count,get]Available() return different results compared to + // uloc_[count,get]Available(), we can't use |supportedLocales| here. + LocaleSet collatorSupportedLocales; + + bool supportedLocalesInitialized = false; + + // CountAvailable and GetAvailable describe the signatures used for ICU API + // to determine available locales for various functionality. + using CountAvailable = int32_t (*)(); + using GetAvailable = const char* (*)(int32_t localeIndex); + + static bool getAvailableLocales(JSContext* cx, LocaleSet& locales, + CountAvailable countAvailable, + GetAvailable getAvailable); + + /** + * Precomputes the available locales sets. + */ + bool ensureSupportedLocales(JSContext* cx); + + public: + enum class SupportedLocaleKind { + Collator, + DateTimeFormat, + DisplayNames, + ListFormat, + NumberFormat, + PluralRules, + RelativeTimeFormat + }; + + /** + * Sets |supported| to true if |locale| is supported by the requested Intl + * service constructor. Otherwise sets |supported| to false. + */ + MOZ_MUST_USE bool isSupportedLocale(JSContext* cx, SupportedLocaleKind kind, + JS::Handle<JSString*> locale, + bool* supported); + + private: + /** + * The case first parameter (BCP47 key "kf") allows to switch the order of + * upper- and lower-case characters. ICU doesn't directly provide an API + * to query the default case first value of a given locale, but instead + * requires to instantiate a collator object and then query the case first + * attribute (UCOL_CASE_FIRST). + * To avoid instantiating an additional collator object whenever we need + * to retrieve the default case first value of a specific locale, we + * compute the default case first value for every supported locale only + * once and then keep a list of all locales which don't use the default + * case first setting. + * There is almost no difference between lower-case first and when case + * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to + * track locales which use upper-case first as their default setting. + * + * Instantiating collator objects for each available locale is slow + * (bug 1527879), therefore we're hardcoding the two locales using upper-case + * first ("da" (Danish) and "mt" (Maltese)) and only assert in debug-mode + * these two locales match the upper-case first locales returned by ICU. A + * system-ICU may support a different set of locales, therefore we're always + * calling into ICU to find the upper-case first locales in that case. + */ + +#if DEBUG || MOZ_SYSTEM_ICU + LocaleSet upperCaseFirstLocales; + + bool upperCaseFirstInitialized = false; + + /** + * Precomputes the available locales which use upper-case first sorting. + */ + bool ensureUpperCaseFirstLocales(JSContext* cx); +#endif + + public: + /** + * Sets |isUpperFirst| to true if |locale| sorts upper-case characters + * before lower-case characters. + */ + bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale, + bool* isUpperFirst); + + private: + using UniqueUDateTimePatternGenerator = + mozilla::UniquePtr<UDateTimePatternGenerator, + DateTimePatternGeneratorDeleter>; + + UniqueUDateTimePatternGenerator dateTimePatternGenerator; + JS::UniqueChars dateTimePatternGeneratorLocale; + + public: + /** + * Wrapper around |udatpg_open| to return a possibly cached generator + * instance. The returned pointer must not be closed via |udatpg_close|. + */ + UDateTimePatternGenerator* getDateTimePatternGenerator(JSContext* cx, + const char* locale); + + public: + void destroyInstance(); + + void trace(JSTracer* trc); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const; +}; + +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_SharedIntlData_h */ diff --git a/js/src/builtin/intl/TimeZoneDataGenerated.h b/js/src/builtin/intl/TimeZoneDataGenerated.h new file mode 100644 index 0000000000..ba2646efb5 --- /dev/null +++ b/js/src/builtin/intl/TimeZoneDataGenerated.h @@ -0,0 +1,140 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// tzdata version = 2021a + +#ifndef builtin_intl_TimeZoneDataGenerated_h +#define builtin_intl_TimeZoneDataGenerated_h + +namespace js { +namespace timezone { + +// Format: +// "ZoneName" // ICU-Name [time zone file] +const char* const ianaZonesTreatedAsLinksByICU[] = { + "Africa/Asmara", // Africa/Asmera [backzone] + "Africa/Timbuktu", // Africa/Bamako [backzone] + "America/Argentina/Buenos_Aires", // America/Buenos_Aires [southamerica] + "America/Argentina/Catamarca", // America/Catamarca [southamerica] + "America/Argentina/ComodRivadavia", // America/Catamarca [backzone] + "America/Argentina/Cordoba", // America/Cordoba [southamerica] + "America/Argentina/Jujuy", // America/Jujuy [southamerica] + "America/Argentina/Mendoza", // America/Mendoza [southamerica] + "America/Atikokan", // America/Coral_Harbour [northamerica] + "America/Ensenada", // America/Tijuana [backzone] + "America/Indiana/Indianapolis", // America/Indianapolis [northamerica] + "America/Kentucky/Louisville", // America/Louisville [northamerica] + "America/Nuuk", // America/Godthab [europe] + "America/Rosario", // America/Cordoba [backzone] + "Asia/Chongqing", // Asia/Shanghai [backzone] + "Asia/Harbin", // Asia/Shanghai [backzone] + "Asia/Ho_Chi_Minh", // Asia/Saigon [asia] + "Asia/Kashgar", // Asia/Urumqi [backzone] + "Asia/Kathmandu", // Asia/Katmandu [asia] + "Asia/Kolkata", // Asia/Calcutta [asia] + "Asia/Tel_Aviv", // Asia/Jerusalem [backzone] + "Asia/Yangon", // Asia/Rangoon [asia] + "Atlantic/Faroe", // Atlantic/Faeroe [europe] + "Atlantic/Jan_Mayen", // Arctic/Longyearbyen [backzone] + "EST", // Etc/GMT+5 [northamerica] + "Europe/Belfast", // Europe/London [backzone] + "Europe/Tiraspol", // Europe/Chisinau [backzone] + "HST", // Etc/GMT+10 [northamerica] + "MST", // Etc/GMT+7 [northamerica] + "Pacific/Chuuk", // Pacific/Truk [australasia] + "Pacific/Pohnpei", // Pacific/Ponape [australasia] +}; + +// Format: +// "LinkName", "Target" // ICU-Target [time zone file] +struct LinkAndTarget +{ + const char* const link; + const char* const target; +}; + +const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = { + { "Africa/Asmera", "Africa/Asmara" }, // Africa/Asmera [backward] + { "America/Buenos_Aires", "America/Argentina/Buenos_Aires" }, // America/Buenos_Aires [backward] + { "America/Catamarca", "America/Argentina/Catamarca" }, // America/Catamarca [backward] + { "America/Cordoba", "America/Argentina/Cordoba" }, // America/Cordoba [backward] + { "America/Fort_Wayne", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] + { "America/Godthab", "America/Nuuk" }, // America/Godthab [backward] + { "America/Indianapolis", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] + { "America/Jujuy", "America/Argentina/Jujuy" }, // America/Jujuy [backward] + { "America/Kralendijk", "America/Curacao" }, // America/Kralendijk [southamerica] + { "America/Louisville", "America/Kentucky/Louisville" }, // America/Louisville [backward] + { "America/Lower_Princes", "America/Curacao" }, // America/Lower_Princes [southamerica] + { "America/Marigot", "America/Port_of_Spain" }, // America/Marigot [southamerica] + { "America/Mendoza", "America/Argentina/Mendoza" }, // America/Mendoza [backward] + { "America/Santa_Isabel", "America/Tijuana" }, // America/Santa_Isabel [backward] + { "America/St_Barthelemy", "America/Port_of_Spain" }, // America/St_Barthelemy [southamerica] + { "America/Virgin", "America/Port_of_Spain" }, // America/St_Thomas [backward] + { "Antarctica/South_Pole", "Antarctica/McMurdo" }, // Pacific/Auckland [backward] + { "Arctic/Longyearbyen", "Europe/Oslo" }, // Arctic/Longyearbyen [europe] + { "Asia/Calcutta", "Asia/Kolkata" }, // Asia/Calcutta [backward] + { "Asia/Chungking", "Asia/Chongqing" }, // Asia/Shanghai [backward] + { "Asia/Katmandu", "Asia/Kathmandu" }, // Asia/Katmandu [backward] + { "Asia/Rangoon", "Asia/Yangon" }, // Asia/Rangoon [backward] + { "Asia/Saigon", "Asia/Ho_Chi_Minh" }, // Asia/Saigon [backward] + { "Atlantic/Faeroe", "Atlantic/Faroe" }, // Atlantic/Faeroe [backward] + { "Europe/Bratislava", "Europe/Prague" }, // Europe/Bratislava [europe] + { "Europe/Busingen", "Europe/Zurich" }, // Europe/Busingen [europe] + { "Europe/Mariehamn", "Europe/Helsinki" }, // Europe/Mariehamn [europe] + { "Europe/Podgorica", "Europe/Belgrade" }, // Europe/Podgorica [europe] + { "Europe/San_Marino", "Europe/Rome" }, // Europe/San_Marino [europe] + { "Europe/Vatican", "Europe/Rome" }, // Europe/Vatican [europe] + { "Pacific/Ponape", "Pacific/Pohnpei" }, // Pacific/Ponape [backward] + { "Pacific/Truk", "Pacific/Chuuk" }, // Pacific/Truk [backward] + { "Pacific/Yap", "Pacific/Chuuk" }, // Pacific/Truk [backward] + { "US/East-Indiana", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] +}; + +// Legacy ICU time zones, these are not valid IANA time zone names. We also +// disallow the old and deprecated System V time zones. +// https://ssl.icu-project.org/repos/icu/trunk/icu4c/source/tools/tzcode/icuzones +const char* const legacyICUTimeZones[] = { + "ACT", + "AET", + "AGT", + "ART", + "AST", + "BET", + "BST", + "CAT", + "CNT", + "CST", + "CTT", + "Canada/East-Saskatchewan", + "EAT", + "ECT", + "IET", + "IST", + "JST", + "MIT", + "NET", + "NST", + "PLT", + "PNT", + "PRT", + "PST", + "SST", + "US/Pacific-New", + "VST", + "SystemV/AST4", + "SystemV/AST4ADT", + "SystemV/CST6", + "SystemV/CST6CDT", + "SystemV/EST5", + "SystemV/EST5EDT", + "SystemV/HST10", + "SystemV/MST7", + "SystemV/MST7MDT", + "SystemV/PST8", + "SystemV/PST8PDT", + "SystemV/YST9", + "SystemV/YST9YDT", +}; + +} // namespace timezone +} // namespace js + +#endif /* builtin_intl_TimeZoneDataGenerated_h */ diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py new file mode 100755 index 0000000000..802902336e --- /dev/null +++ b/js/src/builtin/intl/make_intl_data.py @@ -0,0 +1,3731 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" Usage: + make_intl_data.py langtags [cldr_core.zip] + make_intl_data.py tzdata + make_intl_data.py currency + make_intl_data.py units + make_intl_data.py numbering + + + Target "langtags": + This script extracts information about 1) mappings between deprecated and + current Unicode BCP 47 locale identifiers, and 2) deprecated and current + BCP 47 Unicode extension value from CLDR, and converts it to C++ mapping + code in LanguageTagGenerated.cpp. The code is used in LanguageTag.cpp. + + + Target "tzdata": + This script computes which time zone informations are not up-to-date in ICU + and provides the necessary mappings to workaround this problem. + https://ssl.icu-project.org/trac/ticket/12044 + + + Target "currency": + Generates the mapping from currency codes to decimal digits used for them. + + + Target "units": + Generate source and test files using the list of so-called "sanctioned unit + identifiers" and verifies that the ICU data filter includes these units. + + + Target "numbering": + Generate source and test files using the list of numbering systems with + simple digit mappings and verifies that it's in sync with ICU/CLDR. +""" + +from __future__ import print_function +import os +import re +import io +import json +import sys +import tarfile +import tempfile +import yaml +from contextlib import closing +from functools import partial, total_ordering +from itertools import chain, groupby, tee +from operator import attrgetter, itemgetter +from zipfile import ZipFile + +if sys.version_info.major == 2: + from itertools import ( + ifilter as filter, + ifilterfalse as filterfalse, + imap as map, + izip_longest as zip_longest, + ) + from urllib2 import urlopen, Request as UrlRequest + from urlparse import urlsplit +else: + from itertools import filterfalse, zip_longest + from urllib.request import urlopen, Request as UrlRequest + from urllib.parse import urlsplit + + +# From https://docs.python.org/3/library/itertools.html +def grouper(iterable, n, fillvalue=None): + "Collect data into fixed-length chunks or blocks" + # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" + args = [iter(iterable)] * n + return zip_longest(*args, fillvalue=fillvalue) + + +def writeMappingHeader(println, description, source, url): + if type(description) is not list: + description = [description] + for desc in description: + println("// {0}".format(desc)) + println("// Derived from {0}.".format(source)) + println("// {0}".format(url)) + + +def writeMappingsVar(println, mapping, name, description, source, url): + """Writes a variable definition with a mapping table. + + Writes the contents of dictionary |mapping| through the |println| + function with the given variable name and a comment with description, + fileDate, and URL. + """ + println("") + writeMappingHeader(println, description, source, url) + println("var {0} = {{".format(name)) + for (key, value) in sorted(mapping.items(), key=itemgetter(0)): + println(' "{0}": "{1}",'.format(key, value)) + println("};") + + +def writeMappingsBinarySearch( + println, + fn_name, + type_name, + name, + validate_fn, + validate_case_fn, + mappings, + tag_maxlength, + description, + source, + url, +): + """Emit code to perform a binary search on language tag subtags. + + Uses the contents of |mapping|, which can either be a dictionary or set, + to emit a mapping function to find subtag replacements. + """ + println("") + writeMappingHeader(println, description, source, url) + println( + """ +bool js::intl::LanguageTag::{0}({1} {2}) {{ + MOZ_ASSERT({3}({2}.span())); + MOZ_ASSERT({4}({2}.span())); +""".format( + fn_name, type_name, name, validate_fn, validate_case_fn + ).strip() + ) + + def write_array(subtags, name, length, fixed): + if fixed: + println( + " static const char {}[{}][{}] = {{".format( + name, len(subtags), length + 1 + ) + ) + else: + println(" static const char* {}[{}] = {{".format(name, len(subtags))) + + # Group in pairs of ten to not exceed the 80 line column limit. + for entries in grouper(subtags, 10): + entries = ( + '"{}"'.format(tag).rjust(length + 2) + for tag in entries + if tag is not None + ) + println(" {},".format(", ".join(entries))) + + println(" };") + + trailing_return = True + + # Sort the subtags by length. That enables using an optimized comparator + # for the binary search, which only performs a single |memcmp| for multiple + # of two subtag lengths. + mappings_keys = mappings.keys() if type(mappings) == dict else mappings + for (length, subtags) in groupby(sorted(mappings_keys, key=len), len): + # Omit the length check if the current length is the maximum length. + if length != tag_maxlength: + println( + """ + if ({}.length() == {}) {{ +""".format( + name, length + ).rstrip( + "\n" + ) + ) + else: + trailing_return = False + println( + """ + { +""".rstrip( + "\n" + ) + ) + + # The subtags need to be sorted for binary search to work. + subtags = sorted(subtags) + + def equals(subtag): + return """{}.equalTo("{}")""".format(name, subtag) + + # Don't emit a binary search for short lists. + if len(subtags) == 1: + if type(mappings) == dict: + println( + """ + if ({}) {{ + {}.set("{}"); + return true; + }} + return false; +""".format( + equals(subtags[0]), name, mappings[subtags[0]] + ).strip( + "\n" + ) + ) + else: + println( + """ + return {}; +""".format( + equals(subtags[0]) + ).strip( + "\n" + ) + ) + elif len(subtags) <= 4: + if type(mappings) == dict: + for subtag in subtags: + println( + """ + if ({}) {{ + {}.set("{}"); + return true; + }} +""".format( + equals(subtag), name, mappings[subtag] + ).strip( + "\n" + ) + ) + + println( + """ + return false; +""".strip( + "\n" + ) + ) + else: + cond = (equals(subtag) for subtag in subtags) + cond = (" ||\n" + " " * (4 + len("return "))).join(cond) + println( + """ + return {}; +""".format( + cond + ).strip( + "\n" + ) + ) + else: + write_array(subtags, name + "s", length, True) + + if type(mappings) == dict: + write_array([mappings[k] for k in subtags], "aliases", length, False) + + println( + """ + if (const char* replacement = SearchReplacement({0}s, aliases, {0})) {{ + {0}.set(mozilla::MakeStringSpan(replacement)); + return true; + }} + return false; +""".format( + name + ).rstrip() + ) + else: + println( + """ + return HasReplacement({0}s, {0}); +""".format( + name + ).rstrip() + ) + + println( + """ + } +""".strip( + "\n" + ) + ) + + if trailing_return: + println( + """ + return false;""" + ) + + println( + """ +}""".lstrip( + "\n" + ) + ) + + +def writeComplexLanguageTagMappings( + println, complex_language_mappings, description, source, url +): + println("") + writeMappingHeader(println, description, source, url) + println( + """ +void js::intl::LanguageTag::performComplexLanguageMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); +""".lstrip() + ) + + # Merge duplicate language entries. + language_aliases = {} + for (deprecated_language, (language, script, region)) in sorted( + complex_language_mappings.items(), key=itemgetter(0) + ): + key = (language, script, region) + if key not in language_aliases: + language_aliases[key] = [] + else: + language_aliases[key].append(deprecated_language) + + first_language = True + for (deprecated_language, (language, script, region)) in sorted( + complex_language_mappings.items(), key=itemgetter(0) + ): + key = (language, script, region) + if deprecated_language in language_aliases[key]: + continue + + if_kind = "if" if first_language else "else if" + first_language = False + + cond = ( + 'language().equalTo("{}")'.format(lang) + for lang in [deprecated_language] + language_aliases[key] + ) + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + + println( + """ + setLanguage("{}");""".format( + language + ).strip( + "\n" + ) + ) + + if script is not None: + println( + """ + if (script().missing()) {{ + setScript("{}"); + }}""".format( + script + ).strip( + "\n" + ) + ) + if region is not None: + println( + """ + if (region().missing()) {{ + setRegion("{}"); + }}""".format( + region + ).strip( + "\n" + ) + ) + println( + """ + }""".strip( + "\n" + ) + ) + + println( + """ +} +""".strip( + "\n" + ) + ) + + +def writeComplexRegionTagMappings( + println, complex_region_mappings, description, source, url +): + println("") + writeMappingHeader(println, description, source, url) + println( + """ +void js::intl::LanguageTag::performComplexRegionMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); + MOZ_ASSERT(IsStructurallyValidRegionTag(region().span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region().span())); +""".lstrip() + ) + + # |non_default_replacements| is a list and hence not hashable. Convert it + # to a string to get a proper hashable value. + def hash_key(default, non_default_replacements): + return (default, str(sorted(str(v) for v in non_default_replacements))) + + # Merge duplicate region entries. + region_aliases = {} + for (deprecated_region, (default, non_default_replacements)) in sorted( + complex_region_mappings.items(), key=itemgetter(0) + ): + key = hash_key(default, non_default_replacements) + if key not in region_aliases: + region_aliases[key] = [] + else: + region_aliases[key].append(deprecated_region) + + first_region = True + for (deprecated_region, (default, non_default_replacements)) in sorted( + complex_region_mappings.items(), key=itemgetter(0) + ): + key = hash_key(default, non_default_replacements) + if deprecated_region in region_aliases[key]: + continue + + if_kind = "if" if first_region else "else if" + first_region = False + + cond = ( + 'region().equalTo("{}")'.format(region) + for region in [deprecated_region] + region_aliases[key] + ) + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + + replacement_regions = sorted( + {region for (_, _, region) in non_default_replacements} + ) + + first_case = True + for replacement_region in replacement_regions: + replacement_language_script = sorted( + (language, script) + for (language, script, region) in (non_default_replacements) + if region == replacement_region + ) + + if_kind = "if" if first_case else "else if" + first_case = False + + def compare_tags(language, script): + if script is None: + return 'language().equalTo("{}")'.format(language) + return '(language().equalTo("{}") && script().equalTo("{}"))'.format( + language, script + ) + + cond = ( + compare_tags(language, script) + for (language, script) in replacement_language_script + ) + cond = (" ||\n" + " " * (4 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{ + setRegion("{}"); + }}""".format( + if_kind, cond, replacement_region + ) + .rstrip() + .strip("\n") + ) + + println( + """ + else {{ + setRegion("{}"); + }} + }}""".format( + default + ) + .rstrip() + .strip("\n") + ) + + println( + """ +} +""".strip( + "\n" + ) + ) + + +def writeVariantTagMappings(println, variant_mappings, description, source, url): + """ Writes a function definition that maps variant subtags. """ + println( + """ +static const char* ToCharPointer(const char* str) { + return str; +} + +static const char* ToCharPointer(const js::UniqueChars& str) { + return str.get(); +} + +template <typename T, typename U = T> +static bool IsLessThan(const T& a, const U& b) { + return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0; +} +""" + ) + writeMappingHeader(println, description, source, url) + println( + """ +bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) { + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(), + IsLessThan<decltype(variants_)::ElementType>)); + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant, + IsLessThan<decltype(variants_)::ElementType, + decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != variants_.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateString(cx, variant); + if (!preferred) { + return false; + } + return !!variants_.insert(p, std::move(preferred)); + }; + + for (size_t i = 0; i < variants_.length(); ) { + auto& variant = variants_[i]; + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get()))); +""".lstrip() + ) + + first_variant = True + + for (deprecated_variant, (type, replacement)) in sorted( + variant_mappings.items(), key=itemgetter(0) + ): + if_kind = "if" if first_variant else "else if" + first_variant = False + + println( + """ + {} (strcmp(variant.get(), "{}") == 0) {{ + variants_.erase(variants_.begin() + i); +""".format( + if_kind, deprecated_variant + ).strip( + "\n" + ) + ) + + if type == "language": + println( + """ + setLanguage("{}"); +""".format( + replacement + ).strip( + "\n" + ) + ) + elif type == "region": + println( + """ + setRegion("{}"); +""".format( + replacement + ).strip( + "\n" + ) + ) + else: + assert type == "variant" + println( + """ + if (!insertVariantSortedIfNotPresent("{}")) {{ + return false; + }} +""".format( + replacement + ).strip( + "\n" + ) + ) + + println( + """ + } +""".strip( + "\n" + ) + ) + + println( + """ + else { + i++; + } + } + return true; +} +""".strip( + "\n" + ) + ) + + +def writeGrandfatheredMappingsFunction( + println, grandfathered_mappings, description, source, url +): + """ Writes a function definition that maps grandfathered language tags. """ + println("") + writeMappingHeader(println, description, source, url) + println( + """\ +bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) { + // We're mapping regular grandfathered tags to non-grandfathered form here. + // Other tags remain unchanged. + // + // regular = "art-lojban" + // / "cel-gaulish" + // / "no-bok" + // / "no-nyn" + // / "zh-guoyu" + // / "zh-hakka" + // / "zh-min" + // / "zh-min-nan" + // / "zh-xiang" + // + // Therefore we can quickly exclude most tags by checking every + // |unicode_locale_id| subcomponent for characteristics not shared by any of + // the regular grandfathered (RG) tags: + // + // * Real-world |unicode_language_subtag|s are all two or three letters, + // so don't waste time running a useless |language.length > 3| fast-path. + // * No RG tag has a "script"-looking component. + // * No RG tag has a "region"-looking component. + // * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish, + // zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok, + // no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag + // that |unicode_locale_id| doesn't support.) + // * No RG tag contains |extensions| or |pu_extensions|. + if (script().present() || + region().present() || + variants().length() != 1 || + extensions().length() != 0 || + privateuse()) { + return true; + } + + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span())); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variants()[0].get()))); + + auto variantEqualTo = [this](const char* variant) { + return strcmp(variants()[0].get(), variant) == 0; + };""" + ) + + # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>. + # + # Doesn't allow any 'extensions' subtags. + re_unicode_locale_id = re.compile( + r""" + ^ + # unicode_language_id = unicode_language_subtag + # unicode_language_subtag = alpha{2,3} | alpha{5,8} + (?P<language>[a-z]{2,3}|[a-z]{5,8}) + + # (sep unicode_script_subtag)? + # unicode_script_subtag = alpha{4} + (?:-(?P<script>[a-z]{4}))? + + # (sep unicode_region_subtag)? + # unicode_region_subtag = (alpha{2} | digit{3}) + (?:-(?P<region>([a-z]{2}|[0-9]{3})))? + + # (sep unicode_variant_subtag)* + # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) + (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)? + + # pu_extensions? + # pu_extensions = sep [xX] (sep alphanum{1,8})+ + (?:-(?P<privateuse>x(-[a-z0-9]{1,8})+))? + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + is_first = True + + for (tag, modern) in sorted(grandfathered_mappings.items(), key=itemgetter(0)): + tag_match = re_unicode_locale_id.match(tag) + assert tag_match is not None + + tag_language = tag_match.group("language") + assert ( + tag_match.group("script") is None + ), "{} does not contain a script subtag".format(tag) + assert ( + tag_match.group("region") is None + ), "{} does not contain a region subtag".format(tag) + tag_variants = tag_match.group("variants") + assert tag_variants is not None, "{} contains a variant subtag".format(tag) + assert ( + tag_match.group("privateuse") is None + ), "{} does not contain a privateuse subtag".format(tag) + + tag_variant = tag_variants[1:] + assert "-" not in tag_variant, "{} contains only a single variant".format(tag) + + modern_match = re_unicode_locale_id.match(modern) + assert modern_match is not None + + modern_language = modern_match.group("language") + modern_script = modern_match.group("script") + modern_region = modern_match.group("region") + modern_variants = modern_match.group("variants") + modern_privateuse = modern_match.group("privateuse") + + println( + """ + // {} -> {} +""".format( + tag, modern + ).rstrip() + ) + + println( + """ + {}if (language().equalTo("{}") && variantEqualTo("{}")) {{ + """.format( + "" if is_first else "else ", tag_language, tag_variant + ) + .rstrip() + .strip("\n") + ) + + is_first = False + + println( + """ + setLanguage("{}"); + """.format( + modern_language + ) + .rstrip() + .strip("\n") + ) + + if modern_script is not None: + println( + """ + setScript("{}"); + """.format( + modern_script + ) + .rstrip() + .strip("\n") + ) + + if modern_region is not None: + println( + """ + setRegion("{}"); + """.format( + modern_region + ) + .rstrip() + .strip("\n") + ) + + assert ( + modern_variants is None + ), "all regular grandfathered tags' modern forms do not contain variant subtags" + + println( + """ + clearVariants(); + """.rstrip().strip( + "\n" + ) + ) + + if modern_privateuse is not None: + println( + """ + auto privateuse = DuplicateString(cx, "{}"); + if (!privateuse) {{ + return false; + }} + setPrivateuse(std::move(privateuse)); + """.format( + modern_privateuse + ) + .rstrip() + .rstrip("\n") + ) + + println( + """ + return true; + }""".rstrip().strip( + "\n" + ) + ) + + println( + """ + return true; +}""" + ) + + +def readSupplementalData(core_file): + """Reads CLDR Supplemental Data and extracts information for Intl.js. + + Information extracted: + - grandfatheredMappings: mappings from grandfathered tags to preferred + complete language tags + - languageMappings: mappings from language subtags to preferred subtags + - complexLanguageMappings: mappings from language subtags with complex rules + - regionMappings: mappings from region subtags to preferred subtags + - complexRegionMappings: mappings from region subtags with complex rules + - variantMappings: mappings from variant subtags to preferred subtags + - likelySubtags: likely subtags used for generating test data only + Returns these mappings as dictionaries. + """ + import xml.etree.ElementTree as ET + + # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>. + re_unicode_language_id = re.compile( + r""" + ^ + # unicode_language_id = unicode_language_subtag + # unicode_language_subtag = alpha{2,3} | alpha{5,8} + (?P<language>[a-z]{2,3}|[a-z]{5,8}) + + # (sep unicode_script_subtag)? + # unicode_script_subtag = alpha{4} + (?:-(?P<script>[a-z]{4}))? + + # (sep unicode_region_subtag)? + # unicode_region_subtag = (alpha{2} | digit{3}) + (?:-(?P<region>([a-z]{2}|[0-9]{3})))? + + # (sep unicode_variant_subtag)* + # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) + (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)? + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + re_unicode_language_subtag = re.compile( + r""" + ^ + # unicode_language_subtag = alpha{2,3} | alpha{5,8} + ([a-z]{2,3}|[a-z]{5,8}) + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + re_unicode_region_subtag = re.compile( + r""" + ^ + # unicode_region_subtag = (alpha{2} | digit{3}) + ([a-z]{2}|[0-9]{3}) + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + re_unicode_variant_subtag = re.compile( + r""" + ^ + # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) + ([a-z0-9]{5,8}|(?:[0-9][a-z0-9]{3})) + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + # The fixed list of BCP 47 grandfathered language tags. + grandfathered_tags = ( + "art-lojban", + "cel-gaulish", + "en-GB-oed", + "i-ami", + "i-bnn", + "i-default", + "i-enochian", + "i-hak", + "i-klingon", + "i-lux", + "i-mingo", + "i-navajo", + "i-pwn", + "i-tao", + "i-tay", + "i-tsu", + "no-bok", + "no-nyn", + "sgn-BE-FR", + "sgn-BE-NL", + "sgn-CH-DE", + "zh-guoyu", + "zh-hakka", + "zh-min", + "zh-min-nan", + "zh-xiang", + ) + + # The list of grandfathered tags which are valid Unicode BCP 47 locale identifiers. + unicode_bcp47_grandfathered_tags = { + tag for tag in grandfathered_tags if re_unicode_language_id.match(tag) + } + + # Dictionary of simple language subtag mappings, e.g. "in" -> "id". + language_mappings = {} + + # Dictionary of complex language subtag mappings, modifying more than one + # subtag, e.g. "sh" -> ("sr", "Latn", None) and "cnr" -> ("sr", None, "ME"). + complex_language_mappings = {} + + # Dictionary of simple region subtag mappings, e.g. "DD" -> "DE". + region_mappings = {} + + # Dictionary of complex region subtag mappings, containing more than one + # replacement, e.g. "SU" -> ("RU", ["AM", "AZ", "BY", ...]). + complex_region_mappings = {} + + # Dictionary of aliased variant subtags to a tuple of preferred replacement + # type and replacement, e.g. "arevela" -> ("language", "hy") or + # "aaland" -> ("region", "AX") or "heploc" -> ("variant", "alalc97"). + variant_mappings = {} + + # Dictionary of grandfathered mappings to preferred values. + grandfathered_mappings = {} + + # CLDR uses "_" as the separator for some elements. Replace it with "-". + def bcp47_id(cldr_id): + return cldr_id.replace("_", "-") + + # CLDR uses the canonical case for most entries, but there are some + # exceptions, like: + # <languageAlias type="drw" replacement="fa_af" reason="deprecated"/> + # Therefore canonicalize all tags to be on the safe side. + def bcp47_canonical(language, script, region): + # Canonical case for language subtags is lower case. + # Canonical case for script subtags is title case. + # Canonical case for region subtags is upper case. + return ( + language.lower() if language else None, + script.title() if script else None, + region.upper() if region else None, + ) + + tree = ET.parse(core_file.open("common/supplemental/supplementalMetadata.xml")) + + for language_alias in tree.iterfind(".//languageAlias"): + type = bcp47_id(language_alias.get("type")) + replacement = bcp47_id(language_alias.get("replacement")) + + # Handle grandfathered mappings first. + if type in unicode_bcp47_grandfathered_tags: + grandfathered_mappings[type] = replacement + continue + + # We're only interested in language subtag matches, so ignore any + # entries which have additional subtags. + if re_unicode_language_subtag.match(type) is None: + continue + + assert type.islower() + + if re_unicode_language_subtag.match(replacement) is not None: + # Canonical case for language subtags is lower-case. + language_mappings[type] = replacement.lower() + else: + replacement_match = re_unicode_language_id.match(replacement) + assert ( + replacement_match is not None + ), "{} invalid Unicode BCP 47 locale identifier".format(replacement) + assert ( + replacement_match.group("variants") is None + ), "{}: unexpected variant subtags in {}".format(type, replacement) + + complex_language_mappings[type] = bcp47_canonical( + replacement_match.group("language"), + replacement_match.group("script"), + replacement_match.group("region"), + ) + + for territory_alias in tree.iterfind(".//territoryAlias"): + type = territory_alias.get("type") + replacement = territory_alias.get("replacement") + + # We're only interested in region subtag matches, so ignore any entries + # which contain legacy formats, e.g. three letter region codes. + if re_unicode_region_subtag.match(type) is None: + continue + + assert type.isupper() or type.isdigit() + + if re_unicode_region_subtag.match(replacement) is not None: + # Canonical case for region subtags is upper-case. + region_mappings[type] = replacement.upper() + else: + # Canonical case for region subtags is upper-case. + replacements = [r.upper() for r in replacement.split(" ")] + assert all( + re_unicode_region_subtag.match(loc) is not None for loc in replacements + ), "{} invalid region subtags".format(replacement) + complex_region_mappings[type] = replacements + + for variant_alias in tree.iterfind(".//variantAlias"): + type = variant_alias.get("type") + replacement = variant_alias.get("replacement") + + assert ( + re_unicode_variant_subtag.match(type) is not None + ), "{} invalid variant subtag".format(type) + + # Normalize the case, because some variants are in upper case. + type = type.lower() + + # The replacement can be a language, a region, or a variant subtag. + # Language and region subtags are case normalized, variant subtags can + # be in any case. + + if ( + re_unicode_language_subtag.match(replacement) is not None + and replacement.islower() + ): + variant_mappings[type] = ("language", replacement) + + elif re_unicode_region_subtag.match(replacement) is not None: + assert ( + replacement.isupper() or replacement.isdigit() + ), "{} invalid variant subtag replacement".format(replacement) + variant_mappings[type] = ("region", replacement) + + else: + assert ( + re_unicode_variant_subtag.match(replacement) is not None + ), "{} invalid variant subtag replacement".format(replacement) + variant_mappings[type] = ("variant", replacement.lower()) + + tree = ET.parse(core_file.open("common/supplemental/likelySubtags.xml")) + + likely_subtags = {} + + for likely_subtag in tree.iterfind(".//likelySubtag"): + from_tag = bcp47_id(likely_subtag.get("from")) + from_match = re_unicode_language_id.match(from_tag) + assert ( + from_match is not None + ), "{} invalid Unicode BCP 47 locale identifier".format(from_tag) + assert ( + from_match.group("variants") is None + ), "unexpected variant subtags in {}".format(from_tag) + + to_tag = bcp47_id(likely_subtag.get("to")) + to_match = re_unicode_language_id.match(to_tag) + assert ( + to_match is not None + ), "{} invalid Unicode BCP 47 locale identifier".format(to_tag) + assert ( + to_match.group("variants") is None + ), "unexpected variant subtags in {}".format(to_tag) + + from_canonical = bcp47_canonical( + from_match.group("language"), + from_match.group("script"), + from_match.group("region"), + ) + + to_canonical = bcp47_canonical( + to_match.group("language"), + to_match.group("script"), + to_match.group("region"), + ) + + likely_subtags[from_canonical] = to_canonical + + complex_region_mappings_final = {} + + for (deprecated_region, replacements) in complex_region_mappings.items(): + # Find all likely subtag entries which don't already contain a region + # subtag and whose target region is in the list of replacement regions. + region_likely_subtags = [ + (from_language, from_script, to_region) + for ( + (from_language, from_script, from_region), + (_, _, to_region), + ) in likely_subtags.items() + if from_region is None and to_region in replacements + ] + + # The first replacement entry is the default region. + default = replacements[0] + + # Find all likely subtag entries whose region matches the default region. + default_replacements = { + (language, script) + for (language, script, region) in region_likely_subtags + if region == default + } + + # And finally find those entries which don't use the default region. + # These are the entries we're actually interested in, because those need + # to be handled specially when selecting the correct preferred region. + non_default_replacements = [ + (language, script, region) + for (language, script, region) in region_likely_subtags + if (language, script) not in default_replacements + ] + + # If there are no non-default replacements, we can handle the region as + # part of the simple region mapping. + if non_default_replacements: + complex_region_mappings_final[deprecated_region] = ( + default, + non_default_replacements, + ) + else: + region_mappings[deprecated_region] = default + + return { + "grandfatheredMappings": grandfathered_mappings, + "languageMappings": language_mappings, + "complexLanguageMappings": complex_language_mappings, + "regionMappings": region_mappings, + "complexRegionMappings": complex_region_mappings_final, + "variantMappings": variant_mappings, + "likelySubtags": likely_subtags, + } + + +def readUnicodeExtensions(core_file): + import xml.etree.ElementTree as ET + + # Match all xml-files in the BCP 47 directory. + bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$") + + # https://www.unicode.org/reports/tr35/#Unicode_locale_identifier + # + # type = alphanum{3,8} (sep alphanum{3,8})* ; + typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$") + + # Mapping from Unicode extension types to dict of deprecated to + # preferred values. + mapping = { + # Unicode BCP 47 U Extension + "u": {}, + # Unicode BCP 47 T Extension + "t": {}, + } + + def readBCP47File(file): + tree = ET.parse(file) + for keyword in tree.iterfind(".//keyword/key"): + extension = keyword.get("extension", "u") + assert ( + extension == "u" or extension == "t" + ), "unknown extension type: {}".format(extension) + + extension_name = keyword.get("name") + + for type in keyword.iterfind("type"): + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The key or type name used by Unicode locale extension with 'u' extension + # syntax or the 't' extensions syntax. When alias below is absent, this name + # can be also used with the old style "@key=type" syntax. + name = type.get("name") + + # Ignore the special name: + # - <https://unicode.org/reports/tr35/#CODEPOINTS> + # - <https://unicode.org/reports/tr35/#REORDER_CODE> + # - <https://unicode.org/reports/tr35/#RG_KEY_VALUE> + # - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE> + # - <https://unicode.org/reports/tr35/#PRIVATE_USE> + if name in ( + "CODEPOINTS", + "REORDER_CODE", + "RG_KEY_VALUE", + "SUBDIVISION_CODE", + "PRIVATE_USE", + ): + continue + + # All other names should match the 'type' production. + assert ( + typeRE.match(name) is not None + ), "{} matches the 'type' production".format(name) + + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The preferred value of the deprecated key, type or attribute element. + # When a key, type or attribute element is deprecated, this attribute is + # used for specifying a new canonical form if available. + preferred = type.get("preferred") + + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The BCP 47 form is the canonical form, and recommended. Other aliases are + # included only for backwards compatibility. + alias = type.get("alias") + + # <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers> + # + # Use the bcp47 data to replace keys, types, tfields, and tvalues by their + # canonical forms. See Section 3.6.4 U Extension Data Files) and Section + # 3.7.1 T Extension Data Files. The aliases are in the alias attribute + # value, while the canonical is in the name attribute value. + + # 'preferred' contains the new preferred name, 'alias' the compatibility + # name, but then there's this entry where 'preferred' and 'alias' are the + # same. So which one to choose? Assume 'preferred' is the actual canonical + # name. + # + # <type name="islamicc" + # description="Civil (algorithmic) Arabic calendar" + # deprecated="true" + # preferred="islamic-civil" + # alias="islamic-civil"/> + + if preferred is not None: + assert typeRE.match(preferred), preferred + mapping[extension].setdefault(extension_name, {})[name] = preferred + + if alias is not None: + for alias_name in alias.lower().split(" "): + # Ignore alias entries which don't match the 'type' production. + if typeRE.match(alias_name) is None: + continue + + # See comment above when 'alias' and 'preferred' are both present. + if ( + preferred is not None + and name in mapping[extension][extension_name] + ): + continue + + # Skip over entries where 'name' and 'alias' are equal. + # + # <type name="pst8pdt" + # description="POSIX style time zone for US Pacific Time" + # alias="PST8PDT" + # since="1.8"/> + if name == alias_name: + continue + + mapping[extension].setdefault(extension_name, {})[ + alias_name + ] = name + + def readSupplementalMetadata(file): + # Find subdivision and region replacements. + # + # <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers> + # + # Replace aliases in special key values: + # - If there is an 'sd' or 'rg' key, replace any subdivision alias + # in its value in the same way, using subdivisionAlias data. + tree = ET.parse(file) + for alias in tree.iterfind(".//subdivisionAlias"): + type = alias.get("type") + assert ( + typeRE.match(type) is not None + ), "{} matches the 'type' production".format(type) + + # Take the first replacement when multiple ones are present. + replacement = alias.get("replacement").split(" ")[0].lower() + + # Skip over invalid replacements. + # + # <subdivisionAlias type="fi01" replacement="AX" reason="overlong"/> + # + # It's not entirely clear to me if CLDR actually wants to use + # "axzzzz" as the replacement for this case. + if typeRE.match(replacement) is None: + continue + + # 'subdivisionAlias' applies to 'rg' and 'sd' keys. + mapping["u"].setdefault("rg", {})[type] = replacement + mapping["u"].setdefault("sd", {})[type] = replacement + + for name in core_file.namelist(): + if bcpFileRE.match(name): + readBCP47File(core_file.open(name)) + + readSupplementalMetadata( + core_file.open("common/supplemental/supplementalMetadata.xml") + ) + + return { + "unicodeMappings": mapping["u"], + "transformMappings": mapping["t"], + } + + +def writeCLDRLanguageTagData(println, data, url): + """ Writes the language tag data to the Intl data file. """ + + println(generatedFileWarning) + println("// Version: CLDR-{}".format(data["version"])) + println("// URL: {}".format(url)) + + println( + """ +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <type_traits> + +#include "builtin/intl/LanguageTag.h" +#include "util/Text.h" +#include "vm/JSContext.h" + +using namespace js::intl::LanguageTagLimits; + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline bool HasReplacement( + const char (&subtags)[Length][TagLength], + const js::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.span().data(); + return std::binary_search(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); +} + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline const char* SearchReplacement( + const char (&subtags)[Length][TagLength], + const char* (&aliases)[Length], + const js::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.span().data(); + auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); + if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { + return aliases[std::distance(std::begin(subtags), p)]; + } + return nullptr; +} + +#ifdef DEBUG +static bool IsAsciiLowercaseAlphanumeric(char c) { + return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); +} + +static bool IsAsciiLowercaseAlphanumericOrDash(char c) { + return IsAsciiLowercaseAlphanumeric(c) || c == '-'; +} + +static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) || + std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>); +} + +static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); +} + +static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); +} +#endif +""".rstrip() + ) + + source = "CLDR Supplemental Data, version {}".format(data["version"]) + grandfathered_mappings = data["grandfatheredMappings"] + language_mappings = data["languageMappings"] + complex_language_mappings = data["complexLanguageMappings"] + region_mappings = data["regionMappings"] + complex_region_mappings = data["complexRegionMappings"] + variant_mappings = data["variantMappings"] + unicode_mappings = data["unicodeMappings"] + transform_mappings = data["transformMappings"] + + # unicode_language_subtag = alpha{2,3} | alpha{5,8} ; + language_maxlength = 8 + + # unicode_region_subtag = (alpha{2} | digit{3}) ; + region_maxlength = 3 + + writeMappingsBinarySearch( + println, + "languageMapping", + "LanguageSubtag&", + "language", + "IsStructurallyValidLanguageTag", + "IsCanonicallyCasedLanguageTag", + language_mappings, + language_maxlength, + "Mappings from language subtags to preferred values.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "complexLanguageMapping", + "const LanguageSubtag&", + "language", + "IsStructurallyValidLanguageTag", + "IsCanonicallyCasedLanguageTag", + complex_language_mappings.keys(), + language_maxlength, + "Language subtags with complex mappings.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "regionMapping", + "RegionSubtag&", + "region", + "IsStructurallyValidRegionTag", + "IsCanonicallyCasedRegionTag", + region_mappings, + region_maxlength, + "Mappings from region subtags to preferred values.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "complexRegionMapping", + "const RegionSubtag&", + "region", + "IsStructurallyValidRegionTag", + "IsCanonicallyCasedRegionTag", + complex_region_mappings.keys(), + region_maxlength, + "Region subtags with complex mappings.", + source, + url, + ) + + writeComplexLanguageTagMappings( + println, + complex_language_mappings, + "Language subtags with complex mappings.", + source, + url, + ) + writeComplexRegionTagMappings( + println, + complex_region_mappings, + "Region subtags with complex mappings.", + source, + url, + ) + + writeVariantTagMappings( + println, + variant_mappings, + "Mappings from variant subtags to preferred values.", + source, + url, + ) + + writeGrandfatheredMappingsFunction( + println, + grandfathered_mappings, + "Canonicalize grandfathered locale identifiers.", + source, + url, + ) + + writeUnicodeExtensionsMappings(println, unicode_mappings, "Unicode") + writeUnicodeExtensionsMappings(println, transform_mappings, "Transform") + + +def writeCLDRLanguageTagLikelySubtagsTest(println, data, url): + """ Writes the likely-subtags test file. """ + + println(generatedFileWarning) + + source = "CLDR Supplemental Data, version {}".format(data["version"]) + language_mappings = data["languageMappings"] + complex_language_mappings = data["complexLanguageMappings"] + region_mappings = data["regionMappings"] + complex_region_mappings = data["complexRegionMappings"] + likely_subtags = data["likelySubtags"] + + def bcp47(tag): + (language, script, region) = tag + return "{}{}{}".format( + language, "-" + script if script else "", "-" + region if region else "" + ) + + def canonical(tag): + (language, script, region) = tag + + # Map deprecated language subtags. + if language in language_mappings: + language = language_mappings[language] + elif language in complex_language_mappings: + (language2, script2, region2) = complex_language_mappings[language] + (language, script, region) = ( + language2, + script if script else script2, + region if region else region2, + ) + + # Map deprecated region subtags. + if region in region_mappings: + region = region_mappings[region] + else: + # Assume no complex region mappings are needed for now. + assert ( + region not in complex_region_mappings + ), "unexpected region with complex mappings: {}".format(region) + + return (language, script, region) + + # https://unicode.org/reports/tr35/#Likely_Subtags + + def addLikelySubtags(tag): + # Step 1: Canonicalize. + (language, script, region) = canonical(tag) + if script == "Zzzz": + script = None + if region == "ZZ": + region = None + + # Step 2: Lookup. + searches = ( + (language, script, region), + (language, None, region), + (language, script, None), + (language, None, None), + ("und", script, None), + ) + search = next(search for search in searches if search in likely_subtags) + + (language_s, script_s, region_s) = search + (language_m, script_m, region_m) = likely_subtags[search] + + # Step 3: Return. + return ( + language if language != language_s else language_m, + script if script != script_s else script_m, + region if region != region_s else region_m, + ) + + # https://unicode.org/reports/tr35/#Likely_Subtags + def removeLikelySubtags(tag): + # Step 1: Add likely subtags. + max = addLikelySubtags(tag) + + # Step 2: Remove variants (doesn't apply here). + + # Step 3: Find a match. + (language, script, region) = max + for trial in ( + (language, None, None), + (language, None, region), + (language, script, None), + ): + if addLikelySubtags(trial) == max: + return trial + + # Step 4: Return maximized if no match found. + return max + + def likely_canonical(from_tag, to_tag): + # Canonicalize the input tag. + from_tag = canonical(from_tag) + + # Update the expected result if necessary. + if from_tag in likely_subtags: + to_tag = likely_subtags[from_tag] + + # Canonicalize the expected output. + to_canonical = canonical(to_tag) + + # Sanity check: This should match the result of |addLikelySubtags|. + assert to_canonical == addLikelySubtags(from_tag) + + return to_canonical + + # |likely_subtags| contains non-canonicalized tags, so canonicalize it first. + likely_subtags_canonical = { + k: likely_canonical(k, v) for (k, v) in likely_subtags.items() + } + + # Add test data for |Intl.Locale.prototype.maximize()|. + writeMappingsVar( + println, + {bcp47(k): bcp47(v) for (k, v) in likely_subtags_canonical.items()}, + "maxLikelySubtags", + "Extracted from likelySubtags.xml.", + source, + url, + ) + + # Use the maximalized tags as the input for the remove likely-subtags test. + minimized = { + tag: removeLikelySubtags(tag) for tag in likely_subtags_canonical.values() + } + + # Add test data for |Intl.Locale.prototype.minimize()|. + writeMappingsVar( + println, + {bcp47(k): bcp47(v) for (k, v) in minimized.items()}, + "minLikelySubtags", + "Extracted from likelySubtags.xml.", + source, + url, + ) + + println( + """ +for (let [tag, maximal] of Object.entries(maxLikelySubtags)) { + assertEq(new Intl.Locale(tag).maximize().toString(), maximal); +}""" + ) + + println( + """ +for (let [tag, minimal] of Object.entries(minLikelySubtags)) { + assertEq(new Intl.Locale(tag).minimize().toString(), minimal); +}""" + ) + + println( + """ +if (typeof reportCompare === "function") + reportCompare(0, 0);""" + ) + + +def readCLDRVersionFromICU(): + icuDir = os.path.join(topsrcdir, "intl/icu/source") + if not os.path.isdir(icuDir): + raise RuntimeError("not a directory: {}".format(icuDir)) + + reVersion = re.compile(r'\s*cldrVersion\{"(\d+(?:\.\d+)?)"\}') + + for line in flines(os.path.join(icuDir, "data/misc/supplementalData.txt")): + m = reVersion.match(line) + if m: + version = m.group(1) + break + + if version is None: + raise RuntimeError("can't resolve CLDR version") + + return version + + +def updateCLDRLangTags(args): + """ Update the LanguageTagGenerated.cpp file. """ + version = args.version + url = args.url + out = args.out + filename = args.file + + # Determine current CLDR version from ICU. + if version is None: + version = readCLDRVersionFromICU() + + url = url.replace("<VERSION>", version) + + print("Arguments:") + print("\tCLDR version: %s" % version) + print("\tDownload url: %s" % url) + if filename is not None: + print("\tLocal CLDR core.zip file: %s" % filename) + print("\tOutput file: %s" % out) + print("") + + data = { + "version": version, + } + + def readFiles(cldr_file): + with ZipFile(cldr_file) as zip_file: + data.update(readSupplementalData(zip_file)) + data.update(readUnicodeExtensions(zip_file)) + + print("Processing CLDR data...") + if filename is not None: + print("Always make sure you have the newest CLDR core.zip!") + with open(filename, "rb") as cldr_file: + readFiles(cldr_file) + else: + print("Downloading CLDR core.zip...") + with closing(urlopen(url)) as cldr_file: + cldr_data = io.BytesIO(cldr_file.read()) + readFiles(cldr_data) + + print("Writing Intl data...") + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + writeCLDRLanguageTagData(println, data, url) + + print("Writing Intl test data...") + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join( + js_src_builtin_intl_dir, + "../../tests/non262/Intl/Locale/likely-subtags-generated.js", + ) + with io.open(test_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println("// |reftest| skip-if(!this.hasOwnProperty('Intl'))") + writeCLDRLanguageTagLikelySubtagsTest(println, data, url) + + +def flines(filepath, encoding="utf-8"): + """ Open filepath and iterate over its content. """ + with io.open(filepath, mode="r", encoding=encoding) as f: + for line in f: + yield line + + +@total_ordering +class Zone(object): + """ Time zone with optional file name. """ + + def __init__(self, name, filename=""): + self.name = name + self.filename = filename + + def __eq__(self, other): + return hasattr(other, "name") and self.name == other.name + + def __lt__(self, other): + return self.name < other.name + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return self.name + + +class TzDataDir(object): + """ tzdata source from a directory. """ + + def __init__(self, obj): + self.name = partial(os.path.basename, obj) + self.resolve = partial(os.path.join, obj) + self.basename = os.path.basename + self.isfile = os.path.isfile + self.listdir = partial(os.listdir, obj) + self.readlines = flines + + +class TzDataFile(object): + """ tzdata source from a file (tar or gzipped). """ + + def __init__(self, obj): + self.name = lambda: os.path.splitext( + os.path.splitext(os.path.basename(obj))[0] + )[0] + self.resolve = obj.getmember + self.basename = attrgetter("name") + self.isfile = tarfile.TarInfo.isfile + self.listdir = obj.getnames + self.readlines = partial(self._tarlines, obj) + + def _tarlines(self, tar, m): + with closing(tar.extractfile(m)) as f: + for line in f: + yield line.decode("utf-8") + + +def validateTimeZones(zones, links): + """ Validate the zone and link entries. """ + linkZones = set(links.keys()) + intersect = linkZones.intersection(zones) + if intersect: + raise RuntimeError("Links also present in zones: %s" % intersect) + + zoneNames = {z.name for z in zones} + linkTargets = set(links.values()) + if not linkTargets.issubset(zoneNames): + raise RuntimeError( + "Link targets not found: %s" % linkTargets.difference(zoneNames) + ) + + +def partition(iterable, *predicates): + def innerPartition(pred, it): + it1, it2 = tee(it) + return (filter(pred, it1), filterfalse(pred, it2)) + + if len(predicates) == 0: + return iterable + (left, right) = innerPartition(predicates[0], iterable) + if len(predicates) == 1: + return (left, right) + return tuple([left] + list(partition(right, *predicates[1:]))) + + +def listIANAFiles(tzdataDir): + def isTzFile(d, m, f): + return m(f) and d.isfile(d.resolve(f)) + + return filter( + partial(isTzFile, tzdataDir, re.compile("^[a-z0-9]+$").match), + tzdataDir.listdir(), + ) + + +def readIANAFiles(tzdataDir, files): + """ Read all IANA time zone files from the given iterable. """ + nameSyntax = "[\w/+\-]+" + pZone = re.compile(r"Zone\s+(?P<name>%s)\s+.*" % nameSyntax) + pLink = re.compile( + r"Link\s+(?P<target>%s)\s+(?P<name>%s)(?:\s+#.*)?" % (nameSyntax, nameSyntax) + ) + + def createZone(line, fname): + match = pZone.match(line) + name = match.group("name") + return Zone(name, fname) + + def createLink(line, fname): + match = pLink.match(line) + (name, target) = match.group("name", "target") + return (Zone(name, fname), target) + + zones = set() + links = dict() + for filename in files: + filepath = tzdataDir.resolve(filename) + for line in tzdataDir.readlines(filepath): + if line.startswith("Zone"): + zones.add(createZone(line, filename)) + if line.startswith("Link"): + (link, target) = createLink(line, filename) + links[link] = target + + return (zones, links) + + +def readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory): + """ Read the IANA time zone information from `tzdataDir`. """ + + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + # Remove the placeholder time zone "Factory". + if ignoreFactory: + zones.remove(Zone("Factory")) + + # Merge with backzone data. + if not ignoreBackzone: + zones |= backzones + links = { + name: target for name, target in links.items() if name not in backzones + } + links.update(backlinks) + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUResourceFile(filename): + """Read an ICU resource file. + + Yields (<table-name>, <startOrEnd>, <value>) for each table. + """ + + numberValue = r"-?\d+" + stringValue = r'".+?"' + + def asVector(val): + return r"%s(?:\s*,\s*%s)*" % (val, val) + + numberVector = asVector(numberValue) + stringVector = asVector(stringValue) + + reNumberVector = re.compile(numberVector) + reStringVector = re.compile(stringVector) + reNumberValue = re.compile(numberValue) + reStringValue = re.compile(stringValue) + + def parseValue(value): + m = reNumberVector.match(value) + if m: + return [int(v) for v in reNumberValue.findall(value)] + m = reStringVector.match(value) + if m: + return [v[1:-1] for v in reStringValue.findall(value)] + raise RuntimeError("unknown value type: %s" % value) + + def extractValue(values): + if len(values) == 0: + return None + if len(values) == 1: + return values[0] + return values + + def line(*args): + maybeMultiComments = r"(?:/\*[^*]*\*/)*" + maybeSingleComment = r"(?://.*)?" + lineStart = "^%s" % maybeMultiComments + lineEnd = "%s\s*%s$" % (maybeMultiComments, maybeSingleComment) + return re.compile(r"\s*".join(chain([lineStart], args, [lineEnd]))) + + tableName = r'(?P<quote>"?)(?P<name>.+?)(?P=quote)' + tableValue = r"(?P<value>%s|%s)" % (numberVector, stringVector) + + reStartTable = line(tableName, r"\{") + reEndTable = line(r"\}") + reSingleValue = line(r",?", tableValue, r",?") + reCompactTable = line(tableName, r"\{", tableValue, r"\}") + reEmptyLine = line() + + tables = [] + + def currentTable(): + return "|".join(tables) + + values = [] + for line in flines(filename, "utf-8-sig"): + line = line.strip() + if line == "": + continue + + m = reEmptyLine.match(line) + if m: + continue + + m = reStartTable.match(line) + if m: + assert len(values) == 0 + tables.append(m.group("name")) + continue + + m = reEndTable.match(line) + if m: + yield (currentTable(), extractValue(values)) + tables.pop() + values = [] + continue + + m = reCompactTable.match(line) + if m: + assert len(values) == 0 + tables.append(m.group("name")) + yield (currentTable(), extractValue(parseValue(m.group("value")))) + tables.pop() + continue + + m = reSingleValue.match(line) + if m and tables: + values.extend(parseValue(m.group("value"))) + continue + + raise RuntimeError("unknown entry: %s" % line) + + +def readICUTimeZonesFromTimezoneTypes(icuTzDir): + """Read the ICU time zone information from `icuTzDir`/timezoneTypes.txt + and returns the tuple (zones, links). + """ + typeMapTimeZoneKey = "timezoneTypes:table(nofallback)|typeMap|timezone|" + typeAliasTimeZoneKey = "timezoneTypes:table(nofallback)|typeAlias|timezone|" + + def toTimeZone(name): + return Zone(name.replace(":", "/")) + + zones = set() + links = dict() + + for name, value in readICUResourceFile(os.path.join(icuTzDir, "timezoneTypes.txt")): + if name.startswith(typeMapTimeZoneKey): + zones.add(toTimeZone(name[len(typeMapTimeZoneKey) :])) + if name.startswith(typeAliasTimeZoneKey): + links[toTimeZone(name[len(typeAliasTimeZoneKey) :])] = value + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUTimeZonesFromZoneInfo(icuTzDir): + """Read the ICU time zone information from `icuTzDir`/zoneinfo64.txt + and returns the tuple (zones, links). + """ + zoneKey = "zoneinfo64:table(nofallback)|Zones:array|:table" + linkKey = "zoneinfo64:table(nofallback)|Zones:array|:int" + namesKey = "zoneinfo64:table(nofallback)|Names" + + tzId = 0 + tzLinks = dict() + tzNames = [] + + for name, value in readICUResourceFile(os.path.join(icuTzDir, "zoneinfo64.txt")): + if name == zoneKey: + tzId += 1 + elif name == linkKey: + tzLinks[tzId] = int(value) + tzId += 1 + elif name == namesKey: + tzNames.extend(value) + + links = {Zone(tzNames[zone]): tzNames[target] for (zone, target) in tzLinks.items()} + zones = {Zone(v) for v in tzNames if Zone(v) not in links} + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUTimeZones(icuDir, icuTzDir, ignoreFactory): + # zoneinfo64.txt contains the supported time zones by ICU. This data is + # generated from tzdata files, it doesn't include "backzone" in stock ICU. + (zoneinfoZones, zoneinfoLinks) = readICUTimeZonesFromZoneInfo(icuTzDir) + + # timezoneTypes.txt contains the canonicalization information for ICU. This + # data is generated from CLDR files. It includes data about time zones from + # tzdata's "backzone" file. + (typesZones, typesLinks) = readICUTimeZonesFromTimezoneTypes(icuTzDir) + + # Remove the placeholder time zone "Factory". + # See also <https://github.com/eggert/tz/blob/master/factory>. + if ignoreFactory: + zoneinfoZones.remove(Zone("Factory")) + + # Remove the ICU placeholder time zone "Etc/Unknown". + # See also <https://unicode.org/reports/tr35/#Time_Zone_Identifiers>. + for zones in (zoneinfoZones, typesZones): + zones.remove(Zone("Etc/Unknown")) + + # Remove any outdated ICU links. + for links in (zoneinfoLinks, typesLinks): + for zone in otherICULegacyLinks().keys(): + if zone not in links: + raise KeyError(f"Can't remove non-existent link from '{zone}'") + del links[zone] + + # Information in zoneinfo64 should be a superset of timezoneTypes. + def inZoneInfo64(zone): + return zone in zoneinfoZones or zone in zoneinfoLinks + + notFoundInZoneInfo64 = [zone for zone in typesZones if not inZoneInfo64(zone)] + if notFoundInZoneInfo64: + raise RuntimeError( + "Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64 + ) + + notFoundInZoneInfo64 = [ + zone for zone in typesLinks.keys() if not inZoneInfo64(zone) + ] + if notFoundInZoneInfo64: + raise RuntimeError( + "Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64 + ) + + # zoneinfo64.txt only defines the supported time zones by ICU, the canonicalization + # rules are defined through timezoneTypes.txt. Merge both to get the actual zones + # and links used by ICU. + icuZones = set( + chain( + (zone for zone in zoneinfoZones if zone not in typesLinks), + (zone for zone in typesZones), + ) + ) + icuLinks = dict( + chain( + ( + (zone, target) + for (zone, target) in zoneinfoLinks.items() + if zone not in typesZones + ), + ((zone, target) for (zone, target) in typesLinks.items()), + ) + ) + + return (icuZones, icuLinks) + + +def readICULegacyZones(icuDir): + """Read the ICU legacy time zones from `icuTzDir`/tools/tzcode/icuzones + and returns the tuple (zones, links). + """ + tzdir = TzDataDir(os.path.join(icuDir, "tools/tzcode")) + + # Per spec we must recognize only IANA time zones and links, but ICU + # recognizes various legacy, non-IANA time zones and links. Compute these + # non-IANA time zones and links. + + # Most legacy, non-IANA time zones and links are in the icuzones file. + (zones, links) = readIANAFiles(tzdir, ["icuzones"]) + + # Remove the ICU placeholder time zone "Etc/Unknown". + # See also <https://unicode.org/reports/tr35/#Time_Zone_Identifiers>. + zones.remove(Zone("Etc/Unknown")) + + # A handful of non-IANA zones/links are not in icuzones and must be added + # manually so that we won't invoke ICU with them. + for (zone, target) in otherICULegacyLinks().items(): + if zone in links: + if links[zone] != target: + raise KeyError( + f"Can't overwrite link '{zone} -> {links[zone]}' with '{target}'" + ) + else: + print( + f"Info: Link '{zone} -> {target}' can be removed from otherICULegacyLinks()" + ) + links[zone] = target + + return (zones, links) + + +def otherICULegacyLinks(): + """The file `icuTzDir`/tools/tzcode/icuzones contains all ICU legacy time + zones with the exception of time zones which are removed by IANA after an + ICU release. + + For example ICU 67 uses tzdata2018i, but tzdata2020b removed the link from + "US/Pacific-New" to "America/Los_Angeles". ICU standalone tzdata updates + don't include modified icuzones files, so we must manually record any IANA + modifications here. + + After an ICU update, we can remove any no longer needed entries from this + function by checking if the relevant entries are now included in icuzones. + """ + + return { + # tzdata2020b removed the link US/Pacific-New -> America/Los_Angeles. + Zone("US/Pacific-New"): "America/Los_Angeles", + } + + +def icuTzDataVersion(icuTzDir): + """ Read the ICU time zone version from `icuTzDir`/zoneinfo64.txt. """ + + def searchInFile(pattern, f): + p = re.compile(pattern) + for line in flines(f, "utf-8-sig"): + m = p.search(line) + if m: + return m.group(1) + return None + + zoneinfo = os.path.join(icuTzDir, "zoneinfo64.txt") + if not os.path.isfile(zoneinfo): + raise RuntimeError("file not found: %s" % zoneinfo) + version = searchInFile("^//\s+tz version:\s+([0-9]{4}[a-z])$", zoneinfo) + if version is None: + raise RuntimeError( + "%s does not contain a valid tzdata version string" % zoneinfo + ) + return version + + +def findIncorrectICUZones(ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone): + """ Find incorrect ICU zone entries. """ + + def isIANATimeZone(zone): + return zone in ianaZones or zone in ianaLinks + + def isICUTimeZone(zone): + return zone in icuZones or zone in icuLinks + + def isICULink(zone): + return zone in icuLinks + + # All IANA zones should be present in ICU. + missingTimeZones = [zone for zone in ianaZones if not isICUTimeZone(zone)] + # Normally zones in backzone are also present as links in one of the other + # time zone files. The only exception to this rule is the Asia/Hanoi time + # zone, this zone is only present in the backzone file. + expectedMissing = [] if ignoreBackzone else [Zone("Asia/Hanoi")] + if missingTimeZones != expectedMissing: + raise RuntimeError( + "Not all zones are present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % missingTimeZones + ) + + # Zones which are only present in ICU? + additionalTimeZones = [zone for zone in icuZones if not isIANATimeZone(zone)] + if additionalTimeZones: + raise RuntimeError( + "Additional zones present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % additionalTimeZones + ) + + # Zones which are marked as links in ICU. + result = ((zone, icuLinks[zone]) for zone in ianaZones if isICULink(zone)) + + # Remove unnecessary UTC mappings. + utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"] + result = ((zone, target) for (zone, target) in result if zone.name not in utcnames) + + return sorted(result, key=itemgetter(0)) + + +def findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks): + """ Find incorrect ICU link entries. """ + + def isIANATimeZone(zone): + return zone in ianaZones or zone in ianaLinks + + def isICUTimeZone(zone): + return zone in icuZones or zone in icuLinks + + def isICULink(zone): + return zone in icuLinks + + def isICUZone(zone): + return zone in icuZones + + # All links should be present in ICU. + missingTimeZones = [zone for zone in ianaLinks.keys() if not isICUTimeZone(zone)] + if missingTimeZones: + raise RuntimeError( + "Not all zones are present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % missingTimeZones + ) + + # Links which are only present in ICU? + additionalTimeZones = [zone for zone in icuLinks.keys() if not isIANATimeZone(zone)] + if additionalTimeZones: + raise RuntimeError( + "Additional links present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % additionalTimeZones + ) + + result = chain( + # IANA links which have a different target in ICU. + ( + (zone, target, icuLinks[zone]) + for (zone, target) in ianaLinks.items() + if isICULink(zone) and target != icuLinks[zone] + ), + # IANA links which are zones in ICU. + ( + (zone, target, zone.name) + for (zone, target) in ianaLinks.items() + if isICUZone(zone) + ), + ) + + # Remove unnecessary UTC mappings. + utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"] + result = ( + (zone, target, icuTarget) + for (zone, target, icuTarget) in result + if target not in utcnames or icuTarget not in utcnames + ) + + return sorted(result, key=itemgetter(0)) + + +generatedFileWarning = "// Generated by make_intl_data.py. DO NOT EDIT." +tzdataVersionComment = "// tzdata version = {0}" + + +def processTimeZones( + tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignoreFactory, out +): + """ Read the time zone info and create a new time zone cpp file. """ + print("Processing tzdata mapping...") + (ianaZones, ianaLinks) = readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory) + (icuZones, icuLinks) = readICUTimeZones(icuDir, icuTzDir, ignoreFactory) + (legacyZones, legacyLinks) = readICULegacyZones(icuDir) + + # Remove all legacy ICU time zones. + icuZones = {zone for zone in icuZones if zone not in legacyZones} + icuLinks = { + zone: target for (zone, target) in icuLinks.items() if zone not in legacyLinks + } + + incorrectZones = findIncorrectICUZones( + ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone + ) + if not incorrectZones: + print("<<< No incorrect ICU time zones found, please update Intl.js! >>>") + print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>") + + incorrectLinks = findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks) + if not incorrectLinks: + print("<<< No incorrect ICU time zone links found, please update Intl.js! >>>") + print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>") + + print("Writing Intl tzdata file...") + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println("") + + println("#ifndef builtin_intl_TimeZoneDataGenerated_h") + println("#define builtin_intl_TimeZoneDataGenerated_h") + println("") + + println("namespace js {") + println("namespace timezone {") + println("") + + println("// Format:") + println('// "ZoneName" // ICU-Name [time zone file]') + println("const char* const ianaZonesTreatedAsLinksByICU[] = {") + for (zone, icuZone) in incorrectZones: + println(' "%s", // %s [%s]' % (zone, icuZone, zone.filename)) + println("};") + println("") + + println("// Format:") + println('// "LinkName", "Target" // ICU-Target [time zone file]') + println("struct LinkAndTarget") + println("{") + println(" const char* const link;") + println(" const char* const target;") + println("};") + println("") + println("const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {") + for (zone, target, icuTarget) in incorrectLinks: + println( + ' { "%s", "%s" }, // %s [%s]' + % (zone, target, icuTarget, zone.filename) + ) + println("};") + println("") + + println( + "// Legacy ICU time zones, these are not valid IANA time zone names. We also" + ) + println("// disallow the old and deprecated System V time zones.") + println( + "// https://ssl.icu-project.org/repos/icu/trunk/icu4c/source/tools/tzcode/icuzones" + ) # NOQA: E501 + println("const char* const legacyICUTimeZones[] = {") + for zone in chain(sorted(legacyLinks.keys()), sorted(legacyZones)): + println(' "%s",' % zone) + println("};") + println("") + + println("} // namespace timezone") + println("} // namespace js") + println("") + println("#endif /* builtin_intl_TimeZoneDataGenerated_h */") + + +def updateBackzoneLinks(tzdataDir, links): + def withZone(fn): + return lambda zone_target: fn(zone_target[0]) + + (backzoneZones, backzoneLinks) = readIANAFiles(tzdataDir, ["backzone"]) + (stableZones, updatedLinks, updatedZones) = partition( + links.items(), + # Link not changed in backzone. + withZone(lambda zone: zone not in backzoneLinks and zone not in backzoneZones), + # Link has a new target. + withZone(lambda zone: zone in backzoneLinks), + ) + # Keep stable zones and links with updated target. + return dict( + chain( + stableZones, + map(withZone(lambda zone: (zone, backzoneLinks[zone])), updatedLinks), + ) + ) + + +def generateTzDataLinkTestContent(testDir, version, fileName, description, links): + with io.open( + os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println( + """ +const tzMapper = [ + x => x, + x => x.toUpperCase(), + x => x.toLowerCase(), +]; +""" + ) + + println(description) + println("const links = {") + for (zone, target) in sorted(links, key=itemgetter(0)): + println(' "%s": "%s",' % (zone, target)) + println("};") + + println( + """ +for (let [linkName, target] of Object.entries(links)) { + if (target === "Etc/UTC" || target === "Etc/GMT") + target = "UTC"; + + for (let map of tzMapper) { + let dtf = new Intl.DateTimeFormat(undefined, {timeZone: map(linkName)}); + let resolvedTimeZone = dtf.resolvedOptions().timeZone; + assertEq(resolvedTimeZone, target, `${linkName} -> ${target}`); + } +} +""" + ) + println( + """ +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); +""" + ) + + +def generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, testDir): + (zones, links) = readIANAFiles(tzdataDir, ["backward"]) + assert len(zones) == 0 + + if not ignoreBackzone: + links = updateBackzoneLinks(tzdataDir, links) + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backward_links.js", + "// Link names derived from IANA Time Zone Database, backward file.", + links.items(), + ) + + +def generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, testDir): + tzfiles = filterfalse( + {"backward", "backzone"}.__contains__, listIANAFiles(tzdataDir) + ) + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + + if not ignoreBackzone: + links = updateBackzoneLinks(tzdataDir, links) + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_notbackward_links.js", + "// Link names derived from IANA Time Zone Database, excluding backward file.", + links.items(), + ) + + +def generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, testDir): + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + if not ignoreBackzone: + comment = """\ +// This file was generated with historical, pre-1970 backzone information +// respected. Therefore, every zone key listed below is its own Zone, not +// a Link to a modern-day target as IANA ignoring backzones would say. + +""" + else: + comment = """\ +// This file was generated while ignoring historical, pre-1970 backzone +// information. Therefore, every zone key listed below is part of a Link +// whose target is the corresponding value. + +""" + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backzone.js", + comment + "// Backzone zones derived from IANA Time Zone Database.", + ( + (zone, zone if not ignoreBackzone else links[zone]) + for zone in backzones + if zone in links + ), + ) + + +def generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, testDir): + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + if not ignoreBackzone: + comment = """\ +// This file was generated with historical, pre-1970 backzone information +// respected. Therefore, every zone key listed below points to a target +// in the backzone file and not to its modern-day target as IANA ignoring +// backzones would say. + +""" + else: + comment = """\ +// This file was generated while ignoring historical, pre-1970 backzone +// information. Therefore, every zone key listed below is part of a Link +// whose target is the corresponding value ignoring any backzone entries. + +""" + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backzone_links.js", + comment + "// Backzone links derived from IANA Time Zone Database.", + ( + (zone, target if not ignoreBackzone else links[zone]) + for (zone, target) in backlinks.items() + ), + ) + + +def generateTzDataTestVersion(tzdataDir, version, testDir): + fileName = "timeZone_version.js" + + with io.open( + os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println("""const tzdata = "{0}";""".format(version)) + + println( + """ +if (typeof getICUOptions === "undefined") { + var getICUOptions = SpecialPowers.Cu.getJSTestingFunctions().getICUOptions; +} + +var options = getICUOptions(); + +assertEq(options.tzdata, tzdata); + +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); +""" + ) + + +def generateTzDataTests(tzdataDir, version, ignoreBackzone, testDir): + generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, testDir) + generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, testDir) + generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, testDir) + generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, testDir) + generateTzDataTestVersion(tzdataDir, version, testDir) + + +def updateTzdata(topsrcdir, args): + """ Update the time zone cpp file. """ + + icuDir = os.path.join(topsrcdir, "intl/icu/source") + if not os.path.isdir(icuDir): + raise RuntimeError("not a directory: %s" % icuDir) + + icuTzDir = os.path.join(topsrcdir, "intl/tzdata/source") + if not os.path.isdir(icuTzDir): + raise RuntimeError("not a directory: %s" % icuTzDir) + + dateTimeFormatTestDir = os.path.join( + topsrcdir, "js/src/tests/non262/Intl/DateTimeFormat" + ) + if not os.path.isdir(dateTimeFormatTestDir): + raise RuntimeError("not a directory: %s" % dateTimeFormatTestDir) + + tzDir = args.tz + if tzDir is not None and not (os.path.isdir(tzDir) or os.path.isfile(tzDir)): + raise RuntimeError("not a directory or file: %s" % tzDir) + ignoreBackzone = args.ignore_backzone + # TODO: Accept or ignore the placeholder time zone "Factory"? + ignoreFactory = False + out = args.out + + version = icuTzDataVersion(icuTzDir) + url = ( + "https://www.iana.org/time-zones/repository/releases/tzdata%s.tar.gz" % version + ) + + print("Arguments:") + print("\ttzdata version: %s" % version) + print("\ttzdata URL: %s" % url) + print("\ttzdata directory|file: %s" % tzDir) + print("\tICU directory: %s" % icuDir) + print("\tICU timezone directory: %s" % icuTzDir) + print("\tIgnore backzone file: %s" % ignoreBackzone) + print("\tOutput file: %s" % out) + print("") + + def updateFrom(f): + if os.path.isfile(f) and tarfile.is_tarfile(f): + with tarfile.open(f, "r:*") as tar: + processTimeZones( + TzDataFile(tar), + icuDir, + icuTzDir, + version, + ignoreBackzone, + ignoreFactory, + out, + ) + generateTzDataTests( + TzDataFile(tar), version, ignoreBackzone, dateTimeFormatTestDir + ) + elif os.path.isdir(f): + processTimeZones( + TzDataDir(f), + icuDir, + icuTzDir, + version, + ignoreBackzone, + ignoreFactory, + out, + ) + generateTzDataTests( + TzDataDir(f), version, ignoreBackzone, dateTimeFormatTestDir + ) + else: + raise RuntimeError("unknown format") + + if tzDir is None: + print("Downloading tzdata file...") + with closing(urlopen(url)) as tzfile: + fname = urlsplit(tzfile.geturl()).path.split("/")[-1] + with tempfile.NamedTemporaryFile(suffix=fname) as tztmpfile: + print("File stored in %s" % tztmpfile.name) + tztmpfile.write(tzfile.read()) + tztmpfile.flush() + updateFrom(tztmpfile.name) + else: + updateFrom(tzDir) + + +def readCurrencyFile(tree): + reCurrency = re.compile(r"^[A-Z]{3}$") + reIntMinorUnits = re.compile(r"^\d+$") + + for country in tree.iterfind(".//CcyNtry"): + # Skip entry if no currency information is available. + currency = country.findtext("Ccy") + if currency is None: + continue + assert reCurrency.match(currency) + + minorUnits = country.findtext("CcyMnrUnts") + assert minorUnits is not None + + # Skip all entries without minorUnits or which use the default minorUnits. + if reIntMinorUnits.match(minorUnits) and int(minorUnits) != 2: + currencyName = country.findtext("CcyNm") + countryName = country.findtext("CtryNm") + yield (currency, int(minorUnits), currencyName, countryName) + + +def writeCurrencyFile(published, currencies, out): + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + println("// Version: {}".format(published)) + + println( + """ +/** + * Mapping from currency codes to the number of decimal digits used for them. + * Default is 2 digits. + * + * Spec: ISO 4217 Currency and Funds Code List. + * http://www.currency-iso.org/en/home/tables/table-a1.html + */""" + ) + println("var currencyDigits = {") + for (currency, entries) in groupby( + sorted(currencies, key=itemgetter(0)), itemgetter(0) + ): + for (_, minorUnits, currencyName, countryName) in entries: + println(" // {} ({})".format(currencyName, countryName)) + println(" {}: {},".format(currency, minorUnits)) + println("};") + + +def updateCurrency(topsrcdir, args): + """ Update the CurrencyDataGenerated.js file. """ + import xml.etree.ElementTree as ET + from random import randint + + url = args.url + out = args.out + filename = args.file + + print("Arguments:") + print("\tDownload url: %s" % url) + print("\tLocal currency file: %s" % filename) + print("\tOutput file: %s" % out) + print("") + + def updateFrom(currencyFile): + print("Processing currency code list file...") + tree = ET.parse(currencyFile) + published = tree.getroot().attrib["Pblshd"] + currencies = readCurrencyFile(tree) + + print("Writing CurrencyData file...") + writeCurrencyFile(published, currencies, out) + + if filename is not None: + print("Always make sure you have the newest currency code list file!") + updateFrom(filename) + else: + print("Downloading currency & funds code list...") + request = UrlRequest(url) + request.add_header( + "User-agent", + "Mozilla/5.0 (Mobile; rv:{0}.0) Gecko/{0}.0 Firefox/{0}.0".format( + randint(1, 999) + ), + ) + with closing(urlopen(request)) as currencyFile: + fname = urlsplit(currencyFile.geturl()).path.split("/")[-1] + with tempfile.NamedTemporaryFile(suffix=fname) as currencyTmpFile: + print("File stored in %s" % currencyTmpFile.name) + currencyTmpFile.write(currencyFile.read()) + currencyTmpFile.flush() + updateFrom(currencyTmpFile.name) + + +def writeUnicodeExtensionsMappings(println, mapping, extension): + println( + """ +template <size_t Length> +static inline bool Is{0}Key( + mozilla::Span<const char> key, const char (&str)[Length]) {{ + static_assert(Length == {0}KeyLength + 1, + "{0} extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +}} + +template <size_t Length> +static inline bool Is{0}Type( + mozilla::Span<const char> type, const char (&str)[Length]) {{ + static_assert(Length > {0}KeyLength + 1, + "{0} extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +}} +""".format( + extension + ).rstrip( + "\n" + ) + ) + + linear_search_max_length = 4 + + needs_binary_search = any( + len(replacements.items()) > linear_search_max_length + for replacements in mapping.values() + ) + + if needs_binary_search: + println( + """ +static int32_t Compare{0}Type(const char* a, mozilla::Span<const char> b) {{ + MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\\0'), + "unexpected null-character in string"); + + using UnsignedChar = unsigned char; + for (size_t i = 0; i < b.size(); i++) {{ + // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if + // we've reached the end of |a|, the below if-statement will always be true. + // That ensures we don't read past the end of |a|. + if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {{ + return r; + }} + }} + + // Return zero if both strings are equal or a negative number if |b| is a + // prefix of |a|. + return -int32_t(UnsignedChar(a[b.size()])); +}} + +template <size_t Length> +static inline const char* Search{0}Replacement( + const char* (&types)[Length], const char* (&aliases)[Length], + mozilla::Span<const char> type) {{ + + auto p = std::lower_bound(std::begin(types), std::end(types), type, + [](const auto& a, const auto& b) {{ + return Compare{0}Type(a, b) < 0; + }}); + if (p != std::end(types) && Compare{0}Type(*p, type) == 0) {{ + return aliases[std::distance(std::begin(types), p)]; + }} + return nullptr; +}} +""".format( + extension + ).rstrip( + "\n" + ) + ) + + println( + """ +/** + * Mapping from deprecated BCP 47 {0} extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* js::intl::LanguageTag::replace{0}ExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) {{ + MOZ_ASSERT(key.size() == {0}KeyLength); + MOZ_ASSERT(IsCanonicallyCased{0}Key(key)); + + MOZ_ASSERT(type.size() > {0}KeyLength); + MOZ_ASSERT(IsCanonicallyCased{0}Type(type)); +""".format( + extension + ) + ) + + def to_hash_key(replacements): + return str(sorted(replacements.items())) + + def write_array(subtags, name, length): + max_entries = (80 - len(" ")) // (length + len('"", ')) + + println(" static const char* {}[{}] = {{".format(name, len(subtags))) + + for entries in grouper(subtags, max_entries): + entries = ( + '"{}"'.format(tag).rjust(length + 2) + for tag in entries + if tag is not None + ) + println(" {},".format(", ".join(entries))) + + println(" };") + + # Merge duplicate keys. + key_aliases = {} + for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)): + hash_key = to_hash_key(replacements) + if hash_key not in key_aliases: + key_aliases[hash_key] = [] + else: + key_aliases[hash_key].append(key) + + first_key = True + for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)): + hash_key = to_hash_key(replacements) + if key in key_aliases[hash_key]: + continue + + cond = ( + 'Is{}Key(key, "{}")'.format(extension, k) + for k in [key] + key_aliases[hash_key] + ) + + if_kind = "if" if first_key else "else if" + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + first_key = False + + replacements = sorted(replacements.items(), key=itemgetter(0)) + + if len(replacements) > linear_search_max_length: + types = [t for (t, _) in replacements] + preferred = [r for (_, r) in replacements] + max_len = max(len(k) for k in types + preferred) + + write_array(types, "types", max_len) + write_array(preferred, "aliases", max_len) + println( + """ + return Search{}Replacement(types, aliases, type); +""".format( + extension + ).strip( + "\n" + ) + ) + else: + for (type, replacement) in replacements: + println( + """ + if (Is{}Type(type, "{}")) {{ + return "{}"; + }}""".format( + extension, type, replacement + ).strip( + "\n" + ) + ) + + println( + """ + }""".lstrip( + "\n" + ) + ) + + println( + """ + return nullptr; +} +""".strip( + "\n" + ) + ) + + +def readICUUnitResourceFile(filepath): + """Return a set of unit descriptor pairs where the first entry denotes the unit type and the + second entry the unit name. + + Example: + + root{ + units{ + compound{ + } + coordinate{ + } + length{ + meter{ + } + } + } + unitsNarrow:alias{"/LOCALE/unitsShort"} + unitsShort{ + duration{ + day{ + } + day-person:alias{"/LOCALE/unitsShort/duration/day"} + } + length{ + meter{ + } + } + } + } + + Returns {("length", "meter"), ("duration", "day"), ("duration", "day-person")} + """ + + start_table_re = re.compile(r"^([\w\-%:\"]+)\{$") + end_table_re = re.compile(r"^\}$") + table_entry_re = re.compile(r"^([\w\-%:\"]+)\{\"(.*?)\"\}$") + + # The current resource table. + table = {} + + # List of parent tables when parsing. + parents = [] + + # Track multi-line comments state. + in_multiline_comment = False + + for line in flines(filepath, "utf-8-sig"): + # Remove leading and trailing whitespace. + line = line.strip() + + # Skip over comments. + if in_multiline_comment: + if line.endswith("*/"): + in_multiline_comment = False + continue + + if line.startswith("//"): + continue + + if line.startswith("/*"): + in_multiline_comment = True + continue + + # Try to match the start of a table, e.g. `length{` or `meter{`. + match = start_table_re.match(line) + if match: + parents.append(table) + table_name = match.group(1) + new_table = {} + table[table_name] = new_table + table = new_table + continue + + # Try to match the end of a table. + match = end_table_re.match(line) + if match: + table = parents.pop() + continue + + # Try to match a table entry, e.g. `dnam{"meter"}`. + match = table_entry_re.match(line) + if match: + entry_key = match.group(1) + entry_value = match.group(2) + table[entry_key] = entry_value + continue + + raise Exception("unexpected line: '{}' in {}".format(line, filepath)) + + assert len(parents) == 0, "Not all tables closed" + assert len(table) == 1, "More than one root table" + + # Remove the top-level language identifier table. + (_, unit_table) = table.popitem() + + # Add all units for the three display formats "units", "unitsNarrow", and "unitsShort". + # But exclude the pseudo-units "compound" and "ccoordinate". + return { + (unit_type, unit_name if not unit_name.endswith(":alias") else unit_name[:-6]) + for unit_display in ("units", "unitsNarrow", "unitsShort") + if unit_display in unit_table + for (unit_type, unit_names) in unit_table[unit_display].items() + if unit_type != "compound" and unit_type != "coordinate" + for unit_name in unit_names.keys() + } + + +def computeSupportedUnits(all_units, sanctioned_units): + """Given the set of all possible ICU unit identifiers and the set of sanctioned unit + identifiers, compute the set of effectively supported ICU unit identifiers. + """ + + def find_match(unit): + unit_match = [ + (unit_type, unit_name) + for (unit_type, unit_name) in all_units + if unit_name == unit + ] + if unit_match: + assert len(unit_match) == 1 + return unit_match[0] + return None + + def compound_unit_identifiers(): + for numerator in sanctioned_units: + for denominator in sanctioned_units: + yield "{}-per-{}".format(numerator, denominator) + + supported_simple_units = {find_match(unit) for unit in sanctioned_units} + assert None not in supported_simple_units + + supported_compound_units = { + unit_match + for unit_match in (find_match(unit) for unit in compound_unit_identifiers()) + if unit_match + } + + return supported_simple_units | supported_compound_units + + +def readICUDataFilterForUnits(data_filter_file): + with io.open(data_filter_file, mode="r", encoding="utf-8") as f: + data_filter = json.load(f) + + # Find the rule set for the "unit_tree". + unit_tree_rules = [ + entry["rules"] + for entry in data_filter["resourceFilters"] + if entry["categories"] == ["unit_tree"] + ] + assert len(unit_tree_rules) == 1 + + # Compute the list of included units from that rule set. The regular expression must match + # "+/*/length/meter" and mustn't match either "-/*" or "+/*/compound". + included_unit_re = re.compile(r"^\+/\*/(.+?)/(.+)$") + filtered_units = (included_unit_re.match(unit) for unit in unit_tree_rules[0]) + + return {(unit.group(1), unit.group(2)) for unit in filtered_units if unit} + + +def writeSanctionedSimpleUnitIdentifiersFiles(all_units, sanctioned_units): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + + def find_unit_type(unit): + result = [ + unit_type for (unit_type, unit_name) in all_units if unit_name == unit + ] + assert result and len(result) == 1 + return result[0] + + sanctioned_js_file = os.path.join( + js_src_builtin_intl_dir, "SanctionedSimpleUnitIdentifiersGenerated.js" + ) + with io.open(sanctioned_js_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + sanctioned_units_object = json.dumps( + {unit: True for unit in sorted(sanctioned_units)}, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) + + println(generatedFileWarning) + + println( + """ +/** + * The list of currently supported simple unit identifiers. + * + * Intl.NumberFormat Unified API Proposal + */""" + ) + + println( + "var sanctionedSimpleUnitIdentifiers = {};".format(sanctioned_units_object) + ) + + sanctioned_cpp_file = os.path.join( + js_src_builtin_intl_dir, "MeasureUnitGenerated.h" + ) + with io.open(sanctioned_cpp_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +struct MeasureUnit { + const char* const type; + const char* const name; +}; + +/** + * The list of currently supported simple unit identifiers. + * + * The list must be kept in alphabetical order of |name|. + */ +inline constexpr MeasureUnit simpleMeasureUnits[] = { + // clang-format off""" + ) + + for unit_name in sorted(sanctioned_units): + println(' {{"{}", "{}"}},'.format(find_unit_type(unit_name), unit_name)) + + println( + """ + // clang-format on +};""".lstrip( + "\n" + ) + ) + + writeUnitTestFiles(all_units, sanctioned_units) + + +def writeUnitTestFiles(all_units, sanctioned_units): + """ Generate test files for unit number formatters. """ + + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_dir = os.path.join( + js_src_builtin_intl_dir, "../../tests/non262/Intl/NumberFormat" + ) + + def write_test(file_name, test_content, indent=4): + file_path = os.path.join(test_dir, file_name) + with io.open(file_path, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println("") + + sanctioned_units_array = json.dumps( + [unit for unit in sorted(sanctioned_units)], + indent=indent, + separators=(",", ": "), + ) + + println( + "const sanctionedSimpleUnitIdentifiers = {};".format( + sanctioned_units_array + ) + ) + + println(test_content) + + println( + """ +if (typeof reportCompare === "function") +{}reportCompare(true, true);""".format( + " " * indent + ) + ) + + write_test( + "unit-compound-combinations.js", + """ +// Test all simple unit identifier combinations are allowed. + +for (const numerator of sanctionedSimpleUnitIdentifiers) { + for (const denominator of sanctionedSimpleUnitIdentifiers) { + const unit = `${numerator}-per-${denominator}`; + const nf = new Intl.NumberFormat("en", {style: "unit", unit}); + + assertEq(nf.format(1), nf.formatToParts(1).map(p => p.value).join("")); + } +}""", + ) + + all_units_array = json.dumps( + ["-".join(unit) for unit in sorted(all_units)], indent=4, separators=(",", ": ") + ) + + write_test( + "unit-well-formed.js", + """ +const allUnits = {}; +""".format( + all_units_array + ) + + """ +// Test only sanctioned unit identifiers are allowed. + +for (const typeAndUnit of allUnits) { + const [_, type, unit] = typeAndUnit.match(/(\w+)-(.+)/); + + let allowed; + if (unit.includes("-per-")) { + const [numerator, denominator] = unit.split("-per-"); + allowed = sanctionedSimpleUnitIdentifiers.includes(numerator) && + sanctionedSimpleUnitIdentifiers.includes(denominator); + } else { + allowed = sanctionedSimpleUnitIdentifiers.includes(unit); + } + + if (allowed) { + const nf = new Intl.NumberFormat("en", {style: "unit", unit}); + assertEq(nf.format(1), nf.formatToParts(1).map(p => p.value).join("")); + } else { + assertThrowsInstanceOf(() => new Intl.NumberFormat("en", {style: "unit", unit}), + RangeError, `Missing error for "${typeAndUnit}"`); + } +}""", + ) + + write_test( + "unit-formatToParts-has-unit-field.js", + """ +// Test only English and Chinese to keep the overall runtime reasonable. +// +// Chinese is included because it contains more than one "unit" element for +// certain unit combinations. +const locales = ["en", "zh"]; + +// Plural rules for English only differentiate between "one" and "other". Plural +// rules for Chinese only use "other". That means we only need to test two values +// per unit. +const values = [0, 1]; + +// Ensure unit formatters contain at least one "unit" element. + +for (const locale of locales) { + for (const unit of sanctionedSimpleUnitIdentifiers) { + const nf = new Intl.NumberFormat(locale, {style: "unit", unit}); + + for (const value of values) { + assertEq(nf.formatToParts(value).some(e => e.type === "unit"), true, + `locale=${locale}, unit=${unit}`); + } + } + + for (const numerator of sanctionedSimpleUnitIdentifiers) { + for (const denominator of sanctionedSimpleUnitIdentifiers) { + const unit = `${numerator}-per-${denominator}`; + const nf = new Intl.NumberFormat(locale, {style: "unit", unit}); + + for (const value of values) { + assertEq(nf.formatToParts(value).some(e => e.type === "unit"), true, + `locale=${locale}, unit=${unit}`); + } + } + } +}""", + indent=2, + ) + + +def updateUnits(topsrcdir, args): + icu_path = os.path.join(topsrcdir, "intl", "icu") + icu_unit_path = os.path.join(icu_path, "source", "data", "unit") + + with io.open( + "SanctionedSimpleUnitIdentifiers.yaml", mode="r", encoding="utf-8" + ) as f: + sanctioned_units = yaml.safe_load(f) + + # Read all possible ICU unit identifiers from the "unit/root.txt" resource. + unit_root_file = os.path.join(icu_unit_path, "root.txt") + all_units = readICUUnitResourceFile(unit_root_file) + + # Compute the set of effectively supported ICU unit identifiers. + supported_units = computeSupportedUnits(all_units, sanctioned_units) + + # Read the list of units we're including into the ICU data file. + data_filter_file = os.path.join(icu_path, "data_filter.json") + filtered_units = readICUDataFilterForUnits(data_filter_file) + + # Both sets must match to avoid resource loading errors at runtime. + if supported_units != filtered_units: + + def units_to_string(units): + return ", ".join("/".join(u) for u in units) + + missing = supported_units - filtered_units + if missing: + raise RuntimeError("Missing units: {}".format(units_to_string(missing))) + + # Not exactly an error, but we currently don't have a use case where we need to support + # more units than required by ECMA-402. + extra = filtered_units - supported_units + if extra: + raise RuntimeError("Unnecessary units: {}".format(units_to_string(extra))) + + writeSanctionedSimpleUnitIdentifiersFiles(all_units, sanctioned_units) + + +def readICUNumberingSystemsResourceFile(filepath): + """Returns a dictionary of numbering systems where the key denotes the numbering system name + and the value a dictionary with additional numbering system data. + + Example: + + numberingSystems:table(nofallback){ + numberingSystems{ + latn{ + algorithmic:int{0} + desc{"0123456789"} + radix:int{10} + } + roman{ + algorithmic:int{1} + desc{"%roman-upper"} + radix:int{10} + } + } + } + + Returns {"latn": {"digits": "0123456789", "algorithmic": False}, + "roman": {"algorithmic": True}} + """ + + start_table_re = re.compile(r"^(\w+)(?:\:[\w\(\)]+)?\{$") + end_table_re = re.compile(r"^\}$") + table_entry_re = re.compile(r"^(\w+)(?:\:[\w\(\)]+)?\{(?:(?:\"(.*?)\")|(\d+))\}$") + + # The current resource table. + table = {} + + # List of parent tables when parsing. + parents = [] + + # Track multi-line comments state. + in_multiline_comment = False + + for line in flines(filepath, "utf-8-sig"): + # Remove leading and trailing whitespace. + line = line.strip() + + # Skip over comments. + if in_multiline_comment: + if line.endswith("*/"): + in_multiline_comment = False + continue + + if line.startswith("//"): + continue + + if line.startswith("/*"): + in_multiline_comment = True + continue + + # Try to match the start of a table, e.g. `latn{`. + match = start_table_re.match(line) + if match: + parents.append(table) + table_name = match.group(1) + new_table = {} + table[table_name] = new_table + table = new_table + continue + + # Try to match the end of a table. + match = end_table_re.match(line) + if match: + table = parents.pop() + continue + + # Try to match a table entry, e.g. `desc{"0123456789"}`. + match = table_entry_re.match(line) + if match: + entry_key = match.group(1) + entry_value = ( + match.group(2) if match.group(2) is not None else int(match.group(3)) + ) + table[entry_key] = entry_value + continue + + raise Exception("unexpected line: '{}' in {}".format(line, filepath)) + + assert len(parents) == 0, "Not all tables closed" + assert len(table) == 1, "More than one root table" + + # Remove the two top-level "numberingSystems" tables. + (_, numbering_systems) = table.popitem() + (_, numbering_systems) = numbering_systems.popitem() + + # Assert all numbering systems use base 10. + assert all(ns["radix"] == 10 for ns in numbering_systems.values()) + + # Return the numbering systems. + return { + key: {"digits": value["desc"], "algorithmic": False} + if not bool(value["algorithmic"]) + else {"algorithmic": True} + for (key, value) in numbering_systems.items() + } + + +def writeNumberingSystemFiles(numbering_systems): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + + numbering_systems_js_file = os.path.join( + js_src_builtin_intl_dir, "NumberingSystemsGenerated.h" + ) + with io.open( + numbering_systems_js_file, mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +/** + * The list of numbering systems with simple digit mappings. + */ + +#ifndef builtin_intl_NumberingSystemsGenerated_h +#define builtin_intl_NumberingSystemsGenerated_h +""" + ) + + simple_numbering_systems = sorted( + name + for (name, value) in numbering_systems.items() + if not value["algorithmic"] + ) + + println("// clang-format off") + println("#define NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS \\") + println( + "{}".format( + ", \\\n".join( + ' "{}"'.format(name) for name in simple_numbering_systems + ) + ) + ) + println("// clang-format on") + println("") + + println("#endif // builtin_intl_NumberingSystemsGenerated_h") + + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_dir = os.path.join(js_src_builtin_intl_dir, "../../tests/non262/Intl") + + intl_shell_js_file = os.path.join(test_dir, "shell.js") + + with io.open(intl_shell_js_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +// source: CLDR file common/bcp47/number.xml; version CLDR {}. +// https://github.com/unicode-org/cldr/blob/master/common/bcp47/number.xml +// https://github.com/unicode-org/cldr/blob/master/common/supplemental/numberingSystems.xml +""".format( + readCLDRVersionFromICU() + ).rstrip() + ) + + numbering_systems_object = json.dumps( + numbering_systems, + indent=2, + separators=(",", ": "), + sort_keys=True, + ensure_ascii=False, + ) + println("const numberingSystems = {};".format(numbering_systems_object)) + + +def updateNumberingSystems(topsrcdir, args): + icu_path = os.path.join(topsrcdir, "intl", "icu") + icu_misc_path = os.path.join(icu_path, "source", "data", "misc") + + with io.open("NumberingSystems.yaml", mode="r", encoding="utf-8") as f: + numbering_systems = yaml.safe_load(f) + + # Read all possible ICU unit identifiers from the "misc/numberingSystems.txt" resource. + misc_ns_file = os.path.join(icu_misc_path, "numberingSystems.txt") + all_numbering_systems = readICUNumberingSystemsResourceFile(misc_ns_file) + + all_numbering_systems_simple_digits = { + name + for (name, value) in all_numbering_systems.items() + if not value["algorithmic"] + } + + # Assert ICU includes support for all required numbering systems. If this assertion fails, + # something is broken in ICU. + assert all_numbering_systems_simple_digits.issuperset( + numbering_systems + ), "{}".format(numbering_systems.difference(all_numbering_systems_simple_digits)) + + # Assert the spec requires support for all numbering systems with simple digit mappings. If + # this assertion fails, file a PR at <https://github.com/tc39/ecma402> to include any new + # numbering systems. + assert all_numbering_systems_simple_digits.issubset(numbering_systems), "{}".format( + all_numbering_systems_simple_digits.difference(numbering_systems) + ) + + writeNumberingSystemFiles(all_numbering_systems) + + +if __name__ == "__main__": + import argparse + + # This script must reside in js/src/builtin/intl to work correctly. + (thisDir, thisFile) = os.path.split(os.path.abspath(sys.argv[0])) + dirPaths = os.path.normpath(thisDir).split(os.sep) + if "/".join(dirPaths[-4:]) != "js/src/builtin/intl": + raise RuntimeError("%s must reside in js/src/builtin/intl" % sys.argv[0]) + topsrcdir = "/".join(dirPaths[:-4]) + + def EnsureHttps(v): + if not v.startswith("https:"): + raise argparse.ArgumentTypeError("URL protocol must be https: " % v) + return v + + parser = argparse.ArgumentParser(description="Update intl data.") + subparsers = parser.add_subparsers(help="Select update mode") + + parser_cldr_tags = subparsers.add_parser( + "langtags", help="Update CLDR language tags data" + ) + parser_cldr_tags.add_argument( + "--version", metavar="VERSION", help="CLDR version number" + ) + parser_cldr_tags.add_argument( + "--url", + metavar="URL", + default="https://unicode.org/Public/cldr/<VERSION>/core.zip", + type=EnsureHttps, + help="Download url CLDR data (default: %(default)s)", + ) + parser_cldr_tags.add_argument( + "--out", + default="LanguageTagGenerated.cpp", + help="Output file (default: %(default)s)", + ) + parser_cldr_tags.add_argument( + "file", nargs="?", help="Local cldr-core.zip file, if omitted uses <URL>" + ) + parser_cldr_tags.set_defaults(func=updateCLDRLangTags) + + parser_tz = subparsers.add_parser("tzdata", help="Update tzdata") + parser_tz.add_argument( + "--tz", + help="Local tzdata directory or file, if omitted downloads tzdata " + "distribution from https://www.iana.org/time-zones/", + ) + # ICU doesn't include the backzone file by default, but we still like to + # use the backzone time zone names to avoid user confusion. This does lead + # to formatting "historic" dates (pre-1970 era) with the wrong time zone, + # but that's probably acceptable for now. + parser_tz.add_argument( + "--ignore-backzone", + action="store_true", + help="Ignore tzdata's 'backzone' file. Can be enabled to generate more " + "accurate time zone canonicalization reflecting the actual time " + "zones as used by ICU.", + ) + parser_tz.add_argument( + "--out", + default="TimeZoneDataGenerated.h", + help="Output file (default: %(default)s)", + ) + parser_tz.set_defaults(func=partial(updateTzdata, topsrcdir)) + + parser_currency = subparsers.add_parser( + "currency", help="Update currency digits mapping" + ) + parser_currency.add_argument( + "--url", + metavar="URL", + default="https://www.currency-iso.org/dam/downloads/lists/list_one.xml", # NOQA: E501 + type=EnsureHttps, + help="Download url for the currency & funds code list (default: " + "%(default)s)", + ) + parser_currency.add_argument( + "--out", + default="CurrencyDataGenerated.js", + help="Output file (default: %(default)s)", + ) + parser_currency.add_argument( + "file", nargs="?", help="Local currency code list file, if omitted uses <URL>" + ) + parser_currency.set_defaults(func=partial(updateCurrency, topsrcdir)) + + parser_units = subparsers.add_parser( + "units", help="Update sanctioned unit identifiers mapping" + ) + parser_units.set_defaults(func=partial(updateUnits, topsrcdir)) + + parser_numbering_systems = subparsers.add_parser( + "numbering", help="Update numbering systems with simple " "digit mappings" + ) + parser_numbering_systems.set_defaults( + func=partial(updateNumberingSystems, topsrcdir) + ) + + args = parser.parse_args() + args.func(args) |