diff options
Diffstat (limited to 'js/src/builtin/intl')
46 files changed, 22827 insertions, 0 deletions
diff --git a/js/src/builtin/intl/Collator.cpp b/js/src/builtin/intl/Collator.cpp new file mode 100644 index 0000000000..924cfdbdaa --- /dev/null +++ b/js/src/builtin/intl/Collator.cpp @@ -0,0 +1,488 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.Collator implementation. */ + +#include "builtin/intl/Collator.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/Collator.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/Span.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/SharedIntlData.h" +#include "gc/GCContext.h" +#include "js/PropertySpec.h" +#include "js/StableStringChars.h" +#include "js/TypeDecls.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Runtime.h" +#include "vm/StringType.h" + +#include "vm/GeckoProfiler-inl.h" +#include "vm/JSObject-inl.h" + +using namespace js; + +using JS::AutoStableStringChars; + +using js::intl::ReportInternalError; +using js::intl::SharedIntlData; + +const JSClassOps CollatorObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + CollatorObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass CollatorObject::class_ = { + "Intl.Collator", + JSCLASS_HAS_RESERVED_SLOTS(CollatorObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_Collator) | + JSCLASS_FOREGROUND_FINALIZE, + &CollatorObject::classOps_, &CollatorObject::classSpec_}; + +const JSClass& CollatorObject::protoClass_ = PlainObject::class_; + +static bool collator_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Collator); + return true; +} + +static const JSFunctionSpec collator_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", "Intl_Collator_supportedLocalesOf", + 1, 0), + JS_FS_END}; + +static const JSFunctionSpec collator_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_Collator_resolvedOptions", 0, 0), + JS_FN("toSource", collator_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec collator_properties[] = { + JS_SELF_HOSTED_GET("compare", "$Intl_Collator_compare_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.Collator", JSPROP_READONLY), JS_PS_END}; + +static bool Collator(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec CollatorObject::classSpec_ = { + GenericCreateConstructor<Collator, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<CollatorObject>, + collator_static_methods, + nullptr, + collator_methods, + collator_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 10.1.2 Intl.Collator([ locales [, options]]) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +static bool Collator(JSContext* cx, const CallArgs& args) { + AutoJSConstructorProfilerEntry pseudoFrame(cx, "Intl.Collator"); + + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Steps 2-5 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Collator, &proto)) { + return false; + } + + Rooted<CollatorObject*> collator( + cx, NewObjectWithClassProto<CollatorObject>(cx, proto)); + if (!collator) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 6. + if (!intl::InitializeObject(cx, collator, cx->names().InitializeCollator, + locales, options)) { + return false; + } + + args.rval().setObject(*collator); + return true; +} + +static bool Collator(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return Collator(cx, args); +} + +bool js::intl_Collator(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(!args.isConstructing()); + + return Collator(cx, args); +} + +void js::CollatorObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + if (mozilla::intl::Collator* coll = obj->as<CollatorObject>().getCollator()) { + intl::RemoveICUCellMemory(gcx, obj, CollatorObject::EstimatedMemoryUse); + delete coll; + } +} + +bool js::intl_availableCollations(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + auto keywords = + mozilla::intl::Collator::GetBcp47KeywordValuesForLocale(locale.get()); + if (keywords.isErr()) { + ReportInternalError(cx, keywords.unwrapErr()); + return false; + } + + RootedObject collations(cx, NewDenseEmptyArray(cx)); + if (!collations) { + return false; + } + + // The first element of the collations array must be |null| per + // ES2017 Intl, 10.2.3 Internal Slots. + if (!NewbornArrayPush(cx, collations, NullValue())) { + return false; + } + + for (auto result : keywords.unwrap()) { + if (result.isErr()) { + ReportInternalError(cx); + return false; + } + mozilla::Span<const char> collation = result.unwrap(); + + // Per ECMA-402, 10.2.3, we don't include standard and search: + // "The values 'standard' and 'search' must not be used as elements in + // any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co + // array." + static constexpr auto standard = mozilla::MakeStringSpan("standard"); + static constexpr auto search = mozilla::MakeStringSpan("search"); + if (collation == standard || collation == search) { + continue; + } + + JSString* jscollation = NewStringCopy<CanGC>(cx, collation); + if (!jscollation) { + return false; + } + if (!NewbornArrayPush(cx, collations, StringValue(jscollation))) { + return false; + } + } + + args.rval().setObject(*collations); + return true; +} + +/** + * Returns a new mozilla::intl::Collator with the locale and collation options + * of the given Collator. + */ +static mozilla::intl::Collator* NewIntlCollator( + JSContext* cx, Handle<CollatorObject*> collator) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, collator)); + if (!internals) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + mozilla::intl::Locale tag; + { + Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx)); + if (!locale) { + return nullptr; + } + + if (!intl::ParseLocale(cx, locale, tag)) { + return nullptr; + } + } + + using mozilla::intl::Collator; + + Collator::Options options{}; + + if (!GetProperty(cx, internals, internals, cx->names().usage, &value)) { + return nullptr; + } + + enum class Usage { Search, Sort }; + + Usage usage; + { + JSLinearString* str = value.toString()->ensureLinear(cx); + if (!str) { + return nullptr; + } + + if (StringEqualsLiteral(str, "search")) { + usage = Usage::Search; + } else { + MOZ_ASSERT(StringEqualsLiteral(str, "sort")); + usage = Usage::Sort; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + // ICU expects collation as Unicode locale extensions on locale. + if (usage == Usage::Search) { + if (!keywords.emplaceBack("co", cx->names().search)) { + return nullptr; + } + + // Search collations can't select a different collation, so the collation + // property is guaranteed to be "default". +#ifdef DEBUG + if (!GetProperty(cx, internals, internals, cx->names().collation, &value)) { + return nullptr; + } + + JSLinearString* collation = value.toString()->ensureLinear(cx); + if (!collation) { + return nullptr; + } + + MOZ_ASSERT(StringEqualsLiteral(collation, "default")); +#endif + } else { + if (!GetProperty(cx, internals, internals, cx->names().collation, &value)) { + return nullptr; + } + + JSLinearString* collation = value.toString()->ensureLinear(cx); + if (!collation) { + return nullptr; + } + + // Set collation as a Unicode locale extension when it was specified. + if (!StringEqualsLiteral(collation, "default")) { + if (!keywords.emplaceBack("co", collation)) { + return nullptr; + } + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of the + // Unicode extension subtag. We're then relying on ICU to follow RFC 6067, + // which states that any trailing keywords using the same key should be + // ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + intl::FormatBuffer<char> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + UniqueChars locale = buffer.extractStringZ(); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().sensitivity, &value)) { + return nullptr; + } + + { + JSLinearString* sensitivity = value.toString()->ensureLinear(cx); + if (!sensitivity) { + return nullptr; + } + if (StringEqualsLiteral(sensitivity, "base")) { + options.sensitivity = Collator::Sensitivity::Base; + } else if (StringEqualsLiteral(sensitivity, "accent")) { + options.sensitivity = Collator::Sensitivity::Accent; + } else if (StringEqualsLiteral(sensitivity, "case")) { + options.sensitivity = Collator::Sensitivity::Case; + } else { + MOZ_ASSERT(StringEqualsLiteral(sensitivity, "variant")); + options.sensitivity = Collator::Sensitivity::Variant; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().ignorePunctuation, + &value)) { + return nullptr; + } + options.ignorePunctuation = value.toBoolean(); + + if (!GetProperty(cx, internals, internals, cx->names().numeric, &value)) { + return nullptr; + } + if (!value.isUndefined()) { + options.numeric = value.toBoolean(); + } + + if (!GetProperty(cx, internals, internals, cx->names().caseFirst, &value)) { + return nullptr; + } + if (!value.isUndefined()) { + JSLinearString* caseFirst = value.toString()->ensureLinear(cx); + if (!caseFirst) { + return nullptr; + } + if (StringEqualsLiteral(caseFirst, "upper")) { + options.caseFirst = Collator::CaseFirst::Upper; + } else if (StringEqualsLiteral(caseFirst, "lower")) { + options.caseFirst = Collator::CaseFirst::Lower; + } else { + MOZ_ASSERT(StringEqualsLiteral(caseFirst, "false")); + options.caseFirst = Collator::CaseFirst::False; + } + } + + auto collResult = Collator::TryCreate(locale.get()); + if (collResult.isErr()) { + ReportInternalError(cx, collResult.unwrapErr()); + return nullptr; + } + auto coll = collResult.unwrap(); + + auto optResult = coll->SetOptions(options); + if (optResult.isErr()) { + ReportInternalError(cx, optResult.unwrapErr()); + return nullptr; + } + + return coll.release(); +} + +static mozilla::intl::Collator* GetOrCreateCollator( + JSContext* cx, Handle<CollatorObject*> collator) { + // Obtain a cached mozilla::intl::Collator object. + mozilla::intl::Collator* coll = collator->getCollator(); + if (coll) { + return coll; + } + + coll = NewIntlCollator(cx, collator); + if (!coll) { + return nullptr; + } + collator->setCollator(coll); + + intl::AddICUCellMemory(collator, CollatorObject::EstimatedMemoryUse); + return coll; +} + +static bool intl_CompareStrings(JSContext* cx, mozilla::intl::Collator* coll, + HandleString str1, HandleString str2, + MutableHandleValue result) { + MOZ_ASSERT(str1); + MOZ_ASSERT(str2); + + if (str1 == str2) { + result.setInt32(0); + return true; + } + + AutoStableStringChars stableChars1(cx); + if (!stableChars1.initTwoByte(cx, str1)) { + return false; + } + + AutoStableStringChars stableChars2(cx); + if (!stableChars2.initTwoByte(cx, str2)) { + return false; + } + + mozilla::Range<const char16_t> chars1 = stableChars1.twoByteRange(); + mozilla::Range<const char16_t> chars2 = stableChars2.twoByteRange(); + + result.setInt32(coll->CompareStrings(chars1, chars2)); + return true; +} + +bool js::intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isString()); + MOZ_ASSERT(args[2].isString()); + + Rooted<CollatorObject*> collator(cx, + &args[0].toObject().as<CollatorObject>()); + + mozilla::intl::Collator* coll = GetOrCreateCollator(cx, collator); + if (!coll) { + return false; + } + + // Use the UCollator to actually compare the strings. + RootedString str1(cx, args[1].toString()); + RootedString str2(cx, args[2].toString()); + return intl_CompareStrings(cx, coll, str1, str2, args.rval()); +} + +bool js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + RootedString locale(cx, args[0].toString()); + bool isUpperFirst; + if (!sharedIntlData.isUpperCaseFirst(cx, locale, &isUpperFirst)) { + return false; + } + + args.rval().setBoolean(isUpperFirst); + return true; +} + +bool js::intl_isIgnorePunctuation(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + RootedString locale(cx, args[0].toString()); + bool isIgnorePunctuation; + if (!sharedIntlData.isIgnorePunctuation(cx, locale, &isIgnorePunctuation)) { + return false; + } + + args.rval().setBoolean(isIgnorePunctuation); + return true; +} diff --git a/js/src/builtin/intl/Collator.h b/js/src/builtin/intl/Collator.h new file mode 100644 index 0000000000..3cdf461911 --- /dev/null +++ b/js/src/builtin/intl/Collator.h @@ -0,0 +1,110 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_Collator_h +#define builtin_intl_Collator_h + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class Collator; +} + +namespace js { + +/******************** Collator ********************/ + +class CollatorObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t INTL_COLLATOR_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UCollator (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 1128; + + mozilla::intl::Collator* getCollator() const { + const auto& slot = getFixedSlot(INTL_COLLATOR_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::Collator*>(slot.toPrivate()); + } + + void setCollator(mozilla::intl::Collator* collator) { + setFixedSlot(INTL_COLLATOR_SLOT, PrivateValue(collator)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in Collator constructor. + * + * Usage: collator = intl_Collator(locales, options) + */ +[[nodiscard]] extern bool intl_Collator(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns an array with the collation type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * collations supported for the given locale. "standard" and "search" are + * excluded. + * + * Usage: collations = intl_availableCollations(locale) + */ +[[nodiscard]] extern bool intl_availableCollations(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Compares x and y (which must be String values), and returns a number less + * than 0 if x < y, 0 if x = y, or a number greater than 0 if x > y according + * to the sort order for the locale and collation options of the given + * Collator. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.2. + * + * Usage: result = intl_CompareStrings(collator, x, y) + */ +[[nodiscard]] extern bool intl_CompareStrings(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns true if the given locale sorts upper-case before lower-case + * characters. + * + * Usage: result = intl_isUpperCaseFirst(locale) + */ +[[nodiscard]] extern bool intl_isUpperCaseFirst(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns true if the given locale ignores punctuation by default. + * + * Usage: result = intl_isIgnorePunctuation(locale) + */ +[[nodiscard]] extern bool intl_isIgnorePunctuation(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_Collator_h */ diff --git a/js/src/builtin/intl/Collator.js b/js/src/builtin/intl/Collator.js new file mode 100644 index 0000000000..4780781e95 --- /dev/null +++ b/js/src/builtin/intl/Collator.js @@ -0,0 +1,473 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +/** + * Compute an internal properties object from |lazyCollatorData|. + */ +function resolveCollatorInternals(lazyCollatorData) { + assert(IsObject(lazyCollatorData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var Collator = collatorInternalProperties; + + // Step 5. + internalProps.usage = lazyCollatorData.usage; + + // Steps 6-7. + var collatorIsSorting = lazyCollatorData.usage === "sort"; + var localeData = collatorIsSorting + ? Collator.sortLocaleData + : Collator.searchLocaleData; + + // Compute effective locale. + // Step 16. + var relevantExtensionKeys = Collator.relevantExtensionKeys; + + // Step 17. + var r = ResolveLocale( + "Collator", + lazyCollatorData.requestedLocales, + lazyCollatorData.opt, + relevantExtensionKeys, + localeData + ); + + // Step 18. + internalProps.locale = r.locale; + + // Step 19. + var collation = r.co; + + // Step 20. + if (collation === null) { + collation = "default"; + } + + // Step 21. + internalProps.collation = collation; + + // Step 22. + internalProps.numeric = r.kn === "true"; + + // Step 23. + internalProps.caseFirst = r.kf; + + // Compute remaining collation options. + // Step 25. + var s = lazyCollatorData.rawSensitivity; + if (s === undefined) { + // In theory the default sensitivity for the "search" collator is + // locale dependent; in reality the CLDR/ICU default strength is + // always tertiary. Therefore use "variant" as the default value for + // both collation modes. + s = "variant"; + } + + // Step 26. + internalProps.sensitivity = s; + + // Step 28. + var ignorePunctuation = lazyCollatorData.ignorePunctuation; + if (ignorePunctuation === undefined) { + var actualLocale = collatorActualLocale(r.dataLocale); + ignorePunctuation = intl_isIgnorePunctuation(actualLocale); + } + internalProps.ignorePunctuation = ignorePunctuation; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the Collator internal properties of |obj|. + */ +function getCollatorInternals(obj) { + assert(IsObject(obj), "getCollatorInternals called with non-object"); + assert( + intl_GuardToCollator(obj) !== null, + "getCollatorInternals called with non-Collator" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "Collator", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveCollatorInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Initializes an object as a Collator. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a Collator. This + * later work occurs in |resolveCollatorInternals|; steps not noted here occur + * there. + * + * Spec: ECMAScript Internationalization API Specification, 10.1.1. + */ +function InitializeCollator(collator, locales, options) { + assert(IsObject(collator), "InitializeCollator called with non-object"); + assert( + intl_GuardToCollator(collator) !== null, + "InitializeCollator called with non-Collator" + ); + + // Lazy Collator data has the following structure: + // + // { + // requestedLocales: List of locales, + // usage: "sort" / "search", + // opt: // opt object computed in InitializeCollator + // { + // localeMatcher: "lookup" / "best fit", + // co: string matching a Unicode extension type / undefined + // kn: true / false / undefined, + // kf: "upper" / "lower" / "false" / undefined + // } + // rawSensitivity: "base" / "accent" / "case" / "variant" / undefined, + // ignorePunctuation: true / false / undefined + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every Collator lazy data object has *all* these properties, never a + // subset of them. + var lazyCollatorData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyCollatorData.requestedLocales = requestedLocales; + + // Steps 2-3. + // + // If we ever need more speed here at startup, we should try to detect the + // case where |options === undefined| and then directly use the default + // value for each option. For now, just keep it simple. + if (options === undefined) { + options = std_Object_create(null); + } else { + options = ToObject(options); + } + + // Compute options that impact interpretation of locale. + // Step 4. + var u = GetOption(options, "usage", "string", ["sort", "search"], "sort"); + lazyCollatorData.usage = u; + + // Step 8. + var opt = new_Record(); + lazyCollatorData.opt = opt; + + // Steps 9-10. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // https://github.com/tc39/ecma402/pull/459 + var collation = GetOption( + options, + "collation", + "string", + undefined, + undefined + ); + if (collation !== undefined) { + collation = intl_ValidateAndCanonicalizeUnicodeExtensionType( + collation, + "collation", + "co" + ); + } + opt.co = collation; + + // Steps 11-13. + var numericValue = GetOption( + options, + "numeric", + "boolean", + undefined, + undefined + ); + if (numericValue !== undefined) { + numericValue = numericValue ? "true" : "false"; + } + opt.kn = numericValue; + + // Steps 14-15. + var caseFirstValue = GetOption( + options, + "caseFirst", + "string", + ["upper", "lower", "false"], + undefined + ); + opt.kf = caseFirstValue; + + // Compute remaining collation options. + // Step 24. + var s = GetOption( + options, + "sensitivity", + "string", + ["base", "accent", "case", "variant"], + undefined + ); + lazyCollatorData.rawSensitivity = s; + + // Step 27. + var ip = GetOption(options, "ignorePunctuation", "boolean", undefined, undefined); + lazyCollatorData.ignorePunctuation = ip; + + // Step 29. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(collator, "Collator", lazyCollatorData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 10.2.2. + */ +function Intl_Collator_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "Collator"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Collator internal properties. + * + * Spec: ECMAScript Internationalization API Specification, 9.1 and 10.2.3. + */ +var collatorInternalProperties = { + sortLocaleData: collatorSortLocaleData, + searchLocaleData: collatorSearchLocaleData, + relevantExtensionKeys: ["co", "kf", "kn"], +}; + +/** + * Returns the actual locale used when a collator for |locale| is constructed. + */ +function collatorActualLocale(locale) { + assert(typeof locale === "string", "locale should be string"); + + // If |locale| is the default locale (e.g. da-DK), but only supported + // through a fallback (da), we need to get the actual locale before we + // can call intl_isUpperCaseFirst. Also see intl_BestAvailableLocale. + return BestAvailableLocaleIgnoringDefault("Collator", locale); +} + +/** + * Returns the default caseFirst values for the given locale. The first + * element in the returned array denotes the default value per ES2017 Intl, + * 9.1 Internal slots of Service Constructors. + */ +function collatorSortCaseFirst(locale) { + var actualLocale = collatorActualLocale(locale); + if (intl_isUpperCaseFirst(actualLocale)) { + return ["upper", "false", "lower"]; + } + + // Default caseFirst values for all other languages. + return ["false", "lower", "upper"]; +} + +/** + * Returns the default caseFirst value for the given locale. + */ +function collatorSortCaseFirstDefault(locale) { + var actualLocale = collatorActualLocale(locale); + if (intl_isUpperCaseFirst(actualLocale)) { + return "upper"; + } + + // Default caseFirst value for all other languages. + return "false"; +} + +function collatorSortLocaleData() { + /* eslint-disable object-shorthand */ + return { + co: intl_availableCollations, + kn: function() { + return ["false", "true"]; + }, + kf: collatorSortCaseFirst, + default: { + co: function() { + // The first element of the collations array must be |null| + // per ES2017 Intl, 10.2.3 Internal Slots. + return null; + }, + kn: function() { + return "false"; + }, + kf: collatorSortCaseFirstDefault, + }, + }; + /* eslint-enable object-shorthand */ +} + +function collatorSearchLocaleData() { + /* eslint-disable object-shorthand */ + return { + co: function() { + return [null]; + }, + kn: function() { + return ["false", "true"]; + }, + kf: function() { + return ["false", "lower", "upper"]; + }, + default: { + co: function() { + return null; + }, + kn: function() { + return "false"; + }, + kf: function() { + return "false"; + }, + }, + }; + /* eslint-enable object-shorthand */ +} + +/** + * Create function to be cached and returned by Intl.Collator.prototype.compare. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.3.1. + */ +function createCollatorCompare(collator) { + // This function is not inlined in $Intl_Collator_compare_get to avoid + // creating a call-object on each call to $Intl_Collator_compare_get. + return function(x, y) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(collator), "collatorCompareToBind called with non-object"); + assert( + intl_GuardToCollator(collator) !== null, + "collatorCompareToBind called with non-Collator" + ); + + // Steps 3-6 + var X = ToString(x); + var Y = ToString(y); + + // Step 7. + return intl_CompareStrings(collator, X, Y); + }; +} + +/** + * Returns a function bound to this Collator that compares x (converted to a + * String value) and y (converted to a String value), + * and returns a number less than 0 if x < y, 0 if x = y, or a number greater + * than 0 if x > y according to the sort order for the locale and collation + * options of this Collator object. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.3. + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `SetCanonicalName`. +function $Intl_Collator_compare_get() { + // Step 1. + var collator = this; + + // Steps 2-3. + if ( + !IsObject(collator) || + (collator = intl_GuardToCollator(collator)) === null + ) { + return callFunction( + intl_CallCollatorMethodIfWrapped, + this, + "$Intl_Collator_compare_get" + ); + } + + var internals = getCollatorInternals(collator); + + // Step 4. + if (internals.boundCompare === undefined) { + // Steps 4.a-c. + internals.boundCompare = createCollatorCompare(collator); + } + + // Step 5. + return internals.boundCompare; +} +SetCanonicalName($Intl_Collator_compare_get, "get compare"); + +/** + * Returns the resolved options for a Collator object. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.4. + */ +function Intl_Collator_resolvedOptions() { + // Step 1. + var collator = this; + + // Steps 2-3. + if ( + !IsObject(collator) || + (collator = intl_GuardToCollator(collator)) === null + ) { + return callFunction( + intl_CallCollatorMethodIfWrapped, + this, + "Intl_Collator_resolvedOptions" + ); + } + + var internals = getCollatorInternals(collator); + + // Steps 4-5. + var result = { + locale: internals.locale, + usage: internals.usage, + sensitivity: internals.sensitivity, + ignorePunctuation: internals.ignorePunctuation, + collation: internals.collation, + numeric: internals.numeric, + caseFirst: internals.caseFirst, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/CommonFunctions.cpp b/js/src/builtin/intl/CommonFunctions.cpp new file mode 100644 index 0000000000..5c386c4b7e --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.cpp @@ -0,0 +1,173 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Operations used to implement multiple Intl.* classes. */ + +#include "builtin/intl/CommonFunctions.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/ICUError.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> + +#include "gc/GCEnum.h" +#include "gc/ZoneAllocator.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_INTERNAL_INTL_ERROR +#include "js/Value.h" +#include "vm/JSAtomState.h" +#include "vm/JSContext.h" +#include "vm/JSObject.h" +#include "vm/SelfHosting.h" +#include "vm/Stack.h" +#include "vm/StringType.h" + +#include "gc/GCContext-inl.h" + +bool js::intl::InitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options) { + FixedInvokeArgs<3> args(cx); + + args[0].setObject(*obj); + args[1].set(locales); + args[2].set(options); + + RootedValue ignored(cx); + if (!CallSelfHostedFunction(cx, initializer, JS::NullHandleValue, args, + &ignored)) { + return false; + } + + MOZ_ASSERT(ignored.isUndefined(), + "Unexpected return value from Intl object initializer"); + return true; +} + +bool js::intl::InitializeDateTimeFormatObject( + JSContext* cx, JS::Handle<JSObject*> obj, JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, JS::Handle<JS::Value> options, + JS::Handle<JSString*> required, JS::Handle<JSString*> defaults, + DateTimeFormatOptions dtfOptions, JS::MutableHandle<JS::Value> result) { + Handle<PropertyName*> initializer = cx->names().InitializeDateTimeFormat; + + FixedInvokeArgs<7> args(cx); + + args[0].setObject(*obj); + args[1].set(thisValue); + args[2].set(locales); + args[3].set(options); + args[4].setString(required); + args[5].setString(defaults); + args[6].setBoolean(dtfOptions == DateTimeFormatOptions::EnableMozExtensions); + + if (!CallSelfHostedFunction(cx, initializer, NullHandleValue, args, result)) { + return false; + } + + MOZ_ASSERT(result.isObject(), + "Intl.DateTimeFormat initializer must return an object"); + return true; +} + +bool js::intl::InitializeNumberFormatObject( + JSContext* cx, JS::Handle<JSObject*> obj, JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, JS::Handle<JS::Value> options, + JS::MutableHandle<JS::Value> result) { + Handle<PropertyName*> initializer = cx->names().InitializeNumberFormat; + + FixedInvokeArgs<4> args(cx); + + args[0].setObject(*obj); + args[1].set(thisValue); + args[2].set(locales); + args[3].set(options); + + if (!CallSelfHostedFunction(cx, initializer, NullHandleValue, args, result)) { + return false; + } + + MOZ_ASSERT(result.isObject(), + "Intl.NumberFormat initializer must return an object"); + return true; +} + +JSObject* js::intl::GetInternalsObject(JSContext* cx, + JS::Handle<JSObject*> obj) { + FixedInvokeArgs<1> args(cx); + + args[0].setObject(*obj); + + RootedValue v(cx); + if (!js::CallSelfHostedFunction(cx, cx->names().getInternals, NullHandleValue, + args, &v)) { + return nullptr; + } + + return &v.toObject(); +} + +void js::intl::ReportInternalError(JSContext* cx) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INTERNAL_INTL_ERROR); +} + +void js::intl::ReportInternalError(JSContext* cx, + mozilla::intl::ICUError error) { + switch (error) { + case mozilla::intl::ICUError::OutOfMemory: + ReportOutOfMemory(cx); + return; + case mozilla::intl::ICUError::InternalError: + ReportInternalError(cx); + return; + case mozilla::intl::ICUError::OverflowError: + ReportAllocationOverflow(cx); + return; + } + MOZ_CRASH("Unexpected ICU error"); +} + +const js::intl::OldStyleLanguageTagMapping + js::intl::oldStyleLanguageTagMappings[] = { + {"pa-PK", "pa-Arab-PK"}, {"zh-CN", "zh-Hans-CN"}, + {"zh-HK", "zh-Hant-HK"}, {"zh-SG", "zh-Hans-SG"}, + {"zh-TW", "zh-Hant-TW"}, +}; + +js::UniqueChars js::intl::EncodeLocale(JSContext* cx, JSString* locale) { + MOZ_ASSERT(locale->length() > 0); + + js::UniqueChars chars = EncodeAscii(cx, locale); + +#ifdef DEBUG + // Ensure the returned value contains only valid BCP 47 characters. + // (Lambdas can't be placed inside MOZ_ASSERT, so move the checks in an + // #ifdef block.) + if (chars) { + auto alnumOrDash = [](char c) { + return mozilla::IsAsciiAlphanumeric(c) || c == '-'; + }; + MOZ_ASSERT(mozilla::IsAsciiAlpha(chars[0])); + MOZ_ASSERT( + std::all_of(chars.get(), chars.get() + locale->length(), alnumOrDash)); + } +#endif + + return chars; +} + +void js::intl::AddICUCellMemory(JSObject* obj, size_t nbytes) { + // Account the (estimated) number of bytes allocated by an ICU object against + // the JSObject's zone. + AddCellMemory(obj, nbytes, MemoryUse::ICUObject); +} + +void js::intl::RemoveICUCellMemory(JS::GCContext* gcx, JSObject* obj, + size_t nbytes) { + gcx->removeCellMemory(obj, nbytes, MemoryUse::ICUObject); +} diff --git a/js/src/builtin/intl/CommonFunctions.h b/js/src/builtin/intl/CommonFunctions.h new file mode 100644 index 0000000000..cf7fd4038d --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.h @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_CommonFunctions_h +#define builtin_intl_CommonFunctions_h + +#include <stddef.h> +#include <stdint.h> + +#include "js/RootingAPI.h" +#include "js/Utility.h" + +namespace mozilla::intl { +enum class ICUError : uint8_t; +} + +namespace js { + +class PropertyName; + +namespace intl { + +/** + * Initialize a new Intl.* object using the named self-hosted function. + */ +extern bool InitializeObject(JSContext* cx, JS::Handle<JSObject*> obj, + JS::Handle<PropertyName*> initializer, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options); + +enum class DateTimeFormatOptions { + Standard, + EnableMozExtensions, +}; + +/** + * Initialize an existing object as an Intl.DateTimeFormat object. + */ +extern bool InitializeDateTimeFormatObject( + JSContext* cx, JS::Handle<JSObject*> obj, JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, JS::Handle<JS::Value> options, + JS::Handle<JSString*> required, JS::Handle<JSString*> defaults, + DateTimeFormatOptions dtfOptions, JS::MutableHandle<JS::Value> result); + +/** + * Initialize an existing object as an Intl.NumberFormat object. + */ +extern bool InitializeNumberFormatObject(JSContext* cx, + JS::Handle<JSObject*> obj, + JS::Handle<JS::Value> thisValue, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options, + JS::MutableHandle<JS::Value> result); + +/** + * Returns the object holding the internal properties for obj. + */ +extern JSObject* GetInternalsObject(JSContext* cx, JS::Handle<JSObject*> obj); + +/** Report an Intl internal error not directly tied to a spec step. */ +extern void ReportInternalError(JSContext* cx); + +/** Report an Intl internal error not directly tied to a spec step. */ +extern void ReportInternalError(JSContext* cx, mozilla::intl::ICUError error); + +/** + * The last-ditch locale is used if none of the available locales satisfies a + * request. "en-GB" is used based on the assumptions that English is the most + * common second language, that both en-GB and en-US are normally available in + * an implementation, and that en-GB is more representative of the English used + * in other locales. + */ +static inline const char* LastDitchLocale() { return "en-GB"; } + +/** + * Certain old, commonly-used language tags that lack a script, are expected to + * nonetheless imply one. This object maps these old-style tags to modern + * equivalents. + */ +struct OldStyleLanguageTagMapping { + const char* const oldStyle; + const char* const modernStyle; + + // Provide a constructor to catch missing initializers in the mappings array. + constexpr OldStyleLanguageTagMapping(const char* oldStyle, + const char* modernStyle) + : oldStyle(oldStyle), modernStyle(modernStyle) {} +}; + +extern const OldStyleLanguageTagMapping oldStyleLanguageTagMappings[5]; + +extern JS::UniqueChars EncodeLocale(JSContext* cx, JSString* locale); + +// The inline capacity we use for a Vector<char16_t>. Use this to ensure that +// our uses of ICU string functions, below and elsewhere, will try to fill the +// buffer's entire inline capacity before growing it and heap-allocating. +constexpr size_t INITIAL_CHAR_BUFFER_SIZE = 32; + +void AddICUCellMemory(JSObject* obj, size_t nbytes); + +void RemoveICUCellMemory(JS::GCContext* gcx, JSObject* obj, size_t nbytes); +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_CommonFunctions_h */ diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js new file mode 100644 index 0000000000..f45b934fa4 --- /dev/null +++ b/js/src/builtin/intl/CommonFunctions.js @@ -0,0 +1,1000 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +#ifdef DEBUG +#define JS_CONCAT2(x, y) x##y +#define JS_CONCAT(x, y) JS_CONCAT2(x, y) +#define assertIsValidAndCanonicalLanguageTag(locale, desc) \ + do { \ + var JS_CONCAT(canonical, __LINE__) = intl_TryValidateAndCanonicalizeLanguageTag(locale); \ + assert(JS_CONCAT(canonical, __LINE__) !== null, \ + `${desc} is a structurally valid language tag`); \ + assert(JS_CONCAT(canonical, __LINE__) === locale, \ + `${desc} is a canonicalized language tag`); \ + } while (false) +#else +#define assertIsValidAndCanonicalLanguageTag(locale, desc) ; // Elided assertion. +#endif + +/** + * Returns the start index of a "Unicode locale extension sequence", which the + * specification defines as: "any substring of a language tag that starts with + * a separator '-' and the singleton 'u' and includes the maximum sequence of + * following non-singleton subtags and their preceding '-' separators." + * + * Alternatively, this may be defined as: the components of a language tag that + * match the `unicode_locale_extensions` production in UTS 35. + * + * Spec: ECMAScript Internationalization API Specification, 6.2.1. + */ +function startOfUnicodeExtensions(locale) { + assert(typeof locale === "string", "locale is a string"); + + // Search for "-u-" marking the start of a Unicode extension sequence. + var start = callFunction(std_String_indexOf, locale, "-u-"); + if (start < 0) { + return -1; + } + + // And search for "-x-" marking the start of any privateuse component to + // handle the case when "-u-" was only found within a privateuse subtag. + var privateExt = callFunction(std_String_indexOf, locale, "-x-"); + if (privateExt >= 0 && privateExt < start) { + return -1; + } + + return start; +} + +/** + * Returns the end index of a Unicode locale extension sequence. + */ +function endOfUnicodeExtensions(locale, start) { + assert(typeof locale === "string", "locale is a string"); + assert(0 <= start && start < locale.length, "start is an index into locale"); + assert( + Substring(locale, start, 3) === "-u-", + "start points to Unicode extension sequence" + ); + + // Search for the start of the next singleton or privateuse subtag. + // + // Begin searching after the smallest possible Unicode locale extension + // sequence, namely |"-u-" 2alphanum|. End searching once the remaining + // characters can't fit the smallest possible singleton or privateuse + // subtag, namely |"-x-" alphanum|. Note the reduced end-limit means + // indexing inside the loop is always in-range. + for (var i = start + 5, end = locale.length - 4; i <= end; i++) { + if (locale[i] !== "-") { + continue; + } + if (locale[i + 2] === "-") { + return i; + } + + // Skip over (i + 1) and (i + 2) because we've just verified they + // aren't "-", so the next possible delimiter can only be at (i + 3). + i += 2; + } + + // If no singleton or privateuse subtag was found, the Unicode extension + // sequence extends until the end of the string. + return locale.length; +} + +/** + * Removes Unicode locale extension sequences from the given language tag. + */ +function removeUnicodeExtensions(locale) { + assertIsValidAndCanonicalLanguageTag( + locale, + "locale with possible Unicode extension" + ); + + var start = startOfUnicodeExtensions(locale); + if (start < 0) { + return locale; + } + + var end = endOfUnicodeExtensions(locale, start); + + var left = Substring(locale, 0, start); + var right = Substring(locale, end, locale.length - end); + var combined = left + right; + + assertIsValidAndCanonicalLanguageTag(combined, "the recombined locale"); + assert( + startOfUnicodeExtensions(combined) < 0, + "recombination failed to remove all Unicode locale extension sequences" + ); + + return combined; +} + +/** + * Returns Unicode locale extension sequences from the given language tag. + */ +function getUnicodeExtensions(locale) { + assertIsValidAndCanonicalLanguageTag(locale, "locale with Unicode extension"); + + var start = startOfUnicodeExtensions(locale); + assert(start >= 0, "start of Unicode extension sequence not found"); + var end = endOfUnicodeExtensions(locale, start); + + return Substring(locale, start, end - start); +} + +/** + * Returns true if the input contains only ASCII alphabetical characters. + */ +function IsASCIIAlphaString(s) { + assert(typeof s === "string", "IsASCIIAlphaString"); + + for (var i = 0; i < s.length; i++) { + var c = callFunction(std_String_charCodeAt, s, i); + if (!((0x41 <= c && c <= 0x5a) || (0x61 <= c && c <= 0x7a))) { + return false; + } + } + return true; +} + +var localeCache = { + runtimeDefaultLocale: undefined, + defaultLocale: undefined, +}; + +/** + * Returns the BCP 47 language tag for the host environment's current locale. + * + * Spec: ECMAScript Internationalization API Specification, 6.2.4. + */ +function DefaultLocale() { + if (intl_IsRuntimeDefaultLocale(localeCache.runtimeDefaultLocale)) { + return localeCache.defaultLocale; + } + + // If we didn't have a cache hit, compute the candidate default locale. + var runtimeDefaultLocale = intl_RuntimeDefaultLocale(); + var locale = intl_supportedLocaleOrFallback(runtimeDefaultLocale); + + assertIsValidAndCanonicalLanguageTag(locale, "the computed default locale"); + assert( + startOfUnicodeExtensions(locale) < 0, + "the computed default locale must not contain a Unicode extension sequence" + ); + + // Cache the computed locale until the runtime default locale changes. + localeCache.defaultLocale = locale; + localeCache.runtimeDefaultLocale = runtimeDefaultLocale; + + return locale; +} + +/** + * Canonicalizes a locale list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.1. + */ +function CanonicalizeLocaleList(locales) { + // Step 1. + if (locales === undefined) { + return []; + } + + // Step 3 (and the remaining steps). + var tag = intl_ValidateAndCanonicalizeLanguageTag(locales, false); + if (tag !== null) { + assert( + typeof tag === "string", + "intl_ValidateAndCanonicalizeLanguageTag returns a string value" + ); + return [tag]; + } + + // Step 2. + var seen = []; + + // Step 4. + var O = ToObject(locales); + + // Step 5. + var len = ToLength(O.length); + + // Step 6. + var k = 0; + + // Step 7. + while (k < len) { + // Steps 7.a-c. + if (k in O) { + // Step 7.c.i. + var kValue = O[k]; + + // Step 7.c.ii. + if (!(typeof kValue === "string" || IsObject(kValue))) { + ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT); + } + + // Steps 7.c.iii-iv. + var tag = intl_ValidateAndCanonicalizeLanguageTag(kValue, true); + assert( + typeof tag === "string", + "ValidateAndCanonicalizeLanguageTag returns a string value" + ); + + // Step 7.c.v. + if (callFunction(std_Array_indexOf, seen, tag) === -1) { + DefineDataProperty(seen, seen.length, tag); + } + } + + // Step 7.d. + k++; + } + + // Step 8. + return seen; +} + +/** + * Compares a BCP 47 language tag against the locales in availableLocales + * and returns the best available match. Uses the fallback + * mechanism of RFC 4647, section 3.4. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.2. + * Spec: RFC 4647, section 3.4. + */ +function BestAvailableLocale(availableLocales, locale) { + return intl_BestAvailableLocale(availableLocales, locale, DefaultLocale()); +} + +/** + * Identical to BestAvailableLocale, but does not consider the default locale + * during computation. + */ +function BestAvailableLocaleIgnoringDefault(availableLocales, locale) { + return intl_BestAvailableLocale(availableLocales, locale, null); +} + +/** + * Compares a BCP 47 language priority list against the set of locales in + * availableLocales and determines the best available language to meet the + * request. Options specified through Unicode extension subsequences are + * ignored in the lookup, but information about such subsequences is returned + * separately. + * + * This variant is based on the Lookup algorithm of RFC 4647 section 3.4. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.3. + * Spec: RFC 4647, section 3.4. + */ +function LookupMatcher(availableLocales, requestedLocales) { + // Step 1. + var result = new_Record(); + + // Step 2. + for (var i = 0; i < requestedLocales.length; i++) { + var locale = requestedLocales[i]; + + // Step 2.a. + var noExtensionsLocale = removeUnicodeExtensions(locale); + + // Step 2.b. + var availableLocale = BestAvailableLocale( + availableLocales, + noExtensionsLocale + ); + + // Step 2.c. + if (availableLocale !== undefined) { + // Step 2.c.i. + result.locale = availableLocale; + + // Step 2.c.ii. + if (locale !== noExtensionsLocale) { + result.extension = getUnicodeExtensions(locale); + } + + // Step 2.c.iii. + return result; + } + } + + // Steps 3-4. + result.locale = DefaultLocale(); + + // Step 5. + return result; +} + +/** + * Compares a BCP 47 language priority list against the set of locales in + * availableLocales and determines the best available language to meet the + * request. Options specified through Unicode extension subsequences are + * ignored in the lookup, but information about such subsequences is returned + * separately. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.4. + */ +function BestFitMatcher(availableLocales, requestedLocales) { + // this implementation doesn't have anything better + return LookupMatcher(availableLocales, requestedLocales); +} + +/** + * Returns the Unicode extension value subtags for the requested key subtag. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.5. + */ +function UnicodeExtensionValue(extension, key) { + assert(typeof extension === "string", "extension is a string value"); + assert( + callFunction(std_String_startsWith, extension, "-u-") && + getUnicodeExtensions("und" + extension) === extension, + "extension is a Unicode extension subtag" + ); + assert(typeof key === "string", "key is a string value"); + + // Step 1. + assert(key.length === 2, "key is a Unicode extension key subtag"); + + // Step 2. + var size = extension.length; + + // Step 3. + var searchValue = "-" + key + "-"; + + // Step 4. + var pos = callFunction(std_String_indexOf, extension, searchValue); + + // Step 5. + if (pos !== -1) { + // Step 5.a. + var start = pos + 4; + + // Step 5.b. + var end = start; + + // Step 5.c. + var k = start; + + // Steps 5.d-e. + while (true) { + // Step 5.e.i. + var e = callFunction(std_String_indexOf, extension, "-", k); + + // Step 5.e.ii. + var len = e === -1 ? size - k : e - k; + + // Step 5.e.iii. + if (len === 2) { + break; + } + + // Step 5.e.iv. + if (e === -1) { + end = size; + break; + } + + // Step 5.e.v. + end = e; + k = e + 1; + } + + // Step 5.f. + return callFunction(String_substring, extension, start, end); + } + + // Step 6. + searchValue = "-" + key; + + // Steps 7-8. + if (callFunction(std_String_endsWith, extension, searchValue)) { + return ""; + } + + // Step 9 (implicit). +} + +/** + * Compares a BCP 47 language priority list against availableLocales and + * determines the best available language to meet the request. Options specified + * through Unicode extension subsequences are negotiated separately, taking the + * caller's relevant extensions and locale data as well as client-provided + * options into consideration. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.6. + */ +function ResolveLocale( + availableLocales, + requestedLocales, + options, + relevantExtensionKeys, + localeData +) { + // Steps 1-3. + var matcher = options.localeMatcher; + var r = + matcher === "lookup" + ? LookupMatcher(availableLocales, requestedLocales) + : BestFitMatcher(availableLocales, requestedLocales); + + // Step 4. + var foundLocale = r.locale; + var extension = r.extension; + + // Step 5. + var result = new_Record(); + + // Step 6. + result.dataLocale = foundLocale; + + // Step 7. + var supportedExtension = "-u"; + + // In this implementation, localeData is a function, not an object. + var localeDataProvider = localeData(); + + // Step 8. + for (var i = 0; i < relevantExtensionKeys.length; i++) { + var key = relevantExtensionKeys[i]; + + // Steps 8.a-h (The locale data is only computed when needed). + var keyLocaleData = undefined; + var value = undefined; + + // Locale tag may override. + + // Step 8.g. + var supportedExtensionAddition = ""; + + // Step 8.h. + if (extension !== undefined) { + // Step 8.h.i. + var requestedValue = UnicodeExtensionValue(extension, key); + + // Step 8.h.ii. + if (requestedValue !== undefined) { + // Steps 8.a-d. + keyLocaleData = callFunction( + localeDataProvider[key], + null, + foundLocale + ); + + // Step 8.h.ii.1. + if (requestedValue !== "") { + // Step 8.h.ii.1.a. + if ( + callFunction(std_Array_indexOf, keyLocaleData, requestedValue) !== + -1 + ) { + value = requestedValue; + supportedExtensionAddition = "-" + key + "-" + value; + } + } else { + // Step 8.h.ii.2. + + // According to the LDML spec, if there's no type value, + // and true is an allowed value, it's used. + if (callFunction(std_Array_indexOf, keyLocaleData, "true") !== -1) { + value = "true"; + supportedExtensionAddition = "-" + key; + } + } + } + } + + // Options override all. + + // Step 8.i.i. + var optionsValue = options[key]; + + // Step 8.i.ii. + assert( + typeof optionsValue === "string" || + optionsValue === undefined || + optionsValue === null, + "unexpected type for options value" + ); + + // Steps 8.i, 8.i.iii.1. + if (optionsValue !== undefined && optionsValue !== value) { + // Steps 8.a-d. + if (keyLocaleData === undefined) { + keyLocaleData = callFunction( + localeDataProvider[key], + null, + foundLocale + ); + } + + // Step 8.i.iii. + if (callFunction(std_Array_indexOf, keyLocaleData, optionsValue) !== -1) { + value = optionsValue; + supportedExtensionAddition = ""; + } + } + + // Locale data provides default value. + if (value === undefined) { + // Steps 8.a-f. + value = + keyLocaleData === undefined + ? callFunction(localeDataProvider.default[key], null, foundLocale) + : keyLocaleData[0]; + } + + // Step 8.j. + assert( + typeof value === "string" || value === null, + "unexpected locale data value" + ); + result[key] = value; + + // Step 8.k. + supportedExtension += supportedExtensionAddition; + } + + // Step 9. + if (supportedExtension.length > 2) { + foundLocale = addUnicodeExtension(foundLocale, supportedExtension); + } + + // Step 10. + result.locale = foundLocale; + + // Step 11. + return result; +} + +/** + * Adds a Unicode extension subtag to a locale. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.6. + */ +function addUnicodeExtension(locale, extension) { + assert(typeof locale === "string", "locale is a string value"); + assert( + !callFunction(std_String_startsWith, locale, "x-"), + "unexpected privateuse-only locale" + ); + assert( + startOfUnicodeExtensions(locale) < 0, + "Unicode extension subtag already present in locale" + ); + + assert(typeof extension === "string", "extension is a string value"); + assert( + callFunction(std_String_startsWith, extension, "-u-") && + getUnicodeExtensions("und" + extension) === extension, + "extension is a Unicode extension subtag" + ); + + // Step 9.a. + var privateIndex = callFunction(std_String_indexOf, locale, "-x-"); + + // Steps 9.b-c. + if (privateIndex === -1) { + locale += extension; + } else { + var preExtension = callFunction(String_substring, locale, 0, privateIndex); + var postExtension = callFunction(String_substring, locale, privateIndex); + locale = preExtension + extension + postExtension; + } + + // Steps 9.d-e (Step 9.e is not required in this implementation, because we don't canonicalize + // Unicode extension subtags). + assertIsValidAndCanonicalLanguageTag(locale, "locale after concatenation"); + + return locale; +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.7. + */ +function LookupSupportedLocales(availableLocales, requestedLocales) { + // Step 1. + var subset = []; + + // Step 2. + for (var i = 0; i < requestedLocales.length; i++) { + var locale = requestedLocales[i]; + + // Step 2.a. + var noExtensionsLocale = removeUnicodeExtensions(locale); + + // Step 2.b. + var availableLocale = BestAvailableLocale( + availableLocales, + noExtensionsLocale + ); + + // Step 2.c. + if (availableLocale !== undefined) { + DefineDataProperty(subset, subset.length, locale); + } + } + + // Step 3. + return subset; +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.8. + */ +function BestFitSupportedLocales(availableLocales, requestedLocales) { + // don't have anything better + return LookupSupportedLocales(availableLocales, requestedLocales); +} + +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.9. + */ +function SupportedLocales(availableLocales, requestedLocales, options) { + // Step 1. + var matcher; + if (options !== undefined) { + // Step 1.a. + options = ToObject(options); + + // Step 1.b + matcher = options.localeMatcher; + if (matcher !== undefined) { + matcher = ToString(matcher); + if (matcher !== "lookup" && matcher !== "best fit") { + ThrowRangeError(JSMSG_INVALID_LOCALE_MATCHER, matcher); + } + } + } + + // Steps 2-5. + return matcher === undefined || matcher === "best fit" + ? BestFitSupportedLocales(availableLocales, requestedLocales) + : LookupSupportedLocales(availableLocales, requestedLocales); +} + +/** + * Extracts a property value from the provided options object, converts it to + * the required type, checks whether it is one of a list of allowed values, + * and fills in a fallback value if necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.10. + */ +function GetOption(options, property, type, values, fallback) { + // Step 1. + var value = options[property]; + + // Step 2. + if (value !== undefined) { + // Steps 2.a-c. + if (type === "boolean") { + value = ToBoolean(value); + } else if (type === "string") { + value = ToString(value); + } else { + assert(false, "GetOption"); + } + + // Step 2.d. + if ( + values !== undefined && + callFunction(std_Array_indexOf, values, value) === -1 + ) { + ThrowRangeError(JSMSG_INVALID_OPTION_VALUE, property, `"${value}"`); + } + + // Step 2.e. + return value; + } + + // Step 3. + return fallback; +} + +/** + * Extracts a property value from the provided options object, converts it to + * a boolean or string, checks whether it is one of a list of allowed values, + * and fills in a fallback value if necessary. + */ +function GetStringOrBooleanOption( + options, + property, + stringValues, + fallback +) { + assert(IsObject(stringValues), "GetStringOrBooleanOption"); + + // Step 1. + var value = options[property]; + + // Step 2. + if (value === undefined) { + return fallback; + } + + // Step 3. + if (value === true) { + return true; + } + + // Steps 4-5. + if (!value) { + return false; + } + + // Step 6. + value = ToString(value); + + // Step 7. + if (callFunction(std_Array_indexOf, stringValues, value) === -1) { + ThrowRangeError(JSMSG_INVALID_OPTION_VALUE, property, `"${value}"`); + } + + // Step 8. + return value; +} + +/** + * The abstract operation DefaultNumberOption converts value to a Number value, + * checks whether it is in the allowed range, and fills in a fallback value if + * necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.11. + */ +function DefaultNumberOption(value, minimum, maximum, fallback) { + assert( + typeof minimum === "number" && (minimum | 0) === minimum, + "DefaultNumberOption" + ); + assert( + typeof maximum === "number" && (maximum | 0) === maximum, + "DefaultNumberOption" + ); + assert( + fallback === undefined || + (typeof fallback === "number" && (fallback | 0) === fallback), + "DefaultNumberOption" + ); + assert( + fallback === undefined || (minimum <= fallback && fallback <= maximum), + "DefaultNumberOption" + ); + + // Step 1. + if (value === undefined) { + return fallback; + } + + // Step 2. + value = ToNumber(value); + + // Step 3. + if (Number_isNaN(value) || value < minimum || value > maximum) { + ThrowRangeError(JSMSG_INVALID_DIGITS_VALUE, value); + } + + // Step 4. + // Apply bitwise-or to convert -0 to +0 per ES2017, 5.2 and to ensure the + // result is an int32 value. + return std_Math_floor(value) | 0; +} + +/** + * Extracts a property value from the provided options object, converts it to a + * Number value, checks whether it is in the allowed range, and fills in a + * fallback value if necessary. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.12. + */ +function GetNumberOption(options, property, minimum, maximum, fallback) { + // Steps 1-2. + return DefaultNumberOption(options[property], minimum, maximum, fallback); +} + +// Symbols in the self-hosting compartment can't be cloned, use a separate +// object to hold the actual symbol value. +// TODO: Can we add support to clone symbols? +var intlFallbackSymbolHolder = { value: undefined }; + +/** + * The [[FallbackSymbol]] symbol of the %Intl% intrinsic object. + * + * This symbol is used to implement the legacy constructor semantics for + * Intl.DateTimeFormat and Intl.NumberFormat. + */ +function intlFallbackSymbol() { + var fallbackSymbol = intlFallbackSymbolHolder.value; + if (!fallbackSymbol) { + var Symbol = GetBuiltinConstructor("Symbol"); + fallbackSymbol = Symbol("IntlLegacyConstructedSymbol"); + intlFallbackSymbolHolder.value = fallbackSymbol; + } + return fallbackSymbol; +} + +/** + * Initializes the INTL_INTERNALS_OBJECT_SLOT of the given object. + */ +function initializeIntlObject(obj, type, lazyData) { + assert(IsObject(obj), "Non-object passed to initializeIntlObject"); + assert( + (type === "Collator" && intl_GuardToCollator(obj) !== null) || + (type === "DateTimeFormat" && intl_GuardToDateTimeFormat(obj) !== null) || + (type === "DisplayNames" && intl_GuardToDisplayNames(obj) !== null) || + (type === "ListFormat" && intl_GuardToListFormat(obj) !== null) || + (type === "NumberFormat" && intl_GuardToNumberFormat(obj) !== null) || + (type === "PluralRules" && intl_GuardToPluralRules(obj) !== null) || + (type === "RelativeTimeFormat" && + intl_GuardToRelativeTimeFormat(obj) !== null) || + (type === "Segmenter" && intl_GuardToSegmenter(obj) !== null), + "type must match the object's class" + ); + assert(IsObject(lazyData), "non-object lazy data"); + + // The meaning of an internals object for an object |obj| is as follows. + // + // The .type property indicates the type of Intl object that |obj| is. It + // must be one of: + // - Collator + // - DateTimeFormat + // - DisplayNames + // - ListFormat + // - NumberFormat + // - PluralRules + // - RelativeTimeFormat + // - Segmenter + // + // The .lazyData property stores information needed to compute -- without + // observable side effects -- the actual internal Intl properties of + // |obj|. If it is non-null, then the actual internal properties haven't + // been computed, and .lazyData must be processed by + // |setInternalProperties| before internal Intl property values are + // available. If it is null, then the .internalProps property contains an + // object whose properties are the internal Intl properties of |obj|. + + var internals = std_Object_create(null); + internals.type = type; + internals.lazyData = lazyData; + internals.internalProps = null; + + assert( + UnsafeGetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT) === undefined, + "Internal slot already initialized?" + ); + UnsafeSetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT, internals); +} + +/** + * Set the internal properties object for an |internals| object previously + * associated with lazy data. + */ +function setInternalProperties(internals, internalProps) { + assert(IsObject(internals.lazyData), "lazy data must exist already"); + assert(IsObject(internalProps), "internalProps argument should be an object"); + + // Set in reverse order so that the .lazyData nulling is a barrier. + internals.internalProps = internalProps; + internals.lazyData = null; +} + +/** + * Get the existing internal properties out of a non-newborn |internals|, or + * null if none have been computed. + */ +function maybeInternalProperties(internals) { + assert(IsObject(internals), "non-object passed to maybeInternalProperties"); + var lazyData = internals.lazyData; + if (lazyData) { + return null; + } + assert( + IsObject(internals.internalProps), + "missing lazy data and computed internals" + ); + return internals.internalProps; +} + +/** + * Return |obj|'s internals object (*not* the object holding its internal + * properties!), with structure specified above. + * + * Spec: ECMAScript Internationalization API Specification, 10.3. + * Spec: ECMAScript Internationalization API Specification, 11.3. + * Spec: ECMAScript Internationalization API Specification, 12.3. + */ +function getIntlObjectInternals(obj) { + assert(IsObject(obj), "getIntlObjectInternals called with non-Object"); + assert( + intl_GuardToCollator(obj) !== null || + intl_GuardToDateTimeFormat(obj) !== null || + intl_GuardToDisplayNames(obj) !== null || + intl_GuardToListFormat(obj) !== null || + intl_GuardToNumberFormat(obj) !== null || + intl_GuardToPluralRules(obj) !== null || + intl_GuardToRelativeTimeFormat(obj) !== null || + intl_GuardToSegmenter(obj) !== null, + "getIntlObjectInternals called with non-Intl object" + ); + + var internals = UnsafeGetReservedSlot(obj, INTL_INTERNALS_OBJECT_SLOT); + + assert(IsObject(internals), "internals not an object"); + assert(hasOwn("type", internals), "missing type"); + assert( + (internals.type === "Collator" && intl_GuardToCollator(obj) !== null) || + (internals.type === "DateTimeFormat" && + intl_GuardToDateTimeFormat(obj) !== null) || + (internals.type === "DisplayNames" && + intl_GuardToDisplayNames(obj) !== null) || + (internals.type === "ListFormat" && + intl_GuardToListFormat(obj) !== null) || + (internals.type === "NumberFormat" && + intl_GuardToNumberFormat(obj) !== null) || + (internals.type === "PluralRules" && + intl_GuardToPluralRules(obj) !== null) || + (internals.type === "RelativeTimeFormat" && + intl_GuardToRelativeTimeFormat(obj) !== null) || + (internals.type === "Segmenter" && + intl_GuardToSegmenter(obj) !== null), + "type must match the object's class" + ); + assert(hasOwn("lazyData", internals), "missing lazyData"); + assert(hasOwn("internalProps", internals), "missing internalProps"); + + return internals; +} + +/** + * Get the internal properties of known-Intl object |obj|. For use only by + * C++ code that knows what it's doing! + */ +function getInternals(obj) { + var internals = getIntlObjectInternals(obj); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + var type = internals.type; + if (type === "Collator") { + internalProps = resolveCollatorInternals(internals.lazyData); + } else if (type === "DateTimeFormat") { + internalProps = resolveDateTimeFormatInternals(internals.lazyData); + } else if (type === "DisplayNames") { + internalProps = resolveDisplayNamesInternals(internals.lazyData); + } else if (type === "ListFormat") { + internalProps = resolveListFormatInternals(internals.lazyData); + } else if (type === "NumberFormat") { + internalProps = resolveNumberFormatInternals(internals.lazyData); + } else if (type === "PluralRules") { + internalProps = resolvePluralRulesInternals(internals.lazyData); + } else if (type === "RelativeTimeFormat") { + internalProps = resolveRelativeTimeFormatInternals(internals.lazyData); + } else { + assert(type === "Segmenter", "unexpected Intl type"); + internalProps = resolveSegmenterInternals(internals.lazyData); + } + setInternalProperties(internals, internalProps); + return internalProps; +} diff --git a/js/src/builtin/intl/CurrencyDataGenerated.js b/js/src/builtin/intl/CurrencyDataGenerated.js new file mode 100644 index 0000000000..dcde004956 --- /dev/null +++ b/js/src/builtin/intl/CurrencyDataGenerated.js @@ -0,0 +1,78 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// Version: 2023-01-01 + +/** + * Mapping from currency codes to the number of decimal digits used for them. + * Default is 2 digits. + * + * Spec: ISO 4217 Currency and Funds Code List. + * http://www.currency-iso.org/en/home/tables/table-a1.html + */ +var currencyDigits = { + // Bahraini Dinar (BAHRAIN) + BHD: 3, + // Burundi Franc (BURUNDI) + BIF: 0, + // Unidad de Fomento (CHILE) + CLF: 4, + // Chilean Peso (CHILE) + CLP: 0, + // Djibouti Franc (DJIBOUTI) + DJF: 0, + // Guinean Franc (GUINEA) + GNF: 0, + // Iraqi Dinar (IRAQ) + IQD: 3, + // Iceland Krona (ICELAND) + ISK: 0, + // Jordanian Dinar (JORDAN) + JOD: 3, + // Yen (JAPAN) + JPY: 0, + // Comorian Franc (COMOROS (THE)) + KMF: 0, + // Won (KOREA (THE REPUBLIC OF)) + KRW: 0, + // Kuwaiti Dinar (KUWAIT) + KWD: 3, + // Libyan Dinar (LIBYA) + LYD: 3, + // Rial Omani (OMAN) + OMR: 3, + // Guarani (PARAGUAY) + PYG: 0, + // Rwanda Franc (RWANDA) + RWF: 0, + // Tunisian Dinar (TUNISIA) + TND: 3, + // Uganda Shilling (UGANDA) + UGX: 0, + // Uruguay Peso en Unidades Indexadas (UI) (URUGUAY) + UYI: 0, + // Unidad Previsional (URUGUAY) + UYW: 4, + // Dong (VIET NAM) + VND: 0, + // Vatu (VANUATU) + VUV: 0, + // CFA Franc BEAC (CAMEROON) + // CFA Franc BEAC (CENTRAL AFRICAN REPUBLIC (THE)) + // CFA Franc BEAC (CHAD) + // CFA Franc BEAC (CONGO (THE)) + // CFA Franc BEAC (EQUATORIAL GUINEA) + // CFA Franc BEAC (GABON) + XAF: 0, + // CFA Franc BCEAO (BENIN) + // CFA Franc BCEAO (BURKINA FASO) + // CFA Franc BCEAO (CÔTE D'IVOIRE) + // CFA Franc BCEAO (GUINEA-BISSAU) + // CFA Franc BCEAO (MALI) + // CFA Franc BCEAO (NIGER (THE)) + // CFA Franc BCEAO (SENEGAL) + // CFA Franc BCEAO (TOGO) + XOF: 0, + // CFP Franc (FRENCH POLYNESIA) + // CFP Franc (NEW CALEDONIA) + // CFP Franc (WALLIS AND FUTUNA) + XPF: 0, +}; diff --git a/js/src/builtin/intl/DateTimeFormat.cpp b/js/src/builtin/intl/DateTimeFormat.cpp new file mode 100644 index 0000000000..00af67ddfd --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.cpp @@ -0,0 +1,1678 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.DateTimeFormat implementation. */ + +#include "builtin/intl/DateTimeFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/Calendar.h" +#include "mozilla/intl/DateIntervalFormat.h" +#include "mozilla/intl/DateTimeFormat.h" +#include "mozilla/intl/DateTimePart.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/intl/TimeZone.h" +#include "mozilla/Range.h" +#include "mozilla/Span.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/SharedIntlData.h" +#include "gc/GCContext.h" +#include "js/Date.h" +#include "js/experimental/Intl.h" // JS::AddMozDateTimeFormatConstructor +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/GCAPI.h" +#include "js/PropertyAndElement.h" // JS_DefineFunctions, JS_DefineProperties +#include "js/PropertySpec.h" +#include "js/StableStringChars.h" +#include "vm/DateTime.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/Runtime.h" + +#include "vm/GeckoProfiler-inl.h" +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using JS::AutoStableStringChars; +using JS::ClippedTime; +using JS::TimeClip; + +using js::intl::DateTimeFormatOptions; +using js::intl::FormatBuffer; +using js::intl::INITIAL_CHAR_BUFFER_SIZE; +using js::intl::SharedIntlData; + +const JSClassOps DateTimeFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + DateTimeFormatObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass DateTimeFormatObject::class_ = { + "Intl.DateTimeFormat", + JSCLASS_HAS_RESERVED_SLOTS(DateTimeFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_DateTimeFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &DateTimeFormatObject::classOps_, &DateTimeFormatObject::classSpec_}; + +const JSClass& DateTimeFormatObject::protoClass_ = PlainObject::class_; + +static bool dateTimeFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().DateTimeFormat); + return true; +} + +static const JSFunctionSpec dateTimeFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_DateTimeFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec dateTimeFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_DateTimeFormat_resolvedOptions", + 0, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_DateTimeFormat_formatToParts", 1, + 0), + JS_SELF_HOSTED_FN("formatRange", "Intl_DateTimeFormat_formatRange", 2, 0), + JS_SELF_HOSTED_FN("formatRangeToParts", + "Intl_DateTimeFormat_formatRangeToParts", 2, 0), + JS_FN("toSource", dateTimeFormat_toSource, 0, 0), + JS_FS_END}; + +static const JSPropertySpec dateTimeFormat_properties[] = { + JS_SELF_HOSTED_GET("format", "$Intl_DateTimeFormat_format_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.DateTimeFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool DateTimeFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec DateTimeFormatObject::classSpec_ = { + GenericCreateConstructor<DateTimeFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<DateTimeFormatObject>, + dateTimeFormat_static_methods, + nullptr, + dateTimeFormat_methods, + dateTimeFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 12.2.1 Intl.DateTimeFormat([ locales [, options]]) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +static bool DateTimeFormat(JSContext* cx, const CallArgs& args, bool construct, + HandleString required, HandleString defaults, + DateTimeFormatOptions dtfOptions) { + AutoJSConstructorProfilerEntry pseudoFrame(cx, "Intl.DateTimeFormat"); + + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + JSProtoKey protoKey = dtfOptions == DateTimeFormatOptions::Standard + ? JSProto_DateTimeFormat + : JSProto_Null; + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, protoKey, &proto)) { + return false; + } + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = NewObjectWithClassProto<DateTimeFormatObject>(cx, proto); + if (!dateTimeFormat) { + return false; + } + + RootedValue thisValue( + cx, construct ? ObjectValue(*dateTimeFormat) : args.thisv()); + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + return intl::InitializeDateTimeFormatObject( + cx, dateTimeFormat, thisValue, locales, options, required, defaults, + dtfOptions, args.rval()); +} + +static bool DateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + Handle<PropertyName*> required = cx->names().any; + Handle<PropertyName*> defaults = cx->names().date; + return DateTimeFormat(cx, args, args.isConstructing(), required, defaults, + DateTimeFormatOptions::Standard); +} + +static bool MozDateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Don't allow to call mozIntl.DateTimeFormat as a function. That way we + // don't need to worry how to handle the legacy initialization semantics + // when applied on mozIntl.DateTimeFormat. + if (!ThrowIfNotConstructing(cx, args, "mozIntl.DateTimeFormat")) { + return false; + } + + Handle<PropertyName*> required = cx->names().any; + Handle<PropertyName*> defaults = cx->names().date; + return DateTimeFormat(cx, args, true, required, defaults, + DateTimeFormatOptions::EnableMozExtensions); +} + +bool js::intl_CreateDateTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(!args.isConstructing()); + + RootedString required(cx, args[2].toString()); + RootedString defaults(cx, args[3].toString()); + + // intl_CreateDateTimeFormat is an intrinsic for self-hosted JavaScript, so it + // cannot be used with "new", but it still has to be treated as a constructor. + return DateTimeFormat(cx, args, true, required, defaults, + DateTimeFormatOptions::Standard); +} + +void js::DateTimeFormatObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto* dateTimeFormat = &obj->as<DateTimeFormatObject>(); + mozilla::intl::DateTimeFormat* df = dateTimeFormat->getDateFormat(); + mozilla::intl::DateIntervalFormat* dif = + dateTimeFormat->getDateIntervalFormat(); + + if (df) { + intl::RemoveICUCellMemory( + gcx, obj, DateTimeFormatObject::UDateFormatEstimatedMemoryUse); + + delete df; + } + + if (dif) { + intl::RemoveICUCellMemory( + gcx, obj, DateTimeFormatObject::UDateIntervalFormatEstimatedMemoryUse); + + delete dif; + } +} + +bool JS::AddMozDateTimeFormatConstructor(JSContext* cx, + JS::Handle<JSObject*> intl) { + RootedObject ctor( + cx, GlobalObject::createConstructor(cx, MozDateTimeFormat, + cx->names().DateTimeFormat, 0)); + if (!ctor) { + return false; + } + + RootedObject proto( + cx, GlobalObject::createBlankPrototype<PlainObject>(cx, cx->global())); + if (!proto) { + return false; + } + + if (!LinkConstructorAndPrototype(cx, ctor, proto)) { + return false; + } + + // 12.3.2 + if (!JS_DefineFunctions(cx, ctor, dateTimeFormat_static_methods)) { + return false; + } + + // 12.4.4 and 12.4.5 + if (!JS_DefineFunctions(cx, proto, dateTimeFormat_methods)) { + return false; + } + + // 12.4.2 and 12.4.3 + if (!JS_DefineProperties(cx, proto, dateTimeFormat_properties)) { + return false; + } + + RootedValue ctorValue(cx, ObjectValue(*ctor)); + return DefineDataProperty(cx, intl, cx->names().DateTimeFormat, ctorValue, 0); +} + +static bool DefaultCalendar(JSContext* cx, const UniqueChars& locale, + MutableHandleValue rval) { + auto calendar = mozilla::intl::Calendar::TryCreate(locale.get()); + if (calendar.isErr()) { + intl::ReportInternalError(cx, calendar.unwrapErr()); + return false; + } + + auto type = calendar.unwrap()->GetBcp47Type(); + if (type.isErr()) { + intl::ReportInternalError(cx, type.unwrapErr()); + return false; + } + + JSString* str = NewStringCopy<CanGC>(cx, type.unwrap()); + if (!str) { + return false; + } + + rval.setString(str); + return true; +} + +bool js::intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + RootedObject calendars(cx, NewDenseEmptyArray(cx)); + if (!calendars) { + return false; + } + + // We need the default calendar for the locale as the first result. + RootedValue defaultCalendar(cx); + if (!DefaultCalendar(cx, locale, &defaultCalendar)) { + return false; + } + + if (!NewbornArrayPush(cx, calendars, defaultCalendar)) { + return false; + } + + // Now get the calendars that "would make a difference", i.e., not the + // default. + auto keywords = + mozilla::intl::Calendar::GetBcp47KeywordValuesForLocale(locale.get()); + if (keywords.isErr()) { + intl::ReportInternalError(cx, keywords.unwrapErr()); + return false; + } + + for (auto keyword : keywords.unwrap()) { + if (keyword.isErr()) { + intl::ReportInternalError(cx); + return false; + } + + JSString* jscalendar = NewStringCopy<CanGC>(cx, keyword.unwrap()); + if (!jscalendar) { + return false; + } + if (!NewbornArrayPush(cx, calendars, StringValue(jscalendar))) { + return false; + } + } + + args.rval().setObject(*calendars); + return true; +} + +bool js::intl_defaultCalendar(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + return DefaultCalendar(cx, locale, args.rval()); +} + +bool js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + RootedString timeZone(cx, args[0].toString()); + Rooted<JSAtom*> validatedTimeZone(cx); + if (!sharedIntlData.validateTimeZoneName(cx, timeZone, &validatedTimeZone)) { + return false; + } + + if (validatedTimeZone) { + cx->markAtom(validatedTimeZone); + args.rval().setString(validatedTimeZone); + } else { + args.rval().setNull(); + } + + return true; +} + +bool js::intl_canonicalizeTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + // Some time zone names are canonicalized differently by ICU -- handle + // those first: + RootedString timeZone(cx, args[0].toString()); + Rooted<JSAtom*> ianaTimeZone(cx); + if (!sharedIntlData.tryCanonicalizeTimeZoneConsistentWithIANA( + cx, timeZone, &ianaTimeZone)) { + return false; + } + + if (ianaTimeZone) { + cx->markAtom(ianaTimeZone); + args.rval().setString(ianaTimeZone); + return true; + } + + AutoStableStringChars stableChars(cx); + if (!stableChars.initTwoByte(cx, timeZone)) { + return false; + } + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> canonicalTimeZone(cx); + auto result = mozilla::intl::TimeZone::GetCanonicalTimeZoneID( + stableChars.twoByteRange(), canonicalTimeZone); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSString* str = canonicalTimeZone.toString(cx); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +bool js::intl_defaultTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> timeZone(cx); + auto result = + DateTimeInfo::timeZoneId(DateTimeInfo::forceUTC(cx->realm()), timeZone); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSString* str = timeZone.toString(cx); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +bool js::intl_defaultTimeZoneOffset(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + auto offset = + DateTimeInfo::getRawOffsetMs(DateTimeInfo::forceUTC(cx->realm())); + if (offset.isErr()) { + intl::ReportInternalError(cx, offset.unwrapErr()); + return false; + } + + args.rval().setInt32(offset.unwrap()); + return true; +} + +bool js::intl_isDefaultTimeZone(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString() || args[0].isUndefined()); + + // |undefined| is the default value when the Intl runtime caches haven't + // yet been initialized. Handle it the same way as a cache miss. + if (args[0].isUndefined()) { + args.rval().setBoolean(false); + return true; + } + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> chars(cx); + auto result = + DateTimeInfo::timeZoneId(DateTimeInfo::forceUTC(cx->realm()), chars); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSLinearString* str = args[0].toString()->ensureLinear(cx); + if (!str) { + return false; + } + + bool equals; + if (str->length() == chars.length()) { + JS::AutoCheckCannotGC nogc; + equals = + str->hasLatin1Chars() + ? EqualChars(str->latin1Chars(nogc), chars.data(), str->length()) + : EqualChars(str->twoByteChars(nogc), chars.data(), str->length()); + } else { + equals = false; + } + + args.rval().setBoolean(equals); + return true; +} + +enum class HourCycle { + // 12 hour cycle, from 0 to 11. + H11, + + // 12 hour cycle, from 1 to 12. + H12, + + // 24 hour cycle, from 0 to 23. + H23, + + // 24 hour cycle, from 1 to 24. + H24 +}; + +static UniqueChars DateTimeFormatLocale( + JSContext* cx, HandleObject internals, + mozilla::Maybe<mozilla::intl::DateTimeFormat::HourCycle> hourCycle = + mozilla::Nothing()) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects calendar, numberingSystem, and hourCycle as Unicode locale + // extensions on locale. + + mozilla::intl::Locale tag; + { + Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx)); + if (!locale) { + return nullptr; + } + + if (!intl::ParseLocale(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().calendar, &value)) { + return nullptr; + } + + { + JSLinearString* calendar = value.toString()->ensureLinear(cx); + if (!calendar) { + return nullptr; + } + + if (!keywords.emplaceBack("ca", calendar)) { + return nullptr; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + if (hourCycle) { + JSAtom* hourCycleStr; + switch (*hourCycle) { + case mozilla::intl::DateTimeFormat::HourCycle::H11: + hourCycleStr = cx->names().h11; + break; + case mozilla::intl::DateTimeFormat::HourCycle::H12: + hourCycleStr = cx->names().h12; + break; + case mozilla::intl::DateTimeFormat::HourCycle::H23: + hourCycleStr = cx->names().h23; + break; + case mozilla::intl::DateTimeFormat::HourCycle::H24: + hourCycleStr = cx->names().h24; + break; + } + + if (!keywords.emplaceBack("hc", hourCycleStr)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of + // the Unicode extension subtag. We're then relying on ICU to follow RFC + // 6067, which states that any trailing keywords using the same key + // should be ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + FormatBuffer<char> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + return buffer.extractStringZ(); +} + +static bool AssignTextComponent( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::Text>* text) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "narrow")) { + *text = mozilla::Some(mozilla::intl::DateTimeFormat::Text::Narrow); + } else if (StringEqualsLiteral(string, "short")) { + *text = mozilla::Some(mozilla::intl::DateTimeFormat::Text::Short); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "long")); + *text = mozilla::Some(mozilla::intl::DateTimeFormat::Text::Long); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignNumericComponent( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::Numeric>* numeric) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "numeric")) { + *numeric = mozilla::Some(mozilla::intl::DateTimeFormat::Numeric::Numeric); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "2-digit")); + *numeric = + mozilla::Some(mozilla::intl::DateTimeFormat::Numeric::TwoDigit); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignMonthComponent( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::Month>* month) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "numeric")) { + *month = mozilla::Some(mozilla::intl::DateTimeFormat::Month::Numeric); + } else if (StringEqualsLiteral(string, "2-digit")) { + *month = mozilla::Some(mozilla::intl::DateTimeFormat::Month::TwoDigit); + } else if (StringEqualsLiteral(string, "long")) { + *month = mozilla::Some(mozilla::intl::DateTimeFormat::Month::Long); + } else if (StringEqualsLiteral(string, "short")) { + *month = mozilla::Some(mozilla::intl::DateTimeFormat::Month::Short); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "narrow")); + *month = mozilla::Some(mozilla::intl::DateTimeFormat::Month::Narrow); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignTimeZoneNameComponent( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::TimeZoneName>* tzName) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "long")) { + *tzName = + mozilla::Some(mozilla::intl::DateTimeFormat::TimeZoneName::Long); + } else if (StringEqualsLiteral(string, "short")) { + *tzName = + mozilla::Some(mozilla::intl::DateTimeFormat::TimeZoneName::Short); + } else if (StringEqualsLiteral(string, "shortOffset")) { + *tzName = mozilla::Some( + mozilla::intl::DateTimeFormat::TimeZoneName::ShortOffset); + } else if (StringEqualsLiteral(string, "longOffset")) { + *tzName = mozilla::Some( + mozilla::intl::DateTimeFormat::TimeZoneName::LongOffset); + } else if (StringEqualsLiteral(string, "shortGeneric")) { + *tzName = mozilla::Some( + mozilla::intl::DateTimeFormat::TimeZoneName::ShortGeneric); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "longGeneric")); + *tzName = mozilla::Some( + mozilla::intl::DateTimeFormat::TimeZoneName::LongGeneric); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignHourCycleComponent( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::HourCycle>* hourCycle) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "h11")) { + *hourCycle = mozilla::Some(mozilla::intl::DateTimeFormat::HourCycle::H11); + } else if (StringEqualsLiteral(string, "h12")) { + *hourCycle = mozilla::Some(mozilla::intl::DateTimeFormat::HourCycle::H12); + } else if (StringEqualsLiteral(string, "h23")) { + *hourCycle = mozilla::Some(mozilla::intl::DateTimeFormat::HourCycle::H23); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "h24")); + *hourCycle = mozilla::Some(mozilla::intl::DateTimeFormat::HourCycle::H24); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignHour12Component(JSContext* cx, HandleObject internals, + mozilla::Maybe<bool>* hour12) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, cx->names().hour12, &value)) { + return false; + } + if (value.isBoolean()) { + *hour12 = mozilla::Some(value.toBoolean()); + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +static bool AssignDateTimeLength( + JSContext* cx, HandleObject internals, Handle<PropertyName*> property, + mozilla::Maybe<mozilla::intl::DateTimeFormat::Style>* style) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, property, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* string = value.toString()->ensureLinear(cx); + if (!string) { + return false; + } + if (StringEqualsLiteral(string, "full")) { + *style = mozilla::Some(mozilla::intl::DateTimeFormat::Style::Full); + } else if (StringEqualsLiteral(string, "long")) { + *style = mozilla::Some(mozilla::intl::DateTimeFormat::Style::Long); + } else if (StringEqualsLiteral(string, "medium")) { + *style = mozilla::Some(mozilla::intl::DateTimeFormat::Style::Medium); + } else { + MOZ_ASSERT(StringEqualsLiteral(string, "short")); + *style = mozilla::Some(mozilla::intl::DateTimeFormat::Style::Short); + } + } else { + MOZ_ASSERT(value.isUndefined()); + } + + return true; +} + +class TimeZoneOffsetString { + static constexpr std::u16string_view GMT = u"GMT"; + + // Time zone offset string format is "±hh:mm". + static constexpr size_t offsetLength = 6; + + // ICU custom time zones are in the format "GMT±hh:mm". + char16_t timeZone_[GMT.size() + offsetLength] = {}; + + TimeZoneOffsetString() = default; + + public: + TimeZoneOffsetString(const TimeZoneOffsetString& other) { *this = other; } + + TimeZoneOffsetString& operator=(const TimeZoneOffsetString& other) { + std::copy_n(other.timeZone_, std::size(timeZone_), timeZone_); + return *this; + } + + operator mozilla::Span<const char16_t>() const { + return mozilla::Span(timeZone_); + } + + /** + * |timeZone| is either a canonical IANA time zone identifier or a normalized + * time zone offset string. + */ + static mozilla::Maybe<TimeZoneOffsetString> from( + const JSLinearString* timeZone) { + MOZ_RELEASE_ASSERT(!timeZone->empty(), "time zone is a non-empty string"); + + // If the time zone string starts with either "+" or "-", it is a normalized + // time zone offset string, because (canonical) IANA time zone identifiers + // can't start with "+" or "-". + char16_t timeZoneSign = timeZone->latin1OrTwoByteChar(0); + MOZ_ASSERT(timeZoneSign != 0x2212, + "Minus sign is normalized to Ascii minus"); + if (timeZoneSign != '+' && timeZoneSign != '-') { + return mozilla::Nothing(); + } + + // Release assert because we don't want CopyChars to write out-of-bounds. + MOZ_RELEASE_ASSERT(timeZone->length() == offsetLength); + + // Self-hosted code has normalized offset strings to the format "±hh:mm". + MOZ_ASSERT(mozilla::IsAsciiDigit(timeZone->latin1OrTwoByteChar(1))); + MOZ_ASSERT(mozilla::IsAsciiDigit(timeZone->latin1OrTwoByteChar(2))); + MOZ_ASSERT(timeZone->latin1OrTwoByteChar(3) == ':'); + MOZ_ASSERT(mozilla::IsAsciiDigit(timeZone->latin1OrTwoByteChar(4))); + MOZ_ASSERT(mozilla::IsAsciiDigit(timeZone->latin1OrTwoByteChar(5))); + + // Self-hosted code has verified the offset is at most ±23:59. +#ifdef DEBUG + auto twoDigit = [&](size_t offset) { + auto c1 = timeZone->latin1OrTwoByteChar(offset); + auto c2 = timeZone->latin1OrTwoByteChar(offset + 1); + return mozilla::AsciiAlphanumericToNumber(c1) * 10 + + mozilla::AsciiAlphanumericToNumber(c2); + }; + + int32_t hours = twoDigit(1); + MOZ_ASSERT(0 <= hours && hours <= 23); + + int32_t minutes = twoDigit(4); + MOZ_ASSERT(0 <= minutes && minutes <= 59); +#endif + + TimeZoneOffsetString result{}; + + // Copy the string "GMT" followed by the offset string. + size_t copied = GMT.copy(result.timeZone_, GMT.size()); + CopyChars(result.timeZone_ + copied, *timeZone); + + return mozilla::Some(result); + } +}; + +/** + * Returns a new mozilla::intl::DateTimeFormat with the locale and date-time + * formatting options of the given DateTimeFormat. + */ +static mozilla::intl::DateTimeFormat* NewDateTimeFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat) { + RootedValue value(cx); + + RootedObject internals(cx, intl::GetInternalsObject(cx, dateTimeFormat)); + if (!internals) { + return nullptr; + } + + UniqueChars locale = DateTimeFormatLocale(cx, internals); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().timeZone, &value)) { + return nullptr; + } + + Rooted<JSLinearString*> timeZoneString(cx, + value.toString()->ensureLinear(cx)); + if (!timeZoneString) { + return nullptr; + } + + AutoStableStringChars timeZone(cx); + mozilla::Span<const char16_t> timeZoneChars{}; + + auto timeZoneOffset = TimeZoneOffsetString::from(timeZoneString); + if (timeZoneOffset) { + timeZoneChars = *timeZoneOffset; + } else { + if (!timeZone.initTwoByte(cx, timeZoneString)) { + return nullptr; + } + timeZoneChars = timeZone.twoByteRange(); + } + + if (!GetProperty(cx, internals, internals, cx->names().pattern, &value)) { + return nullptr; + } + bool hasPattern = value.isString(); + + if (!GetProperty(cx, internals, internals, cx->names().timeStyle, &value)) { + return nullptr; + } + bool hasStyle = value.isString(); + if (!hasStyle) { + if (!GetProperty(cx, internals, internals, cx->names().dateStyle, &value)) { + return nullptr; + } + hasStyle = value.isString(); + } + + mozilla::UniquePtr<mozilla::intl::DateTimeFormat> df = nullptr; + if (hasPattern) { + // This is a DateTimeFormat defined by a pattern option. This is internal + // to Mozilla, and not part of the ECMA-402 API. + if (!GetProperty(cx, internals, internals, cx->names().pattern, &value)) { + return nullptr; + } + + AutoStableStringChars pattern(cx); + if (!pattern.initTwoByte(cx, value.toString())) { + return nullptr; + } + + auto dfResult = mozilla::intl::DateTimeFormat::TryCreateFromPattern( + mozilla::MakeStringSpan(locale.get()), pattern.twoByteRange(), + mozilla::Some(timeZoneChars)); + if (dfResult.isErr()) { + intl::ReportInternalError(cx, dfResult.unwrapErr()); + return nullptr; + } + + df = dfResult.unwrap(); + } else if (hasStyle) { + // This is a DateTimeFormat defined by a time style or date style. + mozilla::intl::DateTimeFormat::StyleBag style; + if (!AssignDateTimeLength(cx, internals, cx->names().timeStyle, + &style.time)) { + return nullptr; + } + if (!AssignDateTimeLength(cx, internals, cx->names().dateStyle, + &style.date)) { + return nullptr; + } + if (!AssignHourCycleComponent(cx, internals, cx->names().hourCycle, + &style.hourCycle)) { + return nullptr; + } + + if (!AssignHour12Component(cx, internals, &style.hour12)) { + return nullptr; + } + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + mozilla::intl::DateTimePatternGenerator* gen = + sharedIntlData.getDateTimePatternGenerator(cx, locale.get()); + if (!gen) { + return nullptr; + } + auto dfResult = mozilla::intl::DateTimeFormat::TryCreateFromStyle( + mozilla::MakeStringSpan(locale.get()), style, gen, + mozilla::Some(timeZoneChars)); + if (dfResult.isErr()) { + intl::ReportInternalError(cx, dfResult.unwrapErr()); + return nullptr; + } + df = dfResult.unwrap(); + } else { + // This is a DateTimeFormat defined by a components bag. + mozilla::intl::DateTimeFormat::ComponentsBag bag; + + if (!AssignTextComponent(cx, internals, cx->names().era, &bag.era)) { + return nullptr; + } + if (!AssignNumericComponent(cx, internals, cx->names().year, &bag.year)) { + return nullptr; + } + if (!AssignMonthComponent(cx, internals, cx->names().month, &bag.month)) { + return nullptr; + } + if (!AssignNumericComponent(cx, internals, cx->names().day, &bag.day)) { + return nullptr; + } + if (!AssignTextComponent(cx, internals, cx->names().weekday, + &bag.weekday)) { + return nullptr; + } + if (!AssignNumericComponent(cx, internals, cx->names().hour, &bag.hour)) { + return nullptr; + } + if (!AssignNumericComponent(cx, internals, cx->names().minute, + &bag.minute)) { + return nullptr; + } + if (!AssignNumericComponent(cx, internals, cx->names().second, + &bag.second)) { + return nullptr; + } + if (!AssignTimeZoneNameComponent(cx, internals, cx->names().timeZoneName, + &bag.timeZoneName)) { + return nullptr; + } + if (!AssignHourCycleComponent(cx, internals, cx->names().hourCycle, + &bag.hourCycle)) { + return nullptr; + } + if (!AssignTextComponent(cx, internals, cx->names().dayPeriod, + &bag.dayPeriod)) { + return nullptr; + } + if (!AssignHour12Component(cx, internals, &bag.hour12)) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, + cx->names().fractionalSecondDigits, &value)) { + return nullptr; + } + if (value.isInt32()) { + bag.fractionalSecondDigits = mozilla::Some(value.toInt32()); + } else { + MOZ_ASSERT(value.isUndefined()); + } + + SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + auto* dtpg = sharedIntlData.getDateTimePatternGenerator(cx, locale.get()); + if (!dtpg) { + return nullptr; + } + + auto dfResult = mozilla::intl::DateTimeFormat::TryCreateFromComponents( + mozilla::MakeStringSpan(locale.get()), bag, dtpg, + mozilla::Some(timeZoneChars)); + if (dfResult.isErr()) { + intl::ReportInternalError(cx, dfResult.unwrapErr()); + return nullptr; + } + df = dfResult.unwrap(); + } + + // ECMAScript requires the Gregorian calendar to be used from the beginning + // of ECMAScript time. + df->SetStartTimeIfGregorian(StartOfTime); + + return df.release(); +} + +static mozilla::intl::DateTimeFormat* GetOrCreateDateTimeFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat) { + // Obtain a cached mozilla::intl::DateTimeFormat object. + mozilla::intl::DateTimeFormat* df = dateTimeFormat->getDateFormat(); + if (df) { + return df; + } + + df = NewDateTimeFormat(cx, dateTimeFormat); + if (!df) { + return nullptr; + } + dateTimeFormat->setDateFormat(df); + + intl::AddICUCellMemory(dateTimeFormat, + DateTimeFormatObject::UDateFormatEstimatedMemoryUse); + return df; +} + +template <typename T> +static bool SetResolvedProperty(JSContext* cx, HandleObject resolved, + Handle<PropertyName*> name, + mozilla::Maybe<T> intlProp) { + if (!intlProp) { + return true; + } + JSString* str = NewStringCopyZ<CanGC>( + cx, mozilla::intl::DateTimeFormat::ToString(*intlProp)); + if (!str) { + return false; + } + RootedValue value(cx, StringValue(str)); + return DefineDataProperty(cx, resolved, name, value); +} + +bool js::intl_resolveDateTimeFormatComponents(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isObject()); + MOZ_ASSERT(args[2].isBoolean()); + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = &args[0].toObject().as<DateTimeFormatObject>(); + + RootedObject resolved(cx, &args[1].toObject()); + + bool includeDateTimeFields = args[2].toBoolean(); + + mozilla::intl::DateTimeFormat* df = + GetOrCreateDateTimeFormat(cx, dateTimeFormat); + if (!df) { + return false; + } + + auto result = df->ResolveComponents(); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + mozilla::intl::DateTimeFormat::ComponentsBag components = result.unwrap(); + + // Map the resolved mozilla::intl::DateTimeFormat::ComponentsBag to the + // options object as returned by DateTimeFormat.prototype.resolvedOptions. + // + // Resolved options must match the ordering as defined in: + // https://tc39.es/ecma402/#sec-intl.datetimeformat.prototype.resolvedoptions + + if (!SetResolvedProperty(cx, resolved, cx->names().hourCycle, + components.hourCycle)) { + return false; + } + + if (components.hour12) { + RootedValue value(cx, BooleanValue(*components.hour12)); + if (!DefineDataProperty(cx, resolved, cx->names().hour12, value)) { + return false; + } + } + + if (!includeDateTimeFields) { + args.rval().setUndefined(); + // Do not include date time fields. + return true; + } + + if (!SetResolvedProperty(cx, resolved, cx->names().weekday, + components.weekday)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().era, components.era)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().year, components.year)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().month, components.month)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().day, components.day)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().dayPeriod, + components.dayPeriod)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().hour, components.hour)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().minute, + components.minute)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().second, + components.second)) { + return false; + } + if (!SetResolvedProperty(cx, resolved, cx->names().timeZoneName, + components.timeZoneName)) { + return false; + } + + if (components.fractionalSecondDigits) { + RootedValue value(cx, Int32Value(*components.fractionalSecondDigits)); + if (!DefineDataProperty(cx, resolved, cx->names().fractionalSecondDigits, + value)) { + return false; + } + } + + args.rval().setUndefined(); + return true; +} + +static bool intl_FormatDateTime(JSContext* cx, + const mozilla::intl::DateTimeFormat* df, + ClippedTime x, MutableHandleValue result) { + MOZ_ASSERT(x.isValid()); + + FormatBuffer<char16_t, INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + auto dfResult = df->TryFormat(x.toDouble(), buffer); + if (dfResult.isErr()) { + intl::ReportInternalError(cx, dfResult.unwrapErr()); + return false; + } + + JSString* str = buffer.toString(cx); + if (!str) { + return false; + } + + result.setString(str); + return true; +} + +using FieldType = js::ImmutableTenuredPtr<PropertyName*> JSAtomState::*; + +static FieldType GetFieldTypeForPartType(mozilla::intl::DateTimePartType type) { + switch (type) { + case mozilla::intl::DateTimePartType::Literal: + return &JSAtomState::literal; + case mozilla::intl::DateTimePartType::Era: + return &JSAtomState::era; + case mozilla::intl::DateTimePartType::Year: + return &JSAtomState::year; + case mozilla::intl::DateTimePartType::YearName: + return &JSAtomState::yearName; + case mozilla::intl::DateTimePartType::RelatedYear: + return &JSAtomState::relatedYear; + case mozilla::intl::DateTimePartType::Month: + return &JSAtomState::month; + case mozilla::intl::DateTimePartType::Day: + return &JSAtomState::day; + case mozilla::intl::DateTimePartType::Hour: + return &JSAtomState::hour; + case mozilla::intl::DateTimePartType::Minute: + return &JSAtomState::minute; + case mozilla::intl::DateTimePartType::Second: + return &JSAtomState::second; + case mozilla::intl::DateTimePartType::Weekday: + return &JSAtomState::weekday; + case mozilla::intl::DateTimePartType::DayPeriod: + return &JSAtomState::dayPeriod; + case mozilla::intl::DateTimePartType::TimeZoneName: + return &JSAtomState::timeZoneName; + case mozilla::intl::DateTimePartType::FractionalSecondDigits: + return &JSAtomState::fractionalSecond; + case mozilla::intl::DateTimePartType::Unknown: + return &JSAtomState::unknown; + } + + MOZ_CRASH( + "unenumerated, undocumented format field returned " + "by iterator"); +} + +static FieldType GetFieldTypeForPartSource( + mozilla::intl::DateTimePartSource source) { + switch (source) { + case mozilla::intl::DateTimePartSource::Shared: + return &JSAtomState::shared; + case mozilla::intl::DateTimePartSource::StartRange: + return &JSAtomState::startRange; + case mozilla::intl::DateTimePartSource::EndRange: + return &JSAtomState::endRange; + } + + MOZ_CRASH( + "unenumerated, undocumented format field returned " + "by iterator"); +} + +// A helper function to create an ArrayObject from DateTimePart objects. +// When hasNoSource is true, we don't need to create the ||Source|| property for +// the DateTimePart object. +static bool CreateDateTimePartArray( + JSContext* cx, mozilla::Span<const char16_t> formattedSpan, + bool hasNoSource, const mozilla::intl::DateTimePartVector& parts, + MutableHandleValue result) { + RootedString overallResult(cx, NewStringCopy<CanGC>(cx, formattedSpan)); + if (!overallResult) { + return false; + } + + Rooted<ArrayObject*> partsArray( + cx, NewDenseFullyAllocatedArray(cx, parts.length())); + if (!partsArray) { + return false; + } + partsArray->ensureDenseInitializedLength(0, parts.length()); + + if (overallResult->length() == 0) { + // An empty string contains no parts, so avoid extra work below. + result.setObject(*partsArray); + return true; + } + + RootedObject singlePart(cx); + RootedValue val(cx); + + size_t index = 0; + size_t beginIndex = 0; + for (const mozilla::intl::DateTimePart& part : parts) { + singlePart = NewPlainObject(cx); + if (!singlePart) { + return false; + } + + FieldType type = GetFieldTypeForPartType(part.mType); + val = StringValue(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { + return false; + } + + MOZ_ASSERT(part.mEndIndex > beginIndex); + JSLinearString* partStr = NewDependentString(cx, overallResult, beginIndex, + part.mEndIndex - beginIndex); + if (!partStr) { + return false; + } + val = StringValue(partStr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { + return false; + } + + if (!hasNoSource) { + FieldType source = GetFieldTypeForPartSource(part.mSource); + val = StringValue(cx->names().*source); + if (!DefineDataProperty(cx, singlePart, cx->names().source, val)) { + return false; + } + } + + beginIndex = part.mEndIndex; + partsArray->initDenseElement(index++, ObjectValue(*singlePart)); + } + + MOZ_ASSERT(index == parts.length()); + MOZ_ASSERT(beginIndex == formattedSpan.size()); + result.setObject(*partsArray); + return true; +} + +static bool intl_FormatToPartsDateTime(JSContext* cx, + const mozilla::intl::DateTimeFormat* df, + ClippedTime x, bool hasNoSource, + MutableHandleValue result) { + MOZ_ASSERT(x.isValid()); + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + mozilla::intl::DateTimePartVector parts; + auto r = df->TryFormatToParts(x.toDouble(), buffer, parts); + if (r.isErr()) { + intl::ReportInternalError(cx, r.unwrapErr()); + return false; + } + + return CreateDateTimePartArray(cx, buffer, hasNoSource, parts, result); +} + +bool js::intl_FormatDateTime(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumber()); + MOZ_ASSERT(args[2].isBoolean()); + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = &args[0].toObject().as<DateTimeFormatObject>(); + + bool formatToParts = args[2].toBoolean(); + + ClippedTime x = TimeClip(args[1].toNumber()); + if (!x.isValid()) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatToParts" : "format"); + return false; + } + + mozilla::intl::DateTimeFormat* df = + GetOrCreateDateTimeFormat(cx, dateTimeFormat); + if (!df) { + return false; + } + + // Use the DateTimeFormat to actually format the time stamp. + return formatToParts ? intl_FormatToPartsDateTime( + cx, df, x, /* hasNoSource */ true, args.rval()) + : intl_FormatDateTime(cx, df, x, args.rval()); +} + +/** + * Returns a new DateIntervalFormat with the locale and date-time formatting + * options of the given DateTimeFormat. + */ +static mozilla::intl::DateIntervalFormat* NewDateIntervalFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat, + mozilla::intl::DateTimeFormat& mozDtf) { + RootedValue value(cx); + RootedObject internals(cx, intl::GetInternalsObject(cx, dateTimeFormat)); + if (!internals) { + return nullptr; + } + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> pattern(cx); + auto result = mozDtf.GetPattern(pattern); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + // Determine the hour cycle used in the resolved pattern. + mozilla::Maybe<mozilla::intl::DateTimeFormat::HourCycle> hcPattern = + mozilla::intl::DateTimeFormat::HourCycleFromPattern(pattern); + + UniqueChars locale = DateTimeFormatLocale(cx, internals, hcPattern); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().timeZone, &value)) { + return nullptr; + } + + Rooted<JSLinearString*> timeZoneString(cx, + value.toString()->ensureLinear(cx)); + if (!timeZoneString) { + return nullptr; + } + + AutoStableStringChars timeZone(cx); + mozilla::Span<const char16_t> timeZoneChars{}; + + auto timeZoneOffset = TimeZoneOffsetString::from(timeZoneString); + if (timeZoneOffset) { + timeZoneChars = *timeZoneOffset; + } else { + if (!timeZone.initTwoByte(cx, timeZoneString)) { + return nullptr; + } + timeZoneChars = timeZone.twoByteRange(); + } + + FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> skeleton(cx); + auto skelResult = mozDtf.GetOriginalSkeleton(skeleton); + if (skelResult.isErr()) { + intl::ReportInternalError(cx, skelResult.unwrapErr()); + return nullptr; + } + + auto dif = mozilla::intl::DateIntervalFormat::TryCreate( + mozilla::MakeStringSpan(locale.get()), skeleton, timeZoneChars); + + if (dif.isErr()) { + js::intl::ReportInternalError(cx, dif.unwrapErr()); + return nullptr; + } + + return dif.unwrap().release(); +} + +static mozilla::intl::DateIntervalFormat* GetOrCreateDateIntervalFormat( + JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat, + mozilla::intl::DateTimeFormat& mozDtf) { + // Obtain a cached DateIntervalFormat object. + mozilla::intl::DateIntervalFormat* dif = + dateTimeFormat->getDateIntervalFormat(); + if (dif) { + return dif; + } + + dif = NewDateIntervalFormat(cx, dateTimeFormat, mozDtf); + if (!dif) { + return nullptr; + } + dateTimeFormat->setDateIntervalFormat(dif); + + intl::AddICUCellMemory( + dateTimeFormat, + DateTimeFormatObject::UDateIntervalFormatEstimatedMemoryUse); + return dif; +} + +/** + * PartitionDateTimeRangePattern ( dateTimeFormat, x, y ) + */ +static bool PartitionDateTimeRangePattern( + JSContext* cx, const mozilla::intl::DateTimeFormat* df, + const mozilla::intl::DateIntervalFormat* dif, + mozilla::intl::AutoFormattedDateInterval& formatted, ClippedTime x, + ClippedTime y, bool* equal) { + MOZ_ASSERT(x.isValid()); + MOZ_ASSERT(y.isValid()); + + // We can't access the calendar used by UDateIntervalFormat to change it to a + // proleptic Gregorian calendar. Instead we need to call a different formatter + // function which accepts UCalendar instead of UDate. + // But creating new UCalendar objects for each call is slow, so when we can + // ensure that the input dates are later than the Gregorian change date, + // directly call the formatter functions taking UDate. + + // The Gregorian change date "1582-10-15T00:00:00.000Z". + constexpr double GregorianChangeDate = -12219292800000.0; + + // Add a full day to account for time zone offsets. + constexpr double GregorianChangeDatePlusOneDay = + GregorianChangeDate + msPerDay; + + mozilla::intl::ICUResult result = Ok(); + if (x.toDouble() < GregorianChangeDatePlusOneDay || + y.toDouble() < GregorianChangeDatePlusOneDay) { + // Create calendar objects for the start and end date by cloning the date + // formatter calendar. The date formatter calendar already has the correct + // time zone set and was changed to use a proleptic Gregorian calendar. + auto startCal = df->CloneCalendar(x.toDouble()); + if (startCal.isErr()) { + intl::ReportInternalError(cx, startCal.unwrapErr()); + return false; + } + + auto endCal = df->CloneCalendar(y.toDouble()); + if (endCal.isErr()) { + intl::ReportInternalError(cx, endCal.unwrapErr()); + return false; + } + + result = dif->TryFormatCalendar(*startCal.unwrap(), *endCal.unwrap(), + formatted, equal); + } else { + // The common fast path which doesn't require creating calendar objects. + result = + dif->TryFormatDateTime(x.toDouble(), y.toDouble(), formatted, equal); + } + + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + return true; +} + +/** + * FormatDateTimeRange( dateTimeFormat, x, y ) + */ +static bool FormatDateTimeRange(JSContext* cx, + const mozilla::intl::DateTimeFormat* df, + const mozilla::intl::DateIntervalFormat* dif, + ClippedTime x, ClippedTime y, + MutableHandleValue result) { + mozilla::intl::AutoFormattedDateInterval formatted; + if (!formatted.IsValid()) { + intl::ReportInternalError(cx, formatted.GetError()); + return false; + } + + bool equal; + if (!PartitionDateTimeRangePattern(cx, df, dif, formatted, x, y, &equal)) { + return false; + } + + // PartitionDateTimeRangePattern, step 12. + if (equal) { + return intl_FormatDateTime(cx, df, x, result); + } + + auto spanResult = formatted.ToSpan(); + if (spanResult.isErr()) { + intl::ReportInternalError(cx, spanResult.unwrapErr()); + return false; + } + JSString* resultStr = NewStringCopy<CanGC>(cx, spanResult.unwrap()); + if (!resultStr) { + return false; + } + + result.setString(resultStr); + return true; +} + +/** + * FormatDateTimeRangeToParts ( dateTimeFormat, x, y ) + */ +static bool FormatDateTimeRangeToParts( + JSContext* cx, const mozilla::intl::DateTimeFormat* df, + const mozilla::intl::DateIntervalFormat* dif, ClippedTime x, ClippedTime y, + MutableHandleValue result) { + mozilla::intl::AutoFormattedDateInterval formatted; + if (!formatted.IsValid()) { + intl::ReportInternalError(cx, formatted.GetError()); + return false; + } + + bool equal; + if (!PartitionDateTimeRangePattern(cx, df, dif, formatted, x, y, &equal)) { + return false; + } + + // PartitionDateTimeRangePattern, step 12. + if (equal) { + return intl_FormatToPartsDateTime(cx, df, x, /* hasNoSource */ false, + result); + } + + mozilla::intl::DateTimePartVector parts; + auto r = dif->TryFormattedToParts(formatted, parts); + if (r.isErr()) { + intl::ReportInternalError(cx, r.unwrapErr()); + return false; + } + + auto spanResult = formatted.ToSpan(); + if (spanResult.isErr()) { + intl::ReportInternalError(cx, spanResult.unwrapErr()); + return false; + } + return CreateDateTimePartArray(cx, spanResult.unwrap(), + /* hasNoSource */ false, parts, result); +} + +bool js::intl_FormatDateTimeRange(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumber()); + MOZ_ASSERT(args[2].isNumber()); + MOZ_ASSERT(args[3].isBoolean()); + + Rooted<DateTimeFormatObject*> dateTimeFormat(cx); + dateTimeFormat = &args[0].toObject().as<DateTimeFormatObject>(); + + bool formatToParts = args[3].toBoolean(); + + // PartitionDateTimeRangePattern, steps 1-2. + ClippedTime x = TimeClip(args[1].toNumber()); + if (!x.isValid()) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + + // PartitionDateTimeRangePattern, steps 3-4. + ClippedTime y = TimeClip(args[2].toNumber()); + if (!y.isValid()) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_DATE_NOT_FINITE, "DateTimeFormat", + formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + + mozilla::intl::DateTimeFormat* df = + GetOrCreateDateTimeFormat(cx, dateTimeFormat); + if (!df) { + return false; + } + + mozilla::intl::DateIntervalFormat* dif = + GetOrCreateDateIntervalFormat(cx, dateTimeFormat, *df); + if (!dif) { + return false; + } + + // Use the DateIntervalFormat to actually format the time range. + return formatToParts + ? FormatDateTimeRangeToParts(cx, df, dif, x, y, args.rval()) + : FormatDateTimeRange(cx, df, dif, x, y, args.rval()); +} diff --git a/js/src/builtin/intl/DateTimeFormat.h b/js/src/builtin/intl/DateTimeFormat.h new file mode 100644 index 0000000000..fd8cea7305 --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.h @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_DateTimeFormat_h +#define builtin_intl_DateTimeFormat_h + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class DateTimeFormat; +class DateIntervalFormat; +} // namespace mozilla::intl + +namespace js { + +class DateTimeFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t DATE_FORMAT_SLOT = 1; + static constexpr uint32_t DATE_INTERVAL_FORMAT_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UDateFormat (see IcuMemoryUsage). + static constexpr size_t UDateFormatEstimatedMemoryUse = 72440; + + // Estimated memory use for UDateIntervalFormat (see IcuMemoryUsage). + static constexpr size_t UDateIntervalFormatEstimatedMemoryUse = 175646; + + mozilla::intl::DateTimeFormat* getDateFormat() const { + const auto& slot = getFixedSlot(DATE_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::DateTimeFormat*>(slot.toPrivate()); + } + + void setDateFormat(mozilla::intl::DateTimeFormat* dateFormat) { + setFixedSlot(DATE_FORMAT_SLOT, PrivateValue(dateFormat)); + } + + mozilla::intl::DateIntervalFormat* getDateIntervalFormat() const { + const auto& slot = getFixedSlot(DATE_INTERVAL_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::DateIntervalFormat*>(slot.toPrivate()); + } + + void setDateIntervalFormat( + mozilla::intl::DateIntervalFormat* dateIntervalFormat) { + setFixedSlot(DATE_INTERVAL_FORMAT_SLOT, PrivateValue(dateIntervalFormat)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in DateTimeFormat constructor. + * + * Usage: dateTimeFormat = intl_CreateDateTimeFormat(locales, options, required, + * defaults) + */ +[[nodiscard]] extern bool intl_CreateDateTimeFormat(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Returns an array with the calendar type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * supported calendars for the given locale. The default calendar is + * element 0. + * + * Usage: calendars = intl_availableCalendars(locale) + */ +[[nodiscard]] extern bool intl_availableCalendars(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns the calendar type identifier per Unicode Technical Standard 35, + * Unicode Locale Data Markup Language, for the default calendar for the given + * locale. + * + * Usage: calendar = intl_defaultCalendar(locale) + */ +[[nodiscard]] extern bool intl_defaultCalendar(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * 6.4.1 IsValidTimeZoneName ( timeZone ) + * + * Verifies that the given string is a valid time zone name. If it is a valid + * time zone name, its IANA time zone name is returned. Otherwise returns null. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + * + * Usage: ianaTimeZone = intl_IsValidTimeZoneName(timeZone) + */ +[[nodiscard]] extern bool intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the canonicalized time zone name. Canonicalization resolves link + * names to their target time zones. + * + * Usage: ianaTimeZone = intl_canonicalizeTimeZone(timeZone) + */ +[[nodiscard]] extern bool intl_canonicalizeTimeZone(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Return the default time zone name. The time zone name is not canonicalized. + * + * Usage: icuDefaultTimeZone = intl_defaultTimeZone() + */ +[[nodiscard]] extern bool intl_defaultTimeZone(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the raw offset from GMT in milliseconds for the default time zone. + * + * Usage: defaultTimeZoneOffset = intl_defaultTimeZoneOffset() + */ +[[nodiscard]] extern bool intl_defaultTimeZoneOffset(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Return true if the given string is the default time zone as returned by + * intl_defaultTimeZone(). Otherwise return false. + * + * Usage: isIcuDefaultTimeZone = intl_isDefaultTimeZone(icuDefaultTimeZone) + */ +[[nodiscard]] extern bool intl_isDefaultTimeZone(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a String value representing x (which must be a Number value) + * according to the effective locale and the formatting options of the + * given DateTimeFormat. + * + * Spec: ECMAScript Internationalization API Specification, 12.3.2. + * + * Usage: formatted = intl_FormatDateTime(dateTimeFormat, x, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatDateTime(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a String value representing the range between x and y (which both + * must be Number values) according to the effective locale and the formatting + * options of the given DateTimeFormat. + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + * + * Usage: formatted = intl_FormatDateTimeRange(dateTimeFmt, x, y, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatDateTimeRange(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Extracts the resolved components from a DateTimeFormat and applies them to + * the object for resolved components. + * + * Usage: intl_resolveDateTimeFormatComponents(dateTimeFormat, resolved) + */ +[[nodiscard]] extern bool intl_resolveDateTimeFormatComponents(JSContext* cx, + unsigned argc, + JS::Value* vp); +} // namespace js + +#endif /* builtin_intl_DateTimeFormat_h */ diff --git a/js/src/builtin/intl/DateTimeFormat.js b/js/src/builtin/intl/DateTimeFormat.js new file mode 100644 index 0000000000..bb6d202a3a --- /dev/null +++ b/js/src/builtin/intl/DateTimeFormat.js @@ -0,0 +1,1170 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +/** + * 11.1.2 CreateDateTimeFormat ( newTarget, locales, options, required, defaults ) + * + * Compute an internal properties object from |lazyDateTimeFormatData|. + */ +function resolveDateTimeFormatInternals(lazyDateTimeFormatData) { + assert(IsObject(lazyDateTimeFormatData), "lazy data not an object?"); + + // Lazy DateTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // localeOpt: // *first* opt computed in InitializeDateTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // + // nu: string matching a Unicode extension type, // optional + // + // hc: "h11" / "h12" / "h23" / "h24", // optional + // } + // + // timeZone: IANA time zone name or a normalized time zone offset string, + // + // formatOptions: // *second* opt computed in InitializeDateTimeFormat + // { + // // all the properties/values listed in Table 3 + // // (weekday, era, year, month, day, &c.) + // + // hour12: true / false, // optional + // } + // + // formatMatcher: "basic" / "best fit", + // + // dateStyle: "full" / "long" / "medium" / "short" / undefined, + // + // timeStyle: "full" / "long" / "medium" / "short" / undefined, + // + // patternOption: + // String representing LDML Date Format pattern or undefined + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DateTimeFormat lazy data object has *all* these properties, + // never a subset of them. + + var internalProps = std_Object_create(null); + + var DateTimeFormat = dateTimeFormatInternalProperties; + + // Compute effective locale. + + // Step 17. + var localeData = DateTimeFormat.localeData; + + // Step 18. + var r = ResolveLocale( + "DateTimeFormat", + lazyDateTimeFormatData.requestedLocales, + lazyDateTimeFormatData.localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData + ); + + // Steps 19-22. + internalProps.locale = r.locale; + internalProps.calendar = r.ca; + internalProps.numberingSystem = r.nu; + + // Step 34. (Reordered) + var formatOptions = lazyDateTimeFormatData.formatOptions; + + // Steps 23-29. + // + // Copy the hourCycle setting, if present, to the format options. But + // only do this if no hour12 option is present, because the latter takes + // precedence over hourCycle. + if (r.hc !== null && formatOptions.hour12 === undefined) { + formatOptions.hourCycle = r.hc; + } + + // Step 33. + internalProps.timeZone = lazyDateTimeFormatData.timeZone; + + // Steps 45-50, more or less. + if (lazyDateTimeFormatData.patternOption !== undefined) { + internalProps.pattern = lazyDateTimeFormatData.patternOption; + } else if ( + lazyDateTimeFormatData.dateStyle !== undefined || + lazyDateTimeFormatData.timeStyle !== undefined + ) { + internalProps.hourCycle = formatOptions.hourCycle; + internalProps.hour12 = formatOptions.hour12; + internalProps.dateStyle = lazyDateTimeFormatData.dateStyle; + internalProps.timeStyle = lazyDateTimeFormatData.timeStyle; + } else { + internalProps.hourCycle = formatOptions.hourCycle; + internalProps.hour12 = formatOptions.hour12; + internalProps.weekday = formatOptions.weekday; + internalProps.era = formatOptions.era; + internalProps.year = formatOptions.year; + internalProps.month = formatOptions.month; + internalProps.day = formatOptions.day; + internalProps.dayPeriod = formatOptions.dayPeriod; + internalProps.hour = formatOptions.hour; + internalProps.minute = formatOptions.minute; + internalProps.second = formatOptions.second; + internalProps.fractionalSecondDigits = formatOptions.fractionalSecondDigits; + internalProps.timeZoneName = formatOptions.timeZoneName; + } + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the DateTimeFormat internal properties of |obj|. + */ +function getDateTimeFormatInternals(obj) { + assert(IsObject(obj), "getDateTimeFormatInternals called with non-object"); + assert( + intl_GuardToDateTimeFormat(obj) !== null, + "getDateTimeFormatInternals called with non-DateTimeFormat" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "DateTimeFormat", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveDateTimeFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * 12.1.10 UnwrapDateTimeFormat( dtf ) + */ +function UnwrapDateTimeFormat(dtf) { + // Steps 2 and 4 (error handling moved to caller). + if ( + IsObject(dtf) && + intl_GuardToDateTimeFormat(dtf) === null && + !intl_IsWrappedDateTimeFormat(dtf) && + callFunction( + std_Object_isPrototypeOf, + GetBuiltinPrototype("DateTimeFormat"), + dtf + ) + ) { + dtf = dtf[intlFallbackSymbol()]; + } + return dtf; +} + +/** + * 6.4.2 CanonicalizeTimeZoneName ( timeZone ) + * + * Canonicalizes the given IANA time zone name. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + */ +function CanonicalizeTimeZoneName(timeZone) { + assert(typeof timeZone === "string", "CanonicalizeTimeZoneName"); + + // Step 1. (Not applicable, the input is already a valid IANA time zone.) + assert(timeZone !== "Etc/Unknown", "Invalid time zone"); + assert( + timeZone === intl_IsValidTimeZoneName(timeZone), + "Time zone name not normalized" + ); + + // Step 2. + var ianaTimeZone = intl_canonicalizeTimeZone(timeZone); + assert(ianaTimeZone !== "Etc/Unknown", "Invalid canonical time zone"); + assert( + ianaTimeZone === intl_IsValidTimeZoneName(ianaTimeZone), + "Unsupported canonical time zone" + ); + + // Step 3. + if (ianaTimeZone === "Etc/UTC" || ianaTimeZone === "Etc/GMT") { + ianaTimeZone = "UTC"; + } + + // Step 4. + return ianaTimeZone; +} + +var timeZoneCache = { + icuDefaultTimeZone: undefined, + defaultTimeZone: undefined, +}; + +/** + * 6.4.3 DefaultTimeZone () + * + * Returns the IANA time zone name for the host environment's current time zone. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + */ +function DefaultTimeZone() { + if (intl_isDefaultTimeZone(timeZoneCache.icuDefaultTimeZone)) { + return timeZoneCache.defaultTimeZone; + } + + // Verify that the current ICU time zone is a valid ECMA-402 time zone. + var icuDefaultTimeZone = intl_defaultTimeZone(); + var timeZone = intl_IsValidTimeZoneName(icuDefaultTimeZone); + if (timeZone === null) { + // Before defaulting to "UTC", try to represent the default time zone + // using the Etc/GMT + offset format. This format only accepts full + // hour offsets. + var msPerHour = 60 * 60 * 1000; + var offset = intl_defaultTimeZoneOffset(); + assert( + offset === (offset | 0), + "milliseconds offset shouldn't be able to exceed int32_t range" + ); + var offsetHours = offset / msPerHour; + var offsetHoursFraction = offset % msPerHour; + if (offsetHoursFraction === 0) { + // Etc/GMT + offset uses POSIX-style signs, i.e. a positive offset + // means a location west of GMT. + timeZone = + "Etc/GMT" + (offsetHours < 0 ? "+" : "-") + std_Math_abs(offsetHours); + + // Check if the fallback is valid. + timeZone = intl_IsValidTimeZoneName(timeZone); + } + + // Fallback to "UTC" if everything else fails. + if (timeZone === null) { + timeZone = "UTC"; + } + } + + // Canonicalize the ICU time zone, e.g. change Etc/UTC to UTC. + var defaultTimeZone = CanonicalizeTimeZoneName(timeZone); + + timeZoneCache.defaultTimeZone = defaultTimeZone; + timeZoneCache.icuDefaultTimeZone = icuDefaultTimeZone; + + return defaultTimeZone; +} + +/** + * 21.4.1.33.1 IsTimeZoneOffsetString ( offsetString ) + * 21.4.1.33.2 ParseTimeZoneOffsetString ( offsetString ) + * 11.1.3 FormatOffsetTimeZoneIdentifier ( offsetMinutes ) + * + * Function to parse, validate, and normalize time zone offset strings. + * + * ES2024 draft rev 10d44bfce4640894a0ed366bb769f2700cc8839a + * ES2024 Intl draft rev 2f002b2000bf8b908efb793767bcfd23620e06db + */ +function TimeZoneOffsetString(offsetString) { + assert(typeof(offsetString) === "string", "offsetString is a string"); + + // UTCOffset ::: + // TemporalSign Hour + // TemporalSign Hour HourSubcomponents[+Extended] + // TemporalSign Hour HourSubcomponents[~Extended] + // + // TemporalSign ::: + // ASCIISign + // <MINUS> + // + // With <MINUS> = U+2212 + // + // ASCIISign ::: one of + // + - + // + // Hour ::: + // 0 DecimalDigit + // 1 DecimalDigit + // 20 + // 21 + // 22 + // 23 + // + // HourSubcomponents[Extended] ::: + // TimeSeparator[?Extended] MinuteSecond + // + // TimeSeparator[Extended] ::: + // [+Extended] : + // [~Extended] [empty] + // + // MinuteSecond ::: + // 0 DecimalDigit + // 1 DecimalDigit + // 2 DecimalDigit + // 3 DecimalDigit + // 4 DecimalDigit + // 5 DecimalDigit + + // Return if there are too few or too many characters for an offset string. + if (offsetString.length < 3 || offsetString.length > 6) { + return null; + } + + // Self-hosted code only supports Latin-1 permanent atoms, so the Unicode <MINUS> + // can't be used in a string literal "\u2212". That means the first character has + // to be checked using the character code instead of performing a normal string + // comparison. Alternatively <MINUS> could be generated at runtime through + // |std_String_fromCharCode(0x2212)|, but that means allocating a string just for + // the comparison. And for consistency also check the remaining characters through + // their character code. + + #define PLUS_SIGN 0x2b + #define HYPHEN_MINUS 0x2d + #define MINUS 0x2212 + #define COLON 0x3a + #define DIGIT_ZERO 0x30 + #define DIGIT_TWO 0x32 + #define DIGIT_THREE 0x33 + #define DIGIT_FIVE 0x35 + #define DIGIT_NINE 0x39 + + /* global PLUS_SIGN, HYPHEN_MINUS, MINUS, COLON */ + /* global DIGIT_ZERO, DIGIT_TWO, DIGIT_THREE, DIGIT_FIVE, DIGIT_NINE */ + + // The first character must match |TemporalSign|. + var sign = callFunction(std_String_charCodeAt, offsetString, 0); + if (sign !== PLUS_SIGN && sign !== HYPHEN_MINUS && sign !== MINUS) { + return null; + } + + // Read the next two characters for the |Hour| grammar production. + var hourTens = callFunction(std_String_charCodeAt, offsetString, 1); + var hourOnes = callFunction(std_String_charCodeAt, offsetString, 2); + + // Read the remaining characters for the optional |MinuteSecond| grammar production. + var minutesTens = DIGIT_ZERO; + var minutesOnes = DIGIT_ZERO; + if (offsetString.length > 3) { + // |TimeSeparator| is optional. + var separatorLength = offsetString[3] === ":" ? 1 : 0; + + // Return if there are too many characters for an offset string. + if (offsetString.length !== (5 + separatorLength)) { + return null; + } + + minutesTens = callFunction( + std_String_charCodeAt, + offsetString, + 3 + separatorLength, + ); + minutesOnes = callFunction( + std_String_charCodeAt, + offsetString, + 4 + separatorLength, + ); + } + + // Validate the characters match the |Hour| and |MinuteSecond| productions: + // - hours must be in the range 0..23 + // - minutes must in the range 0..59 + if ( + hourTens < DIGIT_ZERO || + hourOnes < DIGIT_ZERO || + minutesTens < DIGIT_ZERO || + minutesOnes < DIGIT_ZERO || + hourTens > DIGIT_TWO || + hourOnes > DIGIT_NINE || + minutesTens > DIGIT_FIVE || + minutesOnes > DIGIT_NINE || + (hourTens === DIGIT_TWO && hourOnes > DIGIT_THREE) + ) { + return null; + } + + // FormatOffsetTimeZoneIdentifier, steps 1-5. + if ( + hourTens === DIGIT_ZERO && + hourOnes === DIGIT_ZERO && + minutesTens === DIGIT_ZERO && + minutesOnes === DIGIT_ZERO + ) { + sign = PLUS_SIGN; + } else if (sign === MINUS) { + sign = HYPHEN_MINUS; + } + + return std_String_fromCharCode( + sign, + hourTens, + hourOnes, + COLON, + minutesTens, + minutesOnes, + ); + + #undef PLUS_SIGN + #undef HYPHEN_MINUS + #undef MINUS + #undef COLON + #undef DIGIT_ZERO + #undef DIGIT_TWO + #undef DIGIT_THREE + #undef DIGIT_FIVE + #undef DIGIT_NINE +} + +/* eslint-disable complexity */ +/** + * 11.1.2 CreateDateTimeFormat ( newTarget, locales, options, required, defaults ) + * + * Initializes an object as a DateTimeFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a DateTimeFormat. + * This later work occurs in |resolveDateTimeFormatInternals|; steps not noted + * here occur there. + */ +function InitializeDateTimeFormat( + dateTimeFormat, + thisValue, + locales, + options, + required, + defaults, + mozExtensions +) { + assert( + IsObject(dateTimeFormat), + "InitializeDateTimeFormat called with non-Object" + ); + assert( + intl_GuardToDateTimeFormat(dateTimeFormat) !== null, + "InitializeDateTimeFormat called with non-DateTimeFormat" + ); + assert( + required === "date" || required === "time" || required === "any", + `InitializeDateTimeFormat called with invalid required value: ${required}` + ); + assert( + defaults === "date" || defaults === "time" || defaults === "all", + `InitializeDateTimeFormat called with invalid defaults value: ${defaults}` + ); + + // Lazy DateTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // localeOpt: // *first* opt computed in InitializeDateTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // + // nu: string matching a Unicode extension type, // optional + // + // hc: "h11" / "h12" / "h23" / "h24", // optional + // } + // + // timeZone: IANA time zone name or a normalized time zone offset string, + // + // formatOptions: // *second* opt computed in InitializeDateTimeFormat + // { + // // all the properties/values listed in Table 3 + // // (weekday, era, year, month, day, &c.) + // + // hour12: true / false, // optional + // } + // + // formatMatcher: "basic" / "best fit", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DateTimeFormat lazy data object has *all* these properties, + // never a subset of them. + var lazyDateTimeFormatData = std_Object_create(null); + + // Step 1. (Performed in caller) + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyDateTimeFormatData.requestedLocales = requestedLocales; + + // Step 3. (Inlined call to CoerceOptionsToObject.) + if (options === undefined) { + options = std_Object_create(null); + } else { + options = ToObject(options); + } + + // Compute options that impact interpretation of locale. + // Step 4. + var localeOpt = new_Record(); + lazyDateTimeFormatData.localeOpt = localeOpt; + + // Steps 5-6. + var localeMatcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + localeOpt.localeMatcher = localeMatcher; + + // Step 7. + var calendar = GetOption(options, "calendar", "string", undefined, undefined); + + // Step 8. + if (calendar !== undefined) { + calendar = intl_ValidateAndCanonicalizeUnicodeExtensionType( + calendar, + "calendar", + "ca" + ); + } + + // Step 9. + localeOpt.ca = calendar; + + // Step 10. + var numberingSystem = GetOption( + options, + "numberingSystem", + "string", + undefined, + undefined + ); + + // Step 11. + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType( + numberingSystem, + "numberingSystem", + "nu" + ); + } + + // Step 12. + localeOpt.nu = numberingSystem; + + // Step 13. + var hour12 = GetOption(options, "hour12", "boolean", undefined, undefined); + + // Step 14. + var hourCycle = GetOption( + options, + "hourCycle", + "string", + ["h11", "h12", "h23", "h24"], + undefined + ); + + // Step 15. + if (hour12 !== undefined) { + // The "hourCycle" option is ignored if "hr12" is also present. + hourCycle = null; + } + + // Step 16. + localeOpt.hc = hourCycle; + + // Steps 17-29 (see resolveDateTimeFormatInternals). + + // Step 29. + var timeZone = options.timeZone; + + // Steps 30-34. + if (timeZone === undefined) { + // Step 30.a. + timeZone = DefaultTimeZone(); + + // Steps 32-34. (Not applicable in our implementation.) + } else { + // Step 31.a. + timeZone = ToString(timeZone); + + // Steps 32-34. + var offsetString = TimeZoneOffsetString(timeZone); + if (offsetString !== null) { + // Steps 32.a-g. (Performed in TimeZoneOffsetString in our implementation.) + timeZone = offsetString; + } else { + // Steps 33-34. + var validTimeZone = intl_IsValidTimeZoneName(timeZone); + if (validTimeZone !== null) { + // Step 33.a. + timeZone = CanonicalizeTimeZoneName(validTimeZone); + } else { + // Step 34.a. + ThrowRangeError(JSMSG_INVALID_TIME_ZONE, timeZone); + } + } + } + + // Step 33. + lazyDateTimeFormatData.timeZone = timeZone; + + // Step 34. + var formatOptions = new_Record(); + lazyDateTimeFormatData.formatOptions = formatOptions; + + if (mozExtensions) { + var pattern = GetOption(options, "pattern", "string", undefined, undefined); + lazyDateTimeFormatData.patternOption = pattern; + } + + // Step 35. + // + // Pass hr12 on to ICU. The hour cycle option is passed through |localeOpt|. + if (hour12 !== undefined) { + formatOptions.hour12 = hour12; + } + + // Step 36. (Explicit format component computed in step 43.) + + // Step 37. + // 11.5, Table 7: Components of date and time formats. + formatOptions.weekday = GetOption( + options, + "weekday", + "string", + ["narrow", "short", "long"], + undefined + ); + formatOptions.era = GetOption( + options, + "era", + "string", + ["narrow", "short", "long"], + undefined + ); + formatOptions.year = GetOption( + options, + "year", + "string", + ["2-digit", "numeric"], + undefined + ); + formatOptions.month = GetOption( + options, + "month", + "string", + ["2-digit", "numeric", "narrow", "short", "long"], + undefined + ); + formatOptions.day = GetOption( + options, + "day", + "string", + ["2-digit", "numeric"], + undefined + ); + formatOptions.dayPeriod = GetOption( + options, + "dayPeriod", + "string", + ["narrow", "short", "long"], + undefined + ); + formatOptions.hour = GetOption( + options, + "hour", + "string", + ["2-digit", "numeric"], + undefined + ); + formatOptions.minute = GetOption( + options, + "minute", + "string", + ["2-digit", "numeric"], + undefined + ); + formatOptions.second = GetOption( + options, + "second", + "string", + ["2-digit", "numeric"], + undefined + ); + formatOptions.fractionalSecondDigits = GetNumberOption( + options, + "fractionalSecondDigits", + 1, + 3, + undefined + ); + formatOptions.timeZoneName = GetOption( + options, + "timeZoneName", + "string", + [ + "short", + "long", + "shortOffset", + "longOffset", + "shortGeneric", + "longGeneric", + ], + undefined + ); + + // Step 38. + // + // For some reason (ICU not exposing enough interface?) we drop the + // requested format matcher on the floor after this. In any case, even if + // doing so is justified, we have to do this work here in case it triggers + // getters or similar. (bug 852837) + var formatMatcher = GetOption( + options, + "formatMatcher", + "string", + ["basic", "best fit"], + "best fit" + ); + void formatMatcher; + + // Steps 39-40. + var dateStyle = GetOption( + options, + "dateStyle", + "string", + ["full", "long", "medium", "short"], + undefined + ); + lazyDateTimeFormatData.dateStyle = dateStyle; + + // Steps 41-42. + var timeStyle = GetOption( + options, + "timeStyle", + "string", + ["full", "long", "medium", "short"], + undefined + ); + lazyDateTimeFormatData.timeStyle = timeStyle; + + // Step 43. + if (dateStyle !== undefined || timeStyle !== undefined) { + /* eslint-disable no-nested-ternary */ + var explicitFormatComponent = + formatOptions.weekday !== undefined + ? "weekday" + : formatOptions.era !== undefined + ? "era" + : formatOptions.year !== undefined + ? "year" + : formatOptions.month !== undefined + ? "month" + : formatOptions.day !== undefined + ? "day" + : formatOptions.dayPeriod !== undefined + ? "dayPeriod" + : formatOptions.hour !== undefined + ? "hour" + : formatOptions.minute !== undefined + ? "minute" + : formatOptions.second !== undefined + ? "second" + : formatOptions.fractionalSecondDigits !== undefined + ? "fractionalSecondDigits" + : formatOptions.timeZoneName !== undefined + ? "timeZoneName" + : undefined; + /* eslint-enable no-nested-ternary */ + + // Step 43.a. + if (explicitFormatComponent !== undefined) { + ThrowTypeError( + JSMSG_INVALID_DATETIME_OPTION, + explicitFormatComponent, + dateStyle !== undefined ? "dateStyle" : "timeStyle" + ); + } + + // Step 43.b. + if (required === "date" && timeStyle !== undefined) { + ThrowTypeError( + JSMSG_INVALID_DATETIME_STYLE, + "timeStyle", + "toLocaleDateString" + ); + } + + // Step 43.c. + if (required === "time" && dateStyle !== undefined) { + ThrowTypeError( + JSMSG_INVALID_DATETIME_STYLE, + "dateStyle", + "toLocaleTimeString" + ); + } + } else { + // Step 44.a. + var needDefaults = true; + + // Step 44.b. + if (required === "date" || required === "any") { + needDefaults = + formatOptions.weekday === undefined && + formatOptions.year === undefined && + formatOptions.month === undefined && + formatOptions.day === undefined; + } + + // Step 44.c. + if (required === "time" || required === "any") { + needDefaults = + needDefaults && + formatOptions.dayPeriod === undefined && + formatOptions.hour === undefined && + formatOptions.minute === undefined && + formatOptions.second === undefined && + formatOptions.fractionalSecondDigits === undefined; + } + + // Step 44.d. + if (needDefaults && (defaults === "date" || defaults === "all")) { + formatOptions.year = "numeric"; + formatOptions.month = "numeric"; + formatOptions.day = "numeric"; + } + + // Step 44.e. + if (needDefaults && (defaults === "time" || defaults === "all")) { + formatOptions.hour = "numeric"; + formatOptions.minute = "numeric"; + formatOptions.second = "numeric"; + } + + // Steps 44.f-h provided by ICU, more or less. + } + + // Steps 45-50. (see resolveDateTimeFormatInternals). + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject( + dateTimeFormat, + "DateTimeFormat", + lazyDateTimeFormatData + ); + + // 11.1.1 Intl.DateTimeFormat, step 3. (Inlined call to ChainDateTimeFormat.) + if ( + dateTimeFormat !== thisValue && + callFunction( + std_Object_isPrototypeOf, + GetBuiltinPrototype("DateTimeFormat"), + thisValue + ) + ) { + DefineDataProperty( + thisValue, + intlFallbackSymbol(), + dateTimeFormat, + ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE + ); + + return thisValue; + } + + // Step 51. + return dateTimeFormat; +} +/* eslint-enable complexity */ + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 12.3.2. + */ +function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "DateTimeFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * DateTimeFormat internal properties. + * + * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.3.3. + */ +var dateTimeFormatInternalProperties = { + localeData: dateTimeFormatLocaleData, + relevantExtensionKeys: ["ca", "hc", "nu"], +}; + +function dateTimeFormatLocaleData() { + return { + ca: intl_availableCalendars, + nu: getNumberingSystems, + hc: () => { + return [null, "h11", "h12", "h23", "h24"]; + }, + default: { + ca: intl_defaultCalendar, + nu: intl_numberingSystem, + hc: () => { + return null; + }, + }, + }; +} + +/** + * Create function to be cached and returned by Intl.DateTimeFormat.prototype.format. + * + * Spec: ECMAScript Internationalization API Specification, 12.1.5. + */ +function createDateTimeFormatFormat(dtf) { + // This function is not inlined in $Intl_DateTimeFormat_format_get to avoid + // creating a call-object on each call to $Intl_DateTimeFormat_format_get. + return function(date) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(dtf), "dateTimeFormatFormatToBind called with non-Object"); + assert( + intl_GuardToDateTimeFormat(dtf) !== null, + "dateTimeFormatFormatToBind called with non-DateTimeFormat" + ); + + // Steps 3-4. + var x = date === undefined ? std_Date_now() : ToNumber(date); + + // Step 5. + return intl_FormatDateTime(dtf, x, /* formatToParts = */ false); + }; +} + +/** + * Returns a function bound to this DateTimeFormat that returns a String value + * representing the result of calling ToNumber(date) according to the + * effective locale and the formatting options of this DateTimeFormat. + * + * Spec: ECMAScript Internationalization API Specification, 12.4.3. + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `SetCanonicalName`. +function $Intl_DateTimeFormat_format_get() { + // Steps 1-3. + var thisArg = UnwrapDateTimeFormat(this); + var dtf = thisArg; + if (!IsObject(dtf) || (dtf = intl_GuardToDateTimeFormat(dtf)) === null) { + return callFunction( + intl_CallDateTimeFormatMethodIfWrapped, + thisArg, + "$Intl_DateTimeFormat_format_get" + ); + } + + var internals = getDateTimeFormatInternals(dtf); + + // Step 4. + if (internals.boundFormat === undefined) { + // Steps 4.a-c. + internals.boundFormat = createDateTimeFormatFormat(dtf); + } + + // Step 5. + return internals.boundFormat; +} +SetCanonicalName($Intl_DateTimeFormat_format_get, "get format"); + +/** + * Intl.DateTimeFormat.prototype.formatToParts ( date ) + * + * Spec: ECMAScript Internationalization API Specification, 12.4.4. + */ +function Intl_DateTimeFormat_formatToParts(date) { + // Step 1. + var dtf = this; + + // Steps 2-3. + if (!IsObject(dtf) || (dtf = intl_GuardToDateTimeFormat(dtf)) === null) { + return callFunction( + intl_CallDateTimeFormatMethodIfWrapped, + this, + date, + "Intl_DateTimeFormat_formatToParts" + ); + } + + // Steps 4-5. + var x = date === undefined ? std_Date_now() : ToNumber(date); + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 6. + return intl_FormatDateTime(dtf, x, /* formatToParts = */ true); +} + +/** + * Intl.DateTimeFormat.prototype.formatRange ( startDate , endDate ) + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + */ +function Intl_DateTimeFormat_formatRange(startDate, endDate) { + // Step 1. + var dtf = this; + + // Step 2. + if (!IsObject(dtf) || (dtf = intl_GuardToDateTimeFormat(dtf)) === null) { + return callFunction( + intl_CallDateTimeFormatMethodIfWrapped, + this, + startDate, + endDate, + "Intl_DateTimeFormat_formatRange" + ); + } + + // Step 3. + if (startDate === undefined || endDate === undefined) { + ThrowTypeError( + JSMSG_UNDEFINED_DATE, + startDate === undefined ? "start" : "end", + "formatRange" + ); + } + + // Step 4. + var x = ToNumber(startDate); + + // Step 5. + var y = ToNumber(endDate); + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 6. + return intl_FormatDateTimeRange(dtf, x, y, /* formatToParts = */ false); +} + +/** + * Intl.DateTimeFormat.prototype.formatRangeToParts ( startDate , endDate ) + * + * Spec: Intl.DateTimeFormat.prototype.formatRange proposal + */ +function Intl_DateTimeFormat_formatRangeToParts(startDate, endDate) { + // Step 1. + var dtf = this; + + // Step 2. + if (!IsObject(dtf) || (dtf = intl_GuardToDateTimeFormat(dtf)) === null) { + return callFunction( + intl_CallDateTimeFormatMethodIfWrapped, + this, + startDate, + endDate, + "Intl_DateTimeFormat_formatRangeToParts" + ); + } + + // Step 3. + if (startDate === undefined || endDate === undefined) { + ThrowTypeError( + JSMSG_UNDEFINED_DATE, + startDate === undefined ? "start" : "end", + "formatRangeToParts" + ); + } + + // Step 4. + var x = ToNumber(startDate); + + // Step 5. + var y = ToNumber(endDate); + + // Ensure the DateTimeFormat internals are resolved. + getDateTimeFormatInternals(dtf); + + // Step 6. + return intl_FormatDateTimeRange(dtf, x, y, /* formatToParts = */ true); +} + +/** + * Returns the resolved options for a DateTimeFormat object. + * + * Spec: ECMAScript Internationalization API Specification, 12.4.5. + */ +function Intl_DateTimeFormat_resolvedOptions() { + // Steps 1-3. + var thisArg = UnwrapDateTimeFormat(this); + var dtf = thisArg; + if (!IsObject(dtf) || (dtf = intl_GuardToDateTimeFormat(dtf)) === null) { + return callFunction( + intl_CallDateTimeFormatMethodIfWrapped, + thisArg, + "Intl_DateTimeFormat_resolvedOptions" + ); + } + + // Ensure the internals are resolved. + var internals = getDateTimeFormatInternals(dtf); + + // Steps 4-5. + var result = { + locale: internals.locale, + calendar: internals.calendar, + numberingSystem: internals.numberingSystem, + timeZone: internals.timeZone, + }; + + if (internals.pattern !== undefined) { + // The raw pattern option is only internal to Mozilla, and not part of the + // ECMA-402 API. + DefineDataProperty(result, "pattern", internals.pattern); + } + + var hasDateStyle = internals.dateStyle !== undefined; + var hasTimeStyle = internals.timeStyle !== undefined; + + if (hasDateStyle || hasTimeStyle) { + if (hasTimeStyle) { + // timeStyle (unlike dateStyle) requires resolving the pattern to + // ensure "hourCycle" and "hour12" properties are added to |result|. + intl_resolveDateTimeFormatComponents( + dtf, + result, + /* includeDateTimeFields = */ false + ); + } + if (hasDateStyle) { + DefineDataProperty(result, "dateStyle", internals.dateStyle); + } + if (hasTimeStyle) { + DefineDataProperty(result, "timeStyle", internals.timeStyle); + } + } else { + // Components bag or a (Mozilla-only) raw pattern. + intl_resolveDateTimeFormatComponents( + dtf, + result, + /* includeDateTimeFields = */ true + ); + } + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/DisplayNames.cpp b/js/src/builtin/intl/DisplayNames.cpp new file mode 100644 index 0000000000..d375be58f3 --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.cpp @@ -0,0 +1,550 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.DisplayNames implementation. */ + +#include "builtin/intl/DisplayNames.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/DisplayNames.h" +#include "mozilla/PodOperations.h" +#include "mozilla/Span.h" + +#include <algorithm> + +#include "jsnum.h" +#include "jspubtd.h" + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "gc/AllocKind.h" +#include "gc/GCContext.h" +#include "js/CallArgs.h" +#include "js/Class.h" +#include "js/experimental/Intl.h" // JS::AddMozDisplayNamesConstructor +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/Printer.h" +#include "js/PropertyAndElement.h" // JS_DefineFunctions, JS_DefineProperties +#include "js/PropertyDescriptor.h" +#include "js/PropertySpec.h" +#include "js/RootingAPI.h" +#include "js/TypeDecls.h" +#include "js/Utility.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/JSObject.h" +#include "vm/Runtime.h" +#include "vm/SelfHosting.h" +#include "vm/Stack.h" +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +const JSClassOps DisplayNamesObject::classOps_ = {nullptr, /* addProperty */ + nullptr, /* delProperty */ + nullptr, /* enumerate */ + nullptr, /* newEnumerate */ + nullptr, /* resolve */ + nullptr, /* mayResolve */ + DisplayNamesObject::finalize}; + +const JSClass DisplayNamesObject::class_ = { + "Intl.DisplayNames", + JSCLASS_HAS_RESERVED_SLOTS(DisplayNamesObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_DisplayNames) | + JSCLASS_FOREGROUND_FINALIZE, + &DisplayNamesObject::classOps_, &DisplayNamesObject::classSpec_}; + +const JSClass& DisplayNamesObject::protoClass_ = PlainObject::class_; + +static bool displayNames_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().DisplayNames); + return true; +} + +static const JSFunctionSpec displayNames_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_DisplayNames_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec displayNames_methods[] = { + JS_SELF_HOSTED_FN("of", "Intl_DisplayNames_of", 1, 0), + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_DisplayNames_resolvedOptions", 0, + 0), + JS_FN("toSource", displayNames_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec displayNames_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.DisplayNames", JSPROP_READONLY), + JS_PS_END}; + +static bool DisplayNames(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec DisplayNamesObject::classSpec_ = { + GenericCreateConstructor<DisplayNames, 2, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<DisplayNamesObject>, + displayNames_static_methods, + nullptr, + displayNames_methods, + displayNames_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +enum class DisplayNamesOptions { + Standard, + + // Calendar display names are no longer available with the current spec + // proposal text, but may be re-enabled in the future. For our internal use + // we still need to have them present, so use a feature guard for now. + EnableMozExtensions, +}; + +/** + * Initialize a new Intl.DisplayNames object using the named self-hosted + * function. + */ +static bool InitializeDisplayNamesObject(JSContext* cx, HandleObject obj, + Handle<PropertyName*> initializer, + HandleValue locales, + HandleValue options, + DisplayNamesOptions dnoptions) { + FixedInvokeArgs<4> args(cx); + + args[0].setObject(*obj); + args[1].set(locales); + args[2].set(options); + args[3].setBoolean(dnoptions == DisplayNamesOptions::EnableMozExtensions); + + RootedValue ignored(cx); + if (!CallSelfHostedFunction(cx, initializer, NullHandleValue, args, + &ignored)) { + return false; + } + + MOZ_ASSERT(ignored.isUndefined(), + "Unexpected return value from non-legacy Intl object initializer"); + return true; +} + +/** + * Intl.DisplayNames ([ locales [ , options ]]) + */ +static bool DisplayNames(JSContext* cx, const CallArgs& args, + DisplayNamesOptions dnoptions) { + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.DisplayNames")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (dnoptions == DisplayNamesOptions::Standard) { + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_DisplayNames, + &proto)) { + return false; + } + } else { + RootedObject newTarget(cx, &args.newTarget().toObject()); + if (!GetPrototypeFromConstructor(cx, newTarget, JSProto_Null, &proto)) { + return false; + } + } + + Rooted<DisplayNamesObject*> displayNames(cx); + displayNames = NewObjectWithClassProto<DisplayNamesObject>(cx, proto); + if (!displayNames) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Steps 3-26. + if (!InitializeDisplayNamesObject(cx, displayNames, + cx->names().InitializeDisplayNames, locales, + options, dnoptions)) { + return false; + } + + // Step 27. + args.rval().setObject(*displayNames); + return true; +} + +static bool DisplayNames(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return DisplayNames(cx, args, DisplayNamesOptions::Standard); +} + +static bool MozDisplayNames(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return DisplayNames(cx, args, DisplayNamesOptions::EnableMozExtensions); +} + +void js::DisplayNamesObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + if (mozilla::intl::DisplayNames* displayNames = + obj->as<DisplayNamesObject>().getDisplayNames()) { + intl::RemoveICUCellMemory(gcx, obj, DisplayNamesObject::EstimatedMemoryUse); + delete displayNames; + } +} + +bool JS::AddMozDisplayNamesConstructor(JSContext* cx, HandleObject intl) { + RootedObject ctor(cx, GlobalObject::createConstructor( + cx, MozDisplayNames, cx->names().DisplayNames, 2)); + if (!ctor) { + return false; + } + + RootedObject proto( + cx, GlobalObject::createBlankPrototype<PlainObject>(cx, cx->global())); + if (!proto) { + return false; + } + + if (!LinkConstructorAndPrototype(cx, ctor, proto)) { + return false; + } + + if (!JS_DefineFunctions(cx, ctor, displayNames_static_methods)) { + return false; + } + + if (!JS_DefineFunctions(cx, proto, displayNames_methods)) { + return false; + } + + if (!JS_DefineProperties(cx, proto, displayNames_properties)) { + return false; + } + + RootedValue ctorValue(cx, ObjectValue(*ctor)); + return DefineDataProperty(cx, intl, cx->names().DisplayNames, ctorValue, 0); +} + +static mozilla::intl::DisplayNames* NewDisplayNames( + JSContext* cx, const char* locale, + mozilla::intl::DisplayNames::Options& options) { + auto result = mozilla::intl::DisplayNames::TryCreate(locale, options); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + return result.unwrap().release(); +} + +static mozilla::intl::DisplayNames* GetOrCreateDisplayNames( + JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale, + mozilla::intl::DisplayNames::Options& options) { + // Obtain a cached mozilla::intl::DisplayNames object. + mozilla::intl::DisplayNames* dn = displayNames->getDisplayNames(); + if (!dn) { + dn = NewDisplayNames(cx, locale, options); + if (!dn) { + return nullptr; + } + displayNames->setDisplayNames(dn); + + intl::AddICUCellMemory(displayNames, + DisplayNamesObject::EstimatedMemoryUse); + } + return dn; +} + +static void ReportInvalidOptionError(JSContext* cx, HandleString type, + HandleString option) { + if (UniqueChars optionStr = QuoteString(cx, option, '"')) { + if (UniqueChars typeStr = QuoteString(cx, type)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, typeStr.get(), + optionStr.get()); + } + } +} + +static void ReportInvalidOptionError(JSContext* cx, const char* type, + HandleString option) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, type, str.get()); + } +} + +static void ReportInvalidOptionError(JSContext* cx, const char* type, + double option) { + ToCStringBuf cbuf; + const char* str = NumberToCString(&cbuf, option); + MOZ_ASSERT(str); + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_DIGITS_VALUE, str); +} + +/** + * intl_ComputeDisplayName(displayNames, locale, calendar, style, + * languageDisplay, fallback, type, code) + */ +bool js::intl_ComputeDisplayName(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 8); + + Rooted<DisplayNamesObject*> displayNames( + cx, &args[0].toObject().as<DisplayNamesObject>()); + + UniqueChars locale = intl::EncodeLocale(cx, args[1].toString()); + if (!locale) { + return false; + } + + Rooted<JSLinearString*> calendar(cx, args[2].toString()->ensureLinear(cx)); + if (!calendar) { + return false; + } + + Rooted<JSLinearString*> code(cx, args[7].toString()->ensureLinear(cx)); + if (!code) { + return false; + } + + mozilla::intl::DisplayNames::Style style; + { + JSLinearString* styleStr = args[3].toString()->ensureLinear(cx); + if (!styleStr) { + return false; + } + + if (StringEqualsLiteral(styleStr, "long")) { + style = mozilla::intl::DisplayNames::Style::Long; + } else if (StringEqualsLiteral(styleStr, "short")) { + style = mozilla::intl::DisplayNames::Style::Short; + } else if (StringEqualsLiteral(styleStr, "narrow")) { + style = mozilla::intl::DisplayNames::Style::Narrow; + } else { + MOZ_ASSERT(StringEqualsLiteral(styleStr, "abbreviated")); + style = mozilla::intl::DisplayNames::Style::Abbreviated; + } + } + + mozilla::intl::DisplayNames::LanguageDisplay languageDisplay; + { + JSLinearString* language = args[4].toString()->ensureLinear(cx); + if (!language) { + return false; + } + + if (StringEqualsLiteral(language, "dialect")) { + languageDisplay = mozilla::intl::DisplayNames::LanguageDisplay::Dialect; + } else { + MOZ_ASSERT(language->empty() || + StringEqualsLiteral(language, "standard")); + languageDisplay = mozilla::intl::DisplayNames::LanguageDisplay::Standard; + } + } + + mozilla::intl::DisplayNames::Fallback fallback; + { + JSLinearString* fallbackStr = args[5].toString()->ensureLinear(cx); + if (!fallbackStr) { + return false; + } + + if (StringEqualsLiteral(fallbackStr, "none")) { + fallback = mozilla::intl::DisplayNames::Fallback::None; + } else { + MOZ_ASSERT(StringEqualsLiteral(fallbackStr, "code")); + fallback = mozilla::intl::DisplayNames::Fallback::Code; + } + } + + Rooted<JSLinearString*> type(cx, args[6].toString()->ensureLinear(cx)); + if (!type) { + return false; + } + + mozilla::intl::DisplayNames::Options options{ + style, + languageDisplay, + }; + + // If a calendar exists, set it as an option. + JS::UniqueChars calendarChars = nullptr; + if (!calendar->empty()) { + calendarChars = JS_EncodeStringToUTF8(cx, calendar); + if (!calendarChars) { + return false; + } + } + + mozilla::intl::DisplayNames* dn = + GetOrCreateDisplayNames(cx, displayNames, locale.get(), options); + if (!dn) { + return false; + } + + // The "code" is usually a small ASCII string, so try to avoid an allocation + // by copying it to the stack. Unfortunately we can't pass a string span of + // the JSString directly to the unified DisplayNames API, as the + // intl::FormatBuffer will be written to. This writing can trigger a GC and + // invalidate the span, creating a nogc rooting hazard. + JS::UniqueChars utf8 = nullptr; + unsigned char ascii[32]; + mozilla::Span<const char> codeSpan = nullptr; + if (code->length() < 32 && code->hasLatin1Chars() && StringIsAscii(code)) { + JS::AutoCheckCannotGC nogc; + mozilla::PodCopy(ascii, code->latin1Chars(nogc), code->length()); + codeSpan = + mozilla::Span(reinterpret_cast<const char*>(ascii), code->length()); + } else { + utf8 = JS_EncodeStringToUTF8(cx, code); + if (!utf8) { + return false; + } + codeSpan = mozilla::MakeStringSpan(utf8.get()); + } + + intl::FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + mozilla::Result<mozilla::Ok, mozilla::intl::DisplayNamesError> result = + mozilla::Ok{}; + + if (StringEqualsLiteral(type, "language")) { + result = dn->GetLanguage(buffer, codeSpan, fallback); + } else if (StringEqualsLiteral(type, "script")) { + result = dn->GetScript(buffer, codeSpan, fallback); + } else if (StringEqualsLiteral(type, "region")) { + result = dn->GetRegion(buffer, codeSpan, fallback); + } else if (StringEqualsLiteral(type, "currency")) { + result = dn->GetCurrency(buffer, codeSpan, fallback); + } else if (StringEqualsLiteral(type, "calendar")) { + result = dn->GetCalendar(buffer, codeSpan, fallback); + } else if (StringEqualsLiteral(type, "weekday")) { + double d = LinearStringToNumber(code); + if (!IsInteger(d) || d < 1 || d > 7) { + ReportInvalidOptionError(cx, "weekday", d); + return false; + } + result = + dn->GetWeekday(buffer, static_cast<mozilla::intl::Weekday>(d), + mozilla::MakeStringSpan(calendarChars.get()), fallback); + } else if (StringEqualsLiteral(type, "month")) { + double d = LinearStringToNumber(code); + if (!IsInteger(d) || d < 1 || d > 13) { + ReportInvalidOptionError(cx, "month", d); + return false; + } + + result = + dn->GetMonth(buffer, static_cast<mozilla::intl::Month>(d), + mozilla::MakeStringSpan(calendarChars.get()), fallback); + + } else if (StringEqualsLiteral(type, "quarter")) { + double d = LinearStringToNumber(code); + + // Inlined implementation of `IsValidQuarterCode ( quarter )`. + if (!IsInteger(d) || d < 1 || d > 4) { + ReportInvalidOptionError(cx, "quarter", d); + return false; + } + + result = + dn->GetQuarter(buffer, static_cast<mozilla::intl::Quarter>(d), + mozilla::MakeStringSpan(calendarChars.get()), fallback); + + } else if (StringEqualsLiteral(type, "dayPeriod")) { + mozilla::intl::DayPeriod dayPeriod; + if (StringEqualsLiteral(code, "am")) { + dayPeriod = mozilla::intl::DayPeriod::AM; + } else if (StringEqualsLiteral(code, "pm")) { + dayPeriod = mozilla::intl::DayPeriod::PM; + } else { + ReportInvalidOptionError(cx, "dayPeriod", code); + return false; + } + result = dn->GetDayPeriod(buffer, dayPeriod, + mozilla::MakeStringSpan(calendarChars.get()), + fallback); + + } else { + MOZ_ASSERT(StringEqualsLiteral(type, "dateTimeField")); + mozilla::intl::DateTimeField field; + if (StringEqualsLiteral(code, "era")) { + field = mozilla::intl::DateTimeField::Era; + } else if (StringEqualsLiteral(code, "year")) { + field = mozilla::intl::DateTimeField::Year; + } else if (StringEqualsLiteral(code, "quarter")) { + field = mozilla::intl::DateTimeField::Quarter; + } else if (StringEqualsLiteral(code, "month")) { + field = mozilla::intl::DateTimeField::Month; + } else if (StringEqualsLiteral(code, "weekOfYear")) { + field = mozilla::intl::DateTimeField::WeekOfYear; + } else if (StringEqualsLiteral(code, "weekday")) { + field = mozilla::intl::DateTimeField::Weekday; + } else if (StringEqualsLiteral(code, "day")) { + field = mozilla::intl::DateTimeField::Day; + } else if (StringEqualsLiteral(code, "dayPeriod")) { + field = mozilla::intl::DateTimeField::DayPeriod; + } else if (StringEqualsLiteral(code, "hour")) { + field = mozilla::intl::DateTimeField::Hour; + } else if (StringEqualsLiteral(code, "minute")) { + field = mozilla::intl::DateTimeField::Minute; + } else if (StringEqualsLiteral(code, "second")) { + field = mozilla::intl::DateTimeField::Second; + } else if (StringEqualsLiteral(code, "timeZoneName")) { + field = mozilla::intl::DateTimeField::TimeZoneName; + } else { + ReportInvalidOptionError(cx, "dateTimeField", code); + return false; + } + + intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + mozilla::intl::DateTimePatternGenerator* dtpgen = + sharedIntlData.getDateTimePatternGenerator(cx, locale.get()); + if (!dtpgen) { + return false; + } + + result = dn->GetDateTimeField(buffer, field, *dtpgen, fallback); + } + + if (result.isErr()) { + switch (result.unwrapErr()) { + case mozilla::intl::DisplayNamesError::InternalError: + intl::ReportInternalError(cx); + break; + case mozilla::intl::DisplayNamesError::OutOfMemory: + ReportOutOfMemory(cx); + break; + case mozilla::intl::DisplayNamesError::InvalidOption: + ReportInvalidOptionError(cx, type, code); + break; + case mozilla::intl::DisplayNamesError::DuplicateVariantSubtag: + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + break; + case mozilla::intl::DisplayNamesError::InvalidLanguageTag: + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LANGUAGE_TAG); + break; + } + return false; + } + + JSString* str = buffer.toString(cx); + if (!str) { + return false; + } + + if (str->empty()) { + args.rval().setUndefined(); + } else { + args.rval().setString(str); + } + + return true; +} diff --git a/js/src/builtin/intl/DisplayNames.h b/js/src/builtin/intl/DisplayNames.h new file mode 100644 index 0000000000..9fd6c63a62 --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.h @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_DisplayNames_h +#define builtin_intl_DisplayNames_h + +#include <stddef.h> +#include <stdint.h> + +#include "jstypes.h" +#include "NamespaceImports.h" + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" // JSClass, JSClassOps, js::ClassSpec +#include "js/TypeDecls.h" +#include "js/Value.h" +#include "vm/NativeObject.h" + +struct JS_PUBLIC_API JSContext; + +namespace mozilla::intl { +class DisplayNames; +} + +namespace js { +struct ClassSpec; + +class DisplayNamesObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t LOCALE_DISPLAY_NAMES_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for ULocaleDisplayNames (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 1238; + + mozilla::intl::DisplayNames* getDisplayNames() const { + const auto& slot = getFixedSlot(LOCALE_DISPLAY_NAMES_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::DisplayNames*>(slot.toPrivate()); + } + + void setDisplayNames(mozilla::intl::DisplayNames* displayNames) { + setFixedSlot(LOCALE_DISPLAY_NAMES_SLOT, PrivateValue(displayNames)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Return the display name for the requested code or undefined if no applicable + * display name was found. + * + * Usage: result = intl_ComputeDisplayName(displayNames, locale, calendar, + * style, languageDisplay, fallback, + * type, code) + */ +[[nodiscard]] extern bool intl_ComputeDisplayName(JSContext* cx, unsigned argc, + Value* vp); + +} // namespace js + +#endif /* builtin_intl_DisplayNames_h */ diff --git a/js/src/builtin/intl/DisplayNames.js b/js/src/builtin/intl/DisplayNames.js new file mode 100644 index 0000000000..00ba2301aa --- /dev/null +++ b/js/src/builtin/intl/DisplayNames.js @@ -0,0 +1,418 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Intl.DisplayNames internal properties. + */ +function displayNamesLocaleData() { + // Intl.DisplayNames doesn't support any extension keys. + return {}; +} +var displayNamesInternalProperties = { + localeData: displayNamesLocaleData, + relevantExtensionKeys: [], +}; + +function mozDisplayNamesLocaleData() { + return { + ca: intl_availableCalendars, + default: { + ca: intl_defaultCalendar, + }, + }; +} +var mozDisplayNamesInternalProperties = { + localeData: mozDisplayNamesLocaleData, + relevantExtensionKeys: ["ca"], +}; + +/** + * Intl.DisplayNames ( [ locales [ , options ] ] ) + * + * Compute an internal properties object from |lazyDisplayNamesData|. + */ +function resolveDisplayNamesInternals(lazyDisplayNamesData) { + assert(IsObject(lazyDisplayNamesData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var mozExtensions = lazyDisplayNamesData.mozExtensions; + + var DisplayNames = mozExtensions + ? mozDisplayNamesInternalProperties + : displayNamesInternalProperties; + + // Compute effective locale. + + // Step 7. + var localeData = DisplayNames.localeData; + + // Step 10. + var r = ResolveLocale( + "DisplayNames", + lazyDisplayNamesData.requestedLocales, + lazyDisplayNamesData.opt, + DisplayNames.relevantExtensionKeys, + localeData + ); + + // Step 12. + internalProps.style = lazyDisplayNamesData.style; + + // Step 14. + var type = lazyDisplayNamesData.type; + internalProps.type = type; + + // Step 16. + internalProps.fallback = lazyDisplayNamesData.fallback; + + // Step 17. + internalProps.locale = r.locale; + + // Step 25. + if (type === "language") { + internalProps.languageDisplay = lazyDisplayNamesData.languageDisplay; + } + + if (mozExtensions) { + internalProps.calendar = r.ca; + } + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the DisplayNames internal properties of |obj|. + */ +function getDisplayNamesInternals(obj) { + assert(IsObject(obj), "getDisplayNamesInternals called with non-object"); + assert( + intl_GuardToDisplayNames(obj) !== null, + "getDisplayNamesInternals called with non-DisplayNames" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "DisplayNames", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveDisplayNamesInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Intl.DisplayNames ( [ locales [ , options ] ] ) + * + * Initializes an object as a DisplayNames. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a DisplayNames. + * This later work occurs in |resolveDisplayNamesInternals|; steps not noted + * here occur there. + */ +function InitializeDisplayNames(displayNames, locales, options, mozExtensions) { + assert( + IsObject(displayNames), + "InitializeDisplayNames called with non-object" + ); + assert( + intl_GuardToDisplayNames(displayNames) !== null, + "InitializeDisplayNames called with non-DisplayNames" + ); + + // Lazy DisplayNames data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // opt: // opt object computed in InitializeDisplayNames + // { + // localeMatcher: "lookup" / "best fit", + // + // ca: string matching a Unicode extension type, // optional + // } + // + // localeMatcher: "lookup" / "best fit", + // + // style: "narrow" / "short" / "abbreviated" / "long", + // + // type: "language" / "region" / "script" / "currency" / "weekday" / + // "month" / "quarter" / "dayPeriod" / "dateTimeField" + // + // fallback: "code" / "none", + // + // // field present only if type === "language": + // languageDisplay: "dialect" / "standard", + // + // mozExtensions: true / false, + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every DisplayNames lazy data object has *all* these properties, never a + // subset of them. + var lazyDisplayNamesData = std_Object_create(null); + + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyDisplayNamesData.requestedLocales = requestedLocales; + + // Step 4. + if (!IsObject(options)) { + ThrowTypeError( + JSMSG_OBJECT_REQUIRED, + options === null ? "null" : typeof options + ); + } + + // Step 5. + var opt = new_Record(); + lazyDisplayNamesData.opt = opt; + lazyDisplayNamesData.mozExtensions = mozExtensions; + + // Steps 7-8. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + if (mozExtensions) { + var calendar = GetOption( + options, + "calendar", + "string", + undefined, + undefined + ); + + if (calendar !== undefined) { + calendar = intl_ValidateAndCanonicalizeUnicodeExtensionType( + calendar, + "calendar", + "ca" + ); + } + + opt.ca = calendar; + } + + // Step 10. + var style; + if (mozExtensions) { + style = GetOption( + options, + "style", + "string", + ["narrow", "short", "abbreviated", "long"], + "long" + ); + } else { + style = GetOption( + options, + "style", + "string", + ["narrow", "short", "long"], + "long" + ); + } + + // Step 11. + lazyDisplayNamesData.style = style; + + // Step 12. + var type; + if (mozExtensions) { + type = GetOption( + options, + "type", + "string", + [ + "language", + "region", + "script", + "currency", + "calendar", + "dateTimeField", + "weekday", + "month", + "quarter", + "dayPeriod", + ], + undefined + ); + } else { + type = GetOption( + options, + "type", + "string", + ["language", "region", "script", "currency", "calendar", "dateTimeField"], + undefined + ); + } + + // Step 13. + if (type === undefined) { + ThrowTypeError(JSMSG_UNDEFINED_TYPE); + } + + // Step 14. + lazyDisplayNamesData.type = type; + + // Step 15. + var fallback = GetOption( + options, + "fallback", + "string", + ["code", "none"], + "code" + ); + + // Step 16. + lazyDisplayNamesData.fallback = fallback; + + // Step 24. + var languageDisplay = GetOption( + options, + "languageDisplay", + "string", + ["dialect", "standard"], + "dialect" + ); + + // Step 25. + if (type === "language") { + lazyDisplayNamesData.languageDisplay = languageDisplay; + } + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(displayNames, "DisplayNames", lazyDisplayNamesData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + */ +function Intl_DisplayNames_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "DisplayNames"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Returns the resolved options for a DisplayNames object. + */ +function Intl_DisplayNames_of(code) { + // Step 1. + var displayNames = this; + + // Steps 2-3. + if ( + !IsObject(displayNames) || + (displayNames = intl_GuardToDisplayNames(displayNames)) === null + ) { + return callFunction( + intl_CallDisplayNamesMethodIfWrapped, + this, + "Intl_DisplayNames_of" + ); + } + + code = ToString(code); + + var internals = getDisplayNamesInternals(displayNames); + + // Unpack the internals object to avoid a slow runtime to selfhosted JS call + // in |intl_ComputeDisplayName()|. + var { + locale, + calendar = "", + style, + type, + languageDisplay = "", + fallback, + } = internals; + + // Steps 5-10. + return intl_ComputeDisplayName( + displayNames, + locale, + calendar, + style, + languageDisplay, + fallback, + type, + code + ); +} + +/** + * Returns the resolved options for a DisplayNames object. + */ +function Intl_DisplayNames_resolvedOptions() { + // Step 1. + var displayNames = this; + + // Steps 2-3. + if ( + !IsObject(displayNames) || + (displayNames = intl_GuardToDisplayNames(displayNames)) === null + ) { + return callFunction( + intl_CallDisplayNamesMethodIfWrapped, + this, + "Intl_DisplayNames_resolvedOptions" + ); + } + + var internals = getDisplayNamesInternals(displayNames); + + // Steps 4-5. + var options = { + locale: internals.locale, + style: internals.style, + type: internals.type, + fallback: internals.fallback, + }; + + // languageDisplay is only present for language display names. + assert( + hasOwn("languageDisplay", internals) === (internals.type === "language"), + "languageDisplay is present iff type is 'language'" + ); + + if (hasOwn("languageDisplay", internals)) { + DefineDataProperty(options, "languageDisplay", internals.languageDisplay); + } + + if (hasOwn("calendar", internals)) { + DefineDataProperty(options, "calendar", internals.calendar); + } + + // Step 6. + return options; +} diff --git a/js/src/builtin/intl/FormatBuffer.h b/js/src/builtin/intl/FormatBuffer.h new file mode 100644 index 0000000000..42118e77d8 --- /dev/null +++ b/js/src/builtin/intl/FormatBuffer.h @@ -0,0 +1,154 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_FormatBuffer_h +#define builtin_intl_FormatBuffer_h + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <stddef.h> +#include <stdint.h> + +#include "js/AllocPolicy.h" +#include "js/CharacterEncoding.h" +#include "js/TypeDecls.h" +#include "js/UniquePtr.h" +#include "js/Vector.h" +#include "vm/StringType.h" + +namespace js::intl { + +/** + * A buffer for formatting unified intl data. + */ +template <typename CharT, size_t MinInlineCapacity = 0, + class AllocPolicy = TempAllocPolicy> +class FormatBuffer { + public: + using CharType = CharT; + + // Allow move constructors, but not copy constructors, as this class owns a + // js::Vector. + FormatBuffer(FormatBuffer&& other) noexcept = default; + FormatBuffer& operator=(FormatBuffer&& other) noexcept = default; + + explicit FormatBuffer(AllocPolicy aP = AllocPolicy()) + : buffer_(std::move(aP)) { + // The initial capacity matches the requested minimum inline capacity, as + // long as it doesn't exceed |Vector::kMaxInlineBytes / sizeof(CharT)|. If + // this assertion should ever fail, either reduce |MinInlineCapacity| or + // make the FormatBuffer initialization fallible. + MOZ_ASSERT(buffer_.capacity() == MinInlineCapacity); + if constexpr (MinInlineCapacity > 0) { + // Ensure the full capacity is marked as reserved. + // + // Reserving the minimum inline capacity can never fail, even when + // simulating OOM. + MOZ_ALWAYS_TRUE(buffer_.reserve(MinInlineCapacity)); + } + } + + // Implicitly convert to a Span. + operator mozilla::Span<CharType>() { return buffer_; } + operator mozilla::Span<const CharType>() const { return buffer_; } + + /** + * Ensures the buffer has enough space to accommodate |size| elements. + */ + [[nodiscard]] bool reserve(size_t size) { + // Call |reserve| a second time to ensure its full capacity is marked as + // reserved. + return buffer_.reserve(size) && buffer_.reserve(buffer_.capacity()); + } + + /** + * Returns the raw data inside the buffer. + */ + CharType* data() { return buffer_.begin(); } + + /** + * Returns the count of elements written into the buffer. + */ + size_t length() const { return buffer_.length(); } + + /** + * Returns the buffer's overall capacity. + */ + size_t capacity() const { return buffer_.capacity(); } + + /** + * Resizes the buffer to the given amount of written elements. + */ + void written(size_t amount) { + MOZ_ASSERT(amount <= buffer_.capacity()); + // This sets |buffer_|'s internal size so that it matches how much was + // written. This is necessary because the write happens across FFI + // boundaries. + size_t curLength = length(); + if (amount > curLength) { + buffer_.infallibleGrowByUninitialized(amount - curLength); + } else { + buffer_.shrinkBy(curLength - amount); + } + } + + /** + * Copies the buffer's data to a JSString. + * + * TODO(#1715842) - This should be more explicit on needing to handle OOM + * errors. In this case it returns a nullptr that must be checked, but it may + * not be obvious. + */ + JSLinearString* toString(JSContext* cx) const { + if constexpr (std::is_same_v<CharT, uint8_t> || + std::is_same_v<CharT, unsigned char> || + std::is_same_v<CharT, char>) { + // Handle the UTF-8 encoding case. + return NewStringCopyUTF8N( + cx, JS::UTF8Chars(buffer_.begin(), buffer_.length())); + } else { + // Handle the UTF-16 encoding case. + static_assert(std::is_same_v<CharT, char16_t>); + return NewStringCopyN<CanGC>(cx, buffer_.begin(), buffer_.length()); + } + } + + /** + * Copies the buffer's data to a JSString. The buffer must contain only + * ASCII characters. + */ + JSLinearString* toAsciiString(JSContext* cx) const { + static_assert(std::is_same_v<CharT, char>); + + MOZ_ASSERT(mozilla::IsAscii(buffer_)); + return NewStringCopyN<CanGC>(cx, buffer_.begin(), buffer_.length()); + } + + /** + * Extract this buffer's content as a null-terminated string. + */ + UniquePtr<CharType[], JS::FreePolicy> extractStringZ() { + // Adding the NUL character on an already null-terminated string is likely + // an error. If there's ever a valid use case which triggers this assertion, + // we should change the below code to only conditionally add '\0'. + MOZ_ASSERT_IF(!buffer_.empty(), buffer_.end()[-1] != '\0'); + + if (!buffer_.append('\0')) { + return nullptr; + } + return UniquePtr<CharType[], JS::FreePolicy>( + buffer_.extractOrCopyRawBuffer()); + } + + private: + js::Vector<CharT, MinInlineCapacity, AllocPolicy> buffer_; +}; + +} // namespace js::intl + +#endif /* builtin_intl_FormatBuffer_h */ diff --git a/js/src/builtin/intl/IcuMemoryUsage.java b/js/src/builtin/intl/IcuMemoryUsage.java new file mode 100644 index 0000000000..2295e2298e --- /dev/null +++ b/js/src/builtin/intl/IcuMemoryUsage.java @@ -0,0 +1,268 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.*; +import java.util.stream.Collectors; + +/** + * Java program to estimate the memory usage of ICU objects (bug 1585536). + * + * It computes for each Intl constructor the amount of allocated memory. We're + * currently using the maximum memory ("max" in the output) to estimate the + * memory consumption of ICU objects. + * + * Insert before {@code JS_InitWithFailureDiagnostic} in "js.cpp": + * + * <pre> + * <code> + * JS_SetICUMemoryFunctions( + * [](const void*, size_t size) { + * void* ptr = malloc(size); + * if (ptr) { + * printf(" alloc: %p -> %zu\n", ptr, size); + * } + * return ptr; + * }, + * [](const void*, void* p, size_t size) { + * void* ptr = realloc(p, size); + * if (p) { + * printf(" realloc: %p -> %p -> %zu\n", p, ptr, size); + * } else { + * printf(" alloc: %p -> %zu\n", ptr, size); + * } + * return ptr; + * }, + * [](const void*, void* p) { + * if (p) { + * printf(" free: %p\n", p); + * } + * free(p); + * }); + * </code> + * </pre> + * + * Run this script with: + * {@code java IcuMemoryUsage.java $MOZ_JS_SHELL}. + */ +@SuppressWarnings("preview") +public class IcuMemoryUsage { + private enum Phase { + None, Create, Init, Destroy, Collect, Quit + } + + private static final class Memory { + private Phase phase = Phase.None; + private HashMap<Long, Map.Entry<Phase, Long>> allocations = new HashMap<>(); + private HashSet<Long> freed = new HashSet<>(); + private HashMap<Long, Map.Entry<Phase, Long>> completeAllocations = new HashMap<>(); + private int allocCount = 0; + private ArrayList<Long> allocSizes = new ArrayList<>(); + + void transition(Phase nextPhase) { + assert phase.ordinal() + 1 == nextPhase.ordinal() || (phase == Phase.Collect && nextPhase == Phase.Create); + phase = nextPhase; + + // Create a clean slate when starting a new create cycle or before termination. + if (phase == Phase.Create || phase == Phase.Quit) { + transferAllocations(); + } + + // Only measure the allocation size when creating the second object with the + // same locale. + if (phase == Phase.Collect && ++allocCount % 2 == 0) { + long size = allocations.values().stream().map(Map.Entry::getValue).reduce(0L, (a, c) -> a + c); + allocSizes.add(size); + } + } + + void transferAllocations() { + completeAllocations.putAll(allocations); + completeAllocations.keySet().removeAll(freed); + allocations.clear(); + freed.clear(); + } + + void alloc(long ptr, long size) { + allocations.put(ptr, Map.entry(phase, size)); + } + + void realloc(long oldPtr, long newPtr, long size) { + free(oldPtr); + allocations.put(newPtr, Map.entry(phase, size)); + } + + void free(long ptr) { + if (allocations.remove(ptr) == null) { + freed.add(ptr); + } + } + + LongSummaryStatistics statistics() { + return allocSizes.stream().collect(Collectors.summarizingLong(Long::valueOf)); + } + + double percentile(double p) { + var size = allocSizes.size(); + return allocSizes.stream().sorted().skip((long) ((size - 1) * p)).limit(2 - size % 2) + .mapToDouble(Long::doubleValue).average().getAsDouble(); + } + + long persistent() { + return completeAllocations.values().stream().map(Map.Entry::getValue).reduce(0L, (a, c) -> a + c); + } + } + + private static long parseSize(Matcher m, int group) { + return Long.parseLong(m.group(group), 10); + } + + private static long parsePointer(Matcher m, int group) { + return Long.parseLong(m.group(group), 16); + } + + private static void measure(String exec, String constructor, String description, String initializer) throws IOException { + var pb = new ProcessBuilder(exec, "--file=-", "--", constructor, initializer); + var process = pb.start(); + + try (var writer = new BufferedWriter( + new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) { + writer.write(sourceCode); + writer.flush(); + } + + var memory = new Memory(); + + try (var reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + var reAlloc = Pattern.compile("\\s+alloc: 0x(\\p{XDigit}+) -> (\\p{Digit}+)"); + var reRealloc = Pattern.compile("\\s+realloc: 0x(\\p{XDigit}+) -> 0x(\\p{XDigit}+) -> (\\p{Digit}+)"); + var reFree = Pattern.compile("\\s+free: 0x(\\p{XDigit}+)"); + + String line; + while ((line = reader.readLine()) != null) { + Matcher m; + if ((m = reAlloc.matcher(line)).matches()) { + var ptr = parsePointer(m, 1); + var size = parseSize(m, 2); + memory.alloc(ptr, size); + } else if ((m = reRealloc.matcher(line)).matches()) { + var oldPtr = parsePointer(m, 1); + var newPtr = parsePointer(m, 2); + var size = parseSize(m, 3); + memory.realloc(oldPtr, newPtr, size); + } else if ((m = reFree.matcher(line)).matches()) { + var ptr = parsePointer(m, 1); + memory.free(ptr); + } else { + memory.transition(Phase.valueOf(line)); + } + } + } + + try (var errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) { + String line; + while ((line = errorReader.readLine()) != null) { + System.err.println(line); + } + } + + var stats = memory.statistics(); + + System.out.printf("%s%n", description); + System.out.printf(" max: %d%n", stats.getMax()); + System.out.printf(" min: %d%n", stats.getMin()); + System.out.printf(" avg: %.0f%n", stats.getAverage()); + System.out.printf(" 50p: %.0f%n", memory.percentile(0.50)); + System.out.printf(" 75p: %.0f%n", memory.percentile(0.75)); + System.out.printf(" 85p: %.0f%n", memory.percentile(0.85)); + System.out.printf(" 95p: %.0f%n", memory.percentile(0.95)); + System.out.printf(" 99p: %.0f%n", memory.percentile(0.99)); + System.out.printf(" mem: %d%n", memory.persistent()); + + memory.transferAllocations(); + assert memory.persistent() == 0 : String.format("Leaked %d bytes", memory.persistent()); + } + + public static void main(String[] args) throws IOException { + if (args.length == 0) { + throw new RuntimeException("The first argument must point to the SpiderMonkey shell executable"); + } + + record Entry (String constructor, String description, String initializer) { + public static Entry of(String constructor, String description, String initializer) { + return new Entry(constructor, description, initializer); + } + + public static Entry of(String constructor, String initializer) { + return new Entry(constructor, constructor, initializer); + } + } + + var objects = new ArrayList<Entry>(); + objects.add(Entry.of("Intl.Collator", "o.compare('a', 'b')")); + objects.add(Entry.of("Intl.DateTimeFormat", "DateTimeFormat (UDateFormat)", "o.format(0)")); + objects.add(Entry.of("Intl.DateTimeFormat", "DateTimeFormat (UDateFormat+UDateIntervalFormat)", + "o.formatRange(0, 24*60*60*1000)")); + objects.add(Entry.of("Intl.DisplayNames", "o.of('en')")); + objects.add(Entry.of("Intl.ListFormat", "o.format(['a', 'b'])")); + objects.add(Entry.of("Intl.NumberFormat", "o.format(0)")); + objects.add(Entry.of("Intl.NumberFormat", "NumberFormat (UNumberRangeFormatter)", + "o.formatRange(0, 1000)")); + objects.add(Entry.of("Intl.PluralRules", "o.select(0)")); + objects.add(Entry.of("Intl.RelativeTimeFormat", "o.format(0, 'hour')")); + objects.add(Entry.of("Temporal.TimeZone", "o.getNextTransition(new Temporal.Instant(0n))")); + + for (var entry : objects) { + measure(args[0], entry.constructor, entry.description, entry.initializer); + } + } + + private static final String sourceCode = """ +const constructorName = scriptArgs[0]; +const initializer = Function("o", scriptArgs[1]); + +const extras = {}; +addIntlExtras(extras); + +let constructor; +let inputs; +if (constructorName.startsWith("Intl.")) { + let simpleName = constructorName.substring("Intl.".length); + constructor = Intl[simpleName]; + inputs = getAvailableLocalesOf(simpleName); +} else if (constructorName === "Temporal.TimeZone") { + constructor = Temporal.TimeZone; + inputs = Intl.supportedValuesOf("timeZone"); +} else { + throw new Error("Unsupported constructor name: " + constructorName); +} + +for (let i = 0; i < inputs.length; ++i) { + // Loop twice in case the first time we create an object with a new locale + // allocates additional memory when loading the locale data. + for (let j = 0; j < 2; ++j) { + let options = undefined; + if (constructor === Intl.DisplayNames) { + options = {type: "language"}; + } + + print("Create"); + let obj = new constructor(inputs[i], options); + + print("Init"); + initializer(obj); + + print("Destroy"); + gc(); + gc(); + print("Collect"); + } +} + +print("Quit"); +quit(); +"""; +} diff --git a/js/src/builtin/intl/IntlObject.cpp b/js/src/builtin/intl/IntlObject.cpp new file mode 100644 index 0000000000..f504aca289 --- /dev/null +++ b/js/src/builtin/intl/IntlObject.cpp @@ -0,0 +1,926 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl object and its non-constructor properties. */ + +#include "builtin/intl/IntlObject.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/Calendar.h" +#include "mozilla/intl/Collator.h" +#include "mozilla/intl/Currency.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/intl/MeasureUnitGenerated.h" +#include "mozilla/intl/TimeZone.h" + +#include <algorithm> +#include <array> +#include <cstring> +#include <iterator> +#include <string_view> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/NumberingSystemsGenerated.h" +#include "builtin/intl/SharedIntlData.h" +#include "builtin/intl/StringAsciiChars.h" +#include "ds/Sort.h" +#include "js/Class.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/GCAPI.h" +#include "js/GCVector.h" +#include "js/PropertySpec.h" +#include "js/Result.h" +#include "js/StableStringChars.h" +#include "vm/GlobalObject.h" +#include "vm/JSAtomUtils.h" // ClassName +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +/******************** Intl ********************/ + +bool js::intl_GetCalendarInfo(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + auto result = mozilla::intl::Calendar::TryCreate(locale.get()); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + auto calendar = result.unwrap(); + + RootedObject info(cx, NewPlainObject(cx)); + if (!info) { + return false; + } + + RootedValue v(cx); + + v.setInt32(static_cast<int32_t>(calendar->GetFirstDayOfWeek())); + if (!DefineDataProperty(cx, info, cx->names().firstDayOfWeek, v)) { + return false; + } + + v.setInt32(calendar->GetMinimalDaysInFirstWeek()); + if (!DefineDataProperty(cx, info, cx->names().minDays, v)) { + return false; + } + + Rooted<ArrayObject*> weekendArray(cx, NewDenseEmptyArray(cx)); + if (!weekendArray) { + return false; + } + + auto weekend = calendar->GetWeekend(); + if (weekend.isErr()) { + intl::ReportInternalError(cx, weekend.unwrapErr()); + return false; + } + + for (auto day : weekend.unwrap()) { + if (!NewbornArrayPush(cx, weekendArray, + Int32Value(static_cast<int32_t>(day)))) { + return false; + } + } + + v.setObject(*weekendArray); + if (!DefineDataProperty(cx, info, cx->names().weekend, v)) { + return false; + } + + args.rval().setObject(*info); + return true; +} + +static void ReportBadKey(JSContext* cx, JSString* key) { + if (UniqueChars chars = QuoteString(cx, key, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_KEY, + chars.get()); + } +} + +static bool SameOrParentLocale(JSLinearString* locale, + JSLinearString* otherLocale) { + // Return true if |locale| is the same locale as |otherLocale|. + if (locale->length() == otherLocale->length()) { + return EqualStrings(locale, otherLocale); + } + + // Also return true if |locale| is the parent locale of |otherLocale|. + if (locale->length() < otherLocale->length()) { + return HasSubstringAt(otherLocale, locale, 0) && + otherLocale->latin1OrTwoByteChar(locale->length()) == '-'; + } + + return false; +} + +using SupportedLocaleKind = js::intl::SharedIntlData::SupportedLocaleKind; + +// 9.2.2 BestAvailableLocale ( availableLocales, locale ) +static JS::Result<JSLinearString*> BestAvailableLocale( + JSContext* cx, SupportedLocaleKind kind, Handle<JSLinearString*> locale, + Handle<JSLinearString*> defaultLocale) { + // In the spec, [[availableLocales]] is formally a list of all available + // locales. But in our implementation, it's an *incomplete* list, not + // necessarily including the default locale (and all locales implied by it, + // e.g. "de" implied by "de-CH"), if that locale isn't in every + // [[availableLocales]] list (because that locale is supported through + // fallback, e.g. "de-CH" supported through "de"). + // + // If we're considering the default locale, augment the spec loop with + // additional checks to also test whether the current prefix is a prefix of + // the default locale. + + intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + + auto findLast = [](const auto* chars, size_t length) { + auto rbegin = std::make_reverse_iterator(chars + length); + auto rend = std::make_reverse_iterator(chars); + auto p = std::find(rbegin, rend, '-'); + + // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you + // find easier to reason about when using reserve iterators. + ptrdiff_t r = std::distance(chars, p.base()); + MOZ_ASSERT(r == std::distance(p, rend)); + + // But always subtract one to convert from the reverse iterator result to + // the correspoding forward iterator value, because reserve iterators point + // to one element past the forward iterator value. + return r - 1; + }; + + // Step 1. + Rooted<JSLinearString*> candidate(cx, locale); + + // Step 2. + while (true) { + // Step 2.a. + bool supported = false; + if (!sharedIntlData.isSupportedLocale(cx, kind, candidate, &supported)) { + return cx->alreadyReportedError(); + } + if (supported) { + return candidate.get(); + } + + if (defaultLocale && SameOrParentLocale(candidate, defaultLocale)) { + return candidate.get(); + } + + // Step 2.b. + ptrdiff_t pos; + if (candidate->hasLatin1Chars()) { + JS::AutoCheckCannotGC nogc; + pos = findLast(candidate->latin1Chars(nogc), candidate->length()); + } else { + JS::AutoCheckCannotGC nogc; + pos = findLast(candidate->twoByteChars(nogc), candidate->length()); + } + + if (pos < 0) { + return nullptr; + } + + // Step 2.c. + size_t length = size_t(pos); + if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') { + length -= 2; + } + + // Step 2.d. + candidate = NewDependentString(cx, candidate, 0, length); + if (!candidate) { + return cx->alreadyReportedError(); + } + } +} + +// 9.2.2 BestAvailableLocale ( availableLocales, locale ) +// +// Carries an additional third argument in our implementation to provide the +// default locale. See the doc-comment in the header file. +bool js::intl_BestAvailableLocale(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + SupportedLocaleKind kind; + { + JSLinearString* typeStr = args[0].toString()->ensureLinear(cx); + if (!typeStr) { + return false; + } + + if (StringEqualsLiteral(typeStr, "Collator")) { + kind = SupportedLocaleKind::Collator; + } else if (StringEqualsLiteral(typeStr, "DateTimeFormat")) { + kind = SupportedLocaleKind::DateTimeFormat; + } else if (StringEqualsLiteral(typeStr, "DisplayNames")) { + kind = SupportedLocaleKind::DisplayNames; + } else if (StringEqualsLiteral(typeStr, "ListFormat")) { + kind = SupportedLocaleKind::ListFormat; + } else if (StringEqualsLiteral(typeStr, "NumberFormat")) { + kind = SupportedLocaleKind::NumberFormat; + } else if (StringEqualsLiteral(typeStr, "PluralRules")) { + kind = SupportedLocaleKind::PluralRules; + } else if (StringEqualsLiteral(typeStr, "RelativeTimeFormat")) { + kind = SupportedLocaleKind::RelativeTimeFormat; + } else { + MOZ_ASSERT(StringEqualsLiteral(typeStr, "Segmenter")); + kind = SupportedLocaleKind::Segmenter; + } + } + + Rooted<JSLinearString*> locale(cx, args[1].toString()->ensureLinear(cx)); + if (!locale) { + return false; + } + +#ifdef DEBUG + { + MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); + + // |locale| is a structurally valid language tag. + mozilla::intl::Locale tag; + + using ParserError = mozilla::intl::LocaleParser::ParserError; + mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok(); + { + intl::StringAsciiChars chars(locale); + if (!chars.init(cx)) { + return false; + } + + parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); + } + + if (parse_result.isErr()) { + MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, + "locale is a structurally valid language tag"); + + intl::ReportInternalError(cx); + return false; + } + + MOZ_ASSERT(!tag.GetUnicodeExtension(), + "locale must contain no Unicode extensions"); + + if (auto result = tag.Canonicalize(); result.isErr()) { + MOZ_ASSERT( + result.unwrapErr() != + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); + intl::ReportInternalError(cx); + return false; + } + + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSLinearString* tagStr = buffer.toString(cx); + if (!tagStr) { + return false; + } + + MOZ_ASSERT(EqualStrings(locale, tagStr), + "locale is a canonicalized language tag"); + } +#endif + + MOZ_ASSERT(args[2].isNull() || args[2].isString()); + + Rooted<JSLinearString*> defaultLocale(cx); + if (args[2].isString()) { + defaultLocale = args[2].toString()->ensureLinear(cx); + if (!defaultLocale) { + return false; + } + } + + JSString* result; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, result, BestAvailableLocale(cx, kind, locale, defaultLocale)); + + if (result) { + args.rval().setString(result); + } else { + args.rval().setUndefined(); + } + return true; +} + +bool js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<JSLinearString*> locale(cx, args[0].toString()->ensureLinear(cx)); + if (!locale) { + return false; + } + + mozilla::intl::Locale tag; + bool canParseLocale = false; + if (StringIsAscii(locale)) { + intl::StringAsciiChars chars(locale); + if (!chars.init(cx)) { + return false; + } + + // Tell the analysis the |tag.canonicalize()| method can't GC. + JS::AutoSuppressGCAnalysis nogc; + + canParseLocale = mozilla::intl::LocaleParser::TryParse(chars, tag).isOk() && + tag.Canonicalize().isOk(); + } + + Rooted<JSLinearString*> candidate(cx); + if (!canParseLocale) { + candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); + if (!candidate) { + return false; + } + } else { + // The default locale must be in [[AvailableLocales]], and that list must + // not contain any locales with Unicode extension sequences, so remove any + // present in the candidate. + tag.ClearUnicodeExtension(); + + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + candidate = buffer.toAsciiString(cx); + if (!candidate) { + return false; + } + + // Certain old-style language tags lack a script code, but in current + // usage they *would* include a script code. Map these over to modern + // forms. + for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { + const char* oldStyle = mapping.oldStyle; + const char* modernStyle = mapping.modernStyle; + + if (StringEqualsAscii(candidate, oldStyle)) { + candidate = NewStringCopyZ<CanGC>(cx, modernStyle); + if (!candidate) { + return false; + } + break; + } + } + } + + // 9.1 Internal slots of Service Constructors + // + // - [[AvailableLocales]] is a List [...]. The list must include the value + // returned by the DefaultLocale abstract operation (6.2.4), [...]. + // + // That implies we must ignore any candidate which isn't supported by all + // Intl service constructors. + + Rooted<JSLinearString*> supportedCollator(cx); + JS_TRY_VAR_OR_RETURN_FALSE( + cx, supportedCollator, + BestAvailableLocale(cx, SupportedLocaleKind::Collator, candidate, + nullptr)); + + Rooted<JSLinearString*> supportedDateTimeFormat(cx); + JS_TRY_VAR_OR_RETURN_FALSE( + cx, supportedDateTimeFormat, + BestAvailableLocale(cx, SupportedLocaleKind::DateTimeFormat, candidate, + nullptr)); + +#ifdef DEBUG + // Note: We don't test the supported locales of the remaining Intl service + // constructors, because the set of supported locales is exactly equal to + // the set of supported locales of Intl.DateTimeFormat. + for (auto kind : { + SupportedLocaleKind::DisplayNames, + SupportedLocaleKind::ListFormat, + SupportedLocaleKind::NumberFormat, + SupportedLocaleKind::PluralRules, + SupportedLocaleKind::RelativeTimeFormat, + SupportedLocaleKind::Segmenter, + }) { + JSLinearString* supported; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr)); + + MOZ_ASSERT(!!supported == !!supportedDateTimeFormat); + MOZ_ASSERT_IF(supported, EqualStrings(supported, supportedDateTimeFormat)); + } +#endif + + // Accept the candidate locale if it is supported by all Intl service + // constructors. + if (supportedCollator && supportedDateTimeFormat) { + // Use the actually supported locale instead of the candidate locale. For + // example when the candidate locale "en-US-posix" is supported through + // "en-US", use "en-US" as the default locale. + // + // Also prefer the supported locale with more subtags. For example when + // requesting "de-CH" and Intl.DateTimeFormat supports "de-CH", but + // Intl.Collator only "de", still return "de-CH" as the result. + if (SameOrParentLocale(supportedCollator, supportedDateTimeFormat)) { + candidate = supportedDateTimeFormat; + } else { + candidate = supportedCollator; + } + } else { + candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); + if (!candidate) { + return false; + } + } + + args.rval().setString(candidate); + return true; +} + +using StringList = GCVector<JSLinearString*>; + +/** + * Create a sorted array from a list of strings. + */ +static ArrayObject* CreateArrayFromList(JSContext* cx, + MutableHandle<StringList> list) { + // Reserve scratch space for MergeSort(). + size_t initialLength = list.length(); + if (!list.growBy(initialLength)) { + return nullptr; + } + + // Sort all strings in alphabetical order. + MOZ_ALWAYS_TRUE( + MergeSort(list.begin(), initialLength, list.begin() + initialLength, + [](const auto* a, const auto* b, bool* lessOrEqual) { + *lessOrEqual = CompareStrings(a, b) <= 0; + return true; + })); + + // Ensure we don't add duplicate entries to the array. + auto* end = std::unique( + list.begin(), list.begin() + initialLength, + [](const auto* a, const auto* b) { return EqualStrings(a, b); }); + + // std::unique leaves the elements after |end| with an unspecified value, so + // remove them first. And also delete the elements in the scratch space. + list.shrinkBy(std::distance(end, list.end())); + + // And finally copy the strings into the result array. + auto* array = NewDenseFullyAllocatedArray(cx, list.length()); + if (!array) { + return nullptr; + } + array->setDenseInitializedLength(list.length()); + + for (size_t i = 0; i < list.length(); ++i) { + array->initDenseElement(i, StringValue(list[i])); + } + + return array; +} + +/** + * Create an array from a sorted list of strings. + */ +template <size_t N> +static ArrayObject* CreateArrayFromSortedList( + JSContext* cx, const std::array<const char*, N>& list) { + // Ensure the list is sorted and doesn't contain duplicates. +#ifdef DEBUG + // See bug 1583449 for why the lambda can't be in the MOZ_ASSERT. + auto isLargerThanOrEqual = [](const auto& a, const auto& b) { + return std::strcmp(a, b) >= 0; + }; +#endif + MOZ_ASSERT(std::adjacent_find(std::begin(list), std::end(list), + isLargerThanOrEqual) == std::end(list)); + + size_t length = std::size(list); + + Rooted<ArrayObject*> array(cx, NewDenseFullyAllocatedArray(cx, length)); + if (!array) { + return nullptr; + } + array->ensureDenseInitializedLength(0, length); + + for (size_t i = 0; i < length; ++i) { + auto* str = NewStringCopyZ<CanGC>(cx, list[i]); + if (!str) { + return nullptr; + } + array->initDenseElement(i, StringValue(str)); + } + return array; +} + +/** + * Create an array from an intl::Enumeration. + */ +template <const auto& unsupported, class Enumeration> +static bool EnumerationIntoList(JSContext* cx, Enumeration values, + MutableHandle<StringList> list) { + for (auto value : values) { + if (value.isErr()) { + intl::ReportInternalError(cx); + return false; + } + auto span = value.unwrap(); + + // Skip over known, unsupported values. + std::string_view sv(span.data(), span.size()); + if (std::any_of(std::begin(unsupported), std::end(unsupported), + [sv](const auto& e) { return sv == e; })) { + continue; + } + + auto* string = NewStringCopy<CanGC>(cx, span); + if (!string) { + return false; + } + if (!list.append(string)) { + return false; + } + } + + return true; +} + +/** + * Returns the list of calendar types which mustn't be returned by + * |Intl.supportedValuesOf()|. + */ +static constexpr auto UnsupportedCalendars() { + // No calendar values are currently unsupported. + return std::array<const char*, 0>{}; +} + +// Defined outside of the function to workaround bugs in GCC<9. +// Also see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85589>. +static constexpr auto UnsupportedCalendarsArray = UnsupportedCalendars(); + +/** + * AvailableCalendars ( ) + */ +static ArrayObject* AvailableCalendars(JSContext* cx) { + Rooted<StringList> list(cx, StringList(cx)); + + { + // Hazard analysis complains that the mozilla::Result destructor calls a + // GC function, which is unsound when returning an unrooted value. Work + // around this issue by restricting the lifetime of |keywords| to a + // separate block. + auto keywords = mozilla::intl::Calendar::GetBcp47KeywordValuesForLocale(""); + if (keywords.isErr()) { + intl::ReportInternalError(cx, keywords.unwrapErr()); + return nullptr; + } + + static constexpr auto& unsupported = UnsupportedCalendarsArray; + + if (!EnumerationIntoList<unsupported>(cx, keywords.unwrap(), &list)) { + return nullptr; + } + } + + return CreateArrayFromList(cx, &list); +} + +/** + * Returns the list of collation types which mustn't be returned by + * |Intl.supportedValuesOf()|. + */ +static constexpr auto UnsupportedCollations() { + return std::array{ + "search", + "standard", + }; +} + +// Defined outside of the function to workaround bugs in GCC<9. +// Also see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85589>. +static constexpr auto UnsupportedCollationsArray = UnsupportedCollations(); + +/** + * AvailableCollations ( ) + */ +static ArrayObject* AvailableCollations(JSContext* cx) { + Rooted<StringList> list(cx, StringList(cx)); + + { + // Hazard analysis complains that the mozilla::Result destructor calls a + // GC function, which is unsound when returning an unrooted value. Work + // around this issue by restricting the lifetime of |keywords| to a + // separate block. + auto keywords = mozilla::intl::Collator::GetBcp47KeywordValues(); + if (keywords.isErr()) { + intl::ReportInternalError(cx, keywords.unwrapErr()); + return nullptr; + } + + static constexpr auto& unsupported = UnsupportedCollationsArray; + + if (!EnumerationIntoList<unsupported>(cx, keywords.unwrap(), &list)) { + return nullptr; + } + } + + return CreateArrayFromList(cx, &list); +} + +/** + * Returns a list of known, unsupported currencies which are returned by + * |Currency::GetISOCurrencies()|. + */ +static constexpr auto UnsupportedCurrencies() { + // "MVP" is also marked with "questionable, remove?" in ucurr.cpp, but only + // this single currency code isn't supported by |Intl.DisplayNames| and + // therefore must be excluded by |Intl.supportedValuesOf|. + return std::array{ + "LSM", // https://unicode-org.atlassian.net/browse/ICU-21687 + }; +} + +/** + * Return a list of known, missing currencies which aren't returned by + * |Currency::GetISOCurrencies()|. + */ +static constexpr auto MissingCurrencies() { + return std::array{ + "SLE", // https://unicode-org.atlassian.net/browse/ICU-21989 + "VED", // https://unicode-org.atlassian.net/browse/ICU-21989 + }; +} + +// Defined outside of the function to workaround bugs in GCC<9. +// Also see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85589>. +static constexpr auto UnsupportedCurrenciesArray = UnsupportedCurrencies(); +static constexpr auto MissingCurrenciesArray = MissingCurrencies(); + +/** + * AvailableCurrencies ( ) + */ +static ArrayObject* AvailableCurrencies(JSContext* cx) { + Rooted<StringList> list(cx, StringList(cx)); + + { + // Hazard analysis complains that the mozilla::Result destructor calls a + // GC function, which is unsound when returning an unrooted value. Work + // around this issue by restricting the lifetime of |currencies| to a + // separate block. + auto currencies = mozilla::intl::Currency::GetISOCurrencies(); + if (currencies.isErr()) { + intl::ReportInternalError(cx, currencies.unwrapErr()); + return nullptr; + } + + static constexpr auto& unsupported = UnsupportedCurrenciesArray; + + if (!EnumerationIntoList<unsupported>(cx, currencies.unwrap(), &list)) { + return nullptr; + } + } + + // Add known missing values. + for (const char* value : MissingCurrenciesArray) { + auto* string = NewStringCopyZ<CanGC>(cx, value); + if (!string) { + return nullptr; + } + if (!list.append(string)) { + return nullptr; + } + } + + return CreateArrayFromList(cx, &list); +} + +/** + * AvailableNumberingSystems ( ) + */ +static ArrayObject* AvailableNumberingSystems(JSContext* cx) { + static constexpr std::array numberingSystems = { + NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS}; + + return CreateArrayFromSortedList(cx, numberingSystems); +} + +/** + * AvailableTimeZones ( ) + */ +static ArrayObject* AvailableTimeZones(JSContext* cx) { + // Unsorted list of canonical time zone names, possibly containing + // duplicates. + Rooted<StringList> timeZones(cx, StringList(cx)); + + intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); + auto iterResult = sharedIntlData.availableTimeZonesIteration(cx); + if (iterResult.isErr()) { + return nullptr; + } + auto iter = iterResult.unwrap(); + + Rooted<JSAtom*> validatedTimeZone(cx); + Rooted<JSAtom*> ianaTimeZone(cx); + for (; !iter.done(); iter.next()) { + validatedTimeZone = iter.get(); + + // Canonicalize the time zone before adding it to the result array. + + // Some time zone names are canonicalized differently by ICU -- handle + // those first. + ianaTimeZone.set(nullptr); + if (!sharedIntlData.tryCanonicalizeTimeZoneConsistentWithIANA( + cx, validatedTimeZone, &ianaTimeZone)) { + return nullptr; + } + + JSLinearString* timeZone; + if (ianaTimeZone) { + cx->markAtom(ianaTimeZone); + + timeZone = ianaTimeZone; + } else { + // Call into ICU to canonicalize the time zone. + + JS::AutoStableStringChars stableChars(cx); + if (!stableChars.initTwoByte(cx, validatedTimeZone)) { + return nullptr; + } + + intl::FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> + canonicalTimeZone(cx); + auto result = mozilla::intl::TimeZone::GetCanonicalTimeZoneID( + stableChars.twoByteRange(), canonicalTimeZone); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + timeZone = canonicalTimeZone.toString(cx); + if (!timeZone) { + return nullptr; + } + + // Canonicalize both to "UTC" per CanonicalizeTimeZoneName(). + if (StringEqualsLiteral(timeZone, "Etc/UTC") || + StringEqualsLiteral(timeZone, "Etc/GMT")) { + timeZone = cx->names().UTC; + } + } + + if (!timeZones.append(timeZone)) { + return nullptr; + } + } + + return CreateArrayFromList(cx, &timeZones); +} + +template <size_t N> +constexpr auto MeasurementUnitNames( + const mozilla::intl::SimpleMeasureUnit (&units)[N]) { + std::array<const char*, N> array = {}; + for (size_t i = 0; i < N; ++i) { + array[i] = units[i].name; + } + return array; +} + +/** + * AvailableUnits ( ) + */ +static ArrayObject* AvailableUnits(JSContext* cx) { + static constexpr auto simpleMeasureUnitNames = + MeasurementUnitNames(mozilla::intl::simpleMeasureUnits); + + return CreateArrayFromSortedList(cx, simpleMeasureUnitNames); +} + +bool js::intl_SupportedValuesOf(JSContext* cx, unsigned argc, JS::Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + JSLinearString* key = args[0].toString()->ensureLinear(cx); + if (!key) { + return false; + } + + ArrayObject* list; + if (StringEqualsLiteral(key, "calendar")) { + list = AvailableCalendars(cx); + } else if (StringEqualsLiteral(key, "collation")) { + list = AvailableCollations(cx); + } else if (StringEqualsLiteral(key, "currency")) { + list = AvailableCurrencies(cx); + } else if (StringEqualsLiteral(key, "numberingSystem")) { + list = AvailableNumberingSystems(cx); + } else if (StringEqualsLiteral(key, "timeZone")) { + list = AvailableTimeZones(cx); + } else if (StringEqualsLiteral(key, "unit")) { + list = AvailableUnits(cx); + } else { + ReportBadKey(cx, key); + return false; + } + if (!list) { + return false; + } + + args.rval().setObject(*list); + return true; +} + +static bool intl_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Intl); + return true; +} + +static const JSFunctionSpec intl_static_methods[] = { + JS_FN("toSource", intl_toSource, 0, 0), + JS_SELF_HOSTED_FN("getCanonicalLocales", "Intl_getCanonicalLocales", 1, 0), + JS_SELF_HOSTED_FN("supportedValuesOf", "Intl_supportedValuesOf", 1, 0), + JS_FS_END}; + +static const JSPropertySpec intl_static_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl", JSPROP_READONLY), JS_PS_END}; + +static JSObject* CreateIntlObject(JSContext* cx, JSProtoKey key) { + RootedObject proto(cx, &cx->global()->getObjectPrototype()); + + // The |Intl| object is just a plain object with some "static" function + // properties and some constructor properties. + return NewTenuredObjectWithGivenProto(cx, &IntlClass, proto); +} + +/** + * Initializes the Intl Object and its standard built-in properties. + * Spec: ECMAScript Internationalization API Specification, 8.0, 8.1 + */ +static bool IntlClassFinish(JSContext* cx, HandleObject intl, + HandleObject proto) { + // Add the constructor properties. + RootedId ctorId(cx); + RootedValue ctorValue(cx); + for (const auto& protoKey : { + JSProto_Collator, + JSProto_DateTimeFormat, + JSProto_DisplayNames, + JSProto_ListFormat, + JSProto_Locale, + JSProto_NumberFormat, + JSProto_PluralRules, + JSProto_RelativeTimeFormat, + JSProto_Segmenter, + }) { + if (GlobalObject::skipDeselectedConstructor(cx, protoKey)) { + continue; + } + + JSObject* ctor = GlobalObject::getOrCreateConstructor(cx, protoKey); + if (!ctor) { + return false; + } + + ctorId = NameToId(ClassName(protoKey, cx)); + ctorValue.setObject(*ctor); + if (!DefineDataProperty(cx, intl, ctorId, ctorValue, 0)) { + return false; + } + } + + return true; +} + +static const ClassSpec IntlClassSpec = { + CreateIntlObject, nullptr, intl_static_methods, intl_static_properties, + nullptr, nullptr, IntlClassFinish}; + +const JSClass js::IntlClass = {"Intl", JSCLASS_HAS_CACHED_PROTO(JSProto_Intl), + JS_NULL_CLASS_OPS, &IntlClassSpec}; diff --git a/js/src/builtin/intl/IntlObject.h b/js/src/builtin/intl/IntlObject.h new file mode 100644 index 0000000000..5b79f74e92 --- /dev/null +++ b/js/src/builtin/intl/IntlObject.h @@ -0,0 +1,82 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_IntlObject_h +#define builtin_intl_IntlObject_h + +#include "js/TypeDecls.h" + +namespace js { + +extern const JSClass IntlClass; + +/** + * Returns a plain object with calendar information for a single valid locale + * (callers must perform this validation). The object will have these + * properties: + * + * firstDayOfWeek + * an integer in the range 1=Monday to 7=Sunday indicating the day + * considered the first day of the week in calendars, e.g. 7 for en-US, + * 1 for en-GB, 7 for bn-IN + * minDays + * an integer in the range of 1 to 7 indicating the minimum number + * of days required in the first week of the year, e.g. 1 for en-US, + * 4 for de + * weekend + * an array with values in the range 1=Monday to 7=Sunday indicating the + * days of the week considered as part of the weekend, e.g. [6, 7] for en-US + * and en-GB, [7] for bn-IN (note that "weekend" is *not* necessarily two + * days) + * + * NOTE: "calendar" and "locale" properties are *not* added to the object. + */ +[[nodiscard]] extern bool intl_GetCalendarInfo(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Compares a BCP 47 language tag against the locales in availableLocales and + * returns the best available match -- or |undefined| if no match was found. + * Uses the fallback mechanism of RFC 4647, section 3.4. + * + * The set of available locales consulted doesn't necessarily include the + * default locale or any generalized forms of it (e.g. "de" is a more-general + * form of "de-CH"). If you want to be sure to consider the default local and + * its generalized forms (you usually will), pass the default locale as the + * value of |defaultOrNull|; otherwise pass null. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.2. + * Spec: RFC 4647, section 3.4. + * + * Usage: result = intl_BestAvailableLocale("Collator", locale, defaultOrNull) + */ +[[nodiscard]] extern bool intl_BestAvailableLocale(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Return the supported locale for the input locale if ICU supports that locale + * (perhaps via fallback, e.g. supporting "de-CH" through "de" support implied + * by a "de-DE" locale). Otherwise uses the last-ditch locale. + * + * Usage: result = intl_supportedLocaleOrFallback(locale) + */ +[[nodiscard]] extern bool intl_supportedLocaleOrFallback(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Returns the list of supported values for the given key. Throws a RangeError + * if the key isn't one of {"calendar", "collation", "currency", + * "numberingSystem", "timeZone", "unit"}. + * + * Usage: list = intl_SupportedValuesOf(key) + */ +[[nodiscard]] extern bool intl_SupportedValuesOf(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_IntlObject_h */ diff --git a/js/src/builtin/intl/IntlObject.js b/js/src/builtin/intl/IntlObject.js new file mode 100644 index 0000000000..c62b38282f --- /dev/null +++ b/js/src/builtin/intl/IntlObject.js @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * 8.2.1 Intl.getCanonicalLocales ( locales ) + * + * ES2017 Intl draft rev 947aa9a0c853422824a0c9510d8f09be3eb416b9 + */ +function Intl_getCanonicalLocales(locales) { + // Steps 1-2. + return CanonicalizeLocaleList(locales); +} + +/** + * Intl.supportedValuesOf ( key ) + */ +function Intl_supportedValuesOf(key) { + // Step 1. + key = ToString(key); + + // Steps 2-9. + return intl_SupportedValuesOf(key); +} + +/** + * This function is a custom function in the style of the standard Intl.* + * functions, that isn't part of any spec or proposal yet. + * + * Returns an object with the following properties: + * locale: + * The actual resolved locale. + * + * calendar: + * The default calendar of the resolved locale. + * + * firstDayOfWeek: + * The first day of the week for the resolved locale. + * + * minDays: + * The minimum number of days in a week for the resolved locale. + * + * weekend: + * The days of the week considered as the weekend for the resolved locale. + * + * Days are encoded as integers in the range 1=Monday to 7=Sunday. + */ +function Intl_getCalendarInfo(locales) { + // 1. Let requestLocales be ? CanonicalizeLocaleList(locales). + var requestedLocales = CanonicalizeLocaleList(locales); + + var DateTimeFormat = dateTimeFormatInternalProperties; + + // 2. Let localeData be %DateTimeFormat%.[[localeData]]. + var localeData = DateTimeFormat.localeData; + + // 3. Let localeOpt be a new Record. + var localeOpt = new_Record(); + + // 4. Set localeOpt.[[localeMatcher]] to "best fit". + localeOpt.localeMatcher = "best fit"; + + // 5. Let r be ResolveLocale(%DateTimeFormat%.[[availableLocales]], + // requestedLocales, localeOpt, + // %DateTimeFormat%.[[relevantExtensionKeys]], localeData). + var r = ResolveLocale( + "DateTimeFormat", + requestedLocales, + localeOpt, + DateTimeFormat.relevantExtensionKeys, + localeData + ); + + // 6. Let result be GetCalendarInfo(r.[[locale]]). + var result = intl_GetCalendarInfo(r.locale); + DefineDataProperty(result, "calendar", r.ca); + DefineDataProperty(result, "locale", r.locale); + + // 7. Return result. + return result; +} diff --git a/js/src/builtin/intl/LanguageTag.cpp b/js/src/builtin/intl/LanguageTag.cpp new file mode 100644 index 0000000000..3372f5d99a --- /dev/null +++ b/js/src/builtin/intl/LanguageTag.cpp @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "builtin/intl/LanguageTag.h" + +#include "mozilla/intl/Locale.h" +#include "mozilla/Span.h" + +#include "builtin/intl/StringAsciiChars.h" +#include "gc/Tracer.h" +#include "vm/JSContext.h" + +namespace js { +namespace intl { + +[[nodiscard]] bool ParseLocale(JSContext* cx, Handle<JSLinearString*> str, + mozilla::intl::Locale& result) { + if (StringIsAscii(str)) { + intl::StringAsciiChars chars(str); + if (!chars.init(cx)) { + return false; + } + + if (mozilla::intl::LocaleParser::TryParse(chars, result).isOk()) { + return true; + } + } + + if (UniqueChars localeChars = QuoteString(cx, str, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LANGUAGE_TAG, localeChars.get()); + } + return false; +} + +bool ParseStandaloneLanguageTag(Handle<JSLinearString*> str, + mozilla::intl::LanguageSubtag& result) { + // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + if (str->hasLatin1Chars()) { + if (!mozilla::intl::IsStructurallyValidLanguageTag<Latin1Char>( + str->latin1Range(nogc))) { + return false; + } + result.Set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!mozilla::intl::IsStructurallyValidLanguageTag<char16_t>( + str->twoByteRange(nogc))) { + return false; + } + result.Set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +bool ParseStandaloneScriptTag(Handle<JSLinearString*> str, + mozilla::intl::ScriptSubtag& result) { + // Tell the analysis the |IsStructurallyValidScriptTag| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + if (str->hasLatin1Chars()) { + if (!mozilla::intl::IsStructurallyValidScriptTag<Latin1Char>( + str->latin1Range(nogc))) { + return false; + } + result.Set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!mozilla::intl::IsStructurallyValidScriptTag<char16_t>( + str->twoByteRange(nogc))) { + return false; + } + result.Set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +bool ParseStandaloneRegionTag(Handle<JSLinearString*> str, + mozilla::intl::RegionSubtag& result) { + // Tell the analysis the |IsStructurallyValidRegionTag| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + if (str->hasLatin1Chars()) { + if (!mozilla::intl::IsStructurallyValidRegionTag<Latin1Char>( + str->latin1Range(nogc))) { + return false; + } + result.Set<Latin1Char>(str->latin1Range(nogc)); + } else { + if (!mozilla::intl::IsStructurallyValidRegionTag<char16_t>( + str->twoByteRange(nogc))) { + return false; + } + result.Set<char16_t>(str->twoByteRange(nogc)); + } + return true; +} + +template <typename CharT> +static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + const CharT* ptr = span.data(); + size_t length = span.size(); + return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>); +} + +static bool IsAsciiLowercaseAlpha(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + return IsAsciiLowercaseAlpha<Latin1Char>(str->latin1Range(nogc)); + } + return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc)); +} + +template <typename CharT> +static bool IsAsciiAlpha(mozilla::Span<const CharT> span) { + // Tell the analysis the |std::all_of| function can't GC. + JS::AutoSuppressGCAnalysis nogc; + + const CharT* ptr = span.data(); + size_t length = span.size(); + return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>); +} + +static bool IsAsciiAlpha(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + if (str->hasLatin1Chars()) { + return IsAsciiAlpha<Latin1Char>(str->latin1Range(nogc)); + } + return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc)); +} + +JS::Result<JSString*> ParseStandaloneISO639LanguageTag( + JSContext* cx, Handle<JSLinearString*> str) { + // ISO-639 language codes contain either two or three characters. + size_t length = str->length(); + if (length != 2 && length != 3) { + return nullptr; + } + + // We can directly the return the input below if it's in the correct case. + bool isLowerCase = IsAsciiLowercaseAlpha(str); + if (!isLowerCase) { + // Must be an ASCII alpha string. + if (!IsAsciiAlpha(str)) { + return nullptr; + } + } + + mozilla::intl::LanguageSubtag languageTag; + if (str->hasLatin1Chars()) { + JS::AutoCheckCannotGC nogc; + languageTag.Set<Latin1Char>(str->latin1Range(nogc)); + } else { + JS::AutoCheckCannotGC nogc; + languageTag.Set<char16_t>(str->twoByteRange(nogc)); + } + + if (!isLowerCase) { + // The language subtag is canonicalized to lower case. + languageTag.ToLowerCase(); + } + + // Reject the input if the canonical tag contains more than just a single + // language subtag. + if (mozilla::intl::Locale::ComplexLanguageMapping(languageTag)) { + return nullptr; + } + + // Take care to replace deprecated subtags with their preferred values. + JSString* result; + if (mozilla::intl::Locale::LanguageMapping(languageTag) || !isLowerCase) { + result = NewStringCopy<CanGC>(cx, languageTag.Span()); + } else { + result = str; + } + if (!result) { + return cx->alreadyReportedOOM(); + } + return result; +} + +void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) { + TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type"); +} + +} // namespace intl +} // namespace js diff --git a/js/src/builtin/intl/LanguageTag.h b/js/src/builtin/intl/LanguageTag.h new file mode 100644 index 0000000000..e896411e19 --- /dev/null +++ b/js/src/builtin/intl/LanguageTag.h @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Structured representation of Unicode locale IDs used with Intl functions. */ + +#ifndef builtin_intl_LanguageTag_h +#define builtin_intl_LanguageTag_h + +#include "mozilla/intl/Locale.h" +#include "mozilla/Span.h" + +#include "js/Result.h" +#include "js/RootingAPI.h" + +struct JS_PUBLIC_API JSContext; +class JSLinearString; +class JS_PUBLIC_API JSString; +class JS_PUBLIC_API JSTracer; + +namespace js { + +namespace intl { + +/** + * Parse a string Unicode BCP 47 locale identifier. If successful, store in + * |result| and return true. Otherwise return false. + */ +[[nodiscard]] bool ParseLocale(JSContext* cx, JS::Handle<JSLinearString*> str, + mozilla::intl::Locale& result); + +/** + * Parse a string as a standalone |language| tag. If |str| is a standalone + * language tag, store it in |result| and return true. Otherwise return false. + */ +[[nodiscard]] bool ParseStandaloneLanguageTag( + JS::Handle<JSLinearString*> str, mozilla::intl::LanguageSubtag& result); + +/** + * Parse a string as a standalone |script| tag. If |str| is a standalone script + * tag, store it in |result| and return true. Otherwise return false. + */ +[[nodiscard]] bool ParseStandaloneScriptTag( + JS::Handle<JSLinearString*> str, mozilla::intl::ScriptSubtag& result); + +/** + * Parse a string as a standalone |region| tag. If |str| is a standalone region + * tag, store it in |result| and return true. Otherwise return false. + */ +[[nodiscard]] bool ParseStandaloneRegionTag( + JS::Handle<JSLinearString*> str, mozilla::intl::RegionSubtag& result); + +/** + * Parse a string as an ISO-639 language code. Return |nullptr| in the result if + * the input could not be parsed or the canonical form of the resulting language + * tag contains more than a single language subtag. + */ +JS::Result<JSString*> ParseStandaloneISO639LanguageTag( + JSContext* cx, JS::Handle<JSLinearString*> str); + +class UnicodeExtensionKeyword final { + char key_[mozilla::intl::LanguageTagLimits::UnicodeKeyLength]; + JSLinearString* type_; + + public: + using UnicodeKey = + const char (&)[mozilla::intl::LanguageTagLimits::UnicodeKeyLength + 1]; + using UnicodeKeySpan = + mozilla::Span<const char, + mozilla::intl::LanguageTagLimits::UnicodeKeyLength>; + + UnicodeExtensionKeyword(UnicodeKey key, JSLinearString* type) + : key_{key[0], key[1]}, type_(type) {} + + UnicodeKeySpan key() const { return {key_, sizeof(key_)}; } + JSLinearString* type() const { return type_; } + + void trace(JSTracer* trc); +}; + +[[nodiscard]] extern bool ApplyUnicodeExtensionToTag( + JSContext* cx, mozilla::intl::Locale& tag, + JS::HandleVector<UnicodeExtensionKeyword> keywords); + +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_LanguageTag_h */ diff --git a/js/src/builtin/intl/ListFormat.cpp b/js/src/builtin/intl/ListFormat.cpp new file mode 100644 index 0000000000..decd6fe7b8 --- /dev/null +++ b/js/src/builtin/intl/ListFormat.cpp @@ -0,0 +1,372 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "builtin/intl/ListFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/ListFormat.h" + +#include <stddef.h> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "gc/GCContext.h" +#include "js/Utility.h" +#include "js/Vector.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" +#include "vm/ObjectOperations-inl.h" + +using namespace js; + +const JSClassOps ListFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + ListFormatObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; +const JSClass ListFormatObject::class_ = { + "Intl.ListFormat", + JSCLASS_HAS_RESERVED_SLOTS(ListFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_ListFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &ListFormatObject::classOps_, &ListFormatObject::classSpec_}; + +const JSClass& ListFormatObject::protoClass_ = PlainObject::class_; + +static bool listFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().ListFormat); + return true; +} + +static const JSFunctionSpec listFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_ListFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec listFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_ListFormat_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("format", "Intl_ListFormat_format", 1, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_ListFormat_formatToParts", 1, 0), + JS_FN("toSource", listFormat_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec listFormat_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.ListFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool ListFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec ListFormatObject::classSpec_ = { + GenericCreateConstructor<ListFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<ListFormatObject>, + listFormat_static_methods, + nullptr, + listFormat_methods, + listFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * Intl.ListFormat([ locales [, options]]) + */ +static bool ListFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.ListFormat")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_ListFormat, + &proto)) { + return false; + } + + Rooted<ListFormatObject*> listFormat( + cx, NewObjectWithClassProto<ListFormatObject>(cx, proto)); + if (!listFormat) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + if (!intl::InitializeObject(cx, listFormat, cx->names().InitializeListFormat, + locales, options)) { + return false; + } + + args.rval().setObject(*listFormat); + return true; +} + +void js::ListFormatObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + mozilla::intl::ListFormat* lf = + obj->as<ListFormatObject>().getListFormatSlot(); + if (lf) { + intl::RemoveICUCellMemory(gcx, obj, ListFormatObject::EstimatedMemoryUse); + delete lf; + } +} + +/** + * Returns a new ListFormat with the locale and list formatting options + * of the given ListFormat. + */ +static mozilla::intl::ListFormat* NewListFormat( + JSContext* cx, Handle<ListFormatObject*> listFormat) { + RootedObject internals(cx, intl::GetInternalsObject(cx, listFormat)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + mozilla::intl::ListFormat::Options options; + + using ListFormatType = mozilla::intl::ListFormat::Type; + if (!GetProperty(cx, internals, internals, cx->names().type, &value)) { + return nullptr; + } + { + JSLinearString* strType = value.toString()->ensureLinear(cx); + if (!strType) { + return nullptr; + } + + if (StringEqualsLiteral(strType, "conjunction")) { + options.mType = ListFormatType::Conjunction; + } else if (StringEqualsLiteral(strType, "disjunction")) { + options.mType = ListFormatType::Disjunction; + } else { + MOZ_ASSERT(StringEqualsLiteral(strType, "unit")); + options.mType = ListFormatType::Unit; + } + } + + using ListFormatStyle = mozilla::intl::ListFormat::Style; + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return nullptr; + } + { + JSLinearString* strStyle = value.toString()->ensureLinear(cx); + if (!strStyle) { + return nullptr; + } + + if (StringEqualsLiteral(strStyle, "long")) { + options.mStyle = ListFormatStyle::Long; + } else if (StringEqualsLiteral(strStyle, "short")) { + options.mStyle = ListFormatStyle::Short; + } else { + MOZ_ASSERT(StringEqualsLiteral(strStyle, "narrow")); + options.mStyle = ListFormatStyle::Narrow; + } + } + + auto result = mozilla::intl::ListFormat::TryCreate( + mozilla::MakeStringSpan(locale.get()), options); + + if (result.isOk()) { + return result.unwrap().release(); + } + + js::intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; +} + +static mozilla::intl::ListFormat* GetOrCreateListFormat( + JSContext* cx, Handle<ListFormatObject*> listFormat) { + // Obtain a cached mozilla::intl::ListFormat object. + mozilla::intl::ListFormat* lf = listFormat->getListFormatSlot(); + if (lf) { + return lf; + } + + lf = NewListFormat(cx, listFormat); + if (!lf) { + return nullptr; + } + listFormat->setListFormatSlot(lf); + + intl::AddICUCellMemory(listFormat, ListFormatObject::EstimatedMemoryUse); + return lf; +} + +/** + * FormatList ( listFormat, list ) + */ +static bool FormatList(JSContext* cx, mozilla::intl::ListFormat* lf, + const mozilla::intl::ListFormat::StringList& list, + MutableHandleValue result) { + intl::FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> formatBuffer(cx); + auto formatResult = lf->Format(list, formatBuffer); + if (formatResult.isErr()) { + js::intl::ReportInternalError(cx, formatResult.unwrapErr()); + return false; + } + + JSString* str = formatBuffer.toString(cx); + if (!str) { + return false; + } + result.setString(str); + return true; +} + +/** + * FormatListToParts ( listFormat, list ) + */ +static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf, + const mozilla::intl::ListFormat::StringList& list, + MutableHandleValue result) { + intl::FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + mozilla::intl::ListFormat::PartVector parts; + auto formatResult = lf->FormatToParts(list, buffer, parts); + if (formatResult.isErr()) { + intl::ReportInternalError(cx, formatResult.unwrapErr()); + return false; + } + + RootedString overallResult(cx, buffer.toString(cx)); + if (!overallResult) { + return false; + } + + Rooted<ArrayObject*> partsArray( + cx, NewDenseFullyAllocatedArray(cx, parts.length())); + if (!partsArray) { + return false; + } + partsArray->ensureDenseInitializedLength(0, parts.length()); + + RootedObject singlePart(cx); + RootedValue val(cx); + + size_t index = 0; + size_t beginIndex = 0; + for (const mozilla::intl::ListFormat::Part& part : parts) { + singlePart = NewPlainObject(cx); + if (!singlePart) { + return false; + } + + if (part.first == mozilla::intl::ListFormat::PartType::Element) { + val = StringValue(cx->names().element); + } else { + val = StringValue(cx->names().literal); + } + + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { + return false; + } + + // There could be an empty string so the endIndex coule be equal to + // beginIndex. + MOZ_ASSERT(part.second >= beginIndex); + JSLinearString* partStr = NewDependentString(cx, overallResult, beginIndex, + part.second - beginIndex); + if (!partStr) { + return false; + } + val = StringValue(partStr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { + return false; + } + + beginIndex = part.second; + partsArray->initDenseElement(index++, ObjectValue(*singlePart)); + } + + MOZ_ASSERT(index == parts.length()); + MOZ_ASSERT(beginIndex == buffer.length()); + result.setObject(*partsArray); + + return true; +} + +bool js::intl_FormatList(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + Rooted<ListFormatObject*> listFormat( + cx, &args[0].toObject().as<ListFormatObject>()); + + bool formatToParts = args[2].toBoolean(); + + mozilla::intl::ListFormat* lf = GetOrCreateListFormat(cx, listFormat); + if (!lf) { + return false; + } + + // Collect all strings and their lengths. + // + // 'strings' takes the ownership of those strings, and 'list' will be passed + // to mozilla::intl::ListFormat as a Span. + Vector<UniqueTwoByteChars, mozilla::intl::DEFAULT_LIST_LENGTH> strings(cx); + mozilla::intl::ListFormat::StringList list; + + Rooted<ArrayObject*> listObj(cx, &args[1].toObject().as<ArrayObject>()); + RootedValue value(cx); + uint32_t listLen = listObj->length(); + for (uint32_t i = 0; i < listLen; i++) { + if (!GetElement(cx, listObj, listObj, i, &value)) { + return false; + } + + JSLinearString* linear = value.toString()->ensureLinear(cx); + if (!linear) { + return false; + } + + size_t linearLength = linear->length(); + + UniqueTwoByteChars chars = cx->make_pod_array<char16_t>(linearLength); + if (!chars) { + return false; + } + CopyChars(chars.get(), *linear); + + if (!strings.append(std::move(chars))) { + return false; + } + + if (!list.emplaceBack(strings[i].get(), linearLength)) { + return false; + } + } + + if (formatToParts) { + return FormatListToParts(cx, lf, list, args.rval()); + } + return FormatList(cx, lf, list, args.rval()); +} diff --git a/js/src/builtin/intl/ListFormat.h b/js/src/builtin/intl/ListFormat.h new file mode 100644 index 0000000000..da0daa711b --- /dev/null +++ b/js/src/builtin/intl/ListFormat.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_ListFormat_h +#define builtin_intl_ListFormat_h + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "js/TypeDecls.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class ListFormat; +} // namespace mozilla::intl + +namespace js { + +class ListFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t LIST_FORMAT_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UListFormatter (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 24; + + mozilla::intl::ListFormat* getListFormatSlot() const { + const auto& slot = getFixedSlot(LIST_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::ListFormat*>(slot.toPrivate()); + } + + void setListFormatSlot(mozilla::intl::ListFormat* format) { + setFixedSlot(LIST_FORMAT_SLOT, PrivateValue(format)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a string representing the array of string values |list| according to + * the effective locale and the formatting options of the given ListFormat. + * + * Usage: formatted = intl_FormatList(listFormat, list, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatList(JSContext* cx, unsigned argc, + Value* vp); + +} // namespace js + +#endif /* builtin_intl_ListFormat_h */ diff --git a/js/src/builtin/intl/ListFormat.js b/js/src/builtin/intl/ListFormat.js new file mode 100644 index 0000000000..8ee537d0b9 --- /dev/null +++ b/js/src/builtin/intl/ListFormat.js @@ -0,0 +1,330 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * ListFormat internal properties. + */ +function listFormatLocaleData() { + // ListFormat don't support any extension keys. + return {}; +} +var listFormatInternalProperties = { + localeData: listFormatLocaleData, + relevantExtensionKeys: [], +}; + +/** + * Intl.ListFormat ( [ locales [ , options ] ] ) + * + * Compute an internal properties object from |lazyListFormatData|. + */ +function resolveListFormatInternals(lazyListFormatData) { + assert(IsObject(lazyListFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var ListFormat = listFormatInternalProperties; + + // Compute effective locale. + + // Step 9. + var localeData = ListFormat.localeData; + + // Step 10. + var r = ResolveLocale( + "ListFormat", + lazyListFormatData.requestedLocales, + lazyListFormatData.opt, + ListFormat.relevantExtensionKeys, + localeData + ); + + // Step 11. + internalProps.locale = r.locale; + + // Step 13. + internalProps.type = lazyListFormatData.type; + + // Step 15. + internalProps.style = lazyListFormatData.style; + + // Steps 16-23 (not applicable in our implementation). + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the ListFormat internal properties of |obj|. + */ +function getListFormatInternals(obj) { + assert(IsObject(obj), "getListFormatInternals called with non-object"); + assert( + intl_GuardToListFormat(obj) !== null, + "getListFormatInternals called with non-ListFormat" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "ListFormat", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveListFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Intl.ListFormat ( [ locales [ , options ] ] ) + * + * Initializes an object as a ListFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a ListFormat. + * This later work occurs in |resolveListFormatInternals|; steps not noted + * here occur there. + */ +function InitializeListFormat(listFormat, locales, options) { + assert(IsObject(listFormat), "InitializeListFormat called with non-object"); + assert( + intl_GuardToListFormat(listFormat) !== null, + "InitializeListFormat called with non-ListFormat" + ); + + // Lazy ListFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // type: "conjunction" / "disjunction" / "unit", + // style: "long" / "short" / "narrow", + // + // opt: // opt object computed in InitializeListFormat + // { + // localeMatcher: "lookup" / "best fit", + // } + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every ListFormat lazy data object has *all* these properties, never a + // subset of them. + var lazyListFormatData = std_Object_create(null); + + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyListFormatData.requestedLocales = requestedLocales; + + // Steps 4-5. + if (options === undefined) { + options = std_Object_create(null); + } else if (!IsObject(options)) { + ThrowTypeError( + JSMSG_OBJECT_REQUIRED, + options === null ? "null" : typeof options + ); + } + + // Step 6. + var opt = new_Record(); + lazyListFormatData.opt = opt; + + // Steps 7-8. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // Compute formatting options. + + // Steps 12-13. + var type = GetOption( + options, + "type", + "string", + ["conjunction", "disjunction", "unit"], + "conjunction" + ); + lazyListFormatData.type = type; + + // Steps 14-15. + var style = GetOption( + options, + "style", + "string", + ["long", "short", "narrow"], + "long" + ); + lazyListFormatData.style = style; + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(listFormat, "ListFormat", lazyListFormatData); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + */ +function Intl_ListFormat_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "ListFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * StringListFromIterable ( iterable ) + */ +function StringListFromIterable(iterable, methodName) { + // Step 1. + if (iterable === undefined) { + return []; + } + + // Step 3. + var list = []; + + // Steps 2, 4-5. + for (var element of allowContentIter(iterable)) { + // Step 5.b.ii. + if (typeof element !== "string") { + ThrowTypeError( + JSMSG_NOT_EXPECTED_TYPE, + methodName, + "string", + typeof element + ); + } + + // Step 5.b.iii. + DefineDataProperty(list, list.length, element); + } + + // Step 6. + return list; +} + +/** + * Intl.ListFormat.prototype.format ( list ) + */ +function Intl_ListFormat_format(list) { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if ( + !IsObject(listFormat) || + (listFormat = intl_GuardToListFormat(listFormat)) === null + ) { + return callFunction( + intl_CallListFormatMethodIfWrapped, + this, + list, + "Intl_ListFormat_format" + ); + } + + // Step 4. + var stringList = StringListFromIterable(list, "format"); + + // We can directly return if |stringList| contains less than two elements. + if (stringList.length < 2) { + return stringList.length === 0 ? "" : stringList[0]; + } + + // Ensure the ListFormat internals are resolved. + getListFormatInternals(listFormat); + + // Step 5. + return intl_FormatList(listFormat, stringList, /* formatToParts = */ false); +} + +/** + * Intl.ListFormat.prototype.formatToParts ( list ) + */ +function Intl_ListFormat_formatToParts(list) { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if ( + !IsObject(listFormat) || + (listFormat = intl_GuardToListFormat(listFormat)) === null + ) { + return callFunction( + intl_CallListFormatMethodIfWrapped, + this, + list, + "Intl_ListFormat_formatToParts" + ); + } + + // Step 4. + var stringList = StringListFromIterable(list, "formatToParts"); + + // We can directly return if |stringList| contains less than two elements. + if (stringList.length < 2) { + return stringList.length === 0 + ? [] + : [{ type: "element", value: stringList[0] }]; + } + + // Ensure the ListFormat internals are resolved. + getListFormatInternals(listFormat); + + // Step 5. + return intl_FormatList(listFormat, stringList, /* formatToParts = */ true); +} + +/** + * Returns the resolved options for a ListFormat object. + */ +function Intl_ListFormat_resolvedOptions() { + // Step 1. + var listFormat = this; + + // Steps 2-3. + if ( + !IsObject(listFormat) || + (listFormat = intl_GuardToListFormat(listFormat)) === null + ) { + return callFunction( + intl_CallListFormatMethodIfWrapped, + this, + "Intl_ListFormat_resolvedOptions" + ); + } + + var internals = getListFormatInternals(listFormat); + + // Steps 4-5. + var result = { + locale: internals.locale, + type: internals.type, + style: internals.style, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/Locale.cpp b/js/src/builtin/intl/Locale.cpp new file mode 100644 index 0000000000..d93f1e62a2 --- /dev/null +++ b/js/src/builtin/intl/Locale.cpp @@ -0,0 +1,1520 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.Locale implementation. */ + +#include "builtin/intl/Locale.h" + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/Maybe.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <string> +#include <string.h> +#include <utility> + +#include "builtin/Boolean.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/StringAsciiChars.h" +#include "builtin/String.h" +#include "js/Conversions.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/Printer.h" +#include "js/TypeDecls.h" +#include "js/Wrapper.h" +#include "vm/Compartment.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; +using namespace mozilla::intl::LanguageTagLimits; + +const JSClass LocaleObject::class_ = { + "Intl.Locale", + JSCLASS_HAS_RESERVED_SLOTS(LocaleObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_Locale), + JS_NULL_CLASS_OPS, &LocaleObject::classSpec_}; + +const JSClass& LocaleObject::protoClass_ = PlainObject::class_; + +static inline bool IsLocale(HandleValue v) { + return v.isObject() && v.toObject().is<LocaleObject>(); +} + +// Return the length of the base-name subtags. +static size_t BaseNameLength(const mozilla::intl::Locale& tag) { + size_t baseNameLength = tag.Language().Length(); + if (tag.Script().Present()) { + baseNameLength += 1 + tag.Script().Length(); + } + if (tag.Region().Present()) { + baseNameLength += 1 + tag.Region().Length(); + } + for (const auto& variant : tag.Variants()) { + baseNameLength += 1 + variant.size(); + } + return baseNameLength; +} + +struct IndexAndLength { + size_t index; + size_t length; + + IndexAndLength(size_t index, size_t length) : index(index), length(length){}; + + template <typename T> + mozilla::Span<const T> spanOf(const T* ptr) const { + return {ptr + index, length}; + } +}; + +// Compute the Unicode extension's index and length in the extension subtag. +static mozilla::Maybe<IndexAndLength> UnicodeExtensionPosition( + const mozilla::intl::Locale& tag) { + size_t index = 0; + for (const auto& extension : tag.Extensions()) { + MOZ_ASSERT(!mozilla::IsAsciiUppercaseAlpha(extension[0]), + "extensions are case normalized to lowercase"); + + size_t extensionLength = extension.size(); + if (extension[0] == 'u') { + return mozilla::Some(IndexAndLength{index, extensionLength}); + } + + // Add +1 to skip over the preceding separator. + index += 1 + extensionLength; + } + return mozilla::Nothing(); +} + +static LocaleObject* CreateLocaleObject(JSContext* cx, HandleObject prototype, + const mozilla::intl::Locale& tag) { + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + RootedString tagStr(cx, buffer.toAsciiString(cx)); + if (!tagStr) { + return nullptr; + } + + size_t baseNameLength = BaseNameLength(tag); + + RootedString baseName(cx, NewDependentString(cx, tagStr, 0, baseNameLength)); + if (!baseName) { + return nullptr; + } + + RootedValue unicodeExtension(cx, UndefinedValue()); + if (auto result = UnicodeExtensionPosition(tag)) { + JSString* str = NewDependentString( + cx, tagStr, baseNameLength + 1 + result->index, result->length); + if (!str) { + return nullptr; + } + + unicodeExtension.setString(str); + } + + auto* locale = NewObjectWithClassProto<LocaleObject>(cx, prototype); + if (!locale) { + return nullptr; + } + + locale->setFixedSlot(LocaleObject::LANGUAGE_TAG_SLOT, StringValue(tagStr)); + locale->setFixedSlot(LocaleObject::BASENAME_SLOT, StringValue(baseName)); + locale->setFixedSlot(LocaleObject::UNICODE_EXTENSION_SLOT, unicodeExtension); + + return locale; +} + +static inline bool IsValidUnicodeExtensionValue(JSContext* cx, + JSLinearString* linear, + bool* isValid) { + if (linear->length() == 0) { + *isValid = false; + return true; + } + + if (!StringIsAscii(linear)) { + *isValid = false; + return true; + } + + intl::StringAsciiChars chars(linear); + if (!chars.init(cx)) { + return false; + } + + *isValid = + mozilla::intl::LocaleParser::CanParseUnicodeExtensionType(chars).isOk(); + return true; +} + +/** Iterate through (sep keyword) in a valid, lowercased Unicode extension. */ +template <typename CharT> +class SepKeywordIterator { + const CharT* iter_; + const CharT* const end_; + + public: + SepKeywordIterator(const CharT* unicodeExtensionBegin, + const CharT* unicodeExtensionEnd) + : iter_(unicodeExtensionBegin), end_(unicodeExtensionEnd) {} + + /** + * Return (sep keyword) in the Unicode locale extension from begin to end. + * The first call after all (sep keyword) are consumed returns |nullptr|; no + * further calls are allowed. + */ + const CharT* next() { + MOZ_ASSERT(iter_ != nullptr, + "can't call next() once it's returned nullptr"); + + constexpr size_t SepKeyLength = 1 + UnicodeKeyLength; // "-co"/"-nu"/etc. + + MOZ_ASSERT(iter_ + SepKeyLength <= end_, + "overall Unicode locale extension or non-leading subtags must " + "be at least key-sized"); + + MOZ_ASSERT((iter_[0] == 'u' && iter_[1] == '-') || iter_[0] == '-'); + + while (true) { + // Skip past '-' so |std::char_traits::find| makes progress. Skipping + // 'u' is harmless -- skip or not, |find| returns the first '-'. + iter_++; + + // Find the next separator. + iter_ = std::char_traits<CharT>::find( + iter_, mozilla::PointerRangeSize(iter_, end_), CharT('-')); + if (!iter_) { + return nullptr; + } + + MOZ_ASSERT(iter_ + SepKeyLength <= end_, + "non-leading subtags in a Unicode locale extension are all " + "at least as long as a key"); + + if (iter_ + SepKeyLength == end_ || // key is terminal subtag + iter_[SepKeyLength] == '-') { // key is followed by more subtags + break; + } + } + + MOZ_ASSERT(iter_[0] == '-'); + MOZ_ASSERT(mozilla::IsAsciiLowercaseAlpha(iter_[1]) || + mozilla::IsAsciiDigit(iter_[1])); + MOZ_ASSERT(mozilla::IsAsciiLowercaseAlpha(iter_[2])); + MOZ_ASSERT_IF(iter_ + SepKeyLength < end_, iter_[SepKeyLength] == '-'); + return iter_; + } +}; + +/** + * 9.2.10 GetOption ( options, property, type, values, fallback ) + * + * If the requested property is present and not-undefined, set the result string + * to |ToString(value)|. Otherwise set the result string to nullptr. + */ +static bool GetStringOption(JSContext* cx, HandleObject options, + Handle<PropertyName*> name, + MutableHandle<JSLinearString*> string) { + // Step 1. + RootedValue option(cx); + if (!GetProperty(cx, options, options, name, &option)) { + return false; + } + + // Step 2. + JSLinearString* linear = nullptr; + if (!option.isUndefined()) { + // Steps 2.a-b, 2.d (not applicable). + + // Steps 2.c, 2.e. + JSString* str = ToString(cx, option); + if (!str) { + return false; + } + linear = str->ensureLinear(cx); + if (!linear) { + return false; + } + } + + // Step 3. + string.set(linear); + return true; +} + +/** + * 9.2.10 GetOption ( options, property, type, values, fallback ) + * + * If the requested property is present and not-undefined, set the result string + * to |ToString(ToBoolean(value))|. Otherwise set the result string to nullptr. + */ +static bool GetBooleanOption(JSContext* cx, HandleObject options, + Handle<PropertyName*> name, + MutableHandle<JSLinearString*> string) { + // Step 1. + RootedValue option(cx); + if (!GetProperty(cx, options, options, name, &option)) { + return false; + } + + // Step 2. + JSLinearString* linear = nullptr; + if (!option.isUndefined()) { + // Steps 2.a, 2.c-d (not applicable). + + // Steps 2.c, 2.e. + linear = BooleanToString(cx, ToBoolean(option)); + } + + // Step 3. + string.set(linear); + return true; +} + +/** + * ApplyOptionsToTag ( tag, options ) + */ +static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag, + HandleObject options) { + // Steps 1-2 (Already performed in caller). + + Rooted<JSLinearString*> option(cx); + + // Step 3. + if (!GetStringOption(cx, options, cx->names().language, &option)) { + return false; + } + + // Step 4. + mozilla::intl::LanguageSubtag language; + if (option && !intl::ParseStandaloneLanguageTag(option, language)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "language", + str.get()); + } + return false; + } + + // Step 5. + if (!GetStringOption(cx, options, cx->names().script, &option)) { + return false; + } + + // Step 6. + mozilla::intl::ScriptSubtag script; + if (option && !intl::ParseStandaloneScriptTag(option, script)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "script", + str.get()); + } + return false; + } + + // Step 7. + if (!GetStringOption(cx, options, cx->names().region, &option)) { + return false; + } + + // Step 8. + mozilla::intl::RegionSubtag region; + if (option && !intl::ParseStandaloneRegionTag(option, region)) { + if (UniqueChars str = QuoteString(cx, option, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "region", + str.get()); + } + return false; + } + + // Step 9 (Already performed in caller). + + // Skip steps 10-13 when no subtags were modified. + if (language.Present() || script.Present() || region.Present()) { + // Step 10. + if (language.Present()) { + tag.SetLanguage(language); + } + + // Step 11. + if (script.Present()) { + tag.SetScript(script); + } + + // Step 12. + if (region.Present()) { + tag.SetRegion(region); + } + + // Step 13. + // Optimized to only canonicalize the base-name subtags. All other + // canonicalization steps will happen later. + auto result = tag.CanonicalizeBaseName(); + if (result.isErr()) { + if (result.unwrapErr() == + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + } else { + intl::ReportInternalError(cx); + } + return false; + } + } + + return true; +} + +/** + * ApplyUnicodeExtensionToTag( tag, options, relevantExtensionKeys ) + */ +bool js::intl::ApplyUnicodeExtensionToTag( + JSContext* cx, mozilla::intl::Locale& tag, + JS::HandleVector<intl::UnicodeExtensionKeyword> keywords) { + // If no Unicode extensions were present in the options object, we can skip + // everything below and directly return. + if (keywords.length() == 0) { + return true; + } + + Vector<char, 32> newExtension(cx); + if (!newExtension.append('u')) { + return false; + } + + // Check if there's an existing Unicode extension subtag. + + const char* unicodeExtensionEnd = nullptr; + const char* unicodeExtensionKeywords = nullptr; + if (auto unicodeExtension = tag.GetUnicodeExtension()) { + const char* unicodeExtensionBegin = unicodeExtension->data(); + unicodeExtensionEnd = unicodeExtensionBegin + unicodeExtension->size(); + + SepKeywordIterator<char> iter(unicodeExtensionBegin, unicodeExtensionEnd); + + // Find the start of the first keyword. + unicodeExtensionKeywords = iter.next(); + + // Copy any attributes present before the first keyword. + const char* attributesEnd = unicodeExtensionKeywords + ? unicodeExtensionKeywords + : unicodeExtensionEnd; + if (!newExtension.append(unicodeExtensionBegin + 1, attributesEnd)) { + return false; + } + } + + // Append the new keywords before any existing keywords. That way any previous + // keyword with the same key is detected as a duplicate when canonicalizing + // the Unicode extension subtag and gets discarded. + + for (const auto& keyword : keywords) { + UnicodeExtensionKeyword::UnicodeKeySpan key = keyword.key(); + if (!newExtension.append('-')) { + return false; + } + if (!newExtension.append(key.data(), key.size())) { + return false; + } + if (!newExtension.append('-')) { + return false; + } + + JS::AutoCheckCannotGC nogc; + JSLinearString* type = keyword.type(); + if (type->hasLatin1Chars()) { + if (!newExtension.append(type->latin1Chars(nogc), type->length())) { + return false; + } + } else { + if (!newExtension.append(type->twoByteChars(nogc), type->length())) { + return false; + } + } + } + + // Append the remaining keywords from the previous Unicode extension subtag. + if (unicodeExtensionKeywords) { + if (!newExtension.append(unicodeExtensionKeywords, unicodeExtensionEnd)) { + return false; + } + } + + if (auto res = tag.SetUnicodeExtension(newExtension); res.isErr()) { + intl::ReportInternalError(cx, res.unwrapErr()); + return false; + } + + return true; +} + +static JS::Result<JSString*> LanguageTagFromMaybeWrappedLocale(JSContext* cx, + JSObject* obj) { + if (obj->is<LocaleObject>()) { + return obj->as<LocaleObject>().languageTag(); + } + + JSObject* unwrapped = CheckedUnwrapStatic(obj); + if (!unwrapped) { + ReportAccessDenied(cx); + return cx->alreadyReportedError(); + } + + if (!unwrapped->is<LocaleObject>()) { + return nullptr; + } + + RootedString tagStr(cx, unwrapped->as<LocaleObject>().languageTag()); + if (!cx->compartment()->wrap(cx, &tagStr)) { + return cx->alreadyReportedError(); + } + return tagStr.get(); +} + +/** + * Intl.Locale( tag[, options] ) + */ +static bool Locale(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.Locale")) { + return false; + } + + // Steps 2-6 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Locale, &proto)) { + return false; + } + + // Steps 7-9. + HandleValue tagValue = args.get(0); + JSString* tagStr; + if (tagValue.isObject()) { + JS_TRY_VAR_OR_RETURN_FALSE( + cx, tagStr, + LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject())); + if (!tagStr) { + tagStr = ToString(cx, tagValue); + if (!tagStr) { + return false; + } + } + } else if (tagValue.isString()) { + tagStr = tagValue.toString(); + } else { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LOCALES_ELEMENT); + return false; + } + + Rooted<JSLinearString*> tagLinearStr(cx, tagStr->ensureLinear(cx)); + if (!tagLinearStr) { + return false; + } + + // Steps 10-11. + RootedObject options(cx); + if (args.hasDefined(1)) { + options = ToObject(cx, args[1]); + if (!options) { + return false; + } + } + + // ApplyOptionsToTag, steps 2 and 9. + mozilla::intl::Locale tag; + if (!intl::ParseLocale(cx, tagLinearStr, tag)) { + return false; + } + + if (auto result = tag.CanonicalizeBaseName(); result.isErr()) { + if (result.unwrapErr() == + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + } else { + intl::ReportInternalError(cx); + } + return false; + } + + if (options) { + // Step 12. + if (!ApplyOptionsToTag(cx, tag, options)) { + return false; + } + + // Step 13. + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + // Step 14. + Rooted<JSLinearString*> calendar(cx); + if (!GetStringOption(cx, options, cx->names().calendar, &calendar)) { + return false; + } + + // Steps 15-16. + if (calendar) { + bool isValid; + if (!IsValidUnicodeExtensionValue(cx, calendar, &isValid)) { + return false; + } + + if (!isValid) { + if (UniqueChars str = QuoteString(cx, calendar, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "calendar", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("ca", calendar)) { + return false; + } + } + + // Step 17. + Rooted<JSLinearString*> collation(cx); + if (!GetStringOption(cx, options, cx->names().collation, &collation)) { + return false; + } + + // Steps 18-19. + if (collation) { + bool isValid; + if (!IsValidUnicodeExtensionValue(cx, collation, &isValid)) { + return false; + } + + if (!isValid) { + if (UniqueChars str = QuoteString(cx, collation, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "collation", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("co", collation)) { + return false; + } + } + + // Step 20 (without validation). + Rooted<JSLinearString*> hourCycle(cx); + if (!GetStringOption(cx, options, cx->names().hourCycle, &hourCycle)) { + return false; + } + + // Steps 20-21. + if (hourCycle) { + if (!StringEqualsLiteral(hourCycle, "h11") && + !StringEqualsLiteral(hourCycle, "h12") && + !StringEqualsLiteral(hourCycle, "h23") && + !StringEqualsLiteral(hourCycle, "h24")) { + if (UniqueChars str = QuoteString(cx, hourCycle, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "hourCycle", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("hc", hourCycle)) { + return false; + } + } + + // Step 22 (without validation). + Rooted<JSLinearString*> caseFirst(cx); + if (!GetStringOption(cx, options, cx->names().caseFirst, &caseFirst)) { + return false; + } + + // Steps 22-23. + if (caseFirst) { + if (!StringEqualsLiteral(caseFirst, "upper") && + !StringEqualsLiteral(caseFirst, "lower") && + !StringEqualsLiteral(caseFirst, "false")) { + if (UniqueChars str = QuoteString(cx, caseFirst, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "caseFirst", + str.get()); + } + return false; + } + + if (!keywords.emplaceBack("kf", caseFirst)) { + return false; + } + } + + // Steps 24-25. + Rooted<JSLinearString*> numeric(cx); + if (!GetBooleanOption(cx, options, cx->names().numeric, &numeric)) { + return false; + } + + // Step 26. + if (numeric) { + if (!keywords.emplaceBack("kn", numeric)) { + return false; + } + } + + // Step 27. + Rooted<JSLinearString*> numberingSystem(cx); + if (!GetStringOption(cx, options, cx->names().numberingSystem, + &numberingSystem)) { + return false; + } + + // Steps 28-29. + if (numberingSystem) { + bool isValid; + if (!IsValidUnicodeExtensionValue(cx, numberingSystem, &isValid)) { + return false; + } + if (!isValid) { + if (UniqueChars str = QuoteString(cx, numberingSystem, '"')) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, + "numberingSystem", str.get()); + } + return false; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return false; + } + } + + // Step 30. + if (!ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return false; + } + } + + // ApplyOptionsToTag, steps 9 and 13. + // ApplyUnicodeExtensionToTag, step 9. + if (auto result = tag.CanonicalizeExtensions(); result.isErr()) { + if (result.unwrapErr() == + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + } else { + intl::ReportInternalError(cx); + } + return false; + } + + // Steps 6, 31-37. + JSObject* obj = CreateLocaleObject(cx, proto, tag); + if (!obj) { + return false; + } + + // Step 38. + args.rval().setObject(*obj); + return true; +} + +using UnicodeKey = const char (&)[UnicodeKeyLength + 1]; + +// Returns the tuple [index, length] of the `type` in the `keyword` in Unicode +// locale extension |extension| that has |key| as its `key`. If `keyword` lacks +// a type, the returned |index| will be where `type` would have been, and +// |length| will be set to zero. +template <typename CharT> +static mozilla::Maybe<IndexAndLength> FindUnicodeExtensionType( + const CharT* extension, size_t length, UnicodeKey key) { + MOZ_ASSERT(extension[0] == 'u'); + MOZ_ASSERT(extension[1] == '-'); + + const CharT* end = extension + length; + + SepKeywordIterator<CharT> iter(extension, end); + + // Search all keywords until a match was found. + const CharT* beginKey; + while (true) { + beginKey = iter.next(); + if (!beginKey) { + return mozilla::Nothing(); + } + + // Add +1 to skip over the separator preceding the keyword. + MOZ_ASSERT(beginKey[0] == '-'); + beginKey++; + + // Exit the loop on the first match. + if (std::equal(beginKey, beginKey + UnicodeKeyLength, key)) { + break; + } + } + + // Skip over the key. + const CharT* beginType = beginKey + UnicodeKeyLength; + + // Find the start of the next keyword. + const CharT* endType = iter.next(); + + // No further keyword present, the current keyword ends the Unicode extension. + if (!endType) { + endType = end; + } + + // If the keyword has a type, skip over the separator preceding the type. + if (beginType != endType) { + MOZ_ASSERT(beginType[0] == '-'); + beginType++; + } + return mozilla::Some(IndexAndLength{size_t(beginType - extension), + size_t(endType - beginType)}); +} + +static inline auto FindUnicodeExtensionType(JSLinearString* unicodeExtension, + UnicodeKey key) { + JS::AutoCheckCannotGC nogc; + return unicodeExtension->hasLatin1Chars() + ? FindUnicodeExtensionType( + reinterpret_cast<const char*>( + unicodeExtension->latin1Chars(nogc)), + unicodeExtension->length(), key) + : FindUnicodeExtensionType(unicodeExtension->twoByteChars(nogc), + unicodeExtension->length(), key); +} + +// Return the sequence of types for the Unicode extension keyword specified by +// key or undefined when the keyword isn't present. +static bool GetUnicodeExtension(JSContext* cx, LocaleObject* locale, + UnicodeKey key, MutableHandleValue value) { + // Return undefined when no Unicode extension subtag is present. + const Value& unicodeExtensionValue = locale->unicodeExtension(); + if (unicodeExtensionValue.isUndefined()) { + value.setUndefined(); + return true; + } + + JSLinearString* unicodeExtension = + unicodeExtensionValue.toString()->ensureLinear(cx); + if (!unicodeExtension) { + return false; + } + + // Find the type of the requested key in the Unicode extension subtag. + auto result = FindUnicodeExtensionType(unicodeExtension, key); + + // Return undefined if the requested key isn't present in the extension. + if (!result) { + value.setUndefined(); + return true; + } + + size_t index = result->index; + size_t length = result->length; + + // Otherwise return the type value of the found keyword. + JSString* str = NewDependentString(cx, unicodeExtension, index, length); + if (!str) { + return false; + } + value.setString(str); + return true; +} + +struct BaseNamePartsResult { + IndexAndLength language; + mozilla::Maybe<IndexAndLength> script; + mozilla::Maybe<IndexAndLength> region; +}; + +// Returns [language-length, script-index, region-index, region-length]. +template <typename CharT> +static BaseNamePartsResult BaseNameParts(const CharT* baseName, size_t length) { + size_t languageLength; + size_t scriptIndex = 0; + size_t regionIndex = 0; + size_t regionLength = 0; + + // Search the first separator to find the end of the language subtag. + if (const CharT* sep = std::char_traits<CharT>::find(baseName, length, '-')) { + languageLength = sep - baseName; + + // Add +1 to skip over the separator character. + size_t nextSubtag = languageLength + 1; + + // Script subtags are always four characters long, but take care for a four + // character long variant subtag. These start with a digit. + if ((nextSubtag + ScriptLength == length || + (nextSubtag + ScriptLength < length && + baseName[nextSubtag + ScriptLength] == '-')) && + mozilla::IsAsciiAlpha(baseName[nextSubtag])) { + scriptIndex = nextSubtag; + nextSubtag = scriptIndex + ScriptLength + 1; + } + + // Region subtags can be either two or three characters long. + if (nextSubtag < length) { + for (size_t rlen : {AlphaRegionLength, DigitRegionLength}) { + MOZ_ASSERT(nextSubtag + rlen <= length); + if (nextSubtag + rlen == length || baseName[nextSubtag + rlen] == '-') { + regionIndex = nextSubtag; + regionLength = rlen; + break; + } + } + } + } else { + // No separator found, the base-name consists of just a language subtag. + languageLength = length; + } + + // Tell the analysis the |IsStructurallyValid*Tag| functions can't GC. + JS::AutoSuppressGCAnalysis nogc; + + IndexAndLength language{0, languageLength}; + MOZ_ASSERT( + mozilla::intl::IsStructurallyValidLanguageTag(language.spanOf(baseName))); + + mozilla::Maybe<IndexAndLength> script{}; + if (scriptIndex) { + script.emplace(scriptIndex, ScriptLength); + MOZ_ASSERT( + mozilla::intl::IsStructurallyValidScriptTag(script->spanOf(baseName))); + } + + mozilla::Maybe<IndexAndLength> region{}; + if (regionIndex) { + region.emplace(regionIndex, regionLength); + MOZ_ASSERT( + mozilla::intl::IsStructurallyValidRegionTag(region->spanOf(baseName))); + } + + return {language, script, region}; +} + +static inline auto BaseNameParts(JSLinearString* baseName) { + JS::AutoCheckCannotGC nogc; + return baseName->hasLatin1Chars() + ? BaseNameParts( + reinterpret_cast<const char*>(baseName->latin1Chars(nogc)), + baseName->length()) + : BaseNameParts(baseName->twoByteChars(nogc), baseName->length()); +} + +// Intl.Locale.prototype.maximize () +static bool Locale_maximize(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + Rooted<JSLinearString*> tagStr(cx, locale->languageTag()->ensureLinear(cx)); + if (!tagStr) { + return false; + } + + mozilla::intl::Locale tag; + if (!intl::ParseLocale(cx, tagStr, tag)) { + return false; + } + + if (auto result = tag.AddLikelySubtags(); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + // Step 4. + auto* result = CreateLocaleObject(cx, nullptr, tag); + if (!result) { + return false; + } + args.rval().setObject(*result); + return true; +} + +// Intl.Locale.prototype.maximize () +static bool Locale_maximize(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_maximize>(cx, args); +} + +// Intl.Locale.prototype.minimize () +static bool Locale_minimize(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + Rooted<JSLinearString*> tagStr(cx, locale->languageTag()->ensureLinear(cx)); + if (!tagStr) { + return false; + } + + mozilla::intl::Locale tag; + if (!intl::ParseLocale(cx, tagStr, tag)) { + return false; + } + + if (auto result = tag.RemoveLikelySubtags(); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + // Step 4. + auto* result = CreateLocaleObject(cx, nullptr, tag); + if (!result) { + return false; + } + args.rval().setObject(*result); + return true; +} + +// Intl.Locale.prototype.minimize () +static bool Locale_minimize(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_minimize>(cx, args); +} + +// Intl.Locale.prototype.toString () +static bool Locale_toString(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + args.rval().setString(locale->languageTag()); + return true; +} + +// Intl.Locale.prototype.toString () +static bool Locale_toString(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_toString>(cx, args); +} + +// get Intl.Locale.prototype.baseName +static bool Locale_baseName(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Steps 3-4. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + args.rval().setString(locale->baseName()); + return true; +} + +// get Intl.Locale.prototype.baseName +static bool Locale_baseName(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_baseName>(cx, args); +} + +// get Intl.Locale.prototype.calendar +static bool Locale_calendar(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "ca", args.rval()); +} + +// get Intl.Locale.prototype.calendar +static bool Locale_calendar(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_calendar>(cx, args); +} + +// get Intl.Locale.prototype.caseFirst +static bool Locale_caseFirst(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "kf", args.rval()); +} + +// get Intl.Locale.prototype.caseFirst +static bool Locale_caseFirst(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_caseFirst>(cx, args); +} + +// get Intl.Locale.prototype.collation +static bool Locale_collation(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "co", args.rval()); +} + +// get Intl.Locale.prototype.collation +static bool Locale_collation(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_collation>(cx, args); +} + +// get Intl.Locale.prototype.hourCycle +static bool Locale_hourCycle(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "hc", args.rval()); +} + +// get Intl.Locale.prototype.hourCycle +static bool Locale_hourCycle(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_hourCycle>(cx, args); +} + +// get Intl.Locale.prototype.numeric +static bool Locale_numeric(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + RootedValue value(cx); + if (!GetUnicodeExtension(cx, locale, "kn", &value)) { + return false; + } + + // Compare against the empty string per Intl.Locale, step 36.a. The Unicode + // extension is already canonicalized, so we don't need to compare against + // "true" at this point. + MOZ_ASSERT(value.isUndefined() || value.isString()); + MOZ_ASSERT_IF(value.isString(), + !StringEqualsLiteral(&value.toString()->asLinear(), "true")); + + args.rval().setBoolean(value.isString() && value.toString()->empty()); + return true; +} + +// get Intl.Locale.prototype.numeric +static bool Locale_numeric(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_numeric>(cx, args); +} + +// get Intl.Locale.prototype.numberingSystem +static bool Intl_Locale_numberingSystem(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + return GetUnicodeExtension(cx, locale, "nu", args.rval()); +} + +// get Intl.Locale.prototype.numberingSystem +static bool Locale_numberingSystem(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Intl_Locale_numberingSystem>(cx, args); +} + +// get Intl.Locale.prototype.language +static bool Locale_language(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto language = BaseNameParts(baseName).language; + + size_t index = language.index; + size_t length = language.length; + + // Step 5. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.language +static bool Locale_language(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_language>(cx, args); +} + +// get Intl.Locale.prototype.script +static bool Locale_script(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto script = BaseNameParts(baseName).script; + + // Step 5. + if (!script) { + args.rval().setUndefined(); + return true; + } + + size_t index = script->index; + size_t length = script->length; + + // Step 6. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.script +static bool Locale_script(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_script>(cx, args); +} + +// get Intl.Locale.prototype.region +static bool Locale_region(JSContext* cx, const CallArgs& args) { + MOZ_ASSERT(IsLocale(args.thisv())); + + // Step 3. + auto* locale = &args.thisv().toObject().as<LocaleObject>(); + JSLinearString* baseName = locale->baseName()->ensureLinear(cx); + if (!baseName) { + return false; + } + + // Step 4 (Unnecessary assertion). + + auto region = BaseNameParts(baseName).region; + + // Step 5. + if (!region) { + args.rval().setUndefined(); + return true; + } + + size_t index = region->index; + size_t length = region->length; + + // Step 6. + JSString* str = NewDependentString(cx, baseName, index, length); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} + +// get Intl.Locale.prototype.region +static bool Locale_region(JSContext* cx, unsigned argc, Value* vp) { + // Steps 1-2. + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod<IsLocale, Locale_region>(cx, args); +} + +static bool Locale_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Locale); + return true; +} + +static const JSFunctionSpec locale_methods[] = { + JS_FN("maximize", Locale_maximize, 0, 0), + JS_FN("minimize", Locale_minimize, 0, 0), + JS_FN("toString", Locale_toString, 0, 0), + JS_FN("toSource", Locale_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec locale_properties[] = { + JS_PSG("baseName", Locale_baseName, 0), + JS_PSG("calendar", Locale_calendar, 0), + JS_PSG("caseFirst", Locale_caseFirst, 0), + JS_PSG("collation", Locale_collation, 0), + JS_PSG("hourCycle", Locale_hourCycle, 0), + JS_PSG("numeric", Locale_numeric, 0), + JS_PSG("numberingSystem", Locale_numberingSystem, 0), + JS_PSG("language", Locale_language, 0), + JS_PSG("script", Locale_script, 0), + JS_PSG("region", Locale_region, 0), + JS_STRING_SYM_PS(toStringTag, "Intl.Locale", JSPROP_READONLY), + JS_PS_END}; + +const ClassSpec LocaleObject::classSpec_ = { + GenericCreateConstructor<Locale, 1, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<LocaleObject>, + nullptr, + nullptr, + locale_methods, + locale_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +bool js::intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + HandleValue tagValue = args[0]; + bool applyToString = args[1].toBoolean(); + + if (tagValue.isObject()) { + JSString* tagStr; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, tagStr, + LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject())); + if (tagStr) { + args.rval().setString(tagStr); + return true; + } + } + + if (!applyToString && !tagValue.isString()) { + args.rval().setNull(); + return true; + } + + JSString* tagStr = ToString(cx, tagValue); + if (!tagStr) { + return false; + } + + Rooted<JSLinearString*> tagLinearStr(cx, tagStr->ensureLinear(cx)); + if (!tagLinearStr) { + return false; + } + + // Handle the common case (a standalone language) first. + // Only the following Unicode BCP 47 locale identifier subset is accepted: + // unicode_locale_id = unicode_language_id + // unicode_language_id = unicode_language_subtag + // unicode_language_subtag = alpha{2,3} + JSString* language; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, language, intl::ParseStandaloneISO639LanguageTag(cx, tagLinearStr)); + if (language) { + args.rval().setString(language); + return true; + } + + mozilla::intl::Locale tag; + if (!intl::ParseLocale(cx, tagLinearStr, tag)) { + return false; + } + + auto result = tag.Canonicalize(); + if (result.isErr()) { + if (result.unwrapErr() == + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + } else { + intl::ReportInternalError(cx); + } + return false; + } + + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSString* resultStr = buffer.toAsciiString(cx); + if (!resultStr) { + return false; + } + + args.rval().setString(resultStr); + return true; +} + +bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx, + unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<JSLinearString*> linear(cx, args[0].toString()->ensureLinear(cx)); + if (!linear) { + return false; + } + + mozilla::intl::Locale tag; + { + if (!StringIsAscii(linear)) { + // The caller handles invalid inputs. + args.rval().setNull(); + return true; + } + + intl::StringAsciiChars chars(linear); + if (!chars.init(cx)) { + return false; + } + + if (mozilla::intl::LocaleParser::TryParse(chars, tag).isErr()) { + // The caller handles invalid inputs. + args.rval().setNull(); + return true; + } + } + + auto result = tag.Canonicalize(); + if (result.isErr()) { + if (result.unwrapErr() == + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DUPLICATE_VARIANT_SUBTAG); + } else { + intl::ReportInternalError(cx); + } + return false; + } + + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSString* resultStr = buffer.toAsciiString(cx); + if (!resultStr) { + return false; + } + args.rval().setString(resultStr); + return true; +} + +bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx, + unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + HandleValue typeArg = args[0]; + MOZ_ASSERT(typeArg.isString(), "type must be a string"); + + HandleValue optionArg = args[1]; + MOZ_ASSERT(optionArg.isString(), "option name must be a string"); + + HandleValue keyArg = args[2]; + MOZ_ASSERT(keyArg.isString(), "key must be a string"); + + Rooted<JSLinearString*> unicodeType(cx, typeArg.toString()->ensureLinear(cx)); + if (!unicodeType) { + return false; + } + + bool isValid; + if (!IsValidUnicodeExtensionValue(cx, unicodeType, &isValid)) { + return false; + } + if (!isValid) { + UniqueChars optionChars = EncodeAscii(cx, optionArg.toString()); + if (!optionChars) { + return false; + } + + UniqueChars unicodeTypeChars = QuoteString(cx, unicodeType, '"'); + if (!unicodeTypeChars) { + return false; + } + + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, optionChars.get(), + unicodeTypeChars.get()); + return false; + } + + char unicodeKey[UnicodeKeyLength]; + { + JSLinearString* str = keyArg.toString()->ensureLinear(cx); + if (!str) { + return false; + } + MOZ_ASSERT(str->length() == UnicodeKeyLength); + + for (size_t i = 0; i < UnicodeKeyLength; i++) { + char16_t ch = str->latin1OrTwoByteChar(i); + MOZ_ASSERT(mozilla::IsAscii(ch)); + unicodeKey[i] = char(ch); + } + } + + UniqueChars unicodeTypeChars = EncodeAscii(cx, unicodeType); + if (!unicodeTypeChars) { + return false; + } + + size_t unicodeTypeLength = unicodeType->length(); + MOZ_ASSERT(strlen(unicodeTypeChars.get()) == unicodeTypeLength); + + // Convert into canonical case before searching for replacements. + mozilla::intl::AsciiToLowerCase(unicodeTypeChars.get(), unicodeTypeLength, + unicodeTypeChars.get()); + + auto key = mozilla::Span(unicodeKey, UnicodeKeyLength); + auto type = mozilla::Span(unicodeTypeChars.get(), unicodeTypeLength); + + // Search if there's a replacement for the current Unicode keyword. + JSString* result; + if (const char* replacement = + mozilla::intl::Locale::ReplaceUnicodeExtensionType(key, type)) { + result = NewStringCopyZ<CanGC>(cx, replacement); + } else { + result = StringToLowerCase(cx, unicodeType); + } + if (!result) { + return false; + } + + args.rval().setString(result); + return true; +} diff --git a/js/src/builtin/intl/Locale.h b/js/src/builtin/intl/Locale.h new file mode 100644 index 0000000000..93b618528a --- /dev/null +++ b/js/src/builtin/intl/Locale.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_Locale_h +#define builtin_intl_Locale_h + +#include <stdint.h> + +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace js { + +class LocaleObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t LANGUAGE_TAG_SLOT = 0; + static constexpr uint32_t BASENAME_SLOT = 1; + static constexpr uint32_t UNICODE_EXTENSION_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + /** + * Returns the complete language tag, including any extensions and privateuse + * subtags. + */ + JSString* languageTag() const { + return getFixedSlot(LANGUAGE_TAG_SLOT).toString(); + } + + /** + * Returns the basename subtags, i.e. excluding any extensions and privateuse + * subtags. + */ + JSString* baseName() const { return getFixedSlot(BASENAME_SLOT).toString(); } + + const Value& unicodeExtension() const { + return getFixedSlot(UNICODE_EXTENSION_SLOT); + } + + private: + static const ClassSpec classSpec_; +}; + +[[nodiscard]] extern bool intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, + unsigned argc, + Value* vp); + +[[nodiscard]] extern bool intl_TryValidateAndCanonicalizeLanguageTag( + JSContext* cx, unsigned argc, Value* vp); + +[[nodiscard]] extern bool intl_ValidateAndCanonicalizeUnicodeExtensionType( + JSContext* cx, unsigned argc, Value* vp); + +} // namespace js + +#endif /* builtin_intl_Locale_h */ diff --git a/js/src/builtin/intl/NumberFormat.cpp b/js/src/builtin/intl/NumberFormat.cpp new file mode 100644 index 0000000000..0f849aefff --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.cpp @@ -0,0 +1,1318 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.NumberFormat implementation. */ + +#include "builtin/intl/NumberFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" +#include "mozilla/FloatingPoint.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/intl/MeasureUnit.h" +#include "mozilla/intl/MeasureUnitGenerated.h" +#include "mozilla/intl/NumberFormat.h" +#include "mozilla/intl/NumberingSystem.h" +#include "mozilla/intl/NumberRangeFormat.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" +#include "mozilla/UniquePtr.h" + +#include <algorithm> +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <string_view> +#include <type_traits> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/LanguageTag.h" +#include "builtin/intl/RelativeTimeFormat.h" +#include "gc/GCContext.h" +#include "js/CharacterEncoding.h" +#include "js/PropertySpec.h" +#include "js/RootingAPI.h" +#include "js/TypeDecls.h" +#include "util/Text.h" +#include "vm/BigIntType.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/GeckoProfiler-inl.h" +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using mozilla::AssertedCast; + +using js::intl::DateTimeFormatOptions; +using js::intl::FieldType; + +const JSClassOps NumberFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + NumberFormatObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass NumberFormatObject::class_ = { + "Intl.NumberFormat", + JSCLASS_HAS_RESERVED_SLOTS(NumberFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_NumberFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &NumberFormatObject::classOps_, &NumberFormatObject::classSpec_}; + +const JSClass& NumberFormatObject::protoClass_ = PlainObject::class_; + +static bool numberFormat_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().NumberFormat); + return true; +} + +static const JSFunctionSpec numberFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_NumberFormat_supportedLocalesOf", 1, 0), + JS_FS_END, +}; + +static const JSFunctionSpec numberFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_NumberFormat_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_NumberFormat_formatToParts", 1, 0), + JS_SELF_HOSTED_FN("formatRange", "Intl_NumberFormat_formatRange", 2, 0), + JS_SELF_HOSTED_FN("formatRangeToParts", + "Intl_NumberFormat_formatRangeToParts", 2, 0), + JS_FN("toSource", numberFormat_toSource, 0, 0), + JS_FS_END, +}; + +static const JSPropertySpec numberFormat_properties[] = { + JS_SELF_HOSTED_GET("format", "$Intl_NumberFormat_format_get", 0), + JS_STRING_SYM_PS(toStringTag, "Intl.NumberFormat", JSPROP_READONLY), + JS_PS_END, +}; + +static bool NumberFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec NumberFormatObject::classSpec_ = { + GenericCreateConstructor<NumberFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<NumberFormatObject>, + numberFormat_static_methods, + nullptr, + numberFormat_methods, + numberFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 15.1.1 Intl.NumberFormat ( [ locales [ , options ] ] ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +static bool NumberFormat(JSContext* cx, const CallArgs& args, bool construct) { + AutoJSConstructorProfilerEntry pseudoFrame(cx, "Intl.NumberFormat"); + + // Step 1 (Handled by OrdinaryCreateFromConstructor fallback code). + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_NumberFormat, + &proto)) { + return false; + } + + Rooted<NumberFormatObject*> numberFormat(cx); + numberFormat = NewObjectWithClassProto<NumberFormatObject>(cx, proto); + if (!numberFormat) { + return false; + } + + RootedValue thisValue(cx, + construct ? ObjectValue(*numberFormat) : args.thisv()); + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + return intl::InitializeNumberFormatObject(cx, numberFormat, thisValue, + locales, options, args.rval()); +} + +static bool NumberFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + return NumberFormat(cx, args, args.isConstructing()); +} + +bool js::intl_NumberFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(!args.isConstructing()); + // intl_NumberFormat is an intrinsic for self-hosted JavaScript, so it + // cannot be used with "new", but it still has to be treated as a + // constructor. + return NumberFormat(cx, args, true); +} + +void js::NumberFormatObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto* numberFormat = &obj->as<NumberFormatObject>(); + mozilla::intl::NumberFormat* nf = numberFormat->getNumberFormatter(); + mozilla::intl::NumberRangeFormat* nrf = + numberFormat->getNumberRangeFormatter(); + + if (nf) { + intl::RemoveICUCellMemory(gcx, obj, NumberFormatObject::EstimatedMemoryUse); + // This was allocated using `new` in mozilla::intl::NumberFormat, so we + // delete here. + delete nf; + } + + if (nrf) { + intl::RemoveICUCellMemory(gcx, obj, EstimatedRangeFormatterMemoryUse); + // This was allocated using `new` in mozilla::intl::NumberRangeFormat, so we + // delete here. + delete nrf; + } +} + +bool js::intl_numberingSystem(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isString()); + + UniqueChars locale = intl::EncodeLocale(cx, args[0].toString()); + if (!locale) { + return false; + } + + auto numberingSystem = + mozilla::intl::NumberingSystem::TryCreate(locale.get()); + if (numberingSystem.isErr()) { + intl::ReportInternalError(cx, numberingSystem.unwrapErr()); + return false; + } + + auto name = numberingSystem.inspect()->GetName(); + if (name.isErr()) { + intl::ReportInternalError(cx, name.unwrapErr()); + return false; + } + + JSString* jsname = NewStringCopy<CanGC>(cx, name.unwrap()); + if (!jsname) { + return false; + } + + args.rval().setString(jsname); + return true; +} + +#if DEBUG || MOZ_SYSTEM_ICU +bool js::intl_availableMeasurementUnits(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + RootedObject measurementUnits(cx, NewPlainObjectWithProto(cx, nullptr)); + if (!measurementUnits) { + return false; + } + + auto units = mozilla::intl::MeasureUnit::GetAvailable(); + if (units.isErr()) { + intl::ReportInternalError(cx, units.unwrapErr()); + return false; + } + + Rooted<JSAtom*> unitAtom(cx); + for (auto unit : units.unwrap()) { + if (unit.isErr()) { + intl::ReportInternalError(cx); + return false; + } + auto unitIdentifier = unit.unwrap(); + + unitAtom = Atomize(cx, unitIdentifier.data(), unitIdentifier.size()); + if (!unitAtom) { + return false; + } + + if (!DefineDataProperty(cx, measurementUnits, unitAtom->asPropertyName(), + TrueHandleValue)) { + return false; + } + } + + args.rval().setObject(*measurementUnits); + return true; +} +#endif + +static constexpr size_t MaxUnitLength() { + size_t length = 0; + for (const auto& unit : mozilla::intl::simpleMeasureUnits) { + length = std::max(length, std::char_traits<char>::length(unit.name)); + } + return length * 2 + std::char_traits<char>::length("-per-"); +} + +static UniqueChars NumberFormatLocale(JSContext* cx, HandleObject internals) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects numberingSystem as a Unicode locale extensions on locale. + + mozilla::intl::Locale tag; + { + Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx)); + if (!locale) { + return nullptr; + } + + if (!intl::ParseLocale(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of + // the Unicode extension subtag. We're then relying on ICU to follow RFC + // 6067, which states that any trailing keywords using the same key + // should be ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + intl::FormatBuffer<char> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + return buffer.extractStringZ(); +} + +struct NumberFormatOptions : public mozilla::intl::NumberRangeFormatOptions { + static_assert(std::is_base_of_v<mozilla::intl::NumberFormatOptions, + mozilla::intl::NumberRangeFormatOptions>); + + char currencyChars[3] = {}; + char unitChars[MaxUnitLength()] = {}; +}; + +static bool FillNumberFormatOptions(JSContext* cx, HandleObject internals, + NumberFormatOptions& options) { + RootedValue value(cx); + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return false; + } + + bool accountingSign = false; + { + JSLinearString* style = value.toString()->ensureLinear(cx); + if (!style) { + return false; + } + + if (StringEqualsLiteral(style, "currency")) { + if (!GetProperty(cx, internals, internals, cx->names().currency, + &value)) { + return false; + } + JSLinearString* currency = value.toString()->ensureLinear(cx); + if (!currency) { + return false; + } + + MOZ_RELEASE_ASSERT( + currency->length() == 3, + "IsWellFormedCurrencyCode permits only length-3 strings"); + MOZ_ASSERT(StringIsAscii(currency), + "IsWellFormedCurrencyCode permits only ASCII strings"); + CopyChars(reinterpret_cast<Latin1Char*>(options.currencyChars), + *currency); + + if (!GetProperty(cx, internals, internals, cx->names().currencyDisplay, + &value)) { + return false; + } + JSLinearString* currencyDisplay = value.toString()->ensureLinear(cx); + if (!currencyDisplay) { + return false; + } + + using CurrencyDisplay = + mozilla::intl::NumberFormatOptions::CurrencyDisplay; + + CurrencyDisplay display; + if (StringEqualsLiteral(currencyDisplay, "code")) { + display = CurrencyDisplay::Code; + } else if (StringEqualsLiteral(currencyDisplay, "symbol")) { + display = CurrencyDisplay::Symbol; + } else if (StringEqualsLiteral(currencyDisplay, "narrowSymbol")) { + display = CurrencyDisplay::NarrowSymbol; + } else { + MOZ_ASSERT(StringEqualsLiteral(currencyDisplay, "name")); + display = CurrencyDisplay::Name; + } + + if (!GetProperty(cx, internals, internals, cx->names().currencySign, + &value)) { + return false; + } + JSLinearString* currencySign = value.toString()->ensureLinear(cx); + if (!currencySign) { + return false; + } + + if (StringEqualsLiteral(currencySign, "accounting")) { + accountingSign = true; + } else { + MOZ_ASSERT(StringEqualsLiteral(currencySign, "standard")); + } + + options.mCurrency = mozilla::Some( + std::make_pair(std::string_view(options.currencyChars, 3), display)); + } else if (StringEqualsLiteral(style, "percent")) { + options.mPercent = true; + } else if (StringEqualsLiteral(style, "unit")) { + if (!GetProperty(cx, internals, internals, cx->names().unit, &value)) { + return false; + } + JSLinearString* unit = value.toString()->ensureLinear(cx); + if (!unit) { + return false; + } + + size_t unit_str_length = unit->length(); + + MOZ_ASSERT(StringIsAscii(unit)); + MOZ_RELEASE_ASSERT(unit_str_length <= MaxUnitLength()); + CopyChars(reinterpret_cast<Latin1Char*>(options.unitChars), *unit); + + if (!GetProperty(cx, internals, internals, cx->names().unitDisplay, + &value)) { + return false; + } + JSLinearString* unitDisplay = value.toString()->ensureLinear(cx); + if (!unitDisplay) { + return false; + } + + using UnitDisplay = mozilla::intl::NumberFormatOptions::UnitDisplay; + + UnitDisplay display; + if (StringEqualsLiteral(unitDisplay, "short")) { + display = UnitDisplay::Short; + } else if (StringEqualsLiteral(unitDisplay, "narrow")) { + display = UnitDisplay::Narrow; + } else { + MOZ_ASSERT(StringEqualsLiteral(unitDisplay, "long")); + display = UnitDisplay::Long; + } + + options.mUnit = mozilla::Some(std::make_pair( + std::string_view(options.unitChars, unit_str_length), display)); + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "decimal")); + } + } + + bool hasMinimumSignificantDigits; + if (!HasProperty(cx, internals, cx->names().minimumSignificantDigits, + &hasMinimumSignificantDigits)) { + return false; + } + + if (hasMinimumSignificantDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumSignificantDigits, &value)) { + return false; + } + uint32_t minimumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumSignificantDigits, &value)) { + return false; + } + uint32_t maximumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + options.mSignificantDigits = mozilla::Some( + std::make_pair(minimumSignificantDigits, maximumSignificantDigits)); + } + + bool hasMinimumFractionDigits; + if (!HasProperty(cx, internals, cx->names().minimumFractionDigits, + &hasMinimumFractionDigits)) { + return false; + } + + if (hasMinimumFractionDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumFractionDigits, &value)) { + return false; + } + uint32_t minimumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumFractionDigits, &value)) { + return false; + } + uint32_t maximumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + options.mFractionDigits = mozilla::Some( + std::make_pair(minimumFractionDigits, maximumFractionDigits)); + } + + if (!GetProperty(cx, internals, internals, cx->names().roundingPriority, + &value)) { + return false; + } + + { + JSLinearString* roundingPriority = value.toString()->ensureLinear(cx); + if (!roundingPriority) { + return false; + } + + using RoundingPriority = + mozilla::intl::NumberFormatOptions::RoundingPriority; + + RoundingPriority priority; + if (StringEqualsLiteral(roundingPriority, "auto")) { + priority = RoundingPriority::Auto; + } else if (StringEqualsLiteral(roundingPriority, "morePrecision")) { + priority = RoundingPriority::MorePrecision; + } else { + MOZ_ASSERT(StringEqualsLiteral(roundingPriority, "lessPrecision")); + priority = RoundingPriority::LessPrecision; + } + + options.mRoundingPriority = priority; + } + + if (!GetProperty(cx, internals, internals, cx->names().minimumIntegerDigits, + &value)) { + return false; + } + options.mMinIntegerDigits = + mozilla::Some(AssertedCast<uint32_t>(value.toInt32())); + + if (!GetProperty(cx, internals, internals, cx->names().useGrouping, &value)) { + return false; + } + + if (value.isString()) { + JSLinearString* useGrouping = value.toString()->ensureLinear(cx); + if (!useGrouping) { + return false; + } + + using Grouping = mozilla::intl::NumberFormatOptions::Grouping; + + Grouping grouping; + if (StringEqualsLiteral(useGrouping, "auto")) { + grouping = Grouping::Auto; + } else if (StringEqualsLiteral(useGrouping, "always")) { + grouping = Grouping::Always; + } else { + MOZ_ASSERT(StringEqualsLiteral(useGrouping, "min2")); + grouping = Grouping::Min2; + } + + options.mGrouping = grouping; + } else { + MOZ_ASSERT(value.isBoolean()); + MOZ_ASSERT(value.toBoolean() == false); + + using Grouping = mozilla::intl::NumberFormatOptions::Grouping; + + options.mGrouping = Grouping::Never; + } + + if (!GetProperty(cx, internals, internals, cx->names().notation, &value)) { + return false; + } + + { + JSLinearString* notation = value.toString()->ensureLinear(cx); + if (!notation) { + return false; + } + + using Notation = mozilla::intl::NumberFormatOptions::Notation; + + Notation style; + if (StringEqualsLiteral(notation, "standard")) { + style = Notation::Standard; + } else if (StringEqualsLiteral(notation, "scientific")) { + style = Notation::Scientific; + } else if (StringEqualsLiteral(notation, "engineering")) { + style = Notation::Engineering; + } else { + MOZ_ASSERT(StringEqualsLiteral(notation, "compact")); + + if (!GetProperty(cx, internals, internals, cx->names().compactDisplay, + &value)) { + return false; + } + + JSLinearString* compactDisplay = value.toString()->ensureLinear(cx); + if (!compactDisplay) { + return false; + } + + if (StringEqualsLiteral(compactDisplay, "short")) { + style = Notation::CompactShort; + } else { + MOZ_ASSERT(StringEqualsLiteral(compactDisplay, "long")); + style = Notation::CompactLong; + } + } + + options.mNotation = style; + } + + if (!GetProperty(cx, internals, internals, cx->names().signDisplay, &value)) { + return false; + } + + { + JSLinearString* signDisplay = value.toString()->ensureLinear(cx); + if (!signDisplay) { + return false; + } + + using SignDisplay = mozilla::intl::NumberFormatOptions::SignDisplay; + + SignDisplay display; + if (StringEqualsLiteral(signDisplay, "auto")) { + if (accountingSign) { + display = SignDisplay::Accounting; + } else { + display = SignDisplay::Auto; + } + } else if (StringEqualsLiteral(signDisplay, "never")) { + display = SignDisplay::Never; + } else if (StringEqualsLiteral(signDisplay, "always")) { + if (accountingSign) { + display = SignDisplay::AccountingAlways; + } else { + display = SignDisplay::Always; + } + } else if (StringEqualsLiteral(signDisplay, "exceptZero")) { + if (accountingSign) { + display = SignDisplay::AccountingExceptZero; + } else { + display = SignDisplay::ExceptZero; + } + } else { + MOZ_ASSERT(StringEqualsLiteral(signDisplay, "negative")); + if (accountingSign) { + display = SignDisplay::AccountingNegative; + } else { + display = SignDisplay::Negative; + } + } + + options.mSignDisplay = display; + } + + if (!GetProperty(cx, internals, internals, cx->names().roundingIncrement, + &value)) { + return false; + } + options.mRoundingIncrement = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, cx->names().roundingMode, + &value)) { + return false; + } + + { + JSLinearString* roundingMode = value.toString()->ensureLinear(cx); + if (!roundingMode) { + return false; + } + + using RoundingMode = mozilla::intl::NumberFormatOptions::RoundingMode; + + RoundingMode rounding; + if (StringEqualsLiteral(roundingMode, "halfExpand")) { + // "halfExpand" is the default mode, so we handle it first. + rounding = RoundingMode::HalfExpand; + } else if (StringEqualsLiteral(roundingMode, "ceil")) { + rounding = RoundingMode::Ceil; + } else if (StringEqualsLiteral(roundingMode, "floor")) { + rounding = RoundingMode::Floor; + } else if (StringEqualsLiteral(roundingMode, "expand")) { + rounding = RoundingMode::Expand; + } else if (StringEqualsLiteral(roundingMode, "trunc")) { + rounding = RoundingMode::Trunc; + } else if (StringEqualsLiteral(roundingMode, "halfCeil")) { + rounding = RoundingMode::HalfCeil; + } else if (StringEqualsLiteral(roundingMode, "halfFloor")) { + rounding = RoundingMode::HalfFloor; + } else if (StringEqualsLiteral(roundingMode, "halfTrunc")) { + rounding = RoundingMode::HalfTrunc; + } else { + MOZ_ASSERT(StringEqualsLiteral(roundingMode, "halfEven")); + rounding = RoundingMode::HalfEven; + } + + options.mRoundingMode = rounding; + } + + if (!GetProperty(cx, internals, internals, cx->names().trailingZeroDisplay, + &value)) { + return false; + } + + { + JSLinearString* trailingZeroDisplay = value.toString()->ensureLinear(cx); + if (!trailingZeroDisplay) { + return false; + } + + if (StringEqualsLiteral(trailingZeroDisplay, "auto")) { + options.mStripTrailingZero = false; + } else { + MOZ_ASSERT(StringEqualsLiteral(trailingZeroDisplay, "stripIfInteger")); + options.mStripTrailingZero = true; + } + } + + return true; +} + +/** + * Returns a new mozilla::intl::Number[Range]Format with the locale and number + * formatting options of the given NumberFormat, or a nullptr if + * initialization failed. + */ +template <class Formatter> +static Formatter* NewNumberFormat(JSContext* cx, + Handle<NumberFormatObject*> numberFormat) { + RootedObject internals(cx, intl::GetInternalsObject(cx, numberFormat)); + if (!internals) { + return nullptr; + } + + UniqueChars locale = NumberFormatLocale(cx, internals); + if (!locale) { + return nullptr; + } + + NumberFormatOptions options; + if (!FillNumberFormatOptions(cx, internals, options)) { + return nullptr; + } + + options.mRangeCollapse = NumberFormatOptions::RangeCollapse::Auto; + options.mRangeIdentityFallback = + NumberFormatOptions::RangeIdentityFallback::Approximately; + + mozilla::Result<mozilla::UniquePtr<Formatter>, mozilla::intl::ICUError> + result = Formatter::TryCreate(locale.get(), options); + + if (result.isOk()) { + return result.unwrap().release(); + } + + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; +} + +static mozilla::intl::NumberFormat* GetOrCreateNumberFormat( + JSContext* cx, Handle<NumberFormatObject*> numberFormat) { + // Obtain a cached mozilla::intl::NumberFormat object. + mozilla::intl::NumberFormat* nf = numberFormat->getNumberFormatter(); + if (nf) { + return nf; + } + + nf = NewNumberFormat<mozilla::intl::NumberFormat>(cx, numberFormat); + if (!nf) { + return nullptr; + } + numberFormat->setNumberFormatter(nf); + + intl::AddICUCellMemory(numberFormat, NumberFormatObject::EstimatedMemoryUse); + return nf; +} + +static mozilla::intl::NumberRangeFormat* GetOrCreateNumberRangeFormat( + JSContext* cx, Handle<NumberFormatObject*> numberFormat) { + // Obtain a cached mozilla::intl::NumberRangeFormat object. + mozilla::intl::NumberRangeFormat* nrf = + numberFormat->getNumberRangeFormatter(); + if (nrf) { + return nrf; + } + + nrf = NewNumberFormat<mozilla::intl::NumberRangeFormat>(cx, numberFormat); + if (!nrf) { + return nullptr; + } + numberFormat->setNumberRangeFormatter(nrf); + + intl::AddICUCellMemory(numberFormat, + NumberFormatObject::EstimatedRangeFormatterMemoryUse); + return nrf; +} + +static FieldType GetFieldTypeForNumberPartType( + mozilla::intl::NumberPartType type) { + switch (type) { + case mozilla::intl::NumberPartType::ApproximatelySign: + return &JSAtomState::approximatelySign; + case mozilla::intl::NumberPartType::Compact: + return &JSAtomState::compact; + case mozilla::intl::NumberPartType::Currency: + return &JSAtomState::currency; + case mozilla::intl::NumberPartType::Decimal: + return &JSAtomState::decimal; + case mozilla::intl::NumberPartType::ExponentInteger: + return &JSAtomState::exponentInteger; + case mozilla::intl::NumberPartType::ExponentMinusSign: + return &JSAtomState::exponentMinusSign; + case mozilla::intl::NumberPartType::ExponentSeparator: + return &JSAtomState::exponentSeparator; + case mozilla::intl::NumberPartType::Fraction: + return &JSAtomState::fraction; + case mozilla::intl::NumberPartType::Group: + return &JSAtomState::group; + case mozilla::intl::NumberPartType::Infinity: + return &JSAtomState::infinity; + case mozilla::intl::NumberPartType::Integer: + return &JSAtomState::integer; + case mozilla::intl::NumberPartType::Literal: + return &JSAtomState::literal; + case mozilla::intl::NumberPartType::MinusSign: + return &JSAtomState::minusSign; + case mozilla::intl::NumberPartType::Nan: + return &JSAtomState::nan; + case mozilla::intl::NumberPartType::Percent: + return &JSAtomState::percentSign; + case mozilla::intl::NumberPartType::PlusSign: + return &JSAtomState::plusSign; + case mozilla::intl::NumberPartType::Unit: + return &JSAtomState::unit; + } + + MOZ_ASSERT_UNREACHABLE( + "unenumerated, undocumented format field returned by iterator"); + return nullptr; +} + +static FieldType GetFieldTypeForNumberPartSource( + mozilla::intl::NumberPartSource source) { + switch (source) { + case mozilla::intl::NumberPartSource::Shared: + return &JSAtomState::shared; + case mozilla::intl::NumberPartSource::Start: + return &JSAtomState::startRange; + case mozilla::intl::NumberPartSource::End: + return &JSAtomState::endRange; + } + + MOZ_CRASH("unexpected number part source"); +} + +enum class DisplayNumberPartSource : bool { No, Yes }; + +static bool FormattedNumberToParts(JSContext* cx, HandleString str, + const mozilla::intl::NumberPartVector& parts, + DisplayNumberPartSource displaySource, + FieldType unitType, + MutableHandleValue result) { + size_t lastEndIndex = 0; + + RootedObject singlePart(cx); + RootedValue propVal(cx); + + Rooted<ArrayObject*> partsArray( + cx, NewDenseFullyAllocatedArray(cx, parts.length())); + if (!partsArray) { + return false; + } + partsArray->ensureDenseInitializedLength(0, parts.length()); + + size_t index = 0; + for (const auto& part : parts) { + FieldType type = GetFieldTypeForNumberPartType(part.type); + size_t endIndex = part.endIndex; + + MOZ_ASSERT(lastEndIndex < endIndex); + + singlePart = NewPlainObject(cx); + if (!singlePart) { + return false; + } + + propVal.setString(cx->names().*type); + if (!DefineDataProperty(cx, singlePart, cx->names().type, propVal)) { + return false; + } + + JSLinearString* partSubstr = + NewDependentString(cx, str, lastEndIndex, endIndex - lastEndIndex); + if (!partSubstr) { + return false; + } + + propVal.setString(partSubstr); + if (!DefineDataProperty(cx, singlePart, cx->names().value, propVal)) { + return false; + } + + if (displaySource == DisplayNumberPartSource::Yes) { + FieldType source = GetFieldTypeForNumberPartSource(part.source); + + propVal.setString(cx->names().*source); + if (!DefineDataProperty(cx, singlePart, cx->names().source, propVal)) { + return false; + } + } + + if (unitType != nullptr && type != &JSAtomState::literal) { + propVal.setString(cx->names().*unitType); + if (!DefineDataProperty(cx, singlePart, cx->names().unit, propVal)) { + return false; + } + } + + partsArray->initDenseElement(index++, ObjectValue(*singlePart)); + + lastEndIndex = endIndex; + } + + MOZ_ASSERT(index == parts.length()); + MOZ_ASSERT(lastEndIndex == str->length(), + "result array must partition the entire string"); + + result.setObject(*partsArray); + return true; +} + +bool js::intl::FormattedRelativeTimeToParts( + JSContext* cx, HandleString str, + const mozilla::intl::NumberPartVector& parts, FieldType relativeTimeUnit, + MutableHandleValue result) { + return FormattedNumberToParts(cx, str, parts, DisplayNumberPartSource::No, + relativeTimeUnit, result); +} + +// Return true if the string starts with "0[bBoOxX]", possibly skipping over +// leading whitespace. +template <typename CharT> +static bool IsNonDecimalNumber(mozilla::Range<const CharT> chars) { + const CharT* end = chars.begin().get() + chars.length(); + const CharT* start = SkipSpace(chars.begin().get(), end); + + if (end - start >= 2 && start[0] == '0') { + CharT ch = start[1]; + return ch == 'b' || ch == 'B' || ch == 'o' || ch == 'O' || ch == 'x' || + ch == 'X'; + } + return false; +} + +static bool IsNonDecimalNumber(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + return str->hasLatin1Chars() ? IsNonDecimalNumber(str->latin1Range(nogc)) + : IsNonDecimalNumber(str->twoByteRange(nogc)); +} + +/** + * 15.5.16 ToIntlMathematicalValue ( value ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +static bool ToIntlMathematicalValue(JSContext* cx, MutableHandleValue value) { + // Step 1. + if (!ToPrimitive(cx, JSTYPE_NUMBER, value)) { + return false; + } + + // Step 2. + if (value.isBigInt()) { + return true; + } + + // Step 4. + if (!value.isString()) { + // Step 4.a. (Steps 4.b-10 not applicable in our implementation.) + return ToNumber(cx, value); + } + + // Step 3. + JSLinearString* str = value.toString()->ensureLinear(cx); + if (!str) { + return false; + } + + // Steps 5-6, 8, and 9.a. + double number = LinearStringToNumber(str); + + // Step 7. + if (std::isnan(number)) { + // Set to NaN if the input can't be parsed as a number. + value.setNaN(); + return true; + } + + // Step 9. + if (number == 0.0 || std::isinf(number)) { + // Step 9.a. (Reordered) + + // Steps 9.b-e. + value.setDouble(number); + return true; + } + + // Step 10. + if (IsNonDecimalNumber(str)) { + // ICU doesn't accept non-decimal numbers, so we have to convert the input + // into a base-10 string. + + MOZ_ASSERT(!mozilla::IsNegative(number), + "non-decimal numbers can't be negative"); + + if (number < DOUBLE_INTEGRAL_PRECISION_LIMIT) { + // Fast-path if we can guarantee there was no loss of precision. + value.setDouble(number); + } else { + // For the slow-path convert the string into a BigInt. + + // StringToBigInt can't fail (other than OOM) when StringToNumber already + // succeeded. + RootedString rooted(cx, str); + BigInt* bi; + JS_TRY_VAR_OR_RETURN_FALSE(cx, bi, StringToBigInt(cx, rooted)); + MOZ_ASSERT(bi); + + value.setBigInt(bi); + } + } + return true; +} + +// Return the number part of the input by removing leading and trailing +// whitespace. +template <typename CharT> +static mozilla::Span<const CharT> NumberPart(const CharT* chars, + size_t length) { + const CharT* start = chars; + const CharT* end = chars + length; + + start = SkipSpace(start, end); + + // |SkipSpace| only supports forward iteration, so inline the backwards + // iteration here. + MOZ_ASSERT(start <= end); + while (end > start && unicode::IsSpace(end[-1])) { + end--; + } + + // The number part is a non-empty, ASCII-only substring. + MOZ_ASSERT(start < end); + MOZ_ASSERT(mozilla::IsAscii(mozilla::Span(start, end))); + + return {start, end}; +} + +static bool NumberPart(JSContext* cx, JSLinearString* str, + const JS::AutoCheckCannotGC& nogc, + JS::UniqueChars& latin1, std::string_view& result) { + if (str->hasLatin1Chars()) { + auto span = NumberPart( + reinterpret_cast<const char*>(str->latin1Chars(nogc)), str->length()); + + result = {span.data(), span.size()}; + return true; + } + + auto span = NumberPart(str->twoByteChars(nogc), str->length()); + + latin1.reset(JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, span).c_str()); + if (!latin1) { + return false; + } + + result = {latin1.get(), span.size()}; + return true; +} + +bool js::intl_FormatNumber(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[2].isBoolean()); + + Rooted<NumberFormatObject*> numberFormat( + cx, &args[0].toObject().as<NumberFormatObject>()); + + RootedValue value(cx, args[1]); + if (!ToIntlMathematicalValue(cx, &value)) { + return false; + } + + mozilla::intl::NumberFormat* nf = GetOrCreateNumberFormat(cx, numberFormat); + if (!nf) { + return false; + } + + // Actually format the number + using ICUError = mozilla::intl::ICUError; + + bool formatToParts = args[2].toBoolean(); + mozilla::Result<std::u16string_view, ICUError> result = + mozilla::Err(ICUError::InternalError); + mozilla::intl::NumberPartVector parts; + if (value.isNumber()) { + double num = value.toNumber(); + if (formatToParts) { + result = nf->formatToParts(num, parts); + } else { + result = nf->format(num); + } + } else if (value.isBigInt()) { + RootedBigInt bi(cx, value.toBigInt()); + + int64_t num; + if (BigInt::isInt64(bi, &num)) { + if (formatToParts) { + result = nf->formatToParts(num, parts); + } else { + result = nf->format(num); + } + } else { + JSLinearString* str = BigInt::toString<CanGC>(cx, bi, 10); + if (!str) { + return false; + } + MOZ_RELEASE_ASSERT(str->hasLatin1Chars()); + + JS::AutoCheckCannotGC nogc; + + const char* chars = reinterpret_cast<const char*>(str->latin1Chars(nogc)); + if (formatToParts) { + result = + nf->formatToParts(std::string_view(chars, str->length()), parts); + } else { + result = nf->format(std::string_view(chars, str->length())); + } + } + } else { + JSLinearString* str = value.toString()->ensureLinear(cx); + if (!str) { + return false; + } + + JS::AutoCheckCannotGC nogc; + + // Two-byte strings have to be copied into a separate |char| buffer. + JS::UniqueChars latin1; + + std::string_view sv; + if (!NumberPart(cx, str, nogc, latin1, sv)) { + return false; + } + + if (formatToParts) { + result = nf->formatToParts(sv, parts); + } else { + result = nf->format(sv); + } + } + + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + RootedString str(cx, NewStringCopy<CanGC>(cx, result.unwrap())); + if (!str) { + return false; + } + + if (formatToParts) { + return FormattedNumberToParts(cx, str, parts, DisplayNumberPartSource::No, + nullptr, args.rval()); + } + + args.rval().setString(str); + return true; +} + +static JSLinearString* ToLinearString(JSContext* cx, HandleValue val) { + // Special case to preserve negative zero. + if (val.isDouble() && mozilla::IsNegativeZero(val.toDouble())) { + constexpr std::string_view negativeZero = "-0"; + return NewStringCopy<CanGC>(cx, negativeZero); + } + + JSString* str = ToString(cx, val); + return str ? str->ensureLinear(cx) : nullptr; +}; + +bool js::intl_FormatNumberRange(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(!args[1].isUndefined()); + MOZ_ASSERT(!args[2].isUndefined()); + MOZ_ASSERT(args[3].isBoolean()); + + Rooted<NumberFormatObject*> numberFormat( + cx, &args[0].toObject().as<NumberFormatObject>()); + bool formatToParts = args[3].toBoolean(); + + RootedValue start(cx, args[1]); + if (!ToIntlMathematicalValue(cx, &start)) { + return false; + } + + RootedValue end(cx, args[2]); + if (!ToIntlMathematicalValue(cx, &end)) { + return false; + } + + // PartitionNumberRangePattern, step 1. + if (start.isDouble() && std::isnan(start.toDouble())) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_NAN_NUMBER_RANGE, "start", + "NumberFormat", formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + if (end.isDouble() && std::isnan(end.toDouble())) { + JS_ReportErrorNumberASCII( + cx, GetErrorMessage, nullptr, JSMSG_NAN_NUMBER_RANGE, "end", + "NumberFormat", formatToParts ? "formatRangeToParts" : "formatRange"); + return false; + } + + using NumberRangeFormat = mozilla::intl::NumberRangeFormat; + NumberRangeFormat* nf = GetOrCreateNumberRangeFormat(cx, numberFormat); + if (!nf) { + return false; + } + + auto valueRepresentableAsDouble = [](const Value& val, double* num) { + if (val.isNumber()) { + *num = val.toNumber(); + return true; + } + if (val.isBigInt()) { + int64_t i64; + if (BigInt::isInt64(val.toBigInt(), &i64) && + i64 < int64_t(DOUBLE_INTEGRAL_PRECISION_LIMIT) && + i64 > -int64_t(DOUBLE_INTEGRAL_PRECISION_LIMIT)) { + *num = double(i64); + return true; + } + } + return false; + }; + + // Actually format the number range. + using ICUError = mozilla::intl::ICUError; + + mozilla::Result<std::u16string_view, ICUError> result = + mozilla::Err(ICUError::InternalError); + mozilla::intl::NumberPartVector parts; + + double numStart, numEnd; + if (valueRepresentableAsDouble(start, &numStart) && + valueRepresentableAsDouble(end, &numEnd)) { + if (formatToParts) { + result = nf->formatToParts(numStart, numEnd, parts); + } else { + result = nf->format(numStart, numEnd); + } + } else { + Rooted<JSLinearString*> strStart(cx, ToLinearString(cx, start)); + if (!strStart) { + return false; + } + + Rooted<JSLinearString*> strEnd(cx, ToLinearString(cx, end)); + if (!strEnd) { + return false; + } + + JS::AutoCheckCannotGC nogc; + + // Two-byte strings have to be copied into a separate |char| buffer. + JS::UniqueChars latin1Start; + JS::UniqueChars latin1End; + + std::string_view svStart; + if (!NumberPart(cx, strStart, nogc, latin1Start, svStart)) { + return false; + } + + std::string_view svEnd; + if (!NumberPart(cx, strEnd, nogc, latin1End, svEnd)) { + return false; + } + + if (formatToParts) { + result = nf->formatToParts(svStart, svEnd, parts); + } else { + result = nf->format(svStart, svEnd); + } + } + + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + RootedString str(cx, NewStringCopy<CanGC>(cx, result.unwrap())); + if (!str) { + return false; + } + + if (formatToParts) { + return FormattedNumberToParts(cx, str, parts, DisplayNumberPartSource::Yes, + nullptr, args.rval()); + } + + args.rval().setString(str); + return true; +} diff --git a/js/src/builtin/intl/NumberFormat.h b/js/src/builtin/intl/NumberFormat.h new file mode 100644 index 0000000000..e0566d6fae --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.h @@ -0,0 +1,127 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_NumberFormat_h +#define builtin_intl_NumberFormat_h + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class NumberFormat; +class NumberRangeFormat; +} // namespace mozilla::intl + +namespace js { + +class NumberFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t UNUMBER_FORMATTER_SLOT = 1; + static constexpr uint32_t UNUMBER_RANGE_FORMATTER_SLOT = 2; + static constexpr uint32_t SLOT_COUNT = 3; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UNumberFormatter and UFormattedNumber + // (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 972; + + // Estimated memory use for UNumberRangeFormatter and UFormattedNumberRange + // (see IcuMemoryUsage). + static constexpr size_t EstimatedRangeFormatterMemoryUse = 19894; + + mozilla::intl::NumberFormat* getNumberFormatter() const { + const auto& slot = getFixedSlot(UNUMBER_FORMATTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::NumberFormat*>(slot.toPrivate()); + } + + void setNumberFormatter(mozilla::intl::NumberFormat* formatter) { + setFixedSlot(UNUMBER_FORMATTER_SLOT, PrivateValue(formatter)); + } + + mozilla::intl::NumberRangeFormat* getNumberRangeFormatter() const { + const auto& slot = getFixedSlot(UNUMBER_RANGE_FORMATTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::NumberRangeFormat*>(slot.toPrivate()); + } + + void setNumberRangeFormatter(mozilla::intl::NumberRangeFormat* formatter) { + setFixedSlot(UNUMBER_RANGE_FORMATTER_SLOT, PrivateValue(formatter)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a new instance of the standard built-in NumberFormat constructor. + * + * Usage: numberFormat = intl_NumberFormat(locales, options) + */ +[[nodiscard]] extern bool intl_NumberFormat(JSContext* cx, unsigned argc, + Value* vp); + +/** + * Returns the numbering system type identifier per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * default numbering system for the given locale. + * + * Usage: defaultNumberingSystem = intl_numberingSystem(locale) + */ +[[nodiscard]] extern bool intl_numberingSystem(JSContext* cx, unsigned argc, + Value* vp); + +/** + * Returns a string representing the number x according to the effective + * locale and the formatting options of the given NumberFormat. + * + * Spec: ECMAScript Internationalization API Specification, 11.3.2. + * + * Usage: formatted = intl_FormatNumber(numberFormat, x, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatNumber(JSContext* cx, unsigned argc, + Value* vp); + +/** + * Returns a string representing the number range «x - y» according to the + * effective locale and the formatting options of the given NumberFormat. + * + * Usage: formatted = intl_FormatNumberRange(numberFormat, x, y, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatNumberRange(JSContext* cx, unsigned argc, + Value* vp); + +#if DEBUG || MOZ_SYSTEM_ICU +/** + * Returns an object with all available measurement units. + * + * Usage: units = intl_availableMeasurementUnits() + */ +[[nodiscard]] extern bool intl_availableMeasurementUnits(JSContext* cx, + unsigned argc, + Value* vp); +#endif + +} // namespace js + +#endif /* builtin_intl_NumberFormat_h */ diff --git a/js/src/builtin/intl/NumberFormat.js b/js/src/builtin/intl/NumberFormat.js new file mode 100644 index 0000000000..be3b74a8ac --- /dev/null +++ b/js/src/builtin/intl/NumberFormat.js @@ -0,0 +1,1263 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Portions Copyright Norbert Lindenberg 2011-2012. */ + +#include "NumberingSystemsGenerated.h" + +/** + * NumberFormat internal properties. + * + * 9.1 Internal slots of Service Constructors + * 15.2.3 Properties of the Intl.NumberFormat Constructor, Internal slots + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +var numberFormatInternalProperties = { + localeData: numberFormatLocaleData, + relevantExtensionKeys: ["nu"], +}; + +/** + * 15.1.2 InitializeNumberFormat ( numberFormat, locales, options ) + * + * Compute an internal properties object from |lazyNumberFormatData|. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function resolveNumberFormatInternals(lazyNumberFormatData) { + assert(IsObject(lazyNumberFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var NumberFormat = numberFormatInternalProperties; + + // Compute effective locale. + + // Step 9. + var localeData = NumberFormat.localeData; + + // Step 10. + var r = ResolveLocale( + "NumberFormat", + lazyNumberFormatData.requestedLocales, + lazyNumberFormatData.opt, + NumberFormat.relevantExtensionKeys, + localeData + ); + + // Steps 11-13. (Step 12 is not relevant to our implementation.) + internalProps.locale = r.locale; + internalProps.numberingSystem = r.nu; + + // Compute formatting options. + + // Step 14. SetNumberFormatUnitOptions, step 4. + var style = lazyNumberFormatData.style; + internalProps.style = style; + + // Step 14. SetNumberFormatUnitOptions, step 14. + if (style === "currency") { + internalProps.currency = lazyNumberFormatData.currency; + internalProps.currencyDisplay = lazyNumberFormatData.currencyDisplay; + internalProps.currencySign = lazyNumberFormatData.currencySign; + } + + // Step 14. SetNumberFormatUnitOptions, step 15. + if (style === "unit") { + internalProps.unit = lazyNumberFormatData.unit; + internalProps.unitDisplay = lazyNumberFormatData.unitDisplay; + } + + // Step 19. + var notation = lazyNumberFormatData.notation; + internalProps.notation = notation; + + // Step 20. SetNumberFormatDigitOptions, step 6. + internalProps.minimumIntegerDigits = + lazyNumberFormatData.minimumIntegerDigits; + + // Step 20. SetNumberFormatDigitOptions, step 14. + internalProps.roundingIncrement = lazyNumberFormatData.roundingIncrement; + + // Step 20. SetNumberFormatDigitOptions, step 15. + internalProps.roundingMode = lazyNumberFormatData.roundingMode; + + // Step 20. SetNumberFormatDigitOptions, step 16. + internalProps.trailingZeroDisplay = lazyNumberFormatData.trailingZeroDisplay; + + // Step 20. SetNumberFormatDigitOptions, steps 25-26. + if ("minimumFractionDigits" in lazyNumberFormatData) { + // Note: Intl.NumberFormat.prototype.resolvedOptions() exposes the + // actual presence (versus undefined-ness) of these properties. + assert( + "maximumFractionDigits" in lazyNumberFormatData, + "min/max frac digits mismatch" + ); + internalProps.minimumFractionDigits = + lazyNumberFormatData.minimumFractionDigits; + internalProps.maximumFractionDigits = + lazyNumberFormatData.maximumFractionDigits; + } + + // Step 20. SetNumberFormatDigitOptions, steps 24 and 26. + if ("minimumSignificantDigits" in lazyNumberFormatData) { + // Note: Intl.NumberFormat.prototype.resolvedOptions() exposes the + // actual presence (versus undefined-ness) of these properties. + assert( + "maximumSignificantDigits" in lazyNumberFormatData, + "min/max sig digits mismatch" + ); + internalProps.minimumSignificantDigits = + lazyNumberFormatData.minimumSignificantDigits; + internalProps.maximumSignificantDigits = + lazyNumberFormatData.maximumSignificantDigits; + } + + // Step 20. SetNumberFormatDigitOptions, steps 26-30. + internalProps.roundingPriority = lazyNumberFormatData.roundingPriority; + + // Step 23. + if (notation === "compact") { + internalProps.compactDisplay = lazyNumberFormatData.compactDisplay; + } + + // Step 28. + internalProps.useGrouping = lazyNumberFormatData.useGrouping; + + // Step 30. + internalProps.signDisplay = lazyNumberFormatData.signDisplay; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the NumberFormat internal properties of |obj|. + */ +function getNumberFormatInternals(obj) { + assert(IsObject(obj), "getNumberFormatInternals called with non-object"); + assert( + intl_GuardToNumberFormat(obj) !== null, + "getNumberFormatInternals called with non-NumberFormat" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "NumberFormat", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveNumberFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * 15.5.10 UnwrapNumberFormat ( nf ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function UnwrapNumberFormat(nf) { + // Steps 1-3 (error handling moved to caller). + if ( + IsObject(nf) && + intl_GuardToNumberFormat(nf) === null && + !intl_IsWrappedNumberFormat(nf) && + callFunction( + std_Object_isPrototypeOf, + GetBuiltinPrototype("NumberFormat"), + nf + ) + ) { + return nf[intlFallbackSymbol()]; + } + return nf; +} + +/* eslint-disable complexity */ +/** + * 15.1.3 SetNumberFormatDigitOptions ( intlObj, options, mnfdDefault, mxfdDefault, notation ) + * + * Applies digit options used for number formatting onto the intl object. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function SetNumberFormatDigitOptions( + lazyData, + options, + mnfdDefault, + mxfdDefault, + notation +) { + assert(IsObject(options), "SetNumberFormatDigitOptions"); + assert(typeof mnfdDefault === "number", "SetNumberFormatDigitOptions"); + assert(typeof mxfdDefault === "number", "SetNumberFormatDigitOptions"); + assert(mnfdDefault <= mxfdDefault, "SetNumberFormatDigitOptions"); + assert(typeof notation === "string", "SetNumberFormatDigitOptions"); + + // Steps 1-5. + var mnid = GetNumberOption(options, "minimumIntegerDigits", 1, 21, 1); + var mnfd = options.minimumFractionDigits; + var mxfd = options.maximumFractionDigits; + var mnsd = options.minimumSignificantDigits; + var mxsd = options.maximumSignificantDigits; + + // Step 6. + lazyData.minimumIntegerDigits = mnid; + + // Step 7. + var roundingPriority = GetOption( + options, + "roundingPriority", + "string", + ["auto", "morePrecision", "lessPrecision"], + "auto" + ); + + // Step 8. + var roundingIncrement = GetNumberOption( + options, + "roundingIncrement", + 1, + 5000, + 1 + ); + + // Step 9. + switch (roundingIncrement) { + case 1: + case 2: + case 5: + case 10: + case 20: + case 25: + case 50: + case 100: + case 200: + case 250: + case 500: + case 1000: + case 2000: + case 2500: + case 5000: + break; + default: + ThrowRangeError( + JSMSG_INVALID_OPTION_VALUE, + "roundingIncrement", + roundingIncrement + ); + } + + // Step 10. + var roundingMode = GetOption( + options, + "roundingMode", + "string", + [ + "ceil", + "floor", + "expand", + "trunc", + "halfCeil", + "halfFloor", + "halfExpand", + "halfTrunc", + "halfEven", + ], + "halfExpand" + ); + + // Step 11. + var trailingZeroDisplay = GetOption( + options, + "trailingZeroDisplay", + "string", + ["auto", "stripIfInteger"], + "auto" + ); + + // Step 12. (This step is a note.) + + // Step 13. + if (roundingIncrement !== 1) { + mxfdDefault = mnfdDefault; + } + + // Step 14. + lazyData.roundingIncrement = roundingIncrement; + + // Step 15. + lazyData.roundingMode = roundingMode; + + // Step 16. + lazyData.trailingZeroDisplay = trailingZeroDisplay; + + // Steps 17-18. + var hasSignificantDigits = mnsd !== undefined || mxsd !== undefined; + + // Step 19-20. + var hasFractionDigits = mnfd !== undefined || mxfd !== undefined; + + // Steps 21 and 23.a. + var needSignificantDigits = + roundingPriority !== "auto" || hasSignificantDigits; + + // Steps 22 and 23.b.i. + var needFractionalDigits = + roundingPriority !== "auto" || + !(hasSignificantDigits || (!hasFractionDigits && notation === "compact")); + + // Step 24. + if (needSignificantDigits) { + // Step 24.a. + if (hasSignificantDigits) { + // Step 24.a.i. + mnsd = DefaultNumberOption(mnsd, 1, 21, 1); + lazyData.minimumSignificantDigits = mnsd; + + // Step 24.a.ii. + mxsd = DefaultNumberOption(mxsd, mnsd, 21, 21); + lazyData.maximumSignificantDigits = mxsd; + } else { + // Step 24.b.i. + lazyData.minimumSignificantDigits = 1; + + // Step 24.b.ii. + lazyData.maximumSignificantDigits = 21; + } + } + + // Step 25. + if (needFractionalDigits) { + // Step 25.a. + if (hasFractionDigits) { + // Step 25.a.i. + mnfd = DefaultNumberOption(mnfd, 0, 100, undefined); + + // Step 25.a.ii. + mxfd = DefaultNumberOption(mxfd, 0, 100, undefined); + + // Step 25.a.iii. + if (mnfd === undefined) { + assert( + mxfd !== undefined, + "mxfd isn't undefined when mnfd is undefined" + ); + mnfd = std_Math_min(mnfdDefault, mxfd); + } + + // Step 25.a.iv. + else if (mxfd === undefined) { + mxfd = std_Math_max(mxfdDefault, mnfd); + } + + // Step 25.a.v. + else if (mnfd > mxfd) { + ThrowRangeError(JSMSG_INVALID_DIGITS_VALUE, mxfd); + } + + // Step 25.a.vi. + lazyData.minimumFractionDigits = mnfd; + + // Step 25.a.vii. + lazyData.maximumFractionDigits = mxfd; + } else { + // Step 25.b.i. + lazyData.minimumFractionDigits = mnfdDefault; + + // Step 25.b.ii. + lazyData.maximumFractionDigits = mxfdDefault; + } + } + + // Steps 26-30. + if (!needSignificantDigits && !needFractionalDigits) { + assert(!hasSignificantDigits, "bad significant digits in fallback case"); + assert( + roundingPriority === "auto", + `bad rounding in fallback case: ${roundingPriority}` + ); + assert( + notation === "compact", + `bad notation in fallback case: ${notation}` + ); + + // Steps 26.a-e. + lazyData.minimumFractionDigits = 0; + lazyData.maximumFractionDigits = 0; + lazyData.minimumSignificantDigits = 1; + lazyData.maximumSignificantDigits = 2; + lazyData.roundingPriority = "morePrecision"; + } else { + // Steps 27-30. + // + // Our implementation stores |roundingPriority| instead of using + // [[RoundingType]]. + lazyData.roundingPriority = roundingPriority; + } + + // Step 31. + if (roundingIncrement !== 1) { + // Step 31.a. + // + // [[RoundingType]] is `fractionDigits` if |roundingPriority| is equal to + // "auto" and |hasSignificantDigits| is false. + if (roundingPriority !== "auto") { + ThrowTypeError( + JSMSG_INVALID_NUMBER_OPTION, + "roundingIncrement", + "roundingPriority" + ); + } + if (hasSignificantDigits) { + ThrowTypeError( + JSMSG_INVALID_NUMBER_OPTION, + "roundingIncrement", + "minimumSignificantDigits" + ); + } + + // Step 31.b. + // + // Minimum and maximum fraction digits must be equal. + if ( + lazyData.minimumFractionDigits !== + lazyData.maximumFractionDigits + ) { + ThrowRangeError(JSMSG_UNEQUAL_FRACTION_DIGITS); + } + } +} +/* eslint-enable complexity */ + +/** + * Convert s to upper case, but limited to characters a-z. + * + * Spec: ECMAScript Internationalization API Specification, 6.1. + */ +function toASCIIUpperCase(s) { + assert(typeof s === "string", "toASCIIUpperCase"); + + // String.prototype.toUpperCase may map non-ASCII characters into ASCII, + // so go character by character (actually code unit by code unit, but + // since we only care about ASCII characters here, that's OK). + var result = ""; + for (var i = 0; i < s.length; i++) { + var c = callFunction(std_String_charCodeAt, s, i); + result += + 0x61 <= c && c <= 0x7a + ? callFunction(std_String_fromCharCode, null, c & ~0x20) + : s[i]; + } + return result; +} + +/** + * 6.3.1 IsWellFormedCurrencyCode ( currency ) + * + * Verifies that the given string is a well-formed ISO 4217 currency code. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function IsWellFormedCurrencyCode(currency) { + assert(typeof currency === "string", "currency is a string value"); + + return currency.length === 3 && IsASCIIAlphaString(currency); +} + +/** + * 6.6.1 IsWellFormedUnitIdentifier ( unitIdentifier ) + * + * Verifies that the given string is a well-formed core unit identifier as + * defined in UTS #35, Part 2, Section 6. In addition to obeying the UTS #35 + * core unit identifier syntax, |unitIdentifier| must be one of the identifiers + * sanctioned by UTS #35 or be a compound unit composed of two sanctioned simple + * units. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function IsWellFormedUnitIdentifier(unitIdentifier) { + assert( + typeof unitIdentifier === "string", + "unitIdentifier is a string value" + ); + + // Step 1. + if (IsSanctionedSimpleUnitIdentifier(unitIdentifier)) { + return true; + } + + // Steps 2-3. + var pos = callFunction(std_String_indexOf, unitIdentifier, "-per-"); + if (pos < 0) { + return false; + } + + // Step 4. + // + // Sanctioned single unit identifiers don't include the substring "-per-", + // so we can skip searching for the second "-per-" substring. + + var next = pos + "-per-".length; + + // Steps 5-6. + var numerator = Substring(unitIdentifier, 0, pos); + var denominator = Substring( + unitIdentifier, + next, + unitIdentifier.length - next + ); + + // Steps 7-8. + return ( + IsSanctionedSimpleUnitIdentifier(numerator) && + IsSanctionedSimpleUnitIdentifier(denominator) + ); +} + +#if DEBUG || MOZ_SYSTEM_ICU +var availableMeasurementUnits = { + value: null, +}; +#endif + +/** + * 6.6.2 IsSanctionedSingleUnitIdentifier ( unitIdentifier ) + * + * Verifies that the given string is a sanctioned simple core unit identifier. + * + * Also see: https://unicode.org/reports/tr35/tr35-general.html#Unit_Elements + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function IsSanctionedSimpleUnitIdentifier(unitIdentifier) { + assert( + typeof unitIdentifier === "string", + "unitIdentifier is a string value" + ); + + var isSanctioned = hasOwn(unitIdentifier, sanctionedSimpleUnitIdentifiers); + +#if DEBUG || MOZ_SYSTEM_ICU + if (isSanctioned) { + if (availableMeasurementUnits.value === null) { + availableMeasurementUnits.value = intl_availableMeasurementUnits(); + } + + var isSupported = hasOwn(unitIdentifier, availableMeasurementUnits.value); + +#if MOZ_SYSTEM_ICU + // A system ICU may support fewer measurement units, so we need to make + // sure the unit is actually supported. + isSanctioned = isSupported; +#else + // Otherwise just assert that the sanctioned unit is also supported. + assert( + isSupported, + `"${unitIdentifier}" is sanctioned but not supported. Did you forget to update + intl/icu/data_filter.json to include the unit (and any implicit compound units)? + For example "speed/kilometer-per-hour" is implied by "length/kilometer" and + "duration/hour" and must therefore also be present.` + ); +#endif + } +#endif + + return isSanctioned; +} + +/* eslint-disable complexity */ +/** + * 15.1.2 InitializeNumberFormat ( numberFormat, locales, options ) + * + * Initializes an object as a NumberFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a NumberFormat. + * This later work occurs in |resolveNumberFormatInternals|; steps not noted + * here occur there. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function InitializeNumberFormat(numberFormat, thisValue, locales, options) { + assert( + IsObject(numberFormat), + "InitializeNumberFormat called with non-object" + ); + assert( + intl_GuardToNumberFormat(numberFormat) !== null, + "InitializeNumberFormat called with non-NumberFormat" + ); + + // Lazy NumberFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // style: "decimal" / "percent" / "currency" / "unit", + // + // // fields present only if style === "currency": + // currency: a well-formed currency code (IsWellFormedCurrencyCode), + // currencyDisplay: "code" / "symbol" / "narrowSymbol" / "name", + // currencySign: "standard" / "accounting", + // + // // fields present only if style === "unit": + // unit: a well-formed unit identifier (IsWellFormedUnitIdentifier), + // unitDisplay: "short" / "narrow" / "long", + // + // opt: // opt object computed in InitializeNumberFormat + // { + // localeMatcher: "lookup" / "best fit", + // + // nu: string matching a Unicode extension type, // optional + // } + // + // minimumIntegerDigits: integer ∈ [1, 21], + // + // // optional, mutually exclusive with the significant-digits option + // minimumFractionDigits: integer ∈ [0, 100], + // maximumFractionDigits: integer ∈ [0, 100], + // + // // optional, mutually exclusive with the fraction-digits option + // minimumSignificantDigits: integer ∈ [1, 21], + // maximumSignificantDigits: integer ∈ [1, 21], + // + // roundingPriority: "auto" / "lessPrecision" / "morePrecision", + // + // useGrouping: "auto" / "always" / "min2" / false, + // + // notation: "standard" / "scientific" / "engineering" / "compact", + // + // // optional, if notation is "compact" + // compactDisplay: "short" / "long", + // + // signDisplay: "auto" / "never" / "always" / "exceptZero" / "negative", + // + // trailingZeroDisplay: "auto" / "stripIfInteger", + // + // roundingIncrement: integer ∈ (1, 2, 5, + // 10, 20, 25, 50, + // 100, 200, 250, 500, + // 1000, 2000, 2500, 5000), + // + // roundingMode: "ceil" / "floor" / "expand" / "trunc" / + // "halfCeil" / "halfFloor" / "halfExpand" / "halfTrunc" / "halfEven", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every NumberFormat lazy data object has *all* these properties, never a + // subset of them. + var lazyNumberFormatData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyNumberFormatData.requestedLocales = requestedLocales; + + // Step 2. (Inlined call to CoerceOptionsToObject.) + // + // If we ever need more speed here at startup, we should try to detect the + // case where |options === undefined| and then directly use the default + // value for each option. For now, just keep it simple. + if (options === undefined) { + options = std_Object_create(null); + } else { + options = ToObject(options); + } + + // Compute options that impact interpretation of locale. + + // Step 3. + var opt = new_Record(); + lazyNumberFormatData.opt = opt; + + // Steps 4-5. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // Step 6. + var numberingSystem = GetOption( + options, + "numberingSystem", + "string", + undefined, + undefined + ); + + // Step 7. + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType( + numberingSystem, + "numberingSystem", + "nu" + ); + } + + // Step 8. + opt.nu = numberingSystem; + + // Compute formatting options. + + // Step 14. SetNumberFormatUnitOptions, steps 3-4. + var style = GetOption( + options, + "style", + "string", + ["decimal", "percent", "currency", "unit"], + "decimal" + ); + lazyNumberFormatData.style = style; + + // Step 14. SetNumberFormatUnitOptions, step 5. + var currency = GetOption(options, "currency", "string", undefined, undefined); + + // Step 14. SetNumberFormatUnitOptions, steps 6-7. + if (currency === undefined) { + if (style === "currency") { + ThrowTypeError(JSMSG_UNDEFINED_CURRENCY); + } + } else { + if (!IsWellFormedCurrencyCode(currency)) { + ThrowRangeError(JSMSG_INVALID_CURRENCY_CODE, currency); + } + } + + // Step 14. SetNumberFormatUnitOptions, step 8. + var currencyDisplay = GetOption( + options, + "currencyDisplay", + "string", + ["code", "symbol", "narrowSymbol", "name"], + "symbol" + ); + + // Step 14. SetNumberFormatUnitOptions, step 9. + var currencySign = GetOption( + options, + "currencySign", + "string", + ["standard", "accounting"], + "standard" + ); + + // Step 14. SetNumberFormatUnitOptions, step 14. (Reordered) + if (style === "currency") { + // Step 14. SetNumberFormatUnitOptions, step 14.a. + currency = toASCIIUpperCase(currency); + lazyNumberFormatData.currency = currency; + + // Step 14. SetNumberFormatUnitOptions, step 14.b. + lazyNumberFormatData.currencyDisplay = currencyDisplay; + + // Step 14. SetNumberFormatUnitOptions, step 14.c. + lazyNumberFormatData.currencySign = currencySign; + } + + // Step 14. SetNumberFormatUnitOptions, step 10. + var unit = GetOption(options, "unit", "string", undefined, undefined); + + // Step 14. SetNumberFormatUnitOptions, steps 11-12. + if (unit === undefined) { + if (style === "unit") { + ThrowTypeError(JSMSG_UNDEFINED_UNIT); + } + } else { + if (!IsWellFormedUnitIdentifier(unit)) { + ThrowRangeError(JSMSG_INVALID_UNIT_IDENTIFIER, unit); + } + } + + // Step 14. SetNumberFormatUnitOptions, step 13. + var unitDisplay = GetOption( + options, + "unitDisplay", + "string", + ["short", "narrow", "long"], + "short" + ); + + // Step 14. SetNumberFormatUnitOptions, step 15. + if (style === "unit") { + lazyNumberFormatData.unit = unit; + lazyNumberFormatData.unitDisplay = unitDisplay; + } + + // Steps 16-17. + var mnfdDefault, mxfdDefault; + if (style === "currency") { + var cDigits = CurrencyDigits(currency); + mnfdDefault = cDigits; + mxfdDefault = cDigits; + } else { + mnfdDefault = 0; + mxfdDefault = style === "percent" ? 0 : 3; + } + + // Steps 18-19. + var notation = GetOption( + options, + "notation", + "string", + ["standard", "scientific", "engineering", "compact"], + "standard" + ); + lazyNumberFormatData.notation = notation; + + // Step 20. + SetNumberFormatDigitOptions( + lazyNumberFormatData, + options, + mnfdDefault, + mxfdDefault, + notation + ); + + // Steps 21 and 23.a. + var compactDisplay = GetOption( + options, + "compactDisplay", + "string", + ["short", "long"], + "short" + ); + if (notation === "compact") { + lazyNumberFormatData.compactDisplay = compactDisplay; + } + + // Steps 22 and 23.b. + var defaultUseGrouping = notation !== "compact" ? "auto" : "min2"; + + // Steps 24-25. + var useGrouping = GetStringOrBooleanOption( + options, + "useGrouping", + ["min2", "auto", "always", "true", "false"], + defaultUseGrouping + ); + + // Steps 26-27. + if (useGrouping === "true" || useGrouping === "false") { + useGrouping = defaultUseGrouping; + } else if (useGrouping === true) { + useGrouping = "always"; + } + + // Step 28. + assert( + useGrouping === "min2" || + useGrouping === "auto" || + useGrouping === "always" || + useGrouping === false, + `invalid 'useGrouping' value: ${useGrouping}` + ); + lazyNumberFormatData.useGrouping = useGrouping; + + // Steps 29-30. + var signDisplay = GetOption( + options, + "signDisplay", + "string", + ["auto", "never", "always", "exceptZero", "negative"], + "auto" + ); + lazyNumberFormatData.signDisplay = signDisplay; + + // Step 31. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(numberFormat, "NumberFormat", lazyNumberFormatData); + + // 15.1.1 Intl.NumberFormat, step 4. (Inlined call to ChainNumberFormat.) + if ( + numberFormat !== thisValue && + callFunction( + std_Object_isPrototypeOf, + GetBuiltinPrototype("NumberFormat"), + thisValue + ) + ) { + DefineDataProperty( + thisValue, + intlFallbackSymbol(), + numberFormat, + ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE + ); + + return thisValue; + } + + // 15.1.1 Intl.NumberFormat, step 5. + return numberFormat; +} +/* eslint-enable complexity */ + +/** + * 15.5.1 CurrencyDigits ( currency ) + * + * Returns the number of decimal digits to be used for the given currency. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function CurrencyDigits(currency) { + assert(typeof currency === "string", "currency is a string value"); + assert(IsWellFormedCurrencyCode(currency), "currency is well-formed"); + assert(currency === toASCIIUpperCase(currency), "currency is all upper-case"); + + // Step 1. + if (hasOwn(currency, currencyDigits)) { + return currencyDigits[currency]; + } + return 2; +} + +/** + * 15.2.2 Intl.NumberFormat.supportedLocalesOf ( locales [ , options ] ) + * + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_NumberFormat_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "NumberFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +function getNumberingSystems(locale) { + // ICU doesn't have an API to determine the set of numbering systems + // supported for a locale; it generally pretends that any numbering system + // can be used with any locale. Supporting a decimal numbering system + // (where only the digits are replaced) is easy, so we offer them all here. + // Algorithmic numbering systems are typically tied to one locale, so for + // lack of information we don't offer them. + // The one thing we can find out from ICU is the default numbering system + // for a locale. + var defaultNumberingSystem = intl_numberingSystem(locale); + return [defaultNumberingSystem, NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS]; +} + +function numberFormatLocaleData() { + return { + nu: getNumberingSystems, + default: { + nu: intl_numberingSystem, + }, + }; +} + +/** + * 15.5.2 Number Format Functions + * + * Create function to be cached and returned by Intl.NumberFormat.prototype.format. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function createNumberFormatFormat(nf) { + // This function is not inlined in $Intl_NumberFormat_format_get to avoid + // creating a call-object on each call to $Intl_NumberFormat_format_get. + return function(value) { + // Step 1 (implicit). + + // Step 2. + assert(IsObject(nf), "InitializeNumberFormat called with non-object"); + assert( + intl_GuardToNumberFormat(nf) !== null, + "InitializeNumberFormat called with non-NumberFormat" + ); + + // Steps 3-5. + return intl_FormatNumber(nf, value, /* formatToParts = */ false); + }; +} + +/** + * 15.3.3 get Intl.NumberFormat.prototype.format + * + * Returns a function bound to this NumberFormat that returns a String value + * representing the result of calling ToNumber(value) according to the + * effective locale and the formatting options of this NumberFormat. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +// Uncloned functions with `$` prefix are allocated as extended function +// to store the original name in `SetCanonicalName`. +function $Intl_NumberFormat_format_get() { + // Steps 1-3. + var thisArg = UnwrapNumberFormat(this); + var nf = thisArg; + if (!IsObject(nf) || (nf = intl_GuardToNumberFormat(nf)) === null) { + return callFunction( + intl_CallNumberFormatMethodIfWrapped, + thisArg, + "$Intl_NumberFormat_format_get" + ); + } + + var internals = getNumberFormatInternals(nf); + + // Step 4. + if (internals.boundFormat === undefined) { + // Steps 4.a-c. + internals.boundFormat = createNumberFormatFormat(nf); + } + + // Step 5. + return internals.boundFormat; +} +SetCanonicalName($Intl_NumberFormat_format_get, "get format"); + +/** + * 15.3.4 Intl.NumberFormat.prototype.formatToParts ( value ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_NumberFormat_formatToParts(value) { + // Step 1. + var nf = this; + + // Step 2. + if (!IsObject(nf) || (nf = intl_GuardToNumberFormat(nf)) === null) { + return callFunction( + intl_CallNumberFormatMethodIfWrapped, + this, + value, + "Intl_NumberFormat_formatToParts" + ); + } + + // Steps 3-4. + return intl_FormatNumber(nf, value, /* formatToParts = */ true); +} + +/** + * 15.3.5 Intl.NumberFormat.prototype.formatRange ( start, end ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_NumberFormat_formatRange(start, end) { + // Step 1. + var nf = this; + + // Step 2. + if (!IsObject(nf) || (nf = intl_GuardToNumberFormat(nf)) === null) { + return callFunction( + intl_CallNumberFormatMethodIfWrapped, + this, + start, + end, + "Intl_NumberFormat_formatRange" + ); + } + + // Step 3. + if (start === undefined || end === undefined) { + ThrowTypeError( + JSMSG_UNDEFINED_NUMBER, + start === undefined ? "start" : "end", + "NumberFormat", + "formatRange" + ); + } + + // Steps 4-6. + return intl_FormatNumberRange(nf, start, end, /* formatToParts = */ false); +} + +/** + * 15.3.6 Intl.NumberFormat.prototype.formatRangeToParts ( start, end ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_NumberFormat_formatRangeToParts(start, end) { + // Step 1. + var nf = this; + + // Step 2. + if (!IsObject(nf) || (nf = intl_GuardToNumberFormat(nf)) === null) { + return callFunction( + intl_CallNumberFormatMethodIfWrapped, + this, + start, + end, + "Intl_NumberFormat_formatRangeToParts" + ); + } + + // Step 3. + if (start === undefined || end === undefined) { + ThrowTypeError( + JSMSG_UNDEFINED_NUMBER, + start === undefined ? "start" : "end", + "NumberFormat", + "formatRangeToParts" + ); + } + + // Steps 4-6. + return intl_FormatNumberRange(nf, start, end, /* formatToParts = */ true); +} + +/** + * 15.3.7 Intl.NumberFormat.prototype.resolvedOptions ( ) + * + * Returns the resolved options for a NumberFormat object. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_NumberFormat_resolvedOptions() { + // Steps 1-3. + var thisArg = UnwrapNumberFormat(this); + var nf = thisArg; + if (!IsObject(nf) || (nf = intl_GuardToNumberFormat(nf)) === null) { + return callFunction( + intl_CallNumberFormatMethodIfWrapped, + thisArg, + "Intl_NumberFormat_resolvedOptions" + ); + } + + var internals = getNumberFormatInternals(nf); + + // Steps 4-5. + var result = { + locale: internals.locale, + numberingSystem: internals.numberingSystem, + style: internals.style, + }; + + // currency, currencyDisplay, and currencySign are only present for currency + // formatters. + assert( + hasOwn("currency", internals) === (internals.style === "currency"), + "currency is present iff style is 'currency'" + ); + assert( + hasOwn("currencyDisplay", internals) === (internals.style === "currency"), + "currencyDisplay is present iff style is 'currency'" + ); + assert( + hasOwn("currencySign", internals) === (internals.style === "currency"), + "currencySign is present iff style is 'currency'" + ); + + if (hasOwn("currency", internals)) { + DefineDataProperty(result, "currency", internals.currency); + DefineDataProperty(result, "currencyDisplay", internals.currencyDisplay); + DefineDataProperty(result, "currencySign", internals.currencySign); + } + + // unit and unitDisplay are only present for unit formatters. + assert( + hasOwn("unit", internals) === (internals.style === "unit"), + "unit is present iff style is 'unit'" + ); + assert( + hasOwn("unitDisplay", internals) === (internals.style === "unit"), + "unitDisplay is present iff style is 'unit'" + ); + + if (hasOwn("unit", internals)) { + DefineDataProperty(result, "unit", internals.unit); + DefineDataProperty(result, "unitDisplay", internals.unitDisplay); + } + + DefineDataProperty( + result, + "minimumIntegerDigits", + internals.minimumIntegerDigits + ); + + // Min/Max fraction digits are either both present or not present at all. + assert( + hasOwn("minimumFractionDigits", internals) === + hasOwn("maximumFractionDigits", internals), + "minimumFractionDigits is present iff maximumFractionDigits is present" + ); + + if (hasOwn("minimumFractionDigits", internals)) { + DefineDataProperty( + result, + "minimumFractionDigits", + internals.minimumFractionDigits + ); + DefineDataProperty( + result, + "maximumFractionDigits", + internals.maximumFractionDigits + ); + } + + // Min/Max significant digits are either both present or not present at all. + assert( + hasOwn("minimumSignificantDigits", internals) === + hasOwn("maximumSignificantDigits", internals), + "minimumSignificantDigits is present iff maximumSignificantDigits is present" + ); + + if (hasOwn("minimumSignificantDigits", internals)) { + DefineDataProperty( + result, + "minimumSignificantDigits", + internals.minimumSignificantDigits + ); + DefineDataProperty( + result, + "maximumSignificantDigits", + internals.maximumSignificantDigits + ); + } + + DefineDataProperty(result, "useGrouping", internals.useGrouping); + + var notation = internals.notation; + DefineDataProperty(result, "notation", notation); + + // compactDisplay is only present when `notation` is "compact". + if (notation === "compact") { + DefineDataProperty(result, "compactDisplay", internals.compactDisplay); + } + + DefineDataProperty(result, "signDisplay", internals.signDisplay); + DefineDataProperty(result, "roundingMode", internals.roundingMode); + DefineDataProperty(result, "roundingIncrement", internals.roundingIncrement); + DefineDataProperty( + result, + "trailingZeroDisplay", + internals.trailingZeroDisplay + ); + + // Steps 6-8. + // + // Our implementation doesn't use [[RoundingType]], but instead directly + // stores the computed `roundingPriority` value. + DefineDataProperty(result, "roundingPriority", internals.roundingPriority); + + // Step 9. + return result; +} diff --git a/js/src/builtin/intl/NumberingSystems.yaml b/js/src/builtin/intl/NumberingSystems.yaml new file mode 100644 index 0000000000..db287c10ef --- /dev/null +++ b/js/src/builtin/intl/NumberingSystems.yaml @@ -0,0 +1,82 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# 12.1.7 PartitionNotationSubPattern ( numberFormat, x, n, exponent ) +# +# Numbering systems with simple digit mappings +# +# https://tc39.es/ecma402/#table-numbering-system-digits + +# Run |make_intl_data numbering| to regenerate all files which reference this list +# of numbering systems. + +- adlm +- ahom +- arab +- arabext +- bali +- beng +- bhks +- brah +- cakm +- cham +- deva +- diak +- fullwide +- gong +- gonm +- gujr +- guru +- hanidec +- hmng +- hmnp +- java +- kali +- kawi +- khmr +- knda +- lana +- lanatham +- laoo +- latn +- lepc +- limb +- mathbold +- mathdbl +- mathmono +- mathsanb +- mathsans +- mlym +- modi +- mong +- mroo +- mtei +- mymr +- mymrshan +- mymrtlng +- nagm +- newa +- nkoo +- olck +- orya +- osma +- rohg +- saur +- segment +- shrd +- sind +- sinh +- sora +- sund +- takr +- talu +- tamldec +- telu +- thai +- tibt +- tirh +- tnsa +- vaii +- wara +- wcho diff --git a/js/src/builtin/intl/NumberingSystemsGenerated.h b/js/src/builtin/intl/NumberingSystemsGenerated.h new file mode 100644 index 0000000000..f51d0f9c53 --- /dev/null +++ b/js/src/builtin/intl/NumberingSystemsGenerated.h @@ -0,0 +1,83 @@ +// Generated by make_intl_data.py. DO NOT EDIT. + +/** + * The list of numbering systems with simple digit mappings. + */ + +#ifndef builtin_intl_NumberingSystemsGenerated_h +#define builtin_intl_NumberingSystemsGenerated_h + +// clang-format off +#define NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS \ + "adlm", \ + "ahom", \ + "arab", \ + "arabext", \ + "bali", \ + "beng", \ + "bhks", \ + "brah", \ + "cakm", \ + "cham", \ + "deva", \ + "diak", \ + "fullwide", \ + "gong", \ + "gonm", \ + "gujr", \ + "guru", \ + "hanidec", \ + "hmng", \ + "hmnp", \ + "java", \ + "kali", \ + "kawi", \ + "khmr", \ + "knda", \ + "lana", \ + "lanatham", \ + "laoo", \ + "latn", \ + "lepc", \ + "limb", \ + "mathbold", \ + "mathdbl", \ + "mathmono", \ + "mathsanb", \ + "mathsans", \ + "mlym", \ + "modi", \ + "mong", \ + "mroo", \ + "mtei", \ + "mymr", \ + "mymrshan", \ + "mymrtlng", \ + "nagm", \ + "newa", \ + "nkoo", \ + "olck", \ + "orya", \ + "osma", \ + "rohg", \ + "saur", \ + "segment", \ + "shrd", \ + "sind", \ + "sinh", \ + "sora", \ + "sund", \ + "takr", \ + "talu", \ + "tamldec", \ + "telu", \ + "thai", \ + "tibt", \ + "tirh", \ + "tnsa", \ + "vaii", \ + "wara", \ + "wcho" +// clang-format on + +#endif // builtin_intl_NumberingSystemsGenerated_h diff --git a/js/src/builtin/intl/PluralRules.cpp b/js/src/builtin/intl/PluralRules.cpp new file mode 100644 index 0000000000..4d7907f2c8 --- /dev/null +++ b/js/src/builtin/intl/PluralRules.cpp @@ -0,0 +1,504 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl.PluralRules proposal. */ + +#include "builtin/intl/PluralRules.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Casting.h" +#include "mozilla/intl/PluralRules.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "gc/GCContext.h" +#include "js/PropertySpec.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +using mozilla::AssertedCast; + +const JSClassOps PluralRulesObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + PluralRulesObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass PluralRulesObject::class_ = { + "Intl.PluralRules", + JSCLASS_HAS_RESERVED_SLOTS(PluralRulesObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_PluralRules) | + JSCLASS_FOREGROUND_FINALIZE, + &PluralRulesObject::classOps_, &PluralRulesObject::classSpec_}; + +const JSClass& PluralRulesObject::protoClass_ = PlainObject::class_; + +static bool pluralRules_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().PluralRules); + return true; +} + +static const JSFunctionSpec pluralRules_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_PluralRules_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec pluralRules_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_PluralRules_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("select", "Intl_PluralRules_select", 1, 0), + JS_SELF_HOSTED_FN("selectRange", "Intl_PluralRules_selectRange", 2, 0), + JS_FN("toSource", pluralRules_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec pluralRules_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.PluralRules", JSPROP_READONLY), + JS_PS_END}; + +static bool PluralRules(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec PluralRulesObject::classSpec_ = { + GenericCreateConstructor<PluralRules, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<PluralRulesObject>, + pluralRules_static_methods, + nullptr, + pluralRules_methods, + pluralRules_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * 16.1.1 Intl.PluralRules ( [ locales [ , options ] ] ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +static bool PluralRules(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.PluralRules")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_PluralRules, + &proto)) { + return false; + } + + Rooted<PluralRulesObject*> pluralRules(cx); + pluralRules = NewObjectWithClassProto<PluralRulesObject>(cx, proto); + if (!pluralRules) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + if (!intl::InitializeObject(cx, pluralRules, + cx->names().InitializePluralRules, locales, + options)) { + return false; + } + + args.rval().setObject(*pluralRules); + return true; +} + +void js::PluralRulesObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto* pluralRules = &obj->as<PluralRulesObject>(); + if (mozilla::intl::PluralRules* pr = pluralRules->getPluralRules()) { + intl::RemoveICUCellMemory( + gcx, obj, PluralRulesObject::UPluralRulesEstimatedMemoryUse); + delete pr; + } +} + +static JSString* KeywordToString(mozilla::intl::PluralRules::Keyword keyword, + JSContext* cx) { + using Keyword = mozilla::intl::PluralRules::Keyword; + switch (keyword) { + case Keyword::Zero: { + return cx->names().zero; + } + case Keyword::One: { + return cx->names().one; + } + case Keyword::Two: { + return cx->names().two; + } + case Keyword::Few: { + return cx->names().few; + } + case Keyword::Many: { + return cx->names().many; + } + case Keyword::Other: { + return cx->names().other; + } + } + MOZ_CRASH("Unexpected PluralRules keyword"); +} + +/** + * Returns a new intl::PluralRules with the locale and type options of the given + * PluralRules. + */ +static mozilla::intl::PluralRules* NewPluralRules( + JSContext* cx, Handle<PluralRulesObject*> pluralRules) { + RootedObject internals(cx, intl::GetInternalsObject(cx, pluralRules)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + UniqueChars locale = intl::EncodeLocale(cx, value.toString()); + if (!locale) { + return nullptr; + } + + using PluralRules = mozilla::intl::PluralRules; + mozilla::intl::PluralRulesOptions options; + + if (!GetProperty(cx, internals, internals, cx->names().type, &value)) { + return nullptr; + } + + { + JSLinearString* type = value.toString()->ensureLinear(cx); + if (!type) { + return nullptr; + } + + if (StringEqualsLiteral(type, "ordinal")) { + options.mPluralType = PluralRules::Type::Ordinal; + } else { + MOZ_ASSERT(StringEqualsLiteral(type, "cardinal")); + options.mPluralType = PluralRules::Type::Cardinal; + } + } + + bool hasMinimumSignificantDigits; + if (!HasProperty(cx, internals, cx->names().minimumSignificantDigits, + &hasMinimumSignificantDigits)) { + return nullptr; + } + + if (hasMinimumSignificantDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumSignificantDigits, &value)) { + return nullptr; + } + uint32_t minimumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumSignificantDigits, &value)) { + return nullptr; + } + uint32_t maximumSignificantDigits = AssertedCast<uint32_t>(value.toInt32()); + + options.mSignificantDigits = mozilla::Some( + std::make_pair(minimumSignificantDigits, maximumSignificantDigits)); + } + + bool hasMinimumFractionDigits; + if (!HasProperty(cx, internals, cx->names().minimumFractionDigits, + &hasMinimumFractionDigits)) { + return nullptr; + } + + if (hasMinimumFractionDigits) { + if (!GetProperty(cx, internals, internals, + cx->names().minimumFractionDigits, &value)) { + return nullptr; + } + uint32_t minimumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, + cx->names().maximumFractionDigits, &value)) { + return nullptr; + } + uint32_t maximumFractionDigits = AssertedCast<uint32_t>(value.toInt32()); + + options.mFractionDigits = mozilla::Some( + std::make_pair(minimumFractionDigits, maximumFractionDigits)); + } + + if (!GetProperty(cx, internals, internals, cx->names().roundingPriority, + &value)) { + return nullptr; + } + + { + JSLinearString* roundingPriority = value.toString()->ensureLinear(cx); + if (!roundingPriority) { + return nullptr; + } + + using RoundingPriority = + mozilla::intl::PluralRulesOptions::RoundingPriority; + + RoundingPriority priority; + if (StringEqualsLiteral(roundingPriority, "auto")) { + priority = RoundingPriority::Auto; + } else if (StringEqualsLiteral(roundingPriority, "morePrecision")) { + priority = RoundingPriority::MorePrecision; + } else { + MOZ_ASSERT(StringEqualsLiteral(roundingPriority, "lessPrecision")); + priority = RoundingPriority::LessPrecision; + } + + options.mRoundingPriority = priority; + } + + if (!GetProperty(cx, internals, internals, cx->names().minimumIntegerDigits, + &value)) { + return nullptr; + } + options.mMinIntegerDigits = + mozilla::Some(AssertedCast<uint32_t>(value.toInt32())); + + if (!GetProperty(cx, internals, internals, cx->names().roundingIncrement, + &value)) { + return nullptr; + } + options.mRoundingIncrement = AssertedCast<uint32_t>(value.toInt32()); + + if (!GetProperty(cx, internals, internals, cx->names().roundingMode, + &value)) { + return nullptr; + } + + { + JSLinearString* roundingMode = value.toString()->ensureLinear(cx); + if (!roundingMode) { + return nullptr; + } + + using RoundingMode = mozilla::intl::PluralRulesOptions::RoundingMode; + + RoundingMode rounding; + if (StringEqualsLiteral(roundingMode, "halfExpand")) { + // "halfExpand" is the default mode, so we handle it first. + rounding = RoundingMode::HalfExpand; + } else if (StringEqualsLiteral(roundingMode, "ceil")) { + rounding = RoundingMode::Ceil; + } else if (StringEqualsLiteral(roundingMode, "floor")) { + rounding = RoundingMode::Floor; + } else if (StringEqualsLiteral(roundingMode, "expand")) { + rounding = RoundingMode::Expand; + } else if (StringEqualsLiteral(roundingMode, "trunc")) { + rounding = RoundingMode::Trunc; + } else if (StringEqualsLiteral(roundingMode, "halfCeil")) { + rounding = RoundingMode::HalfCeil; + } else if (StringEqualsLiteral(roundingMode, "halfFloor")) { + rounding = RoundingMode::HalfFloor; + } else if (StringEqualsLiteral(roundingMode, "halfTrunc")) { + rounding = RoundingMode::HalfTrunc; + } else { + MOZ_ASSERT(StringEqualsLiteral(roundingMode, "halfEven")); + rounding = RoundingMode::HalfEven; + } + + options.mRoundingMode = rounding; + } + + if (!GetProperty(cx, internals, internals, cx->names().trailingZeroDisplay, + &value)) { + return nullptr; + } + + { + JSLinearString* trailingZeroDisplay = value.toString()->ensureLinear(cx); + if (!trailingZeroDisplay) { + return nullptr; + } + + if (StringEqualsLiteral(trailingZeroDisplay, "auto")) { + options.mStripTrailingZero = false; + } else { + MOZ_ASSERT(StringEqualsLiteral(trailingZeroDisplay, "stripIfInteger")); + options.mStripTrailingZero = true; + } + } + + auto result = PluralRules::TryCreate(locale.get(), options); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + return result.unwrap().release(); +} + +static mozilla::intl::PluralRules* GetOrCreatePluralRules( + JSContext* cx, Handle<PluralRulesObject*> pluralRules) { + // Obtain a cached PluralRules object. + mozilla::intl::PluralRules* pr = pluralRules->getPluralRules(); + if (pr) { + return pr; + } + + pr = NewPluralRules(cx, pluralRules); + if (!pr) { + return nullptr; + } + pluralRules->setPluralRules(pr); + + intl::AddICUCellMemory(pluralRules, + PluralRulesObject::UPluralRulesEstimatedMemoryUse); + return pr; +} + +/** + * 16.5.3 ResolvePlural ( pluralRules, n ) + * 16.5.2 PluralRuleSelect ( locale, type, n, operands ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +bool js::intl_SelectPluralRule(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + // Steps 1-2. + Rooted<PluralRulesObject*> pluralRules( + cx, &args[0].toObject().as<PluralRulesObject>()); + + // Step 3. + double x = args[1].toNumber(); + + // Steps 4-11. + using PluralRules = mozilla::intl::PluralRules; + PluralRules* pr = GetOrCreatePluralRules(cx, pluralRules); + if (!pr) { + return false; + } + + auto keywordResult = pr->Select(x); + if (keywordResult.isErr()) { + intl::ReportInternalError(cx, keywordResult.unwrapErr()); + return false; + } + + JSString* str = KeywordToString(keywordResult.unwrap(), cx); + MOZ_ASSERT(str); + + args.rval().setString(str); + return true; +} + +/** + * 16.5.5 ResolvePluralRange ( pluralRules, x, y ) + * 16.5.4 PluralRuleSelectRange ( locale, type, xp, yp ) + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +bool js::intl_SelectPluralRuleRange(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 3); + + // Steps 1-2. + Rooted<PluralRulesObject*> pluralRules( + cx, &args[0].toObject().as<PluralRulesObject>()); + + // Steps 3-4. + double x = args[1].toNumber(); + double y = args[2].toNumber(); + + // Step 5. + if (std::isnan(x)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_NAN_NUMBER_RANGE, "start", "PluralRules", + "selectRange"); + return false; + } + if (std::isnan(y)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_NAN_NUMBER_RANGE, "end", "PluralRules", + "selectRange"); + return false; + } + + using PluralRules = mozilla::intl::PluralRules; + PluralRules* pr = GetOrCreatePluralRules(cx, pluralRules); + if (!pr) { + return false; + } + + // Steps 6-11. + auto keywordResult = pr->SelectRange(x, y); + if (keywordResult.isErr()) { + intl::ReportInternalError(cx, keywordResult.unwrapErr()); + return false; + } + + JSString* str = KeywordToString(keywordResult.unwrap(), cx); + MOZ_ASSERT(str); + + args.rval().setString(str); + return true; +} + +bool js::intl_GetPluralCategories(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<PluralRulesObject*> pluralRules( + cx, &args[0].toObject().as<PluralRulesObject>()); + + using PluralRules = mozilla::intl::PluralRules; + PluralRules* pr = GetOrCreatePluralRules(cx, pluralRules); + if (!pr) { + return false; + } + + auto categoriesResult = pr->Categories(); + if (categoriesResult.isErr()) { + intl::ReportInternalError(cx, categoriesResult.unwrapErr()); + return false; + } + auto categories = categoriesResult.unwrap(); + + ArrayObject* res = NewDenseFullyAllocatedArray(cx, categories.size()); + if (!res) { + return false; + } + res->setDenseInitializedLength(categories.size()); + + size_t index = 0; + for (PluralRules::Keyword keyword : categories) { + JSString* str = KeywordToString(keyword, cx); + MOZ_ASSERT(str); + + res->initDenseElement(index++, StringValue(str)); + } + MOZ_ASSERT(index == categories.size()); + + args.rval().setObject(*res); + return true; +} diff --git a/js/src/builtin/intl/PluralRules.h b/js/src/builtin/intl/PluralRules.h new file mode 100644 index 0000000000..86d8ec105d --- /dev/null +++ b/js/src/builtin/intl/PluralRules.h @@ -0,0 +1,98 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_PluralRules_h +#define builtin_intl_PluralRules_h + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class PluralRules; +} + +namespace js { + +class PluralRulesObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t PLURAL_RULES_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for UPluralRules (see IcuMemoryUsage). + // Includes usage for UNumberFormat and UNumberRangeFormatter since our + // PluralRules implementations contains a NumberFormat and a NumberRangeFormat + // object. + static constexpr size_t UPluralRulesEstimatedMemoryUse = 5736; + + mozilla::intl::PluralRules* getPluralRules() const { + const auto& slot = getFixedSlot(PLURAL_RULES_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::PluralRules*>(slot.toPrivate()); + } + + void setPluralRules(mozilla::intl::PluralRules* pluralRules) { + setFixedSlot(PLURAL_RULES_SLOT, PrivateValue(pluralRules)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a plural rule for the number x according to the effective + * locale and the formatting options of the given PluralRules. + * + * A plural rule is a grammatical category that expresses count distinctions + * (such as "one", "two", "few" etc.). + * + * Usage: rule = intl_SelectPluralRule(pluralRules, x) + */ +[[nodiscard]] extern bool intl_SelectPluralRule(JSContext* cx, unsigned argc, + JS::Value* vp); + +/** + * Returns a plural rule for the number range «x - y» according to the effective + * locale and the formatting options of the given PluralRules. + * + * A plural rule is a grammatical category that expresses count distinctions + * (such as "one", "two", "few" etc.). + * + * Usage: rule = intl_SelectPluralRuleRange(pluralRules, x, y) + */ +[[nodiscard]] extern bool intl_SelectPluralRuleRange(JSContext* cx, + unsigned argc, + JS::Value* vp); + +/** + * Returns an array of plural rules categories for a given pluralRules object. + * + * Usage: categories = intl_GetPluralCategories(pluralRules) + * + * Example: + * + * pluralRules = new Intl.PluralRules('pl', {type: 'cardinal'}); + * intl_getPluralCategories(pluralRules); // ['one', 'few', 'many', 'other'] + */ +[[nodiscard]] extern bool intl_GetPluralCategories(JSContext* cx, unsigned argc, + JS::Value* vp); + +} // namespace js + +#endif /* builtin_intl_PluralRules_h */ diff --git a/js/src/builtin/intl/PluralRules.js b/js/src/builtin/intl/PluralRules.js new file mode 100644 index 0000000000..1dbf6656df --- /dev/null +++ b/js/src/builtin/intl/PluralRules.js @@ -0,0 +1,440 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * PluralRules internal properties. + * + * 9.1 Internal slots of Service Constructors + * 16.2.3 Properties of the Intl.PluralRules Constructor, Internal slots + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +var pluralRulesInternalProperties = { + localeData: pluralRulesLocaleData, + relevantExtensionKeys: [], +}; + +function pluralRulesLocaleData() { + // PluralRules don't support any extension keys. + return {}; +} + +/** + * 16.1.2 InitializePluralRules ( pluralRules, locales, options ) + * + * Compute an internal properties object from |lazyPluralRulesData|. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function resolvePluralRulesInternals(lazyPluralRulesData) { + assert(IsObject(lazyPluralRulesData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var PluralRules = pluralRulesInternalProperties; + + // Compute effective locale. + + // Step 9. + var localeData = PluralRules.localeData; + + // Step 10. + var r = ResolveLocale( + "PluralRules", + lazyPluralRulesData.requestedLocales, + lazyPluralRulesData.opt, + PluralRules.relevantExtensionKeys, + localeData + ); + + // Step 11. + internalProps.locale = r.locale; + + // Step 7. + internalProps.type = lazyPluralRulesData.type; + + // Step 8. SetNumberFormatDigitOptions, step 6. + internalProps.minimumIntegerDigits = lazyPluralRulesData.minimumIntegerDigits; + + // Step 8. SetNumberFormatDigitOptions, step 14. + internalProps.roundingIncrement = lazyPluralRulesData.roundingIncrement; + + // Step 8. SetNumberFormatDigitOptions, step 15. + internalProps.roundingMode = lazyPluralRulesData.roundingMode; + + // Step 8. SetNumberFormatDigitOptions, step 16. + internalProps.trailingZeroDisplay = lazyPluralRulesData.trailingZeroDisplay; + + // Step 8. SetNumberFormatDigitOptions, steps 25-26. + if ("minimumFractionDigits" in lazyPluralRulesData) { + assert( + "maximumFractionDigits" in lazyPluralRulesData, + "min/max frac digits mismatch" + ); + internalProps.minimumFractionDigits = + lazyPluralRulesData.minimumFractionDigits; + internalProps.maximumFractionDigits = + lazyPluralRulesData.maximumFractionDigits; + } + + // Step 8. SetNumberFormatDigitOptions, steps 24 and 26. + if ("minimumSignificantDigits" in lazyPluralRulesData) { + assert( + "maximumSignificantDigits" in lazyPluralRulesData, + "min/max sig digits mismatch" + ); + internalProps.minimumSignificantDigits = + lazyPluralRulesData.minimumSignificantDigits; + internalProps.maximumSignificantDigits = + lazyPluralRulesData.maximumSignificantDigits; + } + + // Step 8. SetNumberFormatDigitOptions, steps 26-30. + internalProps.roundingPriority = lazyPluralRulesData.roundingPriority; + + // `pluralCategories` is lazily computed on first access. + internalProps.pluralCategories = null; + + return internalProps; +} + +/** + * Returns an object containing the PluralRules internal properties of |obj|. + */ +function getPluralRulesInternals(obj) { + assert(IsObject(obj), "getPluralRulesInternals called with non-object"); + assert( + intl_GuardToPluralRules(obj) !== null, + "getPluralRulesInternals called with non-PluralRules" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "PluralRules", + "bad type escaped getIntlObjectInternals" + ); + + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + internalProps = resolvePluralRulesInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * 16.1.2 InitializePluralRules ( pluralRules, locales, options ) + * + * Initializes an object as a PluralRules. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a PluralRules. + * This later work occurs in |resolvePluralRulesInternals|; steps not noted + * here occur there. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function InitializePluralRules(pluralRules, locales, options) { + assert(IsObject(pluralRules), "InitializePluralRules called with non-object"); + assert( + intl_GuardToPluralRules(pluralRules) !== null, + "InitializePluralRules called with non-PluralRules" + ); + + // Lazy PluralRules data has the following structure: + // + // { + // requestedLocales: List of locales, + // type: "cardinal" / "ordinal", + // + // opt: // opt object computer in InitializePluralRules + // { + // localeMatcher: "lookup" / "best fit", + // } + // + // minimumIntegerDigits: integer ∈ [1, 21], + // + // // optional, mutually exclusive with the significant-digits option + // minimumFractionDigits: integer ∈ [0, 100], + // maximumFractionDigits: integer ∈ [0, 100], + // + // // optional, mutually exclusive with the fraction-digits option + // minimumSignificantDigits: integer ∈ [1, 21], + // maximumSignificantDigits: integer ∈ [1, 21], + // + // roundingPriority: "auto" / "lessPrecision" / "morePrecision", + // + // trailingZeroDisplay: "auto" / "stripIfInteger", + // + // roundingIncrement: integer ∈ (1, 2, 5, + // 10, 20, 25, 50, + // 100, 200, 250, 500, + // 1000, 2000, 2500, 5000), + // + // roundingMode: "ceil" / "floor" / "expand" / "trunc" / + // "halfCeil" / "halfFloor" / "halfExpand" / "halfTrunc" / "halfEven", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every PluralRules lazy data object has *all* these properties, never a + // subset of them. + var lazyPluralRulesData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyPluralRulesData.requestedLocales = requestedLocales; + + // Step 2. (Inlined call to CoerceOptionsToObject.) + if (options === undefined) { + options = std_Object_create(null); + } else { + options = ToObject(options); + } + + // Step 3. + var opt = new_Record(); + lazyPluralRulesData.opt = opt; + + // Steps 4-5. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // Steps 6-7. + var type = GetOption( + options, + "type", + "string", + ["cardinal", "ordinal"], + "cardinal" + ); + lazyPluralRulesData.type = type; + + // Step 8. + SetNumberFormatDigitOptions(lazyPluralRulesData, options, 0, 3, "standard"); + + // Step 12. + // + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(pluralRules, "PluralRules", lazyPluralRulesData); +} + +/** + * 16.2.2 Intl.PluralRules.supportedLocalesOf ( locales [ , options ] ) + * + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "PluralRules"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * 16.3.3 Intl.PluralRules.prototype.select ( value ) + * + * Returns a String value representing the plural category matching + * the number passed as value according to the + * effective locale and the formatting options of this PluralRules. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_PluralRules_select(value) { + // Step 1. + var pluralRules = this; + + // Step 2. + if ( + !IsObject(pluralRules) || + (pluralRules = intl_GuardToPluralRules(pluralRules)) === null + ) { + return callFunction( + intl_CallPluralRulesMethodIfWrapped, + this, + value, + "Intl_PluralRules_select" + ); + } + + // Step 3. + var n = ToNumber(value); + + // Ensure the PluralRules internals are resolved. + getPluralRulesInternals(pluralRules); + + // Step 4. + return intl_SelectPluralRule(pluralRules, n); +} + +/** + * 16.3.4 Intl.PluralRules.prototype.selectRange ( start, end ) + * + * Returns a String value representing the plural category matching the input + * number range according to the effective locale and the formatting options + * of this PluralRules. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_PluralRules_selectRange(start, end) { + // Step 1. + var pluralRules = this; + + // Step 2. + if ( + !IsObject(pluralRules) || + (pluralRules = intl_GuardToPluralRules(pluralRules)) === null + ) { + return callFunction( + intl_CallPluralRulesMethodIfWrapped, + this, + start, + end, + "Intl_PluralRules_selectRange" + ); + } + + // Step 3. + if (start === undefined || end === undefined) { + ThrowTypeError( + JSMSG_UNDEFINED_NUMBER, + start === undefined ? "start" : "end", + "PluralRules", + "selectRange" + ); + } + + // Step 4. + var x = ToNumber(start); + + // Step 5. + var y = ToNumber(end); + + // Step 6. + return intl_SelectPluralRuleRange(pluralRules, x, y); +} + +/** + * 16.3.5 Intl.PluralRules.prototype.resolvedOptions ( ) + * + * Returns the resolved options for a PluralRules object. + * + * ES2024 Intl draft rev 74ca7099f103d143431b2ea422ae640c6f43e3e6 + */ +function Intl_PluralRules_resolvedOptions() { + // Step 1. + var pluralRules = this; + + // Step 2. + if ( + !IsObject(pluralRules) || + (pluralRules = intl_GuardToPluralRules(pluralRules)) === null + ) { + return callFunction( + intl_CallPluralRulesMethodIfWrapped, + this, + "Intl_PluralRules_resolvedOptions" + ); + } + + var internals = getPluralRulesInternals(pluralRules); + + // Steps 3-4. + var result = { + locale: internals.locale, + type: internals.type, + minimumIntegerDigits: internals.minimumIntegerDigits, + }; + + // Min/Max fraction digits are either both present or not present at all. + assert( + hasOwn("minimumFractionDigits", internals) === + hasOwn("maximumFractionDigits", internals), + "minimumFractionDigits is present iff maximumFractionDigits is present" + ); + + if (hasOwn("minimumFractionDigits", internals)) { + DefineDataProperty( + result, + "minimumFractionDigits", + internals.minimumFractionDigits + ); + DefineDataProperty( + result, + "maximumFractionDigits", + internals.maximumFractionDigits + ); + } + + // Min/Max significant digits are either both present or not present at all. + assert( + hasOwn("minimumSignificantDigits", internals) === + hasOwn("maximumSignificantDigits", internals), + "minimumSignificantDigits is present iff maximumSignificantDigits is present" + ); + + if (hasOwn("minimumSignificantDigits", internals)) { + DefineDataProperty( + result, + "minimumSignificantDigits", + internals.minimumSignificantDigits + ); + DefineDataProperty( + result, + "maximumSignificantDigits", + internals.maximumSignificantDigits + ); + } + + DefineDataProperty(result, "roundingMode", internals.roundingMode); + DefineDataProperty(result, "roundingIncrement", internals.roundingIncrement); + DefineDataProperty( + result, + "trailingZeroDisplay", + internals.trailingZeroDisplay + ); + + // Step 5. + var internalsPluralCategories = internals.pluralCategories; + if (internalsPluralCategories === null) { + internalsPluralCategories = intl_GetPluralCategories(pluralRules); + internals.pluralCategories = internalsPluralCategories; + } + + var pluralCategories = []; + for (var i = 0; i < internalsPluralCategories.length; i++) { + DefineDataProperty(pluralCategories, i, internalsPluralCategories[i]); + } + + // Step 6. + DefineDataProperty(result, "pluralCategories", pluralCategories); + + // Steps 7-9. + // + // Our implementation doesn't use [[RoundingType]], but instead directly + // stores the computed `roundingPriority` value. + DefineDataProperty(result, "roundingPriority", internals.roundingPriority); + + // Step 10. + return result; +} diff --git a/js/src/builtin/intl/RelativeTimeFormat.cpp b/js/src/builtin/intl/RelativeTimeFormat.cpp new file mode 100644 index 0000000000..cb615d34c4 --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.cpp @@ -0,0 +1,402 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Implementation of the Intl.RelativeTimeFormat proposal. */ + +#include "builtin/intl/RelativeTimeFormat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/FloatingPoint.h" +#include "mozilla/intl/RelativeTimeFormat.h" + +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" +#include "builtin/intl/LanguageTag.h" +#include "gc/GCContext.h" +#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* +#include "js/Printer.h" +#include "js/PropertySpec.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" // js::PlainObject +#include "vm/StringType.h" + +#include "vm/NativeObject-inl.h" + +using namespace js; + +/**************** RelativeTimeFormat *****************/ + +const JSClassOps RelativeTimeFormatObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + RelativeTimeFormatObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass RelativeTimeFormatObject::class_ = { + "Intl.RelativeTimeFormat", + JSCLASS_HAS_RESERVED_SLOTS(RelativeTimeFormatObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_RelativeTimeFormat) | + JSCLASS_FOREGROUND_FINALIZE, + &RelativeTimeFormatObject::classOps_, + &RelativeTimeFormatObject::classSpec_}; + +const JSClass& RelativeTimeFormatObject::protoClass_ = PlainObject::class_; + +static bool relativeTimeFormat_toSource(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().RelativeTimeFormat); + return true; +} + +static const JSFunctionSpec relativeTimeFormat_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", + "Intl_RelativeTimeFormat_supportedLocalesOf", 1, 0), + JS_FS_END}; + +static const JSFunctionSpec relativeTimeFormat_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", + "Intl_RelativeTimeFormat_resolvedOptions", 0, 0), + JS_SELF_HOSTED_FN("format", "Intl_RelativeTimeFormat_format", 2, 0), + JS_SELF_HOSTED_FN("formatToParts", "Intl_RelativeTimeFormat_formatToParts", + 2, 0), + JS_FN("toSource", relativeTimeFormat_toSource, 0, 0), JS_FS_END}; + +static const JSPropertySpec relativeTimeFormat_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.RelativeTimeFormat", JSPROP_READONLY), + JS_PS_END}; + +static bool RelativeTimeFormat(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec RelativeTimeFormatObject::classSpec_ = { + GenericCreateConstructor<RelativeTimeFormat, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<RelativeTimeFormatObject>, + relativeTimeFormat_static_methods, + nullptr, + relativeTimeFormat_methods, + relativeTimeFormat_properties, + nullptr, + ClassSpec::DontDefineConstructor}; + +/** + * RelativeTimeFormat constructor. + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.1 + */ +static bool RelativeTimeFormat(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.RelativeTimeFormat")) { + return false; + } + + // Step 2 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + RootedObject proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RelativeTimeFormat, + &proto)) { + return false; + } + + Rooted<RelativeTimeFormatObject*> relativeTimeFormat(cx); + relativeTimeFormat = + NewObjectWithClassProto<RelativeTimeFormatObject>(cx, proto); + if (!relativeTimeFormat) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Step 3. + if (!intl::InitializeObject(cx, relativeTimeFormat, + cx->names().InitializeRelativeTimeFormat, locales, + options)) { + return false; + } + + args.rval().setObject(*relativeTimeFormat); + return true; +} + +void js::RelativeTimeFormatObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + if (mozilla::intl::RelativeTimeFormat* rtf = + obj->as<RelativeTimeFormatObject>().getRelativeTimeFormatter()) { + intl::RemoveICUCellMemory(gcx, obj, + RelativeTimeFormatObject::EstimatedMemoryUse); + + // This was allocated using `new` in mozilla::intl::RelativeTimeFormat, + // so we delete here. + delete rtf; + } +} + +/** + * Returns a new URelativeDateTimeFormatter with the locale and options of the + * given RelativeTimeFormatObject. + */ +static mozilla::intl::RelativeTimeFormat* NewRelativeTimeFormatter( + JSContext* cx, Handle<RelativeTimeFormatObject*> relativeTimeFormat) { + RootedObject internals(cx, intl::GetInternalsObject(cx, relativeTimeFormat)); + if (!internals) { + return nullptr; + } + + RootedValue value(cx); + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return nullptr; + } + + // ICU expects numberingSystem as a Unicode locale extensions on locale. + + mozilla::intl::Locale tag; + { + Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx)); + if (!locale) { + return nullptr; + } + + if (!intl::ParseLocale(cx, locale, tag)) { + return nullptr; + } + } + + JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx); + + if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, + &value)) { + return nullptr; + } + + { + JSLinearString* numberingSystem = value.toString()->ensureLinear(cx); + if (!numberingSystem) { + return nullptr; + } + + if (!keywords.emplaceBack("nu", numberingSystem)) { + return nullptr; + } + } + + // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of the + // Unicode extension subtag. We're then relying on ICU to follow RFC 6067, + // which states that any trailing keywords using the same key should be + // ignored. + if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) { + return nullptr; + } + + intl::FormatBuffer<char> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + + UniqueChars locale = buffer.extractStringZ(); + if (!locale) { + return nullptr; + } + + if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { + return nullptr; + } + + using RelativeTimeFormatOptions = mozilla::intl::RelativeTimeFormatOptions; + RelativeTimeFormatOptions options; + { + JSLinearString* style = value.toString()->ensureLinear(cx); + if (!style) { + return nullptr; + } + + if (StringEqualsLiteral(style, "short")) { + options.style = RelativeTimeFormatOptions::Style::Short; + } else if (StringEqualsLiteral(style, "narrow")) { + options.style = RelativeTimeFormatOptions::Style::Narrow; + } else { + MOZ_ASSERT(StringEqualsLiteral(style, "long")); + options.style = RelativeTimeFormatOptions::Style::Long; + } + } + + if (!GetProperty(cx, internals, internals, cx->names().numeric, &value)) { + return nullptr; + } + + { + JSLinearString* numeric = value.toString()->ensureLinear(cx); + if (!numeric) { + return nullptr; + } + + if (StringEqualsLiteral(numeric, "auto")) { + options.numeric = RelativeTimeFormatOptions::Numeric::Auto; + } else { + MOZ_ASSERT(StringEqualsLiteral(numeric, "always")); + options.numeric = RelativeTimeFormatOptions::Numeric::Always; + } + } + + using RelativeTimeFormat = mozilla::intl::RelativeTimeFormat; + mozilla::Result<mozilla::UniquePtr<RelativeTimeFormat>, + mozilla::intl::ICUError> + result = RelativeTimeFormat::TryCreate(locale.get(), options); + + if (result.isOk()) { + return result.unwrap().release(); + } + + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; +} + +static mozilla::intl::RelativeTimeFormat* GetOrCreateRelativeTimeFormat( + JSContext* cx, Handle<RelativeTimeFormatObject*> relativeTimeFormat) { + // Obtain a cached RelativeDateTimeFormatter object. + mozilla::intl::RelativeTimeFormat* rtf = + relativeTimeFormat->getRelativeTimeFormatter(); + if (rtf) { + return rtf; + } + + rtf = NewRelativeTimeFormatter(cx, relativeTimeFormat); + if (!rtf) { + return nullptr; + } + relativeTimeFormat->setRelativeTimeFormatter(rtf); + + intl::AddICUCellMemory(relativeTimeFormat, + RelativeTimeFormatObject::EstimatedMemoryUse); + return rtf; +} + +bool js::intl_FormatRelativeTime(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 4); + MOZ_ASSERT(args[0].isObject()); + MOZ_ASSERT(args[1].isNumber()); + MOZ_ASSERT(args[2].isString()); + MOZ_ASSERT(args[3].isBoolean()); + + Rooted<RelativeTimeFormatObject*> relativeTimeFormat(cx); + relativeTimeFormat = &args[0].toObject().as<RelativeTimeFormatObject>(); + + bool formatToParts = args[3].toBoolean(); + + // PartitionRelativeTimePattern, step 4. + double t = args[1].toNumber(); + if (!std::isfinite(t)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_DATE_NOT_FINITE, "RelativeTimeFormat", + formatToParts ? "formatToParts" : "format"); + return false; + } + + mozilla::intl::RelativeTimeFormat* rtf = + GetOrCreateRelativeTimeFormat(cx, relativeTimeFormat); + if (!rtf) { + return false; + } + + intl::FieldType jsUnitType; + using FormatUnit = mozilla::intl::RelativeTimeFormat::FormatUnit; + FormatUnit relTimeUnit; + { + JSLinearString* unit = args[2].toString()->ensureLinear(cx); + if (!unit) { + return false; + } + + // PartitionRelativeTimePattern, step 5. + if (StringEqualsLiteral(unit, "second") || + StringEqualsLiteral(unit, "seconds")) { + jsUnitType = &JSAtomState::second; + relTimeUnit = FormatUnit::Second; + } else if (StringEqualsLiteral(unit, "minute") || + StringEqualsLiteral(unit, "minutes")) { + jsUnitType = &JSAtomState::minute; + relTimeUnit = FormatUnit::Minute; + } else if (StringEqualsLiteral(unit, "hour") || + StringEqualsLiteral(unit, "hours")) { + jsUnitType = &JSAtomState::hour; + relTimeUnit = FormatUnit::Hour; + } else if (StringEqualsLiteral(unit, "day") || + StringEqualsLiteral(unit, "days")) { + jsUnitType = &JSAtomState::day; + relTimeUnit = FormatUnit::Day; + } else if (StringEqualsLiteral(unit, "week") || + StringEqualsLiteral(unit, "weeks")) { + jsUnitType = &JSAtomState::week; + relTimeUnit = FormatUnit::Week; + } else if (StringEqualsLiteral(unit, "month") || + StringEqualsLiteral(unit, "months")) { + jsUnitType = &JSAtomState::month; + relTimeUnit = FormatUnit::Month; + } else if (StringEqualsLiteral(unit, "quarter") || + StringEqualsLiteral(unit, "quarters")) { + jsUnitType = &JSAtomState::quarter; + relTimeUnit = FormatUnit::Quarter; + } else if (StringEqualsLiteral(unit, "year") || + StringEqualsLiteral(unit, "years")) { + jsUnitType = &JSAtomState::year; + relTimeUnit = FormatUnit::Year; + } else { + if (auto unitChars = QuoteString(cx, unit, '"')) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_OPTION_VALUE, "unit", + unitChars.get()); + } + return false; + } + } + + using ICUError = mozilla::intl::ICUError; + if (formatToParts) { + mozilla::intl::NumberPartVector parts; + mozilla::Result<mozilla::Span<const char16_t>, ICUError> result = + rtf->formatToParts(t, relTimeUnit, parts); + + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + RootedString str(cx, NewStringCopy<CanGC>(cx, result.unwrap())); + if (!str) { + return false; + } + + return js::intl::FormattedRelativeTimeToParts(cx, str, parts, jsUnitType, + args.rval()); + } + + js::intl::FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + mozilla::Result<Ok, ICUError> result = rtf->format(t, relTimeUnit, buffer); + + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + JSString* str = buffer.toString(cx); + if (!str) { + return false; + } + + args.rval().setString(str); + return true; +} diff --git a/js/src/builtin/intl/RelativeTimeFormat.h b/js/src/builtin/intl/RelativeTimeFormat.h new file mode 100644 index 0000000000..079f8d572c --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.h @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_RelativeTimeFormat_h +#define builtin_intl_RelativeTimeFormat_h + +#include "mozilla/intl/NumberPart.h" + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "gc/Barrier.h" +#include "js/Class.h" +#include "vm/NativeObject.h" + +namespace mozilla::intl { +class RelativeTimeFormat; +} + +namespace js { + +class RelativeTimeFormatObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t URELATIVE_TIME_FORMAT_SLOT = 1; + static constexpr uint32_t SLOT_COUNT = 2; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + // Estimated memory use for URelativeDateTimeFormatter (see IcuMemoryUsage). + static constexpr size_t EstimatedMemoryUse = 8188; + + mozilla::intl::RelativeTimeFormat* getRelativeTimeFormatter() const { + const auto& slot = getFixedSlot(URELATIVE_TIME_FORMAT_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return static_cast<mozilla::intl::RelativeTimeFormat*>(slot.toPrivate()); + } + + void setRelativeTimeFormatter(mozilla::intl::RelativeTimeFormat* rtf) { + setFixedSlot(URELATIVE_TIME_FORMAT_SLOT, PrivateValue(rtf)); + } + + private: + static const JSClassOps classOps_; + static const ClassSpec classSpec_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Returns a relative time as a string formatted according to the effective + * locale and the formatting options of the given RelativeTimeFormat. + * + * |t| should be a number representing a number to be formatted. + * |unit| should be "second", "minute", "hour", "day", "week", "month", + * "quarter", or "year". + * |numeric| should be "always" or "auto". + * + * Usage: formatted = intl_FormatRelativeTime(relativeTimeFormat, t, + * unit, numeric, formatToParts) + */ +[[nodiscard]] extern bool intl_FormatRelativeTime(JSContext* cx, unsigned argc, + JS::Value* vp); + +namespace intl { + +using FieldType = js::ImmutableTenuredPtr<PropertyName*> JSAtomState::*; + +[[nodiscard]] bool FormattedRelativeTimeToParts( + JSContext* cx, HandleString str, + const mozilla::intl::NumberPartVector& parts, FieldType relativeTimeUnit, + MutableHandleValue result); + +} // namespace intl +} // namespace js + +#endif /* builtin_intl_RelativeTimeFormat_h */ diff --git a/js/src/builtin/intl/RelativeTimeFormat.js b/js/src/builtin/intl/RelativeTimeFormat.js new file mode 100644 index 0000000000..feedceaea1 --- /dev/null +++ b/js/src/builtin/intl/RelativeTimeFormat.js @@ -0,0 +1,329 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * RelativeTimeFormat internal properties. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.3.3. + */ +var relativeTimeFormatInternalProperties = { + localeData: relativeTimeFormatLocaleData, + relevantExtensionKeys: ["nu"], +}; + +function relativeTimeFormatLocaleData() { + return { + nu: getNumberingSystems, + default: { + nu: intl_numberingSystem, + }, + }; +} + +/** + * Compute an internal properties object from |lazyRelativeTimeFormatData|. + */ +function resolveRelativeTimeFormatInternals(lazyRelativeTimeFormatData) { + assert(IsObject(lazyRelativeTimeFormatData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var RelativeTimeFormat = relativeTimeFormatInternalProperties; + + // Steps 10-11. + var r = ResolveLocale( + "RelativeTimeFormat", + lazyRelativeTimeFormatData.requestedLocales, + lazyRelativeTimeFormatData.opt, + RelativeTimeFormat.relevantExtensionKeys, + RelativeTimeFormat.localeData + ); + + // Steps 12-13. + internalProps.locale = r.locale; + + // Step 14. + internalProps.numberingSystem = r.nu; + + // Step 15 (Not relevant in our implementation). + + // Step 17. + internalProps.style = lazyRelativeTimeFormatData.style; + + // Step 19. + internalProps.numeric = lazyRelativeTimeFormatData.numeric; + + // Steps 20-24 (Not relevant in our implementation). + + return internalProps; +} + +/** + * Returns an object containing the RelativeTimeFormat internal properties of |obj|. + */ +function getRelativeTimeFormatInternals(obj) { + assert( + IsObject(obj), + "getRelativeTimeFormatInternals called with non-object" + ); + assert( + intl_GuardToRelativeTimeFormat(obj) !== null, + "getRelativeTimeFormatInternals called with non-RelativeTimeFormat" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "RelativeTimeFormat", + "bad type escaped getIntlObjectInternals" + ); + + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + internalProps = resolveRelativeTimeFormatInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Initializes an object as a RelativeTimeFormat. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a RelativeTimeFormat. + * This later work occurs in |resolveRelativeTimeFormatInternals|; steps not noted + * here occur there. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.1.1. + */ +function InitializeRelativeTimeFormat(relativeTimeFormat, locales, options) { + assert( + IsObject(relativeTimeFormat), + "InitializeRelativeimeFormat called with non-object" + ); + assert( + intl_GuardToRelativeTimeFormat(relativeTimeFormat) !== null, + "InitializeRelativeTimeFormat called with non-RelativeTimeFormat" + ); + + // Lazy RelativeTimeFormat data has the following structure: + // + // { + // requestedLocales: List of locales, + // style: "long" / "short" / "narrow", + // numeric: "always" / "auto", + // + // opt: // opt object computed in InitializeRelativeTimeFormat + // { + // localeMatcher: "lookup" / "best fit", + // } + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every RelativeTimeFormat lazy data object has *all* these properties, never a + // subset of them. + var lazyRelativeTimeFormatData = std_Object_create(null); + + // Step 1. + var requestedLocales = CanonicalizeLocaleList(locales); + lazyRelativeTimeFormatData.requestedLocales = requestedLocales; + + // Steps 2-3. + if (options === undefined) { + options = std_Object_create(null); + } else { + options = ToObject(options); + } + + // Step 4. + var opt = new_Record(); + + // Steps 5-6. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // Steps 7-9. + var numberingSystem = GetOption( + options, + "numberingSystem", + "string", + undefined, + undefined + ); + if (numberingSystem !== undefined) { + numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType( + numberingSystem, + "numberingSystem", + "nu" + ); + } + opt.nu = numberingSystem; + + lazyRelativeTimeFormatData.opt = opt; + + // Steps 16-17. + var style = GetOption( + options, + "style", + "string", + ["long", "short", "narrow"], + "long" + ); + lazyRelativeTimeFormatData.style = style; + + // Steps 18-19. + var numeric = GetOption( + options, + "numeric", + "string", + ["always", "auto"], + "always" + ); + lazyRelativeTimeFormatData.numeric = numeric; + + initializeIntlObject( + relativeTimeFormat, + "RelativeTimeFormat", + lazyRelativeTimeFormatData + ); +} + +/** + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.3.2. + */ +function Intl_RelativeTimeFormat_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "RelativeTimeFormat"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Returns a String value representing the written form of a relative date + * formatted according to the effective locale and the formatting options + * of this RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTImeFormat, 1.4.3. + */ +function Intl_RelativeTimeFormat_format(value, unit) { + // Step 1. + var relativeTimeFormat = this; + + // Step 2. + if ( + !IsObject(relativeTimeFormat) || + (relativeTimeFormat = intl_GuardToRelativeTimeFormat( + relativeTimeFormat + )) === null + ) { + return callFunction( + intl_CallRelativeTimeFormatMethodIfWrapped, + this, + value, + unit, + "Intl_RelativeTimeFormat_format" + ); + } + + // Step 3. + var t = ToNumber(value); + + // Step 4. + var u = ToString(unit); + + // Step 5. + return intl_FormatRelativeTime(relativeTimeFormat, t, u, false); +} + +/** + * Returns an Array composed of the components of a relative date formatted + * according to the effective locale and the formatting options of this + * RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTImeFormat, 1.4.4. + */ +function Intl_RelativeTimeFormat_formatToParts(value, unit) { + // Step 1. + var relativeTimeFormat = this; + + // Step 2. + if ( + !IsObject(relativeTimeFormat) || + (relativeTimeFormat = intl_GuardToRelativeTimeFormat( + relativeTimeFormat + )) === null + ) { + return callFunction( + intl_CallRelativeTimeFormatMethodIfWrapped, + this, + value, + unit, + "Intl_RelativeTimeFormat_formatToParts" + ); + } + + // Step 3. + var t = ToNumber(value); + + // Step 4. + var u = ToString(unit); + + // Step 5. + return intl_FormatRelativeTime(relativeTimeFormat, t, u, true); +} + +/** + * Returns the resolved options for a RelativeTimeFormat object. + * + * Spec: ECMAScript 402 API, RelativeTimeFormat, 1.4.5. + */ +function Intl_RelativeTimeFormat_resolvedOptions() { + // Step 1. + var relativeTimeFormat = this; + + // Steps 2-3. + if ( + !IsObject(relativeTimeFormat) || + (relativeTimeFormat = intl_GuardToRelativeTimeFormat( + relativeTimeFormat + )) === null + ) { + return callFunction( + intl_CallRelativeTimeFormatMethodIfWrapped, + this, + "Intl_RelativeTimeFormat_resolvedOptions" + ); + } + + var internals = getRelativeTimeFormatInternals(relativeTimeFormat); + + // Steps 4-5. + var result = { + locale: internals.locale, + style: internals.style, + numeric: internals.numeric, + numberingSystem: internals.numberingSystem, + }; + + // Step 6. + return result; +} diff --git a/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml new file mode 100644 index 0000000000..97cb44c12c --- /dev/null +++ b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiers.yaml @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# 6.5.2 IsSanctionedSimpleUnitIdentifier ( unitIdentifier ) +# +# Simple units sanctioned for use in ECMAScript +# +# https://tc39.es/ecma402/#table-sanctioned-simple-unit-identifiers + +# Run |make_intl_data units| to regenerate all files which reference this list +# of sanctioned unit identifiers. + +- acre +- bit +- byte +- celsius +- centimeter +- day +- degree +- fahrenheit +- fluid-ounce +- foot +- gallon +- gigabit +- gigabyte +- gram +- hectare +- hour +- inch +- kilobit +- kilobyte +- kilogram +- kilometer +- liter +- megabit +- megabyte +- meter +- microsecond +- mile +- mile-scandinavian +- milliliter +- millimeter +- millisecond +- minute +- month +- nanosecond +- ounce +- percent +- petabyte +- pound +- second +- stone +- terabit +- terabyte +- week +- yard +- year diff --git a/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js new file mode 100644 index 0000000000..bc7b460f8e --- /dev/null +++ b/js/src/builtin/intl/SanctionedSimpleUnitIdentifiersGenerated.js @@ -0,0 +1,55 @@ +// Generated by make_intl_data.py. DO NOT EDIT. + +/** + * The list of currently supported simple unit identifiers. + * + * Intl.NumberFormat Unified API Proposal + */ +// prettier-ignore +var sanctionedSimpleUnitIdentifiers = { + "acre": true, + "bit": true, + "byte": true, + "celsius": true, + "centimeter": true, + "day": true, + "degree": true, + "fahrenheit": true, + "fluid-ounce": true, + "foot": true, + "gallon": true, + "gigabit": true, + "gigabyte": true, + "gram": true, + "hectare": true, + "hour": true, + "inch": true, + "kilobit": true, + "kilobyte": true, + "kilogram": true, + "kilometer": true, + "liter": true, + "megabit": true, + "megabyte": true, + "meter": true, + "microsecond": true, + "mile": true, + "mile-scandinavian": true, + "milliliter": true, + "millimeter": true, + "millisecond": true, + "minute": true, + "month": true, + "nanosecond": true, + "ounce": true, + "percent": true, + "petabyte": true, + "pound": true, + "second": true, + "stone": true, + "terabit": true, + "terabyte": true, + "week": true, + "yard": true, + "year": true +}; diff --git a/js/src/builtin/intl/Segmenter.cpp b/js/src/builtin/intl/Segmenter.cpp new file mode 100644 index 0000000000..b7c82bb135 --- /dev/null +++ b/js/src/builtin/intl/Segmenter.cpp @@ -0,0 +1,988 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Intl.Segmenter implementation. */ + +#include "builtin/intl/Segmenter.h" + +#include "mozilla/Assertions.h" +#include "mozilla/IntegerTypeTraits.h" +#include "mozilla/Range.h" +#include "mozilla/UniquePtr.h" + +#if defined(MOZ_ICU4X) +# include "mozilla/intl/ICU4XGeckoDataProvider.h" +# include "ICU4XGraphemeClusterSegmenter.h" +# include "ICU4XSentenceSegmenter.h" +# include "ICU4XWordSegmenter.h" +#endif + +#include "jspubtd.h" +#include "NamespaceImports.h" + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "gc/AllocKind.h" +#include "gc/GCContext.h" +#include "js/CallArgs.h" +#include "js/PropertyDescriptor.h" +#include "js/PropertySpec.h" +#include "js/RootingAPI.h" +#include "js/StableStringChars.h" +#include "js/TypeDecls.h" +#include "js/Value.h" +#include "util/Unicode.h" +#include "vm/ArrayObject.h" +#include "vm/GlobalObject.h" +#include "vm/JSContext.h" +#include "vm/PlainObject.h" +#include "vm/WellKnownAtom.h" + +#include "vm/JSObject-inl.h" +#include "vm/NativeObject-inl.h" + +using namespace js; + +const JSClassOps SegmenterObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + SegmenterObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass SegmenterObject::class_ = { + "Intl.Segmenter", + JSCLASS_HAS_RESERVED_SLOTS(SegmenterObject::SLOT_COUNT) | + JSCLASS_HAS_CACHED_PROTO(JSProto_Segmenter) | + JSCLASS_FOREGROUND_FINALIZE, + &SegmenterObject::classOps_, + &SegmenterObject::classSpec_, +}; + +const JSClass& SegmenterObject::protoClass_ = PlainObject::class_; + +static bool segmenter_toSource(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + args.rval().setString(cx->names().Segmenter); + return true; +} + +static const JSFunctionSpec segmenter_static_methods[] = { + JS_SELF_HOSTED_FN("supportedLocalesOf", "Intl_Segmenter_supportedLocalesOf", + 1, 0), + JS_FS_END, +}; + +static const JSFunctionSpec segmenter_methods[] = { + JS_SELF_HOSTED_FN("resolvedOptions", "Intl_Segmenter_resolvedOptions", 0, + 0), + JS_SELF_HOSTED_FN("segment", "Intl_Segmenter_segment", 1, 0), + JS_FN("toSource", segmenter_toSource, 0, 0), + JS_FS_END, +}; + +static const JSPropertySpec segmenter_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Intl.Segmenter", JSPROP_READONLY), + JS_PS_END, +}; + +static bool Segmenter(JSContext* cx, unsigned argc, Value* vp); + +const ClassSpec SegmenterObject::classSpec_ = { + GenericCreateConstructor<Segmenter, 0, gc::AllocKind::FUNCTION>, + GenericCreatePrototype<SegmenterObject>, + segmenter_static_methods, + nullptr, + segmenter_methods, + segmenter_properties, + nullptr, + ClassSpec::DontDefineConstructor, +}; + +/** + * Intl.Segmenter ([ locales [ , options ]]) + */ +static bool Segmenter(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + + // Step 1. + if (!ThrowIfNotConstructing(cx, args, "Intl.Segmenter")) { + return false; + } + + // Steps 2-3 (Inlined 9.1.14, OrdinaryCreateFromConstructor). + Rooted<JSObject*> proto(cx); + if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Segmenter, + &proto)) { + return false; + } + + Rooted<SegmenterObject*> segmenter(cx); + segmenter = NewObjectWithClassProto<SegmenterObject>(cx, proto); + if (!segmenter) { + return false; + } + + HandleValue locales = args.get(0); + HandleValue options = args.get(1); + + // Steps 4-13. + if (!intl::InitializeObject(cx, segmenter, cx->names().InitializeSegmenter, + locales, options)) { + return false; + } + + // Step 14. + args.rval().setObject(*segmenter); + return true; +} + +const JSClassOps SegmentsObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + SegmentsObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass SegmentsObject::class_ = { + "Intl.Segments", + JSCLASS_HAS_RESERVED_SLOTS(SegmentsObject::SLOT_COUNT) | + JSCLASS_FOREGROUND_FINALIZE, + &SegmentsObject::classOps_, +}; + +static const JSFunctionSpec segments_methods[] = { + JS_SELF_HOSTED_FN("containing", "Intl_Segments_containing", 1, 0), + JS_SELF_HOSTED_SYM_FN(iterator, "Intl_Segments_iterator", 0, 0), + JS_FS_END, +}; + +bool GlobalObject::initSegmentsProto(JSContext* cx, + Handle<GlobalObject*> global) { + Rooted<JSObject*> proto( + cx, GlobalObject::createBlankPrototype<PlainObject>(cx, global)); + if (!proto) { + return false; + } + + if (!JS_DefineFunctions(cx, proto, segments_methods)) { + return false; + } + + global->initBuiltinProto(ProtoKind::SegmentsProto, proto); + return true; +} + +const JSClassOps SegmentIteratorObject::classOps_ = { + nullptr, // addProperty + nullptr, // delProperty + nullptr, // enumerate + nullptr, // newEnumerate + nullptr, // resolve + nullptr, // mayResolve + SegmentIteratorObject::finalize, // finalize + nullptr, // call + nullptr, // construct + nullptr, // trace +}; + +const JSClass SegmentIteratorObject::class_ = { + "Intl.SegmentIterator", + JSCLASS_HAS_RESERVED_SLOTS(SegmentIteratorObject::SLOT_COUNT) | + JSCLASS_FOREGROUND_FINALIZE, + &SegmentIteratorObject::classOps_, +}; + +static const JSFunctionSpec segment_iterator_methods[] = { + JS_SELF_HOSTED_FN("next", "Intl_SegmentIterator_next", 0, 0), + JS_FS_END, +}; + +static const JSPropertySpec segment_iterator_properties[] = { + JS_STRING_SYM_PS(toStringTag, "Segmenter String Iterator", JSPROP_READONLY), + JS_PS_END, +}; + +bool GlobalObject::initSegmentIteratorProto(JSContext* cx, + Handle<GlobalObject*> global) { + Rooted<JSObject*> iteratorProto( + cx, GlobalObject::getOrCreateIteratorPrototype(cx, global)); + if (!iteratorProto) { + return false; + } + + Rooted<JSObject*> proto( + cx, GlobalObject::createBlankPrototypeInheriting<PlainObject>( + cx, iteratorProto)); + if (!proto) { + return false; + } + + if (!JS_DefineFunctions(cx, proto, segment_iterator_methods)) { + return false; + } + + if (!JS_DefineProperties(cx, proto, segment_iterator_properties)) { + return false; + } + + global->initBuiltinProto(ProtoKind::SegmentIteratorProto, proto); + return true; +} + +struct Boundaries { + // Start index of this segmentation boundary. + int32_t startIndex = 0; + + // End index of this segmentation boundary. + int32_t endIndex = 0; + + // |true| if the segment is word-like. (Only used for word segmentation.) + bool isWordLike = false; +}; + +/** + * Find the segmentation boundary for the string character whose position is + * |index|. The end position of the last segment boundary is |previousIndex|. + */ +template <class T> +static Boundaries FindBoundaryFrom(const T& iter, int32_t previousIndex, + int32_t index) { + MOZ_ASSERT(previousIndex <= index, + "previous index must not exceed the search index"); + + int32_t previous = previousIndex; + while (true) { + // Find the next possible break index. + int32_t next = iter.next(); + + // If |next| is larger than the search index, we've found our segment end + // index. + if (next > index) { + return {previous, next, iter.isWordLike()}; + } + + // Otherwise store |next| as the start index of the next segment, + previous = next; + } +} + +// TODO: Consider switching to the ICU4X C++ headers when the C++ headers +// are in better shape: https://github.com/rust-diplomat/diplomat/issues/280 + +template <typename Interface> +class SegmenterBreakIteratorType { + typename Interface::BreakIterator* impl_; + + public: + explicit SegmenterBreakIteratorType(void* impl) + : impl_(static_cast<typename Interface::BreakIterator*>(impl)) { + MOZ_ASSERT(impl); + } + + int32_t next() const { return Interface::next(impl_); } + + bool isWordLike() const { return Interface::isWordLike(impl_); } +}; + +#if defined(MOZ_ICU4X) +// Each SegmenterBreakIterator interface contains the following definitions: +// +// - BreakIterator: Type of the ICU4X break iterator. +// - Segmenter: Type of the ICU4X segmenter. +// - Char: Character type, either `JS::Latin1Char` or `char16_t`. +// - create: Static method to create a new instance of `BreakIterator`. +// - destroy: Static method to destroy an instance of `BreakIterator`. +// - next: Static method to fetch the next break iteration index. +// - isWordLike: Static method to determine if the current segment is word-like. +// +// +// Each Segmenter interface contains the following definitions: +// +// - Segmenter: Type of the ICU4X segmenter. +// - BreakIteratorLatin1: SegmenterBreakIterator interface to Latin1 strings. +// - BreakIteratorTwoByte: SegmenterBreakIterator interface to TwoByte strings. +// - create: Static method to create a new instance of `Segmenter`. +// - destroy: Static method to destroy an instance of `Segmenter`. + +struct GraphemeClusterSegmenterBreakIteratorLatin1 { + using BreakIterator = capi::ICU4XGraphemeClusterBreakIteratorLatin1; + using Segmenter = capi::ICU4XGraphemeClusterSegmenter; + using Char = JS::Latin1Char; + + static constexpr auto& create = + capi::ICU4XGraphemeClusterSegmenter_segment_latin1; + static constexpr auto& destroy = + capi::ICU4XGraphemeClusterBreakIteratorLatin1_destroy; + static constexpr auto& next = + capi::ICU4XGraphemeClusterBreakIteratorLatin1_next; + + static bool isWordLike(const BreakIterator*) { return false; } +}; + +struct GraphemeClusterSegmenterBreakIteratorTwoByte { + using BreakIterator = capi::ICU4XGraphemeClusterBreakIteratorUtf16; + using Segmenter = capi::ICU4XGraphemeClusterSegmenter; + using Char = char16_t; + + static constexpr auto& create = + capi::ICU4XGraphemeClusterSegmenter_segment_utf16; + static constexpr auto& destroy = + capi::ICU4XGraphemeClusterBreakIteratorUtf16_destroy; + static constexpr auto& next = + capi::ICU4XGraphemeClusterBreakIteratorUtf16_next; + + static bool isWordLike(const BreakIterator*) { return false; } +}; + +struct GraphemeClusterSegmenter { + using Segmenter = capi::ICU4XGraphemeClusterSegmenter; + using BreakIteratorLatin1 = + SegmenterBreakIteratorType<GraphemeClusterSegmenterBreakIteratorLatin1>; + using BreakIteratorTwoByte = + SegmenterBreakIteratorType<GraphemeClusterSegmenterBreakIteratorTwoByte>; + + static constexpr auto& create = capi::ICU4XGraphemeClusterSegmenter_create; + static constexpr auto& destroy = capi::ICU4XGraphemeClusterSegmenter_destroy; +}; + +struct WordSegmenterBreakIteratorLatin1 { + using BreakIterator = capi::ICU4XWordBreakIteratorLatin1; + using Segmenter = capi::ICU4XWordSegmenter; + using Char = JS::Latin1Char; + + static constexpr auto& create = capi::ICU4XWordSegmenter_segment_latin1; + static constexpr auto& destroy = capi::ICU4XWordBreakIteratorLatin1_destroy; + static constexpr auto& next = capi::ICU4XWordBreakIteratorLatin1_next; + static constexpr auto& isWordLike = + capi::ICU4XWordBreakIteratorLatin1_is_word_like; +}; + +struct WordSegmenterBreakIteratorTwoByte { + using BreakIterator = capi::ICU4XWordBreakIteratorUtf16; + using Segmenter = capi::ICU4XWordSegmenter; + using Char = char16_t; + + static constexpr auto& create = capi::ICU4XWordSegmenter_segment_utf16; + static constexpr auto& destroy = capi::ICU4XWordBreakIteratorUtf16_destroy; + static constexpr auto& next = capi::ICU4XWordBreakIteratorUtf16_next; + static constexpr auto& isWordLike = + capi::ICU4XWordBreakIteratorUtf16_is_word_like; +}; + +struct WordSegmenter { + using Segmenter = capi::ICU4XWordSegmenter; + using BreakIteratorLatin1 = + SegmenterBreakIteratorType<WordSegmenterBreakIteratorLatin1>; + using BreakIteratorTwoByte = + SegmenterBreakIteratorType<WordSegmenterBreakIteratorTwoByte>; + + static constexpr auto& create = capi::ICU4XWordSegmenter_create_auto; + static constexpr auto& destroy = capi::ICU4XWordSegmenter_destroy; +}; + +struct SentenceSegmenterBreakIteratorLatin1 { + using BreakIterator = capi::ICU4XSentenceBreakIteratorLatin1; + using Segmenter = capi::ICU4XSentenceSegmenter; + using Char = JS::Latin1Char; + + static constexpr auto& create = capi::ICU4XSentenceSegmenter_segment_latin1; + static constexpr auto& destroy = + capi::ICU4XSentenceBreakIteratorLatin1_destroy; + static constexpr auto& next = capi::ICU4XSentenceBreakIteratorLatin1_next; + + static bool isWordLike(const BreakIterator*) { return false; } +}; + +struct SentenceSegmenterBreakIteratorTwoByte { + using BreakIterator = capi::ICU4XSentenceBreakIteratorUtf16; + using Segmenter = capi::ICU4XSentenceSegmenter; + using Char = char16_t; + + static constexpr auto& create = capi::ICU4XSentenceSegmenter_segment_utf16; + static constexpr auto& destroy = + capi::ICU4XSentenceBreakIteratorUtf16_destroy; + static constexpr auto& next = capi::ICU4XSentenceBreakIteratorUtf16_next; + + static bool isWordLike(const BreakIterator*) { return false; } +}; + +struct SentenceSegmenter { + using Segmenter = capi::ICU4XSentenceSegmenter; + using BreakIteratorLatin1 = + SegmenterBreakIteratorType<SentenceSegmenterBreakIteratorLatin1>; + using BreakIteratorTwoByte = + SegmenterBreakIteratorType<SentenceSegmenterBreakIteratorTwoByte>; + + static constexpr auto& create = capi::ICU4XSentenceSegmenter_create; + static constexpr auto& destroy = capi::ICU4XSentenceSegmenter_destroy; +}; +#endif + +/** + * Create a new ICU4X segmenter instance. + */ +template <typename Interface> +static typename Interface::Segmenter* CreateSegmenter(JSContext* cx) { + auto result = Interface::create(mozilla::intl::GetDataProvider()); + if (!result.is_ok) { + intl::ReportInternalError(cx); + return nullptr; + } + return result.ok; +} + +static bool EnsureInternalsResolved(JSContext* cx, + Handle<SegmenterObject*> segmenter) { + if (segmenter->getLocale()) { + return true; + } + + Rooted<JS::Value> value(cx); + + Rooted<JSObject*> internals(cx, intl::GetInternalsObject(cx, segmenter)); + if (!internals) { + return false; + } + + if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { + return false; + } + Rooted<JSString*> locale(cx, value.toString()); + + if (!GetProperty(cx, internals, internals, cx->names().granularity, &value)) { + return false; + } + + SegmenterGranularity granularity; + { + JSLinearString* linear = value.toString()->ensureLinear(cx); + if (!linear) { + return false; + } + + if (StringEqualsLiteral(linear, "grapheme")) { + granularity = SegmenterGranularity::Grapheme; + } else if (StringEqualsLiteral(linear, "word")) { + granularity = SegmenterGranularity::Word; + } else { + MOZ_ASSERT(StringEqualsLiteral(linear, "sentence")); + granularity = SegmenterGranularity::Sentence; + } + } + +#if defined(MOZ_ICU4X) + switch (granularity) { + case SegmenterGranularity::Grapheme: { + auto* seg = CreateSegmenter<GraphemeClusterSegmenter>(cx); + if (!seg) { + return false; + } + segmenter->setSegmenter(seg); + break; + } + case SegmenterGranularity::Word: { + auto* seg = CreateSegmenter<WordSegmenter>(cx); + if (!seg) { + return false; + } + segmenter->setSegmenter(seg); + break; + } + case SegmenterGranularity::Sentence: { + auto* seg = CreateSegmenter<SentenceSegmenter>(cx); + if (!seg) { + return false; + } + segmenter->setSegmenter(seg); + break; + } + } +#endif + + segmenter->setLocale(locale); + segmenter->setGranularity(granularity); + + return true; +} + +/** + * Destroy an ICU4X segmenter instance. + */ +template <typename Interface> +static void DestroySegmenter(void* seg) { + auto* segmenter = static_cast<typename Interface::Segmenter*>(seg); + Interface::destroy(segmenter); +} + +void SegmenterObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto& segmenter = obj->as<SegmenterObject>(); + if (void* seg = segmenter.getSegmenter()) { +#if defined(MOZ_ICU4X) + switch (segmenter.getGranularity()) { + case SegmenterGranularity::Grapheme: { + DestroySegmenter<GraphemeClusterSegmenter>(seg); + break; + } + case SegmenterGranularity::Word: { + DestroySegmenter<WordSegmenter>(seg); + break; + } + case SegmenterGranularity::Sentence: { + DestroySegmenter<SentenceSegmenter>(seg); + break; + } + } +#else + MOZ_CRASH("ICU4X disabled"); +#endif + } +} + +/** + * Destroy an ICU4X break iterator instance. + */ +template <typename Interface> +static void DestroyBreakIterator(void* brk) { + auto* breakIterator = static_cast<typename Interface::BreakIterator*>(brk); + Interface::destroy(breakIterator); +} + +/** + * Destroy the ICU4X break iterator attached to |segments|. + */ +template <typename T> +static void DestroyBreakIterator(const T* segments) { +#if defined(MOZ_ICU4X) + void* brk = segments->getBreakIterator(); + MOZ_ASSERT(brk); + + bool isLatin1 = segments->getString()->hasLatin1Chars(); + + switch (segments->getGranularity()) { + case SegmenterGranularity::Grapheme: { + if (isLatin1) { + DestroyBreakIterator<GraphemeClusterSegmenterBreakIteratorLatin1>(brk); + } else { + DestroyBreakIterator<GraphemeClusterSegmenterBreakIteratorTwoByte>(brk); + } + break; + } + case SegmenterGranularity::Word: { + if (isLatin1) { + DestroyBreakIterator<WordSegmenterBreakIteratorLatin1>(brk); + } else { + DestroyBreakIterator<WordSegmenterBreakIteratorTwoByte>(brk); + } + break; + } + case SegmenterGranularity::Sentence: { + if (isLatin1) { + DestroyBreakIterator<SentenceSegmenterBreakIteratorLatin1>(brk); + } else { + DestroyBreakIterator<SentenceSegmenterBreakIteratorTwoByte>(brk); + } + break; + } + } +#else + MOZ_CRASH("ICU4X disabled"); +#endif +} + +void SegmentsObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto* segments = &obj->as<SegmentsObject>(); + bool isLatin1 = segments->getString()->hasLatin1Chars(); + + if (void* chars = segments->getStringChars()) { + size_t length = segments->getString()->length(); + if (isLatin1) { + intl::RemoveICUCellMemory(gcx, segments, length * sizeof(JS::Latin1Char)); + } else { + intl::RemoveICUCellMemory(gcx, segments, length * sizeof(char16_t)); + } + js_free(chars); + } + + if (segments->getBreakIterator()) { + DestroyBreakIterator(segments); + } +} + +void SegmentIteratorObject::finalize(JS::GCContext* gcx, JSObject* obj) { + MOZ_ASSERT(gcx->onMainThread()); + + auto* iterator = &obj->as<SegmentIteratorObject>(); + bool isLatin1 = iterator->getString()->hasLatin1Chars(); + + if (void* chars = iterator->getStringChars()) { + size_t length = iterator->getString()->length(); + if (isLatin1) { + intl::RemoveICUCellMemory(gcx, iterator, length * sizeof(JS::Latin1Char)); + } else { + intl::RemoveICUCellMemory(gcx, iterator, length * sizeof(char16_t)); + } + js_free(chars); + } + + if (iterator->getBreakIterator()) { + DestroyBreakIterator(iterator); + } +} + +template <typename Iterator, typename T> +static Boundaries FindBoundaryFrom(Handle<T*> segments, int32_t index) { + MOZ_ASSERT(0 <= index && uint32_t(index) < segments->getString()->length()); + + Iterator iter(segments->getBreakIterator()); + return FindBoundaryFrom(iter, segments->getIndex(), index); +} + +template <typename T> +static Boundaries GraphemeBoundaries(Handle<T*> segments, int32_t index) { +#if defined(MOZ_ICU4X) + if (segments->getString()->hasLatin1Chars()) { + return FindBoundaryFrom<GraphemeClusterSegmenter::BreakIteratorLatin1>( + segments, index); + } + return FindBoundaryFrom<GraphemeClusterSegmenter::BreakIteratorTwoByte>( + segments, index); +#else + MOZ_CRASH("ICU4X disabled"); +#endif +} + +template <typename T> +static Boundaries WordBoundaries(Handle<T*> segments, int32_t index) { +#if defined(MOZ_ICU4X) + if (segments->getString()->hasLatin1Chars()) { + return FindBoundaryFrom<WordSegmenter::BreakIteratorLatin1>(segments, + index); + } + return FindBoundaryFrom<WordSegmenter::BreakIteratorTwoByte>(segments, index); +#else + MOZ_CRASH("ICU4X disabled"); +#endif +} + +template <typename T> +static Boundaries SentenceBoundaries(Handle<T*> segments, int32_t index) { +#if defined(MOZ_ICU4X) + if (segments->getString()->hasLatin1Chars()) { + return FindBoundaryFrom<SentenceSegmenter::BreakIteratorLatin1>(segments, + index); + } + return FindBoundaryFrom<SentenceSegmenter::BreakIteratorTwoByte>(segments, + index); +#else + MOZ_CRASH("ICU4X disabled"); +#endif +} + +/** + * Ensure the string characters have been copied into |segments| in preparation + * for passing the string characters to ICU4X. + */ +template <typename T> +static bool EnsureStringChars(JSContext* cx, Handle<T*> segments) { + if (segments->hasStringChars()) { + return true; + } + + Rooted<JSLinearString*> string(cx, segments->getString()->ensureLinear(cx)); + if (!string) { + return false; + } + + size_t length = string->length(); + + JS::AutoCheckCannotGC nogc; + if (string->hasLatin1Chars()) { + auto chars = DuplicateString(cx, string->latin1Chars(nogc), length); + if (!chars) { + return false; + } + segments->setLatin1Chars(chars.release()); + + intl::AddICUCellMemory(segments, length * sizeof(JS::Latin1Char)); + } else { + auto chars = DuplicateString(cx, string->twoByteChars(nogc), length); + if (!chars) { + return false; + } + segments->setTwoByteChars(chars.release()); + + intl::AddICUCellMemory(segments, length * sizeof(char16_t)); + } + return true; +} + +/** + * Create a new ICU4X break iterator instance. + */ +template <typename Interface, typename T> +static auto* CreateBreakIterator(Handle<T*> segments) { + void* segmenter = segments->getSegmenter()->getSegmenter(); + MOZ_ASSERT(segmenter); + + void* chars = segments->getStringChars(); + MOZ_ASSERT(chars); + + size_t length = segments->getString()->length(); + + using Unsigned = typename mozilla::UnsignedStdintTypeForSize<sizeof( + typename Interface::Char)>::Type; + + auto* seg = static_cast<const typename Interface::Segmenter*>(segmenter); + auto* ch = static_cast<const Unsigned*>(chars); + return Interface::create(seg, ch, length); +} + +/** + * Ensure |segments| has a break iterator whose current segment index is at most + * |index|. + */ +template <typename T> +static bool EnsureBreakIterator(JSContext* cx, Handle<T*> segments, + int32_t index) { + if (segments->getBreakIterator()) { + // Reuse the break iterator if its current segment index is at most |index|. + if (index >= segments->getIndex()) { + return true; + } + + // Reverse iteration not supported. Destroy the previous break iterator and + // start from fresh. + DestroyBreakIterator(segments.get()); + + // Reset internal state. + segments->setBreakIterator(nullptr); + segments->setIndex(0); + } + + // Ensure the string characters can be passed to ICU4X. + if (!EnsureStringChars(cx, segments)) { + return false; + } + +#if defined(MOZ_ICU4X) + bool isLatin1 = segments->getString()->hasLatin1Chars(); + + // Create a new break iterator based on the granularity and character type. + void* brk; + switch (segments->getGranularity()) { + case SegmenterGranularity::Grapheme: { + if (isLatin1) { + brk = CreateBreakIterator<GraphemeClusterSegmenterBreakIteratorLatin1>( + segments); + } else { + brk = CreateBreakIterator<GraphemeClusterSegmenterBreakIteratorTwoByte>( + segments); + } + break; + } + case SegmenterGranularity::Word: { + if (isLatin1) { + brk = CreateBreakIterator<WordSegmenterBreakIteratorLatin1>(segments); + } else { + brk = CreateBreakIterator<WordSegmenterBreakIteratorTwoByte>(segments); + } + break; + } + case SegmenterGranularity::Sentence: { + if (isLatin1) { + brk = + CreateBreakIterator<SentenceSegmenterBreakIteratorLatin1>(segments); + } else { + brk = CreateBreakIterator<SentenceSegmenterBreakIteratorTwoByte>( + segments); + } + break; + } + } + + MOZ_RELEASE_ASSERT(brk); + segments->setBreakIterator(brk); + + MOZ_ASSERT(segments->getIndex() == 0, "index is initially zero"); + + return true; +#else + MOZ_CRASH("ICU4X disabled"); +#endif +} + +/** + * Create the boundaries result array for self-hosted code. + */ +static ArrayObject* CreateBoundaries(JSContext* cx, Boundaries boundaries, + SegmenterGranularity granularity) { + auto [startIndex, endIndex, isWordLike] = boundaries; + + auto* result = NewDenseFullyAllocatedArray(cx, 3); + if (!result) { + return nullptr; + } + result->setDenseInitializedLength(3); + result->initDenseElement(0, Int32Value(startIndex)); + result->initDenseElement(1, Int32Value(endIndex)); + if (granularity == SegmenterGranularity::Word) { + result->initDenseElement(2, BooleanValue(isWordLike)); + } else { + result->initDenseElement(2, UndefinedValue()); + } + return result; +} + +template <typename T> +static ArrayObject* FindSegmentBoundaries(JSContext* cx, Handle<T*> segments, + int32_t index) { + // Ensure break iteration can start at |index|. + if (!EnsureBreakIterator(cx, segments, index)) { + return nullptr; + } + + // Find the actual segment boundaries. + Boundaries boundaries{}; + switch (segments->getGranularity()) { + case SegmenterGranularity::Grapheme: { + boundaries = GraphemeBoundaries(segments, index); + break; + } + case SegmenterGranularity::Word: { + boundaries = WordBoundaries(segments, index); + break; + } + case SegmenterGranularity::Sentence: { + boundaries = SentenceBoundaries(segments, index); + break; + } + } + + // Remember the end index of the current boundary segment. + segments->setIndex(boundaries.endIndex); + + return CreateBoundaries(cx, boundaries, segments->getGranularity()); +} + +bool js::intl_CreateSegmentsObject(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + Rooted<SegmenterObject*> segmenter(cx, + &args[0].toObject().as<SegmenterObject>()); + Rooted<JSString*> string(cx, args[1].toString()); + + // Ensure the internal properties are resolved. + if (!EnsureInternalsResolved(cx, segmenter)) { + return false; + } + + Rooted<JSObject*> proto( + cx, GlobalObject::getOrCreateSegmentsPrototype(cx, cx->global())); + if (!proto) { + return false; + } + + auto* segments = NewObjectWithGivenProto<SegmentsObject>(cx, proto); + if (!segments) { + return false; + } + + segments->setSegmenter(segmenter); + segments->setGranularity(segmenter->getGranularity()); + segments->setString(string); + segments->setIndex(0); + + args.rval().setObject(*segments); + return true; +} + +bool js::intl_CreateSegmentIterator(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<SegmentsObject*> segments(cx, + &args[0].toObject().as<SegmentsObject>()); + + Rooted<JSObject*> proto( + cx, GlobalObject::getOrCreateSegmentIteratorPrototype(cx, cx->global())); + if (!proto) { + return false; + } + + auto* iterator = NewObjectWithGivenProto<SegmentIteratorObject>(cx, proto); + if (!iterator) { + return false; + } + + iterator->setSegmenter(segments->getSegmenter()); + iterator->setGranularity(segments->getGranularity()); + iterator->setString(segments->getString()); + iterator->setIndex(0); + + args.rval().setObject(*iterator); + return true; +} + +bool js::intl_FindSegmentBoundaries(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + + Rooted<SegmentsObject*> segments(cx, + &args[0].toObject().as<SegmentsObject>()); + + int32_t index = args[1].toInt32(); + MOZ_ASSERT(index >= 0); + MOZ_ASSERT(uint32_t(index) < segments->getString()->length()); + + auto* result = FindSegmentBoundaries( + cx, static_cast<Handle<SegmentsObject*>>(segments), index); + if (!result) { + return false; + } + + args.rval().setObject(*result); + return true; +} + +bool js::intl_FindNextSegmentBoundaries(JSContext* cx, unsigned argc, + Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + + Rooted<SegmentIteratorObject*> iterator( + cx, &args[0].toObject().as<SegmentIteratorObject>()); + + int32_t index = iterator->getIndex(); + MOZ_ASSERT(index >= 0); + MOZ_ASSERT(uint32_t(index) < iterator->getString()->length()); + + auto* result = FindSegmentBoundaries( + cx, static_cast<Handle<SegmentIteratorObject*>>(iterator), index); + if (!result) { + return false; + } + + args.rval().setObject(*result); + return true; +} diff --git a/js/src/builtin/intl/Segmenter.h b/js/src/builtin/intl/Segmenter.h new file mode 100644 index 0000000000..1567a3f774 --- /dev/null +++ b/js/src/builtin/intl/Segmenter.h @@ -0,0 +1,329 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_Segmenter_h +#define builtin_intl_Segmenter_h + +#include <stdint.h> + +#include "builtin/SelfHostingDefines.h" +#include "js/Class.h" +#include "js/Value.h" +#include "vm/NativeObject.h" + +struct JS_PUBLIC_API JSContext; +class JSString; + +namespace JS { +class GCContext; +} + +namespace js { + +enum class SegmenterGranularity : int8_t { Grapheme, Word, Sentence }; + +class SegmenterObject : public NativeObject { + public: + static const JSClass class_; + static const JSClass& protoClass_; + + static constexpr uint32_t INTERNALS_SLOT = 0; + static constexpr uint32_t LOCALE_SLOT = 1; + static constexpr uint32_t GRANULARITY_SLOT = 2; + static constexpr uint32_t SEGMENTER_SLOT = 3; + static constexpr uint32_t SLOT_COUNT = 4; + + static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, + "INTERNALS_SLOT must match self-hosting define for internals " + "object slot"); + + JSString* getLocale() const { + const auto& slot = getFixedSlot(LOCALE_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toString(); + } + + void setLocale(JSString* locale) { + setFixedSlot(LOCALE_SLOT, StringValue(locale)); + } + + SegmenterGranularity getGranularity() const { + const auto& slot = getFixedSlot(GRANULARITY_SLOT); + if (slot.isUndefined()) { + return SegmenterGranularity::Grapheme; + } + return static_cast<SegmenterGranularity>(slot.toInt32()); + } + + void setGranularity(SegmenterGranularity granularity) { + setFixedSlot(GRANULARITY_SLOT, + Int32Value(static_cast<int32_t>(granularity))); + } + + void* getSegmenter() const { + const auto& slot = getFixedSlot(SEGMENTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toPrivate(); + } + + void setSegmenter(void* brk) { + setFixedSlot(SEGMENTER_SLOT, PrivateValue(brk)); + } + + private: + static const ClassSpec classSpec_; + static const JSClassOps classOps_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +class SegmentsObject : public NativeObject { + public: + static const JSClass class_; + + static constexpr uint32_t SEGMENTER_SLOT = 0; + static constexpr uint32_t STRING_SLOT = 1; + static constexpr uint32_t STRING_CHARS_SLOT = 2; + static constexpr uint32_t INDEX_SLOT = 3; + static constexpr uint32_t GRANULARITY_SLOT = 4; + static constexpr uint32_t BREAK_ITERATOR_SLOT = 5; + static constexpr uint32_t SLOT_COUNT = 6; + + static_assert(STRING_SLOT == INTL_SEGMENTS_STRING_SLOT, + "STRING_SLOT must match self-hosting define for string slot"); + + SegmenterObject* getSegmenter() const { + const auto& slot = getFixedSlot(SEGMENTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return &slot.toObject().as<SegmenterObject>(); + } + + void setSegmenter(SegmenterObject* segmenter) { + setFixedSlot(SEGMENTER_SLOT, ObjectValue(*segmenter)); + } + + JSString* getString() const { + const auto& slot = getFixedSlot(STRING_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toString(); + } + + void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); } + + bool hasStringChars() const { + return !getFixedSlot(STRING_CHARS_SLOT).isUndefined(); + } + + void* getStringChars() const { + const auto& slot = getFixedSlot(STRING_CHARS_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toPrivate(); + } + + void setLatin1Chars(JS::Latin1Char* chars) { + setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars)); + } + + void setTwoByteChars(char16_t* chars) { + setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars)); + } + + int32_t getIndex() const { + const auto& slot = getFixedSlot(INDEX_SLOT); + if (slot.isUndefined()) { + return 0; + } + return slot.toInt32(); + } + + void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); } + + SegmenterGranularity getGranularity() const { + const auto& slot = getFixedSlot(GRANULARITY_SLOT); + if (slot.isUndefined()) { + return SegmenterGranularity::Grapheme; + } + return static_cast<SegmenterGranularity>(slot.toInt32()); + } + + void setGranularity(SegmenterGranularity granularity) { + setFixedSlot(GRANULARITY_SLOT, + Int32Value(static_cast<int32_t>(granularity))); + } + + void* getBreakIterator() const { + const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toPrivate(); + } + + void setBreakIterator(void* brk) { + setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk)); + } + + private: + static const JSClassOps classOps_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +class SegmentIteratorObject : public NativeObject { + public: + static const JSClass class_; + + static constexpr uint32_t SEGMENTER_SLOT = 0; + static constexpr uint32_t STRING_SLOT = 1; + static constexpr uint32_t STRING_CHARS_SLOT = 2; + static constexpr uint32_t INDEX_SLOT = 3; + static constexpr uint32_t GRANULARITY_SLOT = 4; + static constexpr uint32_t BREAK_ITERATOR_SLOT = 5; + static constexpr uint32_t SLOT_COUNT = 6; + + static_assert(STRING_SLOT == INTL_SEGMENT_ITERATOR_STRING_SLOT, + "STRING_SLOT must match self-hosting define for string slot"); + + static_assert(INDEX_SLOT == INTL_SEGMENT_ITERATOR_INDEX_SLOT, + "INDEX_SLOT must match self-hosting define for index slot"); + + SegmenterObject* getSegmenter() const { + const auto& slot = getFixedSlot(SEGMENTER_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return &slot.toObject().as<SegmenterObject>(); + } + + void setSegmenter(SegmenterObject* segmenter) { + setFixedSlot(SEGMENTER_SLOT, ObjectOrNullValue(segmenter)); + } + + JSString* getString() const { + const auto& slot = getFixedSlot(STRING_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toString(); + } + + void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); } + + bool hasStringChars() const { + return !getFixedSlot(STRING_CHARS_SLOT).isUndefined(); + } + + void* getStringChars() const { + const auto& slot = getFixedSlot(STRING_CHARS_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toPrivate(); + } + + void setLatin1Chars(JS::Latin1Char* chars) { + setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars)); + } + + void setTwoByteChars(char16_t* chars) { + setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars)); + } + + int32_t getIndex() const { + const auto& slot = getFixedSlot(INDEX_SLOT); + if (slot.isUndefined()) { + return 0; + } + return slot.toInt32(); + } + + void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); } + + SegmenterGranularity getGranularity() const { + const auto& slot = getFixedSlot(GRANULARITY_SLOT); + if (slot.isUndefined()) { + return SegmenterGranularity::Grapheme; + } + return static_cast<SegmenterGranularity>(slot.toInt32()); + } + + void setGranularity(SegmenterGranularity granularity) { + setFixedSlot(GRANULARITY_SLOT, + Int32Value(static_cast<int32_t>(granularity))); + } + + void* getBreakIterator() const { + const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT); + if (slot.isUndefined()) { + return nullptr; + } + return slot.toPrivate(); + } + + void setBreakIterator(void* brk) { + setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk)); + } + + private: + static const JSClassOps classOps_; + + static void finalize(JS::GCContext* gcx, JSObject* obj); +}; + +/** + * Create a new Segments object. + * + * Usage: segment = intl_CreateSegmentsObject(segmenter, string) + */ +[[nodiscard]] extern bool intl_CreateSegmentsObject(JSContext* cx, + unsigned argc, Value* vp); + +/** + * Create a new Segment Iterator object. + * + * Usage: iterator = intl_CreateSegmentIterator(segments) + */ +[[nodiscard]] extern bool intl_CreateSegmentIterator(JSContext* cx, + unsigned argc, Value* vp); + +/** + * Find the next and the preceding segment boundaries for the given index. The + * index must be a valid string index within the segmenter string. + * + * Return a three-element array object `[startIndex, endIndex, wordLike]`, where + * `wordLike` is either a boolean or undefined for non-word segmenters. + * + * Usage: boundaries = intl_FindSegmentBoundaries(segments, index) + */ +[[nodiscard]] extern bool intl_FindSegmentBoundaries(JSContext* cx, + unsigned argc, Value* vp); + +/** + * Find the next segment boundaries starting from the current iterator index. + * The iterator mustn't have been completed. + * + * Return a three-element array object `[startIndex, endIndex, wordLike]`, where + * `wordLike` is either a boolean or undefined for non-word segmenters. + * + * Usage: boundaries = intl_FindNextSegmentBoundaries(iterator) + */ +[[nodiscard]] extern bool intl_FindNextSegmentBoundaries(JSContext* cx, + unsigned argc, + Value* vp); + +} // namespace js + +#endif /* builtin_intl_Segmenter_h */ diff --git a/js/src/builtin/intl/Segmenter.js b/js/src/builtin/intl/Segmenter.js new file mode 100644 index 0000000000..7c46091b56 --- /dev/null +++ b/js/src/builtin/intl/Segmenter.js @@ -0,0 +1,434 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Intl.Segmenter internal properties. + */ +function segmenterLocaleData() { + // Segmenter doesn't support any extension keys. + return {}; +} +var segmenterInternalProperties = { + localeData: segmenterLocaleData, + relevantExtensionKeys: [], +}; + +/** + * Intl.Segmenter ( [ locales [ , options ] ] ) + * + * Compute an internal properties object from |lazySegmenterData|. + */ +function resolveSegmenterInternals(lazySegmenterData) { + assert(IsObject(lazySegmenterData), "lazy data not an object?"); + + var internalProps = std_Object_create(null); + + var Segmenter = segmenterInternalProperties; + + // Compute effective locale. + + // Step 9. + var localeData = Segmenter.localeData; + + // Step 10. + var r = ResolveLocale( + "Segmenter", + lazySegmenterData.requestedLocales, + lazySegmenterData.opt, + Segmenter.relevantExtensionKeys, + localeData + ); + + // Step 11. + internalProps.locale = r.locale; + + // Step 13. + internalProps.granularity = lazySegmenterData.granularity; + + // The caller is responsible for associating |internalProps| with the right + // object using |setInternalProperties|. + return internalProps; +} + +/** + * Returns an object containing the Segmenter internal properties of |obj|. + */ +function getSegmenterInternals(obj) { + assert(IsObject(obj), "getSegmenterInternals called with non-object"); + assert( + intl_GuardToSegmenter(obj) !== null, + "getSegmenterInternals called with non-Segmenter" + ); + + var internals = getIntlObjectInternals(obj); + assert( + internals.type === "Segmenter", + "bad type escaped getIntlObjectInternals" + ); + + // If internal properties have already been computed, use them. + var internalProps = maybeInternalProperties(internals); + if (internalProps) { + return internalProps; + } + + // Otherwise it's time to fully create them. + internalProps = resolveSegmenterInternals(internals.lazyData); + setInternalProperties(internals, internalProps); + return internalProps; +} + +/** + * Intl.Segmenter ( [ locales [ , options ] ] ) + * + * Initializes an object as a Segmenter. + * + * This method is complicated a moderate bit by its implementing initialization + * as a *lazy* concept. Everything that must happen now, does -- but we defer + * all the work we can until the object is actually used as a Segmenter. + * This later work occurs in |resolveSegmenterInternals|; steps not noted here + * occur there. + */ +function InitializeSegmenter(segmenter, locales, options) { + assert(IsObject(segmenter), "InitializeSegmenter called with non-object"); + assert( + intl_GuardToSegmenter(segmenter) !== null, + "InitializeSegmenter called with non-Segmenter" + ); + + // Lazy Segmenter data has the following structure: + // + // { + // requestedLocales: List of locales, + // + // opt: // opt object computed in InitializeSegmenter + // { + // localeMatcher: "lookup" / "best fit", + // } + // + // granularity: "grapheme" / "word" / "sentence", + // } + // + // Note that lazy data is only installed as a final step of initialization, + // so every Segmenter lazy data object has *all* these properties, never a + // subset of them. + var lazySegmenterData = std_Object_create(null); + + // Step 4. + var requestedLocales = CanonicalizeLocaleList(locales); + lazySegmenterData.requestedLocales = requestedLocales; + + // Step 5. + if (options === undefined) { + options = std_Object_create(null); + } else if (!IsObject(options)) { + ThrowTypeError( + JSMSG_OBJECT_REQUIRED, + options === null ? "null" : typeof options + ); + } + + // Step 6. + var opt = new_Record(); + lazySegmenterData.opt = opt; + + // Steps 7-8. + var matcher = GetOption( + options, + "localeMatcher", + "string", + ["lookup", "best fit"], + "best fit" + ); + opt.localeMatcher = matcher; + + // Steps 12-13. + var granularity = GetOption( + options, + "granularity", + "string", + ["grapheme", "word", "sentence"], + "grapheme" + ); + lazySegmenterData.granularity = granularity; + + // We've done everything that must be done now: mark the lazy data as fully + // computed and install it. + initializeIntlObject(segmenter, "Segmenter", lazySegmenterData); +} + +/** + * Intl.Segmenter.supportedLocalesOf ( locales [, options ]) + * + * Returns the subset of the given locale list for which this locale list has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + */ +function Intl_Segmenter_supportedLocalesOf(locales /*, options*/) { + var options = ArgumentsLength() > 1 ? GetArgument(1) : undefined; + + // Step 1. + var availableLocales = "Segmenter"; + + // Step 2. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Step 3. + return SupportedLocales(availableLocales, requestedLocales, options); +} + +/** + * Intl.Segmenter.prototype.segment ( string ) + * + * Create a new Segments object. + */ +function Intl_Segmenter_segment(value) { + // Step 1. + var segmenter = this; + + // Step 2. + if ( + !IsObject(segmenter) || + (segmenter = intl_GuardToSegmenter(segmenter)) === null + ) { + return callFunction( + intl_CallSegmenterMethodIfWrapped, + this, + value, + "Intl_Segmenter_segment" + ); + } + + // Ensure the Segmenter internals are resolved. + getSegmenterInternals(segmenter); + + // Step 3. + var string = ToString(value); + + // Step 4. + return intl_CreateSegmentsObject(segmenter, string); +} + +/** + * Intl.Segmenter.prototype.resolvedOptions () + * + * Returns the resolved options for a Segmenter object. + */ +function Intl_Segmenter_resolvedOptions() { + // Step 1. + var segmenter = this; + + // Step 2. + if ( + !IsObject(segmenter) || + (segmenter = intl_GuardToSegmenter(segmenter)) === null + ) { + return callFunction( + intl_CallSegmenterMethodIfWrapped, + this, + "Intl_Segmenter_resolvedOptions" + ); + } + + var internals = getSegmenterInternals(segmenter); + + // Steps 3-4. + var options = { + locale: internals.locale, + granularity: internals.granularity, + }; + + // Step 5. + return options; +} + +/** + * CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ) + */ +function CreateSegmentDataObject(string, boundaries) { + assert(typeof string === "string", "CreateSegmentDataObject"); + assert( + IsPackedArray(boundaries) && boundaries.length === 3, + "CreateSegmentDataObject" + ); + + var startIndex = boundaries[0]; + assert( + typeof startIndex === "number" && (startIndex | 0) === startIndex, + "startIndex is an int32-value" + ); + + var endIndex = boundaries[1]; + assert( + typeof endIndex === "number" && (endIndex | 0) === endIndex, + "endIndex is an int32-value" + ); + + // In our implementation |granularity| is encoded in |isWordLike|. + var isWordLike = boundaries[2]; + assert( + typeof isWordLike === "boolean" || isWordLike === undefined, + "isWordLike is either a boolean or undefined" + ); + + // Step 1 (Not applicable). + + // Step 2. + assert(startIndex >= 0, "startIndex is a positive number"); + + // Step 3. + assert( + endIndex <= string.length, + "endIndex is less-than-equals the string length" + ); + + // Step 4. + assert(startIndex < endIndex, "startIndex is strictly less than endIndex"); + + // Step 6. + var segment = Substring(string, startIndex, endIndex - startIndex); + + // Steps 5, 7-12. + if (isWordLike === undefined) { + return { + segment, + index: startIndex, + input: string, + }; + } + + return { + segment, + index: startIndex, + input: string, + isWordLike, + }; +} + +/** + * %Segments.prototype%.containing ( index ) + * + * Return a Segment Data object describing the segment at the given index. If + * the index exceeds the string bounds, undefined is returned. + */ +function Intl_Segments_containing(index) { + // Step 1. + var segments = this; + + // Step 2. + if ( + !IsObject(segments) || + (segments = intl_GuardToSegments(segments)) === null + ) { + return callFunction( + intl_CallSegmentsMethodIfWrapped, + this, + index, + "Intl_Segments_containing" + ); + } + + // Step 3 (not applicable). + + // Step 4. + var string = UnsafeGetStringFromReservedSlot( + segments, + INTL_SEGMENTS_STRING_SLOT + ); + + // Step 5. + var len = string.length; + + // Step 6. + var n = ToInteger(index); + + // Step 7. + if (n < 0 || n >= len) { + return undefined; + } + + // Steps 8-9. + var boundaries = intl_FindSegmentBoundaries(segments, n | 0); + + // Step 10. + return CreateSegmentDataObject(string, boundaries); +} + +/** + * %Segments.prototype% [ @@iterator ] () + * + * Create a new Segment Iterator object. + */ +function Intl_Segments_iterator() { + // Step 1. + var segments = this; + + // Step 2. + if ( + !IsObject(segments) || + (segments = intl_GuardToSegments(segments)) === null + ) { + return callFunction( + intl_CallSegmentsMethodIfWrapped, + this, + "Intl_Segments_iterator" + ); + } + + // Steps 3-5. + return intl_CreateSegmentIterator(segments); +} + +/** + * %SegmentIterator.prototype%.next () + * + * Advance the Segment iterator to the next segment within the string. + */ +function Intl_SegmentIterator_next() { + // Step 1. + var iterator = this; + + // Step 2. + if ( + !IsObject(iterator) || + (iterator = intl_GuardToSegmentIterator(iterator)) === null) + { + return callFunction( + intl_CallSegmentIteratorMethodIfWrapped, + this, + "Intl_SegmentIterator_next" + ); + } + + // Step 3 (Not applicable). + + // Step 4. + var string = UnsafeGetStringFromReservedSlot( + iterator, + INTL_SEGMENT_ITERATOR_STRING_SLOT + ); + + // Step 5. + var index = UnsafeGetInt32FromReservedSlot( + iterator, + INTL_SEGMENT_ITERATOR_INDEX_SLOT + ); + + var result = { value: undefined, done: false }; + + // Step 7. + if (index === string.length) { + result.done = true; + return result; + } + + // Steps 6, 8. + var boundaries = intl_FindNextSegmentBoundaries(iterator); + + // Step 9. + result.value = CreateSegmentDataObject(string, boundaries); + + // Step 10. + return result; +} diff --git a/js/src/builtin/intl/SharedIntlData.cpp b/js/src/builtin/intl/SharedIntlData.cpp new file mode 100644 index 0000000000..8b382f22fb --- /dev/null +++ b/js/src/builtin/intl/SharedIntlData.cpp @@ -0,0 +1,848 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Runtime-wide Intl data shared across compartments. */ + +#include "builtin/intl/SharedIntlData.h" + +#include "mozilla/Assertions.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/intl/Collator.h" +#include "mozilla/intl/DateTimeFormat.h" +#include "mozilla/intl/DateTimePatternGenerator.h" +#include "mozilla/intl/Locale.h" +#include "mozilla/intl/NumberFormat.h" +#include "mozilla/intl/TimeZone.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <stdint.h> +#include <string> +#include <string.h> +#include <string_view> +#include <utility> + +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/TimeZoneDataGenerated.h" +#include "js/Utility.h" +#include "js/Vector.h" +#include "vm/ArrayObject.h" +#include "vm/JSAtomUtils.h" // Atomize +#include "vm/JSContext.h" +#include "vm/StringType.h" + +using js::HashNumber; + +template <typename Char> +static constexpr Char ToUpperASCII(Char c) { + return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c; +} + +static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly"); +static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly"); +static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly"); +static_assert(ToUpperASCII(u'a') == u'A', + "verifying u'a' uppercases correctly"); +static_assert(ToUpperASCII(u'k') == u'K', + "verifying u'k' uppercases correctly"); +static_assert(ToUpperASCII(u'z') == u'Z', + "verifying u'z' uppercases correctly"); + +template <typename Char> +static HashNumber HashStringIgnoreCaseASCII(const Char* s, size_t length) { + uint32_t hash = 0; + for (size_t i = 0; i < length; i++) { + hash = mozilla::AddToHash(hash, ToUpperASCII(s[i])); + } + return hash; +} + +js::intl::SharedIntlData::TimeZoneHasher::Lookup::Lookup( + JSLinearString* timeZone) + : js::intl::SharedIntlData::LinearStringLookup(timeZone) { + if (isLatin1) { + hash = HashStringIgnoreCaseASCII(latin1Chars, length); + } else { + hash = HashStringIgnoreCaseASCII(twoByteChars, length); + } +} + +template <typename Char1, typename Char2> +static bool EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, + size_t len) { + for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) { + if (ToUpperASCII(*s1) != ToUpperASCII(*s2)) { + return false; + } + } + return true; +} + +bool js::intl::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, + const Lookup& lookup) { + if (key->length() != lookup.length) { + return false; + } + + // Compare time zone names ignoring ASCII case differences. + if (key->hasLatin1Chars()) { + const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); + if (lookup.isLatin1) { + return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, + lookup.length); + } + return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, + lookup.length); + } + + const char16_t* keyChars = key->twoByteChars(lookup.nogc); + if (lookup.isLatin1) { + return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, + lookup.length); + } + return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, + lookup.length); +} + +static bool IsLegacyICUTimeZone(mozilla::Span<const char> timeZone) { + std::string_view timeZoneView(timeZone.data(), timeZone.size()); + for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) { + if (timeZoneView == legacyTimeZone) { + return true; + } + } + return false; +} + +bool js::intl::SharedIntlData::ensureTimeZones(JSContext* cx) { + if (timeZoneDataInitialized) { + return true; + } + + // If ensureTimeZones() was called previously, but didn't complete due to + // OOM, clear all sets/maps and start from scratch. + availableTimeZones.clearAndCompact(); + + auto timeZones = mozilla::intl::TimeZone::GetAvailableTimeZones(); + if (timeZones.isErr()) { + ReportInternalError(cx, timeZones.unwrapErr()); + return false; + } + + Rooted<JSAtom*> timeZone(cx); + for (auto timeZoneName : timeZones.unwrap()) { + if (timeZoneName.isErr()) { + ReportInternalError(cx); + return false; + } + auto timeZoneSpan = timeZoneName.unwrap(); + + // Skip legacy ICU time zone names. + if (IsLegacyICUTimeZone(timeZoneSpan)) { + continue; + } + + timeZone = Atomize(cx, timeZoneSpan.data(), timeZoneSpan.size()); + if (!timeZone) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZone); + TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate time zone names, but if it does, + // just ignore the duplicate name. + if (!p && !availableTimeZones.add(p, timeZone)) { + ReportOutOfMemory(cx); + return false; + } + } + + ianaZonesTreatedAsLinksByICU.clearAndCompact(); + + for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) { + MOZ_ASSERT(rawTimeZone != nullptr); + timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone)); + if (!timeZone) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZone); + TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup); + MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU"); + + if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) { + ReportOutOfMemory(cx); + return false; + } + } + + ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); + + Rooted<JSAtom*> linkName(cx); + Rooted<JSAtom*>& target = timeZone; + for (const auto& linkAndTarget : + timezone::ianaLinksCanonicalizedDifferentlyByICU) { + const char* rawLinkName = linkAndTarget.link; + const char* rawTarget = linkAndTarget.target; + + MOZ_ASSERT(rawLinkName != nullptr); + linkName = Atomize(cx, rawLinkName, strlen(rawLinkName)); + if (!linkName) { + return false; + } + + MOZ_ASSERT(rawTarget != nullptr); + target = Atomize(cx, rawTarget, strlen(rawTarget)); + if (!target) { + return false; + } + + TimeZoneHasher::Lookup lookup(linkName); + TimeZoneMap::AddPtr p = + ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup); + MOZ_ASSERT( + !p, + "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU"); + + if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) { + ReportOutOfMemory(cx); + return false; + } + } + + MOZ_ASSERT(!timeZoneDataInitialized, + "ensureTimeZones is neither reentrant nor thread-safe"); + timeZoneDataInitialized = true; + + return true; +} + +bool js::intl::SharedIntlData::validateTimeZoneName( + JSContext* cx, HandleString timeZone, MutableHandle<JSAtom*> result) { + if (!ensureTimeZones(cx)) { + return false; + } + + Rooted<JSLinearString*> timeZoneLinear(cx, timeZone->ensureLinear(cx)); + if (!timeZoneLinear) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZoneLinear); + if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup)) { + result.set(*p); + } + + return true; +} + +bool js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA( + JSContext* cx, HandleString timeZone, MutableHandle<JSAtom*> result) { + if (!ensureTimeZones(cx)) { + return false; + } + + Rooted<JSLinearString*> timeZoneLinear(cx, timeZone->ensureLinear(cx)); + if (!timeZoneLinear) { + return false; + } + + TimeZoneHasher::Lookup lookup(timeZoneLinear); + MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name"); + + if (TimeZoneMap::Ptr p = + ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) { + // The effectively supported time zones aren't known at compile time, + // when + // 1. SpiderMonkey was compiled with "--with-system-icu". + // 2. ICU's dynamic time zone data loading feature was used. + // (ICU supports loading time zone files at runtime through the + // ICU_TIMEZONE_FILES_DIR environment variable.) + // Ensure ICU supports the new target zone before applying the update. + TimeZoneName targetTimeZone = p->value(); + TimeZoneHasher::Lookup targetLookup(targetTimeZone); + if (availableTimeZones.has(targetLookup)) { + result.set(targetTimeZone); + } + } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) { + result.set(*p); + } + + return true; +} + +JS::Result<js::intl::SharedIntlData::TimeZoneSet::Iterator> +js::intl::SharedIntlData::availableTimeZonesIteration(JSContext* cx) { + if (!ensureTimeZones(cx)) { + return cx->alreadyReportedError(); + } + return availableTimeZones.iter(); +} + +js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale) + : js::intl::SharedIntlData::LinearStringLookup(locale) { + if (isLatin1) { + hash = mozilla::HashString(latin1Chars, length); + } else { + hash = mozilla::HashString(twoByteChars, length); + } +} + +js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(const char* chars, + size_t length) + : js::intl::SharedIntlData::LinearStringLookup(chars, length) { + hash = mozilla::HashString(latin1Chars, length); +} + +bool js::intl::SharedIntlData::LocaleHasher::match(Locale key, + const Lookup& lookup) { + if (key->length() != lookup.length) { + return false; + } + + if (key->hasLatin1Chars()) { + const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); + if (lookup.isLatin1) { + return EqualChars(keyChars, lookup.latin1Chars, lookup.length); + } + return EqualChars(keyChars, lookup.twoByteChars, lookup.length); + } + + const char16_t* keyChars = key->twoByteChars(lookup.nogc); + if (lookup.isLatin1) { + return EqualChars(lookup.latin1Chars, keyChars, lookup.length); + } + return EqualChars(keyChars, lookup.twoByteChars, lookup.length); +} + +template <class AvailableLocales> +bool js::intl::SharedIntlData::getAvailableLocales( + JSContext* cx, LocaleSet& locales, + const AvailableLocales& availableLocales) { + auto addLocale = [cx, &locales](const char* locale, size_t length) { + JSAtom* atom = Atomize(cx, locale, length); + if (!atom) { + return false; + } + + LocaleHasher::Lookup lookup(atom); + LocaleSet::AddPtr p = locales.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate locales, but if it does, just + // ignore the duplicated locale. + if (!p && !locales.add(p, atom)) { + ReportOutOfMemory(cx); + return false; + } + + return true; + }; + + js::Vector<char, 16> lang(cx); + + for (const char* locale : availableLocales) { + size_t length = strlen(locale); + + lang.clear(); + if (!lang.append(locale, length)) { + return false; + } + MOZ_ASSERT(lang.length() == length); + + std::replace(lang.begin(), lang.end(), '_', '-'); + + if (!addLocale(lang.begin(), length)) { + return false; + } + + // From <https://tc39.es/ecma402/#sec-internal-slots>: + // + // For locales that include a script subtag in addition to language and + // region, the corresponding locale without a script subtag must also be + // supported; that is, if an implementation recognizes "zh-Hant-TW", it is + // also expected to recognize "zh-TW". + + // 2 * Alpha language subtag + // + 1 separator + // + 4 * Alphanum script subtag + // + 1 separator + // + 2 * Alpha region subtag + using namespace mozilla::intl::LanguageTagLimits; + static constexpr size_t MinLanguageLength = 2; + static constexpr size_t MinLengthForScriptAndRegion = + MinLanguageLength + 1 + ScriptLength + 1 + AlphaRegionLength; + + // Fast case: Skip locales without script subtags. + if (length < MinLengthForScriptAndRegion) { + continue; + } + + // We don't need the full-fledged language tag parser when we just want to + // remove the script subtag. + + // Find the separator between the language and script subtags. + const char* sep = std::char_traits<char>::find(lang.begin(), length, '-'); + if (!sep) { + continue; + } + + // Possible |script| subtag start position. + const char* script = sep + 1; + + // Find the separator between the script and region subtags. + sep = std::char_traits<char>::find(script, lang.end() - script, '-'); + if (!sep) { + continue; + } + + // Continue with the next locale if we didn't find a script subtag. + size_t scriptLength = sep - script; + if (!mozilla::intl::IsStructurallyValidScriptTag<char>( + {script, scriptLength})) { + continue; + } + + // Possible |region| subtag start position. + const char* region = sep + 1; + + // Search if there's yet another subtag after the region subtag. + sep = std::char_traits<char>::find(region, lang.end() - region, '-'); + + // Continue with the next locale if we didn't find a region subtag. + size_t regionLength = (sep ? sep : lang.end()) - region; + if (!mozilla::intl::IsStructurallyValidRegionTag<char>( + {region, regionLength})) { + continue; + } + + // We've found a script and a region subtag. + + static constexpr size_t ScriptWithSeparatorLength = ScriptLength + 1; + + // Remove the script subtag. Note: erase() needs non-const pointers, which + // means we can't directly pass |script|. + char* p = const_cast<char*>(script); + lang.erase(p, p + ScriptWithSeparatorLength); + + MOZ_ASSERT(lang.length() == length - ScriptWithSeparatorLength); + + // Add the locale with the script subtag removed. + if (!addLocale(lang.begin(), lang.length())) { + return false; + } + } + + // Forcibly add an entry for the last-ditch locale, in case ICU doesn't + // directly support it (but does support it through fallback, e.g. supporting + // "en-GB" indirectly using "en" support). + { + const char* lastDitch = intl::LastDitchLocale(); + MOZ_ASSERT(strcmp(lastDitch, "en-GB") == 0); + +#ifdef DEBUG + static constexpr char lastDitchParent[] = "en"; + + LocaleHasher::Lookup lookup(lastDitchParent, strlen(lastDitchParent)); + MOZ_ASSERT(locales.has(lookup), + "shouldn't be a need to add every locale implied by the " + "last-ditch locale, merely just the last-ditch locale"); +#endif + + if (!addLocale(lastDitch, strlen(lastDitch))) { + return false; + } + } + + return true; +} + +#ifdef DEBUG +template <class AvailableLocales1, class AvailableLocales2> +static bool IsSameAvailableLocales(const AvailableLocales1& availableLocales1, + const AvailableLocales2& availableLocales2) { + return std::equal(std::begin(availableLocales1), std::end(availableLocales1), + std::begin(availableLocales2), std::end(availableLocales2), + [](const char* a, const char* b) { + // Intentionally comparing pointer equivalence. + return a == b; + }); +} +#endif + +bool js::intl::SharedIntlData::ensureSupportedLocales(JSContext* cx) { + if (supportedLocalesInitialized) { + return true; + } + + // If ensureSupportedLocales() was called previously, but didn't complete due + // to OOM, clear all data and start from scratch. + supportedLocales.clearAndCompact(); + collatorSupportedLocales.clearAndCompact(); + + if (!getAvailableLocales(cx, supportedLocales, + mozilla::intl::Locale::GetAvailableLocales())) { + return false; + } + if (!getAvailableLocales(cx, collatorSupportedLocales, + mozilla::intl::Collator::GetAvailableLocales())) { + return false; + } + + MOZ_ASSERT(IsSameAvailableLocales( + mozilla::intl::Locale::GetAvailableLocales(), + mozilla::intl::DateTimeFormat::GetAvailableLocales())); + + MOZ_ASSERT(IsSameAvailableLocales( + mozilla::intl::Locale::GetAvailableLocales(), + mozilla::intl::NumberFormat::GetAvailableLocales())); + + MOZ_ASSERT(!supportedLocalesInitialized, + "ensureSupportedLocales is neither reentrant nor thread-safe"); + supportedLocalesInitialized = true; + + return true; +} + +bool js::intl::SharedIntlData::isSupportedLocale(JSContext* cx, + SupportedLocaleKind kind, + HandleString locale, + bool* supported) { + if (!ensureSupportedLocales(cx)) { + return false; + } + + Rooted<JSLinearString*> localeLinear(cx, locale->ensureLinear(cx)); + if (!localeLinear) { + return false; + } + + LocaleHasher::Lookup lookup(localeLinear); + + switch (kind) { + case SupportedLocaleKind::Collator: + *supported = collatorSupportedLocales.has(lookup); + return true; + case SupportedLocaleKind::DateTimeFormat: + case SupportedLocaleKind::DisplayNames: + case SupportedLocaleKind::ListFormat: + case SupportedLocaleKind::NumberFormat: + case SupportedLocaleKind::PluralRules: + case SupportedLocaleKind::RelativeTimeFormat: + case SupportedLocaleKind::Segmenter: + *supported = supportedLocales.has(lookup); + return true; + } + MOZ_CRASH("Invalid Intl constructor"); +} + +js::ArrayObject* js::intl::SharedIntlData::availableLocalesOf( + JSContext* cx, SupportedLocaleKind kind) { + if (!ensureSupportedLocales(cx)) { + return nullptr; + } + + LocaleSet* localeSet = nullptr; + switch (kind) { + case SupportedLocaleKind::Collator: + localeSet = &collatorSupportedLocales; + break; + case SupportedLocaleKind::DateTimeFormat: + case SupportedLocaleKind::DisplayNames: + case SupportedLocaleKind::ListFormat: + case SupportedLocaleKind::NumberFormat: + case SupportedLocaleKind::PluralRules: + case SupportedLocaleKind::RelativeTimeFormat: + case SupportedLocaleKind::Segmenter: + localeSet = &supportedLocales; + break; + default: + MOZ_CRASH("Invalid Intl constructor"); + } + + const uint32_t count = localeSet->count(); + ArrayObject* result = NewDenseFullyAllocatedArray(cx, count); + if (!result) { + return nullptr; + } + result->setDenseInitializedLength(count); + + uint32_t index = 0; + for (auto range = localeSet->iter(); !range.done(); range.next()) { + JSAtom* locale = range.get(); + cx->markAtom(locale); + + result->initDenseElement(index++, StringValue(locale)); + } + MOZ_ASSERT(index == count); + + return result; +} + +#if DEBUG || MOZ_SYSTEM_ICU +bool js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx) { + if (upperCaseFirstInitialized) { + return true; + } + + // If ensureUpperCaseFirstLocales() was called previously, but didn't + // complete due to OOM, clear all data and start from scratch. + upperCaseFirstLocales.clearAndCompact(); + + Rooted<JSAtom*> locale(cx); + for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) { + auto collator = mozilla::intl::Collator::TryCreate(rawLocale); + if (collator.isErr()) { + ReportInternalError(cx, collator.unwrapErr()); + return false; + } + + auto caseFirst = collator.unwrap()->GetCaseFirst(); + if (caseFirst.isErr()) { + ReportInternalError(cx, caseFirst.unwrapErr()); + return false; + } + + if (caseFirst.unwrap() != mozilla::intl::Collator::CaseFirst::Upper) { + continue; + } + + locale = Atomize(cx, rawLocale, strlen(rawLocale)); + if (!locale) { + return false; + } + + LocaleHasher::Lookup lookup(locale); + LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate locales, but if it does, just + // ignore the duplicated locale. + if (!p && !upperCaseFirstLocales.add(p, locale)) { + ReportOutOfMemory(cx); + return false; + } + } + + MOZ_ASSERT( + !upperCaseFirstInitialized, + "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe"); + upperCaseFirstInitialized = true; + + return true; +} +#endif // DEBUG || MOZ_SYSTEM_ICU + +bool js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, + HandleString locale, + bool* isUpperFirst) { +#if DEBUG || MOZ_SYSTEM_ICU + if (!ensureUpperCaseFirstLocales(cx)) { + return false; + } +#endif + + Rooted<JSLinearString*> localeLinear(cx, locale->ensureLinear(cx)); + if (!localeLinear) { + return false; + } + +#if !MOZ_SYSTEM_ICU + // "da" (Danish) and "mt" (Maltese) are the only two supported locales using + // upper-case first. CLDR also lists "cu" (Church Slavic) as an upper-case + // first locale, but since it's not supported in ICU, we don't care about it + // here. + bool isDefaultUpperCaseFirstLocale = + js::StringEqualsLiteral(localeLinear, "da") || + js::StringEqualsLiteral(localeLinear, "mt"); +#endif + +#if DEBUG || MOZ_SYSTEM_ICU + LocaleHasher::Lookup lookup(localeLinear); + *isUpperFirst = upperCaseFirstLocales.has(lookup); +#else + *isUpperFirst = isDefaultUpperCaseFirstLocale; +#endif + +#if !MOZ_SYSTEM_ICU + MOZ_ASSERT(*isUpperFirst == isDefaultUpperCaseFirstLocale, + "upper-case first locales don't match hard-coded list"); +#endif + + return true; +} + +#if DEBUG || MOZ_SYSTEM_ICU +bool js::intl::SharedIntlData::ensureIgnorePunctuationLocales(JSContext* cx) { + if (ignorePunctuationInitialized) { + return true; + } + + // If ensureIgnorePunctuationLocales() was called previously, but didn't + // complete due to OOM, clear all data and start from scratch. + ignorePunctuationLocales.clearAndCompact(); + + Rooted<JSAtom*> locale(cx); + for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) { + auto collator = mozilla::intl::Collator::TryCreate(rawLocale); + if (collator.isErr()) { + ReportInternalError(cx, collator.unwrapErr()); + return false; + } + + auto ignorePunctuation = collator.unwrap()->GetIgnorePunctuation(); + if (ignorePunctuation.isErr()) { + ReportInternalError(cx, ignorePunctuation.unwrapErr()); + return false; + } + + if (!ignorePunctuation.unwrap()) { + continue; + } + + locale = Atomize(cx, rawLocale, strlen(rawLocale)); + if (!locale) { + return false; + } + + LocaleHasher::Lookup lookup(locale); + LocaleSet::AddPtr p = ignorePunctuationLocales.lookupForAdd(lookup); + + // ICU shouldn't report any duplicate locales, but if it does, just + // ignore the duplicated locale. + if (!p && !ignorePunctuationLocales.add(p, locale)) { + ReportOutOfMemory(cx); + return false; + } + } + + MOZ_ASSERT( + !ignorePunctuationInitialized, + "ensureIgnorePunctuationLocales is neither reentrant nor thread-safe"); + ignorePunctuationInitialized = true; + + return true; +} +#endif // DEBUG || MOZ_SYSTEM_ICU + +bool js::intl::SharedIntlData::isIgnorePunctuation(JSContext* cx, + HandleString locale, + bool* ignorePunctuation) { +#if DEBUG || MOZ_SYSTEM_ICU + if (!ensureIgnorePunctuationLocales(cx)) { + return false; + } +#endif + + Rooted<JSLinearString*> localeLinear(cx, locale->ensureLinear(cx)); + if (!localeLinear) { + return false; + } + +#if !MOZ_SYSTEM_ICU + // "th" (Thai) is the only supported locale which ignores punctuation by + // default. + bool isDefaultIgnorePunctuationLocale = + js::StringEqualsLiteral(localeLinear, "th"); +#endif + +#if DEBUG || MOZ_SYSTEM_ICU + LocaleHasher::Lookup lookup(localeLinear); + *ignorePunctuation = ignorePunctuationLocales.has(lookup); +#else + *ignorePunctuation = isDefaultIgnorePunctuationLocale; +#endif + +#if !MOZ_SYSTEM_ICU + MOZ_ASSERT(*ignorePunctuation == isDefaultIgnorePunctuationLocale, + "ignore punctuation locales don't match hard-coded list"); +#endif + + return true; +} + +void js::intl::DateTimePatternGeneratorDeleter::operator()( + mozilla::intl::DateTimePatternGenerator* ptr) { + delete ptr; +} + +static bool StringsAreEqual(const char* s1, const char* s2) { + return !strcmp(s1, s2); +} + +mozilla::intl::DateTimePatternGenerator* +js::intl::SharedIntlData::getDateTimePatternGenerator(JSContext* cx, + const char* locale) { + // Return the cached instance if the requested locale matches the locale + // of the cached generator. + if (dateTimePatternGeneratorLocale && + StringsAreEqual(dateTimePatternGeneratorLocale.get(), locale)) { + return dateTimePatternGenerator.get(); + } + + auto result = mozilla::intl::DateTimePatternGenerator::TryCreate(locale); + if (result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; + } + // The UniquePtr needs to be recreated as it's using a different Deleter in + // order to be able to forward declare DateTimePatternGenerator in + // SharedIntlData.h. + UniqueDateTimePatternGenerator gen(result.unwrap().release()); + + JS::UniqueChars localeCopy = js::DuplicateString(cx, locale); + if (!localeCopy) { + return nullptr; + } + + dateTimePatternGenerator = std::move(gen); + dateTimePatternGeneratorLocale = std::move(localeCopy); + + return dateTimePatternGenerator.get(); +} + +void js::intl::SharedIntlData::destroyInstance() { + availableTimeZones.clearAndCompact(); + ianaZonesTreatedAsLinksByICU.clearAndCompact(); + ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); + supportedLocales.clearAndCompact(); + collatorSupportedLocales.clearAndCompact(); +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.clearAndCompact(); + ignorePunctuationLocales.clearAndCompact(); +#endif +} + +void js::intl::SharedIntlData::trace(JSTracer* trc) { + // Atoms are always tenured. + if (!JS::RuntimeHeapIsMinorCollecting()) { + availableTimeZones.trace(trc); + ianaZonesTreatedAsLinksByICU.trace(trc); + ianaLinksCanonicalizedDifferentlyByICU.trace(trc); + supportedLocales.trace(trc); + collatorSupportedLocales.trace(trc); +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.trace(trc); + ignorePunctuationLocales.trace(trc); +#endif + } +} + +size_t js::intl::SharedIntlData::sizeOfExcludingThis( + mozilla::MallocSizeOf mallocSizeOf) const { + return availableTimeZones.shallowSizeOfExcludingThis(mallocSizeOf) + + ianaZonesTreatedAsLinksByICU.shallowSizeOfExcludingThis(mallocSizeOf) + + ianaLinksCanonicalizedDifferentlyByICU.shallowSizeOfExcludingThis( + mallocSizeOf) + + supportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + + collatorSupportedLocales.shallowSizeOfExcludingThis(mallocSizeOf) + +#if DEBUG || MOZ_SYSTEM_ICU + upperCaseFirstLocales.shallowSizeOfExcludingThis(mallocSizeOf) + + ignorePunctuationLocales.shallowSizeOfExcludingThis(mallocSizeOf) + +#endif + mallocSizeOf(dateTimePatternGeneratorLocale.get()); +} diff --git a/js/src/builtin/intl/SharedIntlData.h b/js/src/builtin/intl/SharedIntlData.h new file mode 100644 index 0000000000..7bf3eb74c9 --- /dev/null +++ b/js/src/builtin/intl/SharedIntlData.h @@ -0,0 +1,355 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_SharedIntlData_h +#define builtin_intl_SharedIntlData_h + +#include "mozilla/MemoryReporting.h" +#include "mozilla/UniquePtr.h" + +#include <stddef.h> + +#include "js/AllocPolicy.h" +#include "js/GCAPI.h" +#include "js/GCHashTable.h" +#include "js/Result.h" +#include "js/RootingAPI.h" +#include "js/Utility.h" +#include "vm/StringType.h" + +namespace mozilla::intl { +class DateTimePatternGenerator; +} // namespace mozilla::intl + +namespace js { + +class ArrayObject; + +namespace intl { + +/** + * This deleter class exists so that mozilla::intl::DateTimePatternGenerator + * can be a forward declaration, but still be used inside of a UniquePtr. + */ +class DateTimePatternGeneratorDeleter { + public: + void operator()(mozilla::intl::DateTimePatternGenerator* ptr); +}; + +/** + * Stores Intl data which can be shared across compartments (but not contexts). + * + * Used for data which is expensive when computed repeatedly or is not + * available through ICU. + */ +class SharedIntlData { + struct LinearStringLookup { + union { + const JS::Latin1Char* latin1Chars; + const char16_t* twoByteChars; + }; + bool isLatin1; + size_t length; + JS::AutoCheckCannotGC nogc; + HashNumber hash = 0; + + explicit LinearStringLookup(JSLinearString* string) + : isLatin1(string->hasLatin1Chars()), length(string->length()) { + if (isLatin1) { + latin1Chars = string->latin1Chars(nogc); + } else { + twoByteChars = string->twoByteChars(nogc); + } + } + + LinearStringLookup(const char* chars, size_t length) + : isLatin1(true), length(length) { + latin1Chars = reinterpret_cast<const JS::Latin1Char*>(chars); + } + }; + + public: + /** + * Information tracking the set of the supported time zone names, derived + * from the IANA time zone database <https://www.iana.org/time-zones>. + * + * There are two kinds of IANA time zone names: Zone and Link (denoted as + * such in database source files). Zone names are the canonical, preferred + * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to + * target Zone names for their meaning, e.g. Asia/Calcutta targets + * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a + * sense of deprecation: some Link names also exist partly for convenience, + * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC. + * + * Two data sources determine the time zone names we support: those ICU + * supports and IANA's zone information. + * + * Unfortunately the names ICU and IANA support, and their Link + * relationships from name to target, aren't identical, so we can't simply + * implicitly trust ICU's name handling. We must perform various + * preprocessing of user-provided zone names and post-processing of + * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior. + * + * Also see <https://ssl.icu-project.org/trac/ticket/12044> and + * <http://unicode.org/cldr/trac/ticket/9892>. + */ + + using TimeZoneName = JSAtom*; + + struct TimeZoneHasher { + struct Lookup : LinearStringLookup { + explicit Lookup(JSLinearString* timeZone); + }; + + static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; } + static bool match(TimeZoneName key, const Lookup& lookup); + }; + + using TimeZoneSet = + GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>; + using TimeZoneMap = + GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>; + + private: + /** + * As a threshold matter, available time zones are those time zones ICU + * supports, via ucal_openTimeZones. But ICU supports additional non-IANA + * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in + * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards + * compatibility purposes. This set consists of ICU's supported time zones, + * minus all backwards-compatibility time zones. + */ + TimeZoneSet availableTimeZones; + + /** + * IANA treats some time zone names as Zones, that ICU instead treats as + * Links. For example, IANA considers "America/Indiana/Indianapolis" to be + * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU + * considers the former a Link that targets "America/Indianapolis" (which + * IANA treats as a Link). + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name in this set, we must *not* return ICU's + * canonicalization. + */ + TimeZoneSet ianaZonesTreatedAsLinksByICU; + + /** + * IANA treats some time zone names as Links to one target, that ICU + * instead treats as either Zones, or Links to different targets. An + * example of the former is "Asia/Calcutta, which IANA assigns the target + * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter + * is "America/Virgin", which IANA assigns the target + * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas". + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name that's a key in this map, we *must* return + * the corresponding value and *must not* return ICU's canonicalization. + */ + TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU; + + bool timeZoneDataInitialized = false; + + /** + * Precomputes the available time zone names, because it's too expensive to + * call ucal_openTimeZones() repeatedly. + */ + bool ensureTimeZones(JSContext* cx); + + public: + /** + * Returns the validated time zone name in |result|. If the input time zone + * isn't a valid IANA time zone name, |result| remains unchanged. + */ + bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone, + JS::MutableHandle<JSAtom*> result); + + /** + * Returns the canonical time zone name in |result|. If no canonical name + * was found, |result| remains unchanged. + * + * This method only handles time zones which are canonicalized differently + * by ICU when compared to IANA. + */ + bool tryCanonicalizeTimeZoneConsistentWithIANA( + JSContext* cx, JS::Handle<JSString*> timeZone, + JS::MutableHandle<JSAtom*> result); + + /** + * Returns an iterator over all available time zones supported by ICU. The + * returned time zone names aren't canonicalized. + */ + JS::Result<TimeZoneSet::Iterator> availableTimeZonesIteration(JSContext* cx); + + private: + using Locale = JSAtom*; + + struct LocaleHasher { + struct Lookup : LinearStringLookup { + explicit Lookup(JSLinearString* locale); + Lookup(const char* chars, size_t length); + }; + + static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; } + static bool match(Locale key, const Lookup& lookup); + }; + + using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>; + + // Set of supported locales for all Intl service constructors except Collator, + // which uses its own set. + // + // UDateFormat: + // udat_[count,get]Available() return the same results as their + // uloc_[count,get]Available() counterparts. + // + // UNumberFormatter: + // unum_[count,get]Available() return the same results as their + // uloc_[count,get]Available() counterparts. + // + // UListFormatter, UPluralRules, and URelativeDateTimeFormatter: + // We're going to use ULocale availableLocales as per ICU recommendation: + // https://unicode-org.atlassian.net/browse/ICU-12756 + LocaleSet supportedLocales; + + // ucol_[count,get]Available() return different results compared to + // uloc_[count,get]Available(), we can't use |supportedLocales| here. + LocaleSet collatorSupportedLocales; + + bool supportedLocalesInitialized = false; + + // CountAvailable and GetAvailable describe the signatures used for ICU API + // to determine available locales for various functionality. + using CountAvailable = int32_t (*)(); + using GetAvailable = const char* (*)(int32_t localeIndex); + + template <class AvailableLocales> + static bool getAvailableLocales(JSContext* cx, LocaleSet& locales, + const AvailableLocales& availableLocales); + + /** + * Precomputes the available locales sets. + */ + bool ensureSupportedLocales(JSContext* cx); + + public: + enum class SupportedLocaleKind { + Collator, + DateTimeFormat, + DisplayNames, + ListFormat, + NumberFormat, + PluralRules, + RelativeTimeFormat, + Segmenter, + }; + + /** + * Sets |supported| to true if |locale| is supported by the requested Intl + * service constructor. Otherwise sets |supported| to false. + */ + [[nodiscard]] bool isSupportedLocale(JSContext* cx, SupportedLocaleKind kind, + JS::Handle<JSString*> locale, + bool* supported); + + /** + * Returns all available locales for |kind|. + */ + ArrayObject* availableLocalesOf(JSContext* cx, SupportedLocaleKind kind); + + private: + /** + * The case first parameter (BCP47 key "kf") allows to switch the order of + * upper- and lower-case characters. ICU doesn't directly provide an API + * to query the default case first value of a given locale, but instead + * requires to instantiate a collator object and then query the case first + * attribute (UCOL_CASE_FIRST). + * To avoid instantiating an additional collator object whenever we need + * to retrieve the default case first value of a specific locale, we + * compute the default case first value for every supported locale only + * once and then keep a list of all locales which don't use the default + * case first setting. + * There is almost no difference between lower-case first and when case + * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to + * track locales which use upper-case first as their default setting. + * + * Instantiating collator objects for each available locale is slow + * (bug 1527879), therefore we're hardcoding the two locales using upper-case + * first ("da" (Danish) and "mt" (Maltese)) and only assert in debug-mode + * these two locales match the upper-case first locales returned by ICU. A + * system-ICU may support a different set of locales, therefore we're always + * calling into ICU to find the upper-case first locales in that case. + */ + +#if DEBUG || MOZ_SYSTEM_ICU + LocaleSet upperCaseFirstLocales; + + bool upperCaseFirstInitialized = false; + + /** + * Precomputes the available locales which use upper-case first sorting. + */ + bool ensureUpperCaseFirstLocales(JSContext* cx); +#endif + + public: + /** + * Sets |isUpperFirst| to true if |locale| sorts upper-case characters + * before lower-case characters. + */ + bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale, + bool* isUpperFirst); + + private: +#if DEBUG || MOZ_SYSTEM_ICU + LocaleSet ignorePunctuationLocales; + + bool ignorePunctuationInitialized = false; + + /** + * Precomputes the available locales which ignore punctuation. + */ + bool ensureIgnorePunctuationLocales(JSContext* cx); +#endif + + public: + /** + * Sets |ignorePunctuation| to true if |locale| ignores punctuation. + */ + bool isIgnorePunctuation(JSContext* cx, JS::Handle<JSString*> locale, + bool* ignorePunctuation); + + private: + using UniqueDateTimePatternGenerator = + mozilla::UniquePtr<mozilla::intl::DateTimePatternGenerator, + DateTimePatternGeneratorDeleter>; + + UniqueDateTimePatternGenerator dateTimePatternGenerator; + JS::UniqueChars dateTimePatternGeneratorLocale; + + public: + /** + * Get a non-owned cached instance of the DateTimePatternGenerator, which is + * expensive to instantiate. + * + * See: https://bugzilla.mozilla.org/show_bug.cgi?id=1549578 + */ + mozilla::intl::DateTimePatternGenerator* getDateTimePatternGenerator( + JSContext* cx, const char* locale); + + public: + void destroyInstance(); + + void trace(JSTracer* trc); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const; +}; + +} // namespace intl + +} // namespace js + +#endif /* builtin_intl_SharedIntlData_h */ diff --git a/js/src/builtin/intl/StringAsciiChars.h b/js/src/builtin/intl/StringAsciiChars.h new file mode 100644 index 0000000000..3323544d8c --- /dev/null +++ b/js/src/builtin/intl/StringAsciiChars.h @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_intl_StringAsciiChars_h +#define builtin_intl_StringAsciiChars_h + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Maybe.h" +#include "mozilla/Span.h" + +#include <stddef.h> + +#include "js/GCAPI.h" +#include "js/TypeDecls.h" +#include "js/Vector.h" + +#include "vm/StringType.h" + +namespace js::intl { + +/** + * String view of an ASCII-only string. + * + * This holds a reference to a JSLinearString and can produce a string view + * into that string. If the string is represented by Latin1 characters, the + * span is returned directly. If the string is represented by UTF-16 + * characters, it copies the char16_t characters into a char array, and then + * returns a span based on the copy. + * + * This allows us to avoid copying for the common use case that the ASCII + * characters are represented in Latin1. + */ +class MOZ_STACK_CLASS StringAsciiChars final { + // When copying string characters, use this many bytes of inline storage. + static const size_t InlineCapacity = 24; + + JS::AutoCheckCannotGC nogc_; + + JSLinearString* str_; + + mozilla::Maybe<Vector<Latin1Char, InlineCapacity>> ownChars_; + + public: + explicit StringAsciiChars(JSLinearString* str) : str_(str) { + MOZ_ASSERT(StringIsAscii(str)); + } + + operator mozilla::Span<const char>() const { + if (str_->hasLatin1Chars()) { + return mozilla::AsChars(str_->latin1Range(nogc_)); + } + return mozilla::AsChars(mozilla::Span<const Latin1Char>(*ownChars_)); + } + + [[nodiscard]] bool init(JSContext* cx) { + if (str_->hasLatin1Chars()) { + return true; + } + + ownChars_.emplace(cx); + if (!ownChars_->resize(str_->length())) { + return false; + } + + js::CopyChars(ownChars_->begin(), *str_); + + return true; + } +}; + +} // namespace js::intl + +#endif // builtin_intl_StringAsciiChars_h diff --git a/js/src/builtin/intl/TimeZoneDataGenerated.h b/js/src/builtin/intl/TimeZoneDataGenerated.h new file mode 100644 index 0000000000..75e09fc522 --- /dev/null +++ b/js/src/builtin/intl/TimeZoneDataGenerated.h @@ -0,0 +1,151 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// tzdata version = 2024a + +#ifndef builtin_intl_TimeZoneDataGenerated_h +#define builtin_intl_TimeZoneDataGenerated_h + +namespace js { +namespace timezone { + +// Format: +// "ZoneName" // ICU-Name [time zone file] +const char* const ianaZonesTreatedAsLinksByICU[] = { + "Africa/Asmara", // Africa/Asmera [backzone] + "Africa/Timbuktu", // Africa/Bamako [backzone] + "America/Argentina/Buenos_Aires", // America/Buenos_Aires [southamerica] + "America/Argentina/Catamarca", // America/Catamarca [southamerica] + "America/Argentina/ComodRivadavia", // America/Catamarca [backzone] + "America/Argentina/Cordoba", // America/Cordoba [southamerica] + "America/Argentina/Jujuy", // America/Jujuy [southamerica] + "America/Argentina/Mendoza", // America/Mendoza [southamerica] + "America/Atikokan", // America/Coral_Harbour [backzone] + "America/Ensenada", // America/Tijuana [backzone] + "America/Indiana/Indianapolis", // America/Indianapolis [northamerica] + "America/Kentucky/Louisville", // America/Louisville [northamerica] + "America/Montreal", // America/Toronto [backzone] + "America/Nipigon", // America/Toronto [backzone] + "America/Nuuk", // America/Godthab [europe] + "America/Pangnirtung", // America/Iqaluit [backzone] + "America/Rainy_River", // America/Winnipeg [backzone] + "America/Rosario", // America/Cordoba [backzone] + "America/Thunder_Bay", // America/Toronto [backzone] + "America/Yellowknife", // America/Edmonton [backzone] + "Asia/Chongqing", // Asia/Shanghai [backzone] + "Asia/Harbin", // Asia/Shanghai [backzone] + "Asia/Ho_Chi_Minh", // Asia/Saigon [asia] + "Asia/Kashgar", // Asia/Urumqi [backzone] + "Asia/Kathmandu", // Asia/Katmandu [asia] + "Asia/Kolkata", // Asia/Calcutta [asia] + "Asia/Tel_Aviv", // Asia/Jerusalem [backzone] + "Asia/Yangon", // Asia/Rangoon [asia] + "Atlantic/Faroe", // Atlantic/Faeroe [europe] + "Atlantic/Jan_Mayen", // Arctic/Longyearbyen [backzone] + "Australia/Currie", // Australia/Hobart [backzone] + "EST", // Etc/GMT+5 [northamerica] + "Europe/Belfast", // Europe/London [backzone] + "Europe/Kyiv", // Europe/Kiev [europe] + "Europe/Tiraspol", // Europe/Chisinau [backzone] + "Europe/Uzhgorod", // Europe/Kiev [backzone] + "Europe/Zaporozhye", // Europe/Kiev [backzone] + "HST", // Etc/GMT+10 [northamerica] + "MST", // Etc/GMT+7 [northamerica] + "Pacific/Chuuk", // Pacific/Truk [backzone] + "Pacific/Johnston", // Pacific/Honolulu [backzone] + "Pacific/Kanton", // Pacific/Enderbury [australasia] + "Pacific/Pohnpei", // Pacific/Ponape [backzone] +}; + +// Format: +// "LinkName", "Target" // ICU-Target [time zone file] +struct LinkAndTarget +{ + const char* const link; + const char* const target; +}; + +const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = { + { "Africa/Asmera", "Africa/Asmara" }, // Africa/Asmera [backward] + { "America/Buenos_Aires", "America/Argentina/Buenos_Aires" }, // America/Buenos_Aires [backward] + { "America/Catamarca", "America/Argentina/Catamarca" }, // America/Catamarca [backward] + { "America/Cordoba", "America/Argentina/Cordoba" }, // America/Cordoba [backward] + { "America/Fort_Wayne", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] + { "America/Godthab", "America/Nuuk" }, // America/Godthab [backward] + { "America/Indianapolis", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] + { "America/Jujuy", "America/Argentina/Jujuy" }, // America/Jujuy [backward] + { "America/Kralendijk", "America/Curacao" }, // America/Kralendijk [backward] + { "America/Louisville", "America/Kentucky/Louisville" }, // America/Louisville [backward] + { "America/Lower_Princes", "America/Curacao" }, // America/Lower_Princes [backward] + { "America/Marigot", "America/Port_of_Spain" }, // America/Marigot [backward] + { "America/Mendoza", "America/Argentina/Mendoza" }, // America/Mendoza [backward] + { "America/St_Barthelemy", "America/Port_of_Spain" }, // America/St_Barthelemy [backward] + { "Antarctica/South_Pole", "Antarctica/McMurdo" }, // Pacific/Auckland [backward] + { "Arctic/Longyearbyen", "Europe/Oslo" }, // Arctic/Longyearbyen [backward] + { "Asia/Calcutta", "Asia/Kolkata" }, // Asia/Calcutta [backward] + { "Asia/Chungking", "Asia/Chongqing" }, // Asia/Shanghai [backward] + { "Asia/Katmandu", "Asia/Kathmandu" }, // Asia/Katmandu [backward] + { "Asia/Rangoon", "Asia/Yangon" }, // Asia/Rangoon [backward] + { "Asia/Saigon", "Asia/Ho_Chi_Minh" }, // Asia/Saigon [backward] + { "Atlantic/Faeroe", "Atlantic/Faroe" }, // Atlantic/Faeroe [backward] + { "Europe/Bratislava", "Europe/Prague" }, // Europe/Bratislava [backward] + { "Europe/Busingen", "Europe/Zurich" }, // Europe/Busingen [backward] + { "Europe/Kiev", "Europe/Kyiv" }, // Europe/Kiev [backward] + { "Europe/Mariehamn", "Europe/Helsinki" }, // Europe/Mariehamn [backward] + { "Europe/Podgorica", "Europe/Belgrade" }, // Europe/Podgorica [backward] + { "Europe/San_Marino", "Europe/Rome" }, // Europe/San_Marino [backward] + { "Europe/Vatican", "Europe/Rome" }, // Europe/Vatican [backward] + { "Pacific/Ponape", "Pacific/Pohnpei" }, // Pacific/Ponape [backward] + { "Pacific/Truk", "Pacific/Chuuk" }, // Pacific/Truk [backward] + { "Pacific/Yap", "Pacific/Chuuk" }, // Pacific/Truk [backward] + { "US/East-Indiana", "America/Indiana/Indianapolis" }, // America/Indianapolis [backward] +}; + +// Legacy ICU time zones, these are not valid IANA time zone names. We also +// disallow the old and deprecated System V time zones. +// https://ssl.icu-project.org/repos/icu/trunk/icu4c/source/tools/tzcode/icuzones +const char* const legacyICUTimeZones[] = { + "ACT", + "AET", + "AGT", + "ART", + "AST", + "BET", + "BST", + "CAT", + "CNT", + "CST", + "CTT", + "Canada/East-Saskatchewan", + "EAT", + "ECT", + "IET", + "IST", + "JST", + "MIT", + "NET", + "NST", + "PLT", + "PNT", + "PRT", + "PST", + "SST", + "US/Pacific-New", + "VST", + "SystemV/AST4", + "SystemV/AST4ADT", + "SystemV/CST6", + "SystemV/CST6CDT", + "SystemV/EST5", + "SystemV/EST5EDT", + "SystemV/HST10", + "SystemV/MST7", + "SystemV/MST7MDT", + "SystemV/PST8", + "SystemV/PST8PDT", + "SystemV/YST9", + "SystemV/YST9YDT", +}; + +} // namespace timezone +} // namespace js + +#endif /* builtin_intl_TimeZoneDataGenerated_h */ diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py new file mode 100755 index 0000000000..7042c0a005 --- /dev/null +++ b/js/src/builtin/intl/make_intl_data.py @@ -0,0 +1,4138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" Usage: + make_intl_data.py langtags [cldr_common.zip] + make_intl_data.py tzdata + make_intl_data.py currency + make_intl_data.py units + make_intl_data.py numbering + + + Target "langtags": + This script extracts information about 1) mappings between deprecated and + current Unicode BCP 47 locale identifiers, and 2) deprecated and current + BCP 47 Unicode extension value from CLDR, and converts it to C++ mapping + code in intl/components/LocaleGenerated.cpp. The code is used in + intl/components/Locale.cpp. + + + Target "tzdata": + This script computes which time zone informations are not up-to-date in ICU + and provides the necessary mappings to workaround this problem. + https://ssl.icu-project.org/trac/ticket/12044 + + + Target "currency": + Generates the mapping from currency codes to decimal digits used for them. + + + Target "units": + Generate source and test files using the list of so-called "sanctioned unit + identifiers" and verifies that the ICU data filter includes these units. + + + Target "numbering": + Generate source and test files using the list of numbering systems with + simple digit mappings and verifies that it's in sync with ICU/CLDR. +""" + +import io +import json +import os +import re +import sys +import tarfile +import tempfile +from contextlib import closing +from functools import partial, total_ordering +from itertools import chain, groupby, tee +from operator import attrgetter, itemgetter +from zipfile import ZipFile + +import yaml + +if sys.version_info.major == 2: + from itertools import ifilter as filter + from itertools import ifilterfalse as filterfalse + from itertools import imap as map + from itertools import izip_longest as zip_longest + + from urllib2 import Request as UrlRequest + from urllib2 import urlopen + from urlparse import urlsplit +else: + from itertools import filterfalse, zip_longest + from urllib.parse import urlsplit + from urllib.request import Request as UrlRequest + from urllib.request import urlopen + + +# From https://docs.python.org/3/library/itertools.html +def grouper(iterable, n, fillvalue=None): + "Collect data into fixed-length chunks or blocks" + # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" + args = [iter(iterable)] * n + return zip_longest(*args, fillvalue=fillvalue) + + +def writeMappingHeader(println, description, source, url): + if type(description) is not list: + description = [description] + for desc in description: + println("// {0}".format(desc)) + println("// Derived from {0}.".format(source)) + println("// {0}".format(url)) + + +def writeMappingsVar(println, mapping, name, description, source, url): + """Writes a variable definition with a mapping table. + + Writes the contents of dictionary |mapping| through the |println| + function with the given variable name and a comment with description, + fileDate, and URL. + """ + println("") + writeMappingHeader(println, description, source, url) + println("var {0} = {{".format(name)) + for key, value in sorted(mapping.items(), key=itemgetter(0)): + println(' "{0}": "{1}",'.format(key, value)) + println("};") + + +def writeMappingsBinarySearch( + println, + fn_name, + type_name, + name, + validate_fn, + validate_case_fn, + mappings, + tag_maxlength, + description, + source, + url, +): + """Emit code to perform a binary search on language tag subtags. + + Uses the contents of |mapping|, which can either be a dictionary or set, + to emit a mapping function to find subtag replacements. + """ + println("") + writeMappingHeader(println, description, source, url) + println( + """ +bool mozilla::intl::Locale::{0}({1} {2}) {{ + MOZ_ASSERT({3}({2}.Span())); + MOZ_ASSERT({4}({2}.Span())); +""".format( + fn_name, type_name, name, validate_fn, validate_case_fn + ).strip() + ) + writeMappingsBinarySearchBody(println, name, name, mappings, tag_maxlength) + + println( + """ +}""".lstrip( + "\n" + ) + ) + + +def writeMappingsBinarySearchBody( + println, source_name, target_name, mappings, tag_maxlength +): + def write_array(subtags, name, length, fixed): + if fixed: + println( + " static const char {}[{}][{}] = {{".format( + name, len(subtags), length + 1 + ) + ) + else: + println(" static const char* {}[{}] = {{".format(name, len(subtags))) + + # Group in pairs of ten to not exceed the 80 line column limit. + for entries in grouper(subtags, 10): + entries = ( + '"{}"'.format(tag).rjust(length + 2) + for tag in entries + if tag is not None + ) + println(" {},".format(", ".join(entries))) + + println(" };") + + trailing_return = True + + # Sort the subtags by length. That enables using an optimized comparator + # for the binary search, which only performs a single |memcmp| for multiple + # of two subtag lengths. + mappings_keys = mappings.keys() if type(mappings) == dict else mappings + for length, subtags in groupby(sorted(mappings_keys, key=len), len): + # Omit the length check if the current length is the maximum length. + if length != tag_maxlength: + println( + """ + if ({}.Length() == {}) {{ +""".format( + source_name, length + ).rstrip( + "\n" + ) + ) + else: + trailing_return = False + println( + """ + { +""".rstrip( + "\n" + ) + ) + + # The subtags need to be sorted for binary search to work. + subtags = sorted(subtags) + + def equals(subtag): + return """{}.EqualTo("{}")""".format(source_name, subtag) + + # Don't emit a binary search for short lists. + if len(subtags) == 1: + if type(mappings) == dict: + println( + """ + if ({}) {{ + {}.Set(mozilla::MakeStringSpan("{}")); + return true; + }} + return false; +""".format( + equals(subtags[0]), target_name, mappings[subtags[0]] + ).strip( + "\n" + ) + ) + else: + println( + """ + return {}; +""".format( + equals(subtags[0]) + ).strip( + "\n" + ) + ) + elif len(subtags) <= 4: + if type(mappings) == dict: + for subtag in subtags: + println( + """ + if ({}) {{ + {}.Set("{}"); + return true; + }} +""".format( + equals(subtag), target_name, mappings[subtag] + ).strip( + "\n" + ) + ) + + println( + """ + return false; +""".strip( + "\n" + ) + ) + else: + cond = (equals(subtag) for subtag in subtags) + cond = (" ||\n" + " " * (4 + len("return "))).join(cond) + println( + """ + return {}; +""".format( + cond + ).strip( + "\n" + ) + ) + else: + write_array(subtags, source_name + "s", length, True) + + if type(mappings) == dict: + write_array([mappings[k] for k in subtags], "aliases", length, False) + + println( + """ + if (const char* replacement = SearchReplacement({0}s, aliases, {0})) {{ + {1}.Set(mozilla::MakeStringSpan(replacement)); + return true; + }} + return false; +""".format( + source_name, target_name + ).rstrip() + ) + else: + println( + """ + return HasReplacement({0}s, {0}); +""".format( + source_name + ).rstrip() + ) + + println( + """ + } +""".strip( + "\n" + ) + ) + + if trailing_return: + println( + """ + return false;""" + ) + + +def writeComplexLanguageTagMappings( + println, complex_language_mappings, description, source, url +): + println("") + writeMappingHeader(println, description, source, url) + println( + """ +void mozilla::intl::Locale::PerformComplexLanguageMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); +""".lstrip() + ) + + # Merge duplicate language entries. + language_aliases = {} + for deprecated_language, (language, script, region) in sorted( + complex_language_mappings.items(), key=itemgetter(0) + ): + key = (language, script, region) + if key not in language_aliases: + language_aliases[key] = [] + else: + language_aliases[key].append(deprecated_language) + + first_language = True + for deprecated_language, (language, script, region) in sorted( + complex_language_mappings.items(), key=itemgetter(0) + ): + key = (language, script, region) + if deprecated_language in language_aliases[key]: + continue + + if_kind = "if" if first_language else "else if" + first_language = False + + cond = ( + 'Language().EqualTo("{}")'.format(lang) + for lang in [deprecated_language] + language_aliases[key] + ) + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + + println( + """ + SetLanguage("{}");""".format( + language + ).strip( + "\n" + ) + ) + + if script is not None: + println( + """ + if (Script().Missing()) {{ + SetScript("{}"); + }}""".format( + script + ).strip( + "\n" + ) + ) + if region is not None: + println( + """ + if (Region().Missing()) {{ + SetRegion("{}"); + }}""".format( + region + ).strip( + "\n" + ) + ) + println( + """ + }""".strip( + "\n" + ) + ) + + println( + """ +} +""".strip( + "\n" + ) + ) + + +def writeComplexRegionTagMappings( + println, complex_region_mappings, description, source, url +): + println("") + writeMappingHeader(println, description, source, url) + println( + """ +void mozilla::intl::Locale::PerformComplexRegionMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); + MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span())); +""".lstrip() + ) + + # |non_default_replacements| is a list and hence not hashable. Convert it + # to a string to get a proper hashable value. + def hash_key(default, non_default_replacements): + return (default, str(sorted(str(v) for v in non_default_replacements))) + + # Merge duplicate region entries. + region_aliases = {} + for deprecated_region, (default, non_default_replacements) in sorted( + complex_region_mappings.items(), key=itemgetter(0) + ): + key = hash_key(default, non_default_replacements) + if key not in region_aliases: + region_aliases[key] = [] + else: + region_aliases[key].append(deprecated_region) + + first_region = True + for deprecated_region, (default, non_default_replacements) in sorted( + complex_region_mappings.items(), key=itemgetter(0) + ): + key = hash_key(default, non_default_replacements) + if deprecated_region in region_aliases[key]: + continue + + if_kind = "if" if first_region else "else if" + first_region = False + + cond = ( + 'Region().EqualTo("{}")'.format(region) + for region in [deprecated_region] + region_aliases[key] + ) + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + + replacement_regions = sorted( + {region for (_, _, region) in non_default_replacements} + ) + + first_case = True + for replacement_region in replacement_regions: + replacement_language_script = sorted( + (language, script) + for (language, script, region) in (non_default_replacements) + if region == replacement_region + ) + + if_kind = "if" if first_case else "else if" + first_case = False + + def compare_tags(language, script): + if script is None: + return 'Language().EqualTo("{}")'.format(language) + return '(Language().EqualTo("{}") && Script().EqualTo("{}"))'.format( + language, script + ) + + cond = ( + compare_tags(language, script) + for (language, script) in replacement_language_script + ) + cond = (" ||\n" + " " * (4 + len(if_kind) + 2)).join(cond) + + println( + """ + {} ({}) {{ + SetRegion("{}"); + }}""".format( + if_kind, cond, replacement_region + ) + .rstrip() + .strip("\n") + ) + + println( + """ + else {{ + SetRegion("{}"); + }} + }}""".format( + default + ) + .rstrip() + .strip("\n") + ) + + println( + """ +} +""".strip( + "\n" + ) + ) + + +def writeVariantTagMappings(println, variant_mappings, description, source, url): + """Writes a function definition that maps variant subtags.""" + println( + """ +static const char* ToCharPointer(const char* str) { + return str; +} + +static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) { + return str.get(); +} + +template <typename T, typename U = T> +static bool IsLessThan(const T& a, const U& b) { + return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0; +} +""" + ) + writeMappingHeader(println, description, source, url) + println( + """ +bool mozilla::intl::Locale::PerformVariantMappings() { + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), + IsLessThan<decltype(mVariants)::ElementType>)); + + auto removeVariantAt = [&](size_t index) { + mVariants.erase(mVariants.begin() + index); + }; + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound( + mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateStringToUniqueChars(variant); + return !!mVariants.insert(p, std::move(preferred)); + }; + + for (size_t i = 0; i < mVariants.length();) { + const char* variant = mVariants[i].get(); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant))); +""".lstrip() + ) + + (no_alias, with_alias) = partition( + variant_mappings.items(), lambda item: item[1] is None + ) + + no_replacements = " ||\n ".join( + f"""strcmp(variant, "{deprecated_variant}") == 0""" + for (deprecated_variant, _) in sorted(no_alias, key=itemgetter(0)) + ) + + println( + f""" + if ({no_replacements}) {{ + removeVariantAt(i); + }} +""".strip( + "\n" + ) + ) + + for deprecated_variant, (type, replacement) in sorted( + with_alias, key=itemgetter(0) + ): + println( + f""" + else if (strcmp(variant, "{deprecated_variant}") == 0) {{ + removeVariantAt(i); +""".strip( + "\n" + ) + ) + + if type == "language": + println( + f""" + SetLanguage("{replacement}"); +""".strip( + "\n" + ) + ) + elif type == "region": + println( + f""" + SetRegion("{replacement}"); +""".strip( + "\n" + ) + ) + else: + assert type == "variant" + println( + f""" + if (!insertVariantSortedIfNotPresent("{replacement}")) {{ + return false; + }} +""".strip( + "\n" + ) + ) + + println( + """ + } +""".strip( + "\n" + ) + ) + + println( + """ + else { + i++; + } + } + return true; +} +""".strip( + "\n" + ) + ) + + +def writeLegacyMappingsFunction(println, legacy_mappings, description, source, url): + """Writes a function definition that maps legacy language tags.""" + println("") + writeMappingHeader(println, description, source, url) + println( + """\ +bool mozilla::intl::Locale::UpdateLegacyMappings() { + // We're mapping legacy tags to non-legacy form here. + // Other tags remain unchanged. + // + // Legacy tags are either sign language tags ("sgn") or have one or multiple + // variant subtags. Therefore we can quickly exclude most tags by checking + // these two subtags. + + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); + + if (!Language().EqualTo("sgn") && mVariants.length() == 0) { + return true; + } + +#ifdef DEBUG + for (const auto& variant : Variants()) { + MOZ_ASSERT(IsStructurallyValidVariantTag(variant)); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant)); + } +#endif + + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), + IsLessThan<decltype(mVariants)::ElementType>)); + + auto findVariant = [this](const char* variant) { + auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, + decltype(variant)>); + + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return p; + } + return static_cast<decltype(p)>(nullptr); + }; + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, + decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateStringToUniqueChars(variant); + return !!mVariants.insert(p, std::move(preferred)); + }; + + auto removeVariant = [&](auto* p) { + size_t index = std::distance(mVariants.begin(), p); + mVariants.erase(mVariants.begin() + index); + }; + + auto removeVariants = [&](auto* p, auto* q) { + size_t pIndex = std::distance(mVariants.begin(), p); + size_t qIndex = std::distance(mVariants.begin(), q); + MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted"); + + mVariants.erase(mVariants.begin() + qIndex); + mVariants.erase(mVariants.begin() + pIndex); + };""" + ) + + # Helper class for pattern matching. + class AnyClass: + def __eq__(self, obj): + return obj is not None + + Any = AnyClass() + + # Group the mappings by language. + legacy_mappings_by_language = {} + for type, replacement in legacy_mappings.items(): + (language, _, _, _) = type + legacy_mappings_by_language.setdefault(language, {})[type] = replacement + + # Handle the empty language case first. + if None in legacy_mappings_by_language: + # Get the mappings and remove them from the dict. + mappings = legacy_mappings_by_language.pop(None) + + # This case only applies for the "hepburn-heploc" -> "alalc97" + # mapping, so just inline it here. + from_tag = (None, None, None, "hepburn-heploc") + to_tag = (None, None, None, "alalc97") + + assert len(mappings) == 1 + assert mappings[from_tag] == to_tag + + println( + """ + if (mVariants.length() >= 2) { + if (auto* hepburn = findVariant("hepburn")) { + if (auto* heploc = findVariant("heploc")) { + removeVariants(hepburn, heploc); + + if (!insertVariantSortedIfNotPresent("alalc97")) { + return false; + } + } + } + } +""" + ) + + # Handle sign languages next. + if "sgn" in legacy_mappings_by_language: + mappings = legacy_mappings_by_language.pop("sgn") + + # Legacy sign language mappings have the form "sgn-XX" where "XX" is + # some region code. + assert all(type == ("sgn", None, Any, None) for type in mappings.keys()) + + # Legacy sign languages are mapped to a single language subtag. + assert all( + replacement == (Any, None, None, None) for replacement in mappings.values() + ) + + println( + """ + if (Language().EqualTo("sgn")) { + if (Region().Present() && SignLanguageMapping(mLanguage, Region())) { + mRegion.Set(mozilla::MakeStringSpan("")); + } + } +""".rstrip().lstrip( + "\n" + ) + ) + + # Finally handle all remaining cases. + + # The remaining mappings have neither script nor region subtags in the source locale. + assert all( + type == (Any, None, None, Any) + for mappings in legacy_mappings_by_language.values() + for type in mappings.keys() + ) + + # And they have neither script nor region nor variant subtags in the target locale. + assert all( + replacement == (Any, None, None, None) + for mappings in legacy_mappings_by_language.values() + for replacement in mappings.values() + ) + + # Compact the mappings table by removing empty fields. + legacy_mappings_by_language = { + lang: { + variants: r_language + for ((_, _, _, variants), (r_language, _, _, _)) in mappings.items() + } + for (lang, mappings) in legacy_mappings_by_language.items() + } + + # Try to combine the remaining cases. + legacy_mappings_compact = {} + + # Python can't hash dicts or lists, so use the string representation as the hash key. + def hash_key(mappings): + return str(sorted(mappings.items(), key=itemgetter(0))) + + for lang, mappings in sorted( + legacy_mappings_by_language.items(), key=itemgetter(0) + ): + key = hash_key(mappings) + legacy_mappings_compact.setdefault(key, []).append(lang) + + for langs in legacy_mappings_compact.values(): + language_equal_to = ( + f"""Language().EqualTo("{lang}")""" for lang in sorted(langs) + ) + cond = f""" ||\n{" " * len(" else if (")}""".join(language_equal_to) + + println( + f""" + else if ({cond}) {{ +""".rstrip().lstrip( + "\n" + ) + ) + + mappings = legacy_mappings_by_language[langs[0]] + + # Count the variant subtags to determine the sort order. + def variant_size(m): + (k, _) = m + return len(k.split("-")) + + # Alias rules are applied by largest union size first. + for size, mappings_by_size in groupby( + sorted(mappings.items(), key=variant_size, reverse=True), key=variant_size + ): + # Convert grouper object to dict. + mappings_by_size = dict(mappings_by_size) + + is_first = True + chain_if = size == 1 + + # Alias rules are applied in alphabetical order + for variants, r_language in sorted( + mappings_by_size.items(), key=itemgetter(0) + ): + sorted_variants = sorted(variants.split("-")) + len_variants = len(sorted_variants) + + maybe_else = "else " if chain_if and not is_first else "" + is_first = False + + for i, variant in enumerate(sorted_variants): + println( + f""" + {" " * i}{maybe_else}if (auto* {variant} = findVariant("{variant}")) {{ +""".rstrip().lstrip( + "\n" + ) + ) + + indent = " " * len_variants + + println( + f""" + {indent}removeVariant{"s" if len_variants > 1 else ""}({", ".join(sorted_variants)}); + {indent}SetLanguage("{r_language}"); + {indent}{"return true;" if not chain_if else ""} +""".rstrip().lstrip( + "\n" + ) + ) + + for i in range(len_variants, 0, -1): + println( + f""" + {" " * (i - 1)}}} +""".rstrip().lstrip( + "\n" + ) + ) + + println( + """ + } +""".rstrip().lstrip( + "\n" + ) + ) + + println( + """ + return true; +}""" + ) + + +def writeSignLanguageMappingsFunction( + println, legacy_mappings, description, source, url +): + """Writes a function definition that maps legacy sign language tags.""" + println("") + writeMappingHeader(println, description, source, url) + println( + """\ +bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language, + const RegionSubtag& region) { + MOZ_ASSERT(language.EqualTo("sgn")); + MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); +""".rstrip() + ) + + region_mappings = { + rg: lg + for ((lang, _, rg, _), (lg, _, _, _)) in legacy_mappings.items() + if lang == "sgn" + } + + source_name = "region" + target_name = "language" + tag_maxlength = 3 + writeMappingsBinarySearchBody( + println, source_name, target_name, region_mappings, tag_maxlength + ) + + println( + """ +}""".lstrip() + ) + + +def readSupplementalData(core_file): + """Reads CLDR Supplemental Data and extracts information for Intl.js. + + Information extracted: + - legacyMappings: mappings from legacy tags to preferred complete language tags + - languageMappings: mappings from language subtags to preferred subtags + - complexLanguageMappings: mappings from language subtags with complex rules + - regionMappings: mappings from region subtags to preferred subtags + - complexRegionMappings: mappings from region subtags with complex rules + - variantMappings: mappings from variant subtags to preferred subtags + - likelySubtags: likely subtags used for generating test data only + Returns these mappings as dictionaries. + """ + import xml.etree.ElementTree as ET + + # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>. + re_unicode_language_id = re.compile( + r""" + ^ + # unicode_language_id = unicode_language_subtag + # unicode_language_subtag = alpha{2,3} | alpha{5,8} + (?P<language>[a-z]{2,3}|[a-z]{5,8}) + + # (sep unicode_script_subtag)? + # unicode_script_subtag = alpha{4} + (?:-(?P<script>[a-z]{4}))? + + # (sep unicode_region_subtag)? + # unicode_region_subtag = (alpha{2} | digit{3}) + (?:-(?P<region>([a-z]{2}|[0-9]{3})))? + + # (sep unicode_variant_subtag)* + # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) + (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)? + $ + """, + re.IGNORECASE | re.VERBOSE, + ) + + # CLDR uses "_" as the separator for some elements. Replace it with "-". + def bcp47_id(cldr_id): + return cldr_id.replace("_", "-") + + # Return the tuple (language, script, region, variants) and assert all + # subtags are in canonical case. + def bcp47_canonical(language, script, region, variants): + # Canonical case for language subtags is lower case. + assert language is None or language.lower() == language + + # Canonical case for script subtags is title case. + assert script is None or script.title() == script + + # Canonical case for region subtags is upper case. + assert region is None or region.upper() == region + + # Canonical case for variant subtags is lower case. + assert variants is None or variants.lower() == variants + + return (language, script, region, variants[1:] if variants else None) + + # Language ids are interpreted as multi-maps in + # <https://www.unicode.org/reports/tr35/#LocaleId_Canonicalization>. + # + # See UTS35, §Annex C, Definitions - 1. Multimap interpretation. + def language_id_to_multimap(language_id): + match = re_unicode_language_id.match(language_id) + assert ( + match is not None + ), f"{language_id} invalid Unicode BCP 47 locale identifier" + + canonical_language_id = bcp47_canonical( + *match.group("language", "script", "region", "variants") + ) + (language, _, _, _) = canonical_language_id + + # Normalize "und" language to None, but keep the rest as is. + return (language if language != "und" else None,) + canonical_language_id[1:] + + rules = {} + territory_exception_rules = {} + + tree = ET.parse(core_file.open("common/supplemental/supplementalMetadata.xml")) + + # Load the rules from supplementalMetadata.xml. + # + # See UTS35, §Annex C, Definitions - 2. Alias elements. + # See UTS35, §Annex C, Preprocessing. + for alias_name in [ + "languageAlias", + "scriptAlias", + "territoryAlias", + "variantAlias", + ]: + for alias in tree.iterfind(".//" + alias_name): + # Replace '_' by '-'. + type = bcp47_id(alias.get("type")) + replacement = bcp47_id(alias.get("replacement")) + + # Prefix with "und-". + if alias_name != "languageAlias": + type = "und-" + type + + # Discard all rules where the type is an invalid languageId. + if re_unicode_language_id.match(type) is None: + continue + + type = language_id_to_multimap(type) + + # Multiple, whitespace-separated territory replacements may be present. + if alias_name == "territoryAlias" and " " in replacement: + replacements = replacement.split(" ") + replacement_list = [ + language_id_to_multimap("und-" + r) for r in replacements + ] + + assert ( + type not in territory_exception_rules + ), f"Duplicate alias rule: {type}" + + territory_exception_rules[type] = replacement_list + + # The first element is the default territory replacement. + replacement = replacements[0] + + # Prefix with "und-". + if alias_name != "languageAlias": + replacement = "und-" + replacement + + replacement = language_id_to_multimap(replacement) + + assert type not in rules, f"Duplicate alias rule: {type}" + + rules[type] = replacement + + # Helper class for pattern matching. + class AnyClass: + def __eq__(self, obj): + return obj is not None + + Any = AnyClass() + + modified_rules = True + loop_count = 0 + + while modified_rules: + modified_rules = False + loop_count += 1 + + # UTS 35 defines that canonicalization is applied until a fixed point has + # been reached. This iterative application of the canonicalization algorithm + # is only needed for a relatively small set of rules, so we can precompute + # the transitive closure of all rules here and then perform a single pass + # when canonicalizing language tags at runtime. + transitive_rules = {} + + # Compute the transitive closure. + # Any case which currently doesn't occur in the CLDR sources isn't supported + # and will lead to throwing an error. + for type, replacement in rules.items(): + (language, script, region, variants) = type + (r_language, r_script, r_region, r_variants) = replacement + + for i_type, i_replacement in rules.items(): + (i_language, i_script, i_region, i_variants) = i_type + (i_r_language, i_r_script, i_r_region, i_r_variants) = i_replacement + + if i_language is not None and i_language == r_language: + # This case currently only occurs when neither script nor region + # subtags are present. A single variant subtags may be present + # in |type|. And |i_type| definitely has a single variant subtag. + # Should this ever change, update this code accordingly. + assert type == (Any, None, None, None) or type == ( + Any, + None, + None, + Any, + ) + assert replacement == (Any, None, None, None) + assert i_type == (Any, None, None, Any) + assert i_replacement == (Any, None, None, None) + + # This case happens for the rules + # "zh-guoyu -> zh", + # "zh-hakka -> hak", and + # "und-hakka -> und". + # Given the possible input "zh-guoyu-hakka", the first rule will + # change it to "zh-hakka", and then the second rule can be + # applied. (The third rule isn't applied ever.) + # + # Let's assume there's a hypothetical rule + # "zh-aaaaa" -> "en" + # And we have the input "zh-aaaaa-hakka", then "zh-aaaaa -> en" + # is applied before "zh-hakka -> hak", because rules are sorted + # alphabetically. That means the overall result is "en": + # "zh-aaaaa-hakka" is first canonicalized to "en-hakka" and then + # "hakka" is removed through the third rule. + # + # No current rule requires to handle this special case, so we + # don't yet support it. + assert variants is None or variants <= i_variants + + # Combine all variants and remove duplicates. + vars = set( + i_variants.split("-") + + (variants.split("-") if variants else []) + ) + + # Add the variants alphabetically sorted. + n_type = (language, None, None, "-".join(sorted(vars))) + + assert ( + n_type not in transitive_rules + or transitive_rules[n_type] == i_replacement + ) + transitive_rules[n_type] = i_replacement + + continue + + if i_script is not None and i_script == r_script: + # This case currently doesn't occur, so we don't yet support it. + raise ValueError( + f"{type} -> {replacement} :: {i_type} -> {i_replacement}" + ) + if i_region is not None and i_region == r_region: + # This case currently only applies for sign language + # replacements. Similar to the language subtag case any other + # combination isn't currently supported. + assert type == (None, None, Any, None) + assert replacement == (None, None, Any, None) + assert i_type == ("sgn", None, Any, None) + assert i_replacement == (Any, None, None, None) + + n_type = ("sgn", None, region, None) + + assert n_type not in transitive_rules + transitive_rules[n_type] = i_replacement + + continue + + if i_variants is not None and i_variants == r_variants: + # This case currently doesn't occur, so we don't yet support it. + raise ValueError( + f"{type} -> {replacement} :: {i_type} -> {i_replacement}" + ) + + # Ensure there are no contradicting rules. + assert all( + rules[type] == replacement + for (type, replacement) in transitive_rules.items() + if type in rules + ) + + # If |transitive_rules| is not a subset of |rules|, new rules will be added. + modified_rules = not (transitive_rules.keys() <= rules.keys()) + + # Ensure we only have to iterate more than once for the "guoyo-{hakka,xiang}" + # case. Failing this assertion means either there's a bug when computing the + # stop condition of this loop or a new kind of legacy language tags was added. + if modified_rules and loop_count > 1: + new_rules = {k for k in transitive_rules.keys() if k not in rules} + for k in new_rules: + assert k == (Any, None, None, "guoyu-hakka") or k == ( + Any, + None, + None, + "guoyu-xiang", + ) + + # Merge the transitive rules. + rules.update(transitive_rules) + + # Computes the size of the union of all field value sets. + def multi_map_size(locale_id): + (language, script, region, variants) = locale_id + + return ( + (1 if language is not None else 0) + + (1 if script is not None else 0) + + (1 if region is not None else 0) + + (len(variants.split("-")) if variants is not None else 0) + ) + + # Dictionary of legacy mappings, contains raw rules, e.g. + # (None, None, None, "hepburn-heploc") -> (None, None, None, "alalc97"). + legacy_mappings = {} + + # Dictionary of simple language subtag mappings, e.g. "in" -> "id". + language_mappings = {} + + # Dictionary of complex language subtag mappings, modifying more than one + # subtag, e.g. "sh" -> ("sr", "Latn", None) and "cnr" -> ("sr", None, "ME"). + complex_language_mappings = {} + + # Dictionary of simple script subtag mappings, e.g. "Qaai" -> "Zinh". + script_mappings = {} + + # Dictionary of simple region subtag mappings, e.g. "DD" -> "DE". + region_mappings = {} + + # Dictionary of complex region subtag mappings, containing more than one + # replacement, e.g. "SU" -> ("RU", ["AM", "AZ", "BY", ...]). + complex_region_mappings = {} + + # Dictionary of aliased variant subtags to a tuple of preferred replacement + # type and replacement, e.g. "arevela" -> ("language", "hy") or + # "aaland" -> ("region", "AX") or "heploc" -> ("variant", "alalc97"). + variant_mappings = {} + + # Preprocess all rules so we can perform a single lookup per subtag at runtime. + for type, replacement in rules.items(): + (language, script, region, variants) = type + (r_language, r_script, r_region, r_variants) = replacement + + type_map_size = multi_map_size(type) + + # Most mappings are one-to-one and can be encoded through lookup tables. + if type_map_size == 1: + if language is not None: + assert r_language is not None, "Can't remove a language subtag" + + # We don't yet support this case. + assert ( + r_variants is None + ), f"Unhandled variant replacement in language alias: {replacement}" + + if replacement == (Any, None, None, None): + language_mappings[language] = r_language + else: + complex_language_mappings[language] = replacement[:-1] + elif script is not None: + # We don't support removing script subtags. + assert ( + r_script is not None + ), f"Can't remove a script subtag: {replacement}" + + # We only support one-to-one script mappings for now. + assert replacement == ( + None, + Any, + None, + None, + ), f"Unhandled replacement in script alias: {replacement}" + + script_mappings[script] = r_script + elif region is not None: + # We don't support removing region subtags. + assert ( + r_region is not None + ), f"Can't remove a region subtag: {replacement}" + + # We only support one-to-one region mappings for now. + assert replacement == ( + None, + None, + Any, + None, + ), f"Unhandled replacement in region alias: {replacement}" + + if type not in territory_exception_rules: + region_mappings[region] = r_region + else: + complex_region_mappings[region] = [ + r_region + for (_, _, r_region, _) in territory_exception_rules[type] + ] + else: + assert variants is not None + assert len(variants.split("-")) == 1 + + # We only support one-to-one variant mappings for now. + assert ( + multi_map_size(replacement) <= 1 + ), f"Unhandled replacement in variant alias: {replacement}" + + if r_language is not None: + variant_mappings[variants] = ("language", r_language) + elif r_script is not None: + variant_mappings[variants] = ("script", r_script) + elif r_region is not None: + variant_mappings[variants] = ("region", r_region) + elif r_variants is not None: + assert len(r_variants.split("-")) == 1 + variant_mappings[variants] = ("variant", r_variants) + else: + variant_mappings[variants] = None + else: + # Alias rules which have multiple input fields must be processed + # first. This applies only to a handful of rules, so our generated + # code adds fast paths to skip these rules in the common case. + + # Case 1: Language and at least one variant subtag. + if language is not None and variants is not None: + pass + + # Case 2: Sign language and a region subtag. + elif language == "sgn" and region is not None: + pass + + # Case 3: "hepburn-heploc" to "alalc97" canonicalization. + elif ( + language is None + and variants is not None + and len(variants.split("-")) == 2 + ): + pass + + # Any other combination is currently unsupported. + else: + raise ValueError(f"{type} -> {replacement}") + + legacy_mappings[type] = replacement + + tree = ET.parse(core_file.open("common/supplemental/likelySubtags.xml")) + + likely_subtags = {} + + for likely_subtag in tree.iterfind(".//likelySubtag"): + from_tag = bcp47_id(likely_subtag.get("from")) + from_match = re_unicode_language_id.match(from_tag) + assert ( + from_match is not None + ), f"{from_tag} invalid Unicode BCP 47 locale identifier" + assert ( + from_match.group("variants") is None + ), f"unexpected variant subtags in {from_tag}" + + to_tag = bcp47_id(likely_subtag.get("to")) + to_match = re_unicode_language_id.match(to_tag) + assert ( + to_match is not None + ), f"{to_tag} invalid Unicode BCP 47 locale identifier" + assert ( + to_match.group("variants") is None + ), f"unexpected variant subtags in {to_tag}" + + from_canonical = bcp47_canonical( + *from_match.group("language", "script", "region", "variants") + ) + + to_canonical = bcp47_canonical( + *to_match.group("language", "script", "region", "variants") + ) + + # Remove the empty variant subtags. + from_canonical = from_canonical[:-1] + to_canonical = to_canonical[:-1] + + likely_subtags[from_canonical] = to_canonical + + complex_region_mappings_final = {} + + for deprecated_region, replacements in complex_region_mappings.items(): + # Find all likely subtag entries which don't already contain a region + # subtag and whose target region is in the list of replacement regions. + region_likely_subtags = [ + (from_language, from_script, to_region) + for ( + (from_language, from_script, from_region), + (_, _, to_region), + ) in likely_subtags.items() + if from_region is None and to_region in replacements + ] + + # The first replacement entry is the default region. + default = replacements[0] + + # Find all likely subtag entries whose region matches the default region. + default_replacements = { + (language, script) + for (language, script, region) in region_likely_subtags + if region == default + } + + # And finally find those entries which don't use the default region. + # These are the entries we're actually interested in, because those need + # to be handled specially when selecting the correct preferred region. + non_default_replacements = [ + (language, script, region) + for (language, script, region) in region_likely_subtags + if (language, script) not in default_replacements + ] + + # Remove redundant mappings. + # + # For example starting with CLDR 43, the deprecated region "SU" has the + # following non-default replacement entries for "GE": + # - ('sva', None, 'GE') + # - ('sva', 'Cyrl', 'GE') + # - ('sva', 'Latn', 'GE') + # + # The latter two entries are redundant, because they're already handled + # by the first entry. + non_default_replacements = [ + (language, script, region) + for (language, script, region) in non_default_replacements + if script is None + or (language, None, region) not in non_default_replacements + ] + + # If there are no non-default replacements, we can handle the region as + # part of the simple region mapping. + if non_default_replacements: + complex_region_mappings_final[deprecated_region] = ( + default, + non_default_replacements, + ) + else: + region_mappings[deprecated_region] = default + + return { + "legacyMappings": legacy_mappings, + "languageMappings": language_mappings, + "complexLanguageMappings": complex_language_mappings, + "scriptMappings": script_mappings, + "regionMappings": region_mappings, + "complexRegionMappings": complex_region_mappings_final, + "variantMappings": variant_mappings, + "likelySubtags": likely_subtags, + } + + +def readUnicodeExtensions(core_file): + import xml.etree.ElementTree as ET + + # Match all xml-files in the BCP 47 directory. + bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$") + + # https://www.unicode.org/reports/tr35/#Unicode_locale_identifier + # + # type = alphanum{3,8} (sep alphanum{3,8})* ; + typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$") + + # https://www.unicode.org/reports/tr35/#Unicode_language_identifier + # + # unicode_region_subtag = alpha{2} ; + alphaRegionRE = re.compile(r"^[A-Z]{2}$", re.IGNORECASE) + + # Mapping from Unicode extension types to dict of deprecated to + # preferred values. + mapping = { + # Unicode BCP 47 U Extension + "u": {}, + # Unicode BCP 47 T Extension + "t": {}, + } + + def readBCP47File(file): + tree = ET.parse(file) + for keyword in tree.iterfind(".//keyword/key"): + extension = keyword.get("extension", "u") + assert ( + extension == "u" or extension == "t" + ), "unknown extension type: {}".format(extension) + + extension_name = keyword.get("name") + + for type in keyword.iterfind("type"): + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The key or type name used by Unicode locale extension with 'u' extension + # syntax or the 't' extensions syntax. When alias below is absent, this name + # can be also used with the old style "@key=type" syntax. + name = type.get("name") + + # Ignore the special name: + # - <https://unicode.org/reports/tr35/#CODEPOINTS> + # - <https://unicode.org/reports/tr35/#REORDER_CODE> + # - <https://unicode.org/reports/tr35/#RG_KEY_VALUE> + # - <https://unicode.org/reports/tr35/#SCRIPT_CODE> + # - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE> + # - <https://unicode.org/reports/tr35/#PRIVATE_USE> + if name in ( + "CODEPOINTS", + "REORDER_CODE", + "RG_KEY_VALUE", + "SCRIPT_CODE", + "SUBDIVISION_CODE", + "PRIVATE_USE", + ): + continue + + # All other names should match the 'type' production. + assert ( + typeRE.match(name) is not None + ), "{} matches the 'type' production".format(name) + + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The preferred value of the deprecated key, type or attribute element. + # When a key, type or attribute element is deprecated, this attribute is + # used for specifying a new canonical form if available. + preferred = type.get("preferred") + + # <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>: + # + # The BCP 47 form is the canonical form, and recommended. Other aliases are + # included only for backwards compatibility. + alias = type.get("alias") + + # <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers> + # + # Use the bcp47 data to replace keys, types, tfields, and tvalues by their + # canonical forms. See Section 3.6.4 U Extension Data Files) and Section + # 3.7.1 T Extension Data Files. The aliases are in the alias attribute + # value, while the canonical is in the name attribute value. + + # 'preferred' contains the new preferred name, 'alias' the compatibility + # name, but then there's this entry where 'preferred' and 'alias' are the + # same. So which one to choose? Assume 'preferred' is the actual canonical + # name. + # + # <type name="islamicc" + # description="Civil (algorithmic) Arabic calendar" + # deprecated="true" + # preferred="islamic-civil" + # alias="islamic-civil"/> + + if preferred is not None: + assert typeRE.match(preferred), preferred + mapping[extension].setdefault(extension_name, {})[name] = preferred + + if alias is not None: + for alias_name in alias.lower().split(" "): + # Ignore alias entries which don't match the 'type' production. + if typeRE.match(alias_name) is None: + continue + + # See comment above when 'alias' and 'preferred' are both present. + if ( + preferred is not None + and name in mapping[extension][extension_name] + ): + continue + + # Skip over entries where 'name' and 'alias' are equal. + # + # <type name="pst8pdt" + # description="POSIX style time zone for US Pacific Time" + # alias="PST8PDT" + # since="1.8"/> + if name == alias_name: + continue + + mapping[extension].setdefault(extension_name, {})[ + alias_name + ] = name + + def readSupplementalMetadata(file): + # Find subdivision and region replacements. + # + # <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers> + # + # Replace aliases in special key values: + # - If there is an 'sd' or 'rg' key, replace any subdivision alias + # in its value in the same way, using subdivisionAlias data. + tree = ET.parse(file) + for alias in tree.iterfind(".//subdivisionAlias"): + type = alias.get("type") + assert ( + typeRE.match(type) is not None + ), "{} matches the 'type' production".format(type) + + # Take the first replacement when multiple ones are present. + replacement = alias.get("replacement").split(" ")[0].lower() + + # Append "zzzz" if the replacement is a two-letter region code. + if alphaRegionRE.match(replacement) is not None: + replacement += "zzzz" + + # Assert the replacement is syntactically correct. + assert ( + typeRE.match(replacement) is not None + ), "replacement {} matches the 'type' production".format(replacement) + + # 'subdivisionAlias' applies to 'rg' and 'sd' keys. + mapping["u"].setdefault("rg", {})[type] = replacement + mapping["u"].setdefault("sd", {})[type] = replacement + + for name in core_file.namelist(): + if bcpFileRE.match(name): + readBCP47File(core_file.open(name)) + + readSupplementalMetadata( + core_file.open("common/supplemental/supplementalMetadata.xml") + ) + + return { + "unicodeMappings": mapping["u"], + "transformMappings": mapping["t"], + } + + +def writeCLDRLanguageTagData(println, data, url): + """Writes the language tag data to the Intl data file.""" + + println(generatedFileWarning) + println("// Version: CLDR-{}".format(data["version"])) + println("// URL: {}".format(url)) + + println( + """ +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <type_traits> + +#include "mozilla/intl/Locale.h" + +using namespace mozilla::intl::LanguageTagLimits; + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline bool HasReplacement( + const char (&subtags)[Length][TagLength], + const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.Length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.Span().data(); + return std::binary_search(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); +} + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline const char* SearchReplacement( + const char (&subtags)[Length][TagLength], const char* (&aliases)[Length], + const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.Length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.Span().data(); + auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); + if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { + return aliases[std::distance(std::begin(subtags), p)]; + } + return nullptr; +} + +#ifdef DEBUG +static bool IsAsciiLowercaseAlphanumeric(char c) { + return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); +} + +static bool IsAsciiLowercaseAlphanumericOrDash(char c) { + return IsAsciiLowercaseAlphanumeric(c) || c == '-'; +} + +static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), + mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) { + return mozilla::IsAsciiUppercaseAlpha(span[0]) && + std::all_of(span.begin() + 1, span.end(), + mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), + mozilla::IsAsciiUppercaseAlpha<char>) || + std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>); +} + +static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), + IsAsciiLowercaseAlphanumericOrDash); +} + +static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), + IsAsciiLowercaseAlphanumericOrDash); +} +#endif +""".rstrip() + ) + + source = "CLDR Supplemental Data, version {}".format(data["version"]) + legacy_mappings = data["legacyMappings"] + language_mappings = data["languageMappings"] + complex_language_mappings = data["complexLanguageMappings"] + script_mappings = data["scriptMappings"] + region_mappings = data["regionMappings"] + complex_region_mappings = data["complexRegionMappings"] + variant_mappings = data["variantMappings"] + unicode_mappings = data["unicodeMappings"] + transform_mappings = data["transformMappings"] + + # unicode_language_subtag = alpha{2,3} | alpha{5,8} ; + language_maxlength = 8 + + # unicode_script_subtag = alpha{4} ; + script_maxlength = 4 + + # unicode_region_subtag = (alpha{2} | digit{3}) ; + region_maxlength = 3 + + writeMappingsBinarySearch( + println, + "LanguageMapping", + "LanguageSubtag&", + "language", + "IsStructurallyValidLanguageTag", + "IsCanonicallyCasedLanguageTag", + language_mappings, + language_maxlength, + "Mappings from language subtags to preferred values.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "ComplexLanguageMapping", + "const LanguageSubtag&", + "language", + "IsStructurallyValidLanguageTag", + "IsCanonicallyCasedLanguageTag", + complex_language_mappings.keys(), + language_maxlength, + "Language subtags with complex mappings.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "ScriptMapping", + "ScriptSubtag&", + "script", + "IsStructurallyValidScriptTag", + "IsCanonicallyCasedScriptTag", + script_mappings, + script_maxlength, + "Mappings from script subtags to preferred values.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "RegionMapping", + "RegionSubtag&", + "region", + "IsStructurallyValidRegionTag", + "IsCanonicallyCasedRegionTag", + region_mappings, + region_maxlength, + "Mappings from region subtags to preferred values.", + source, + url, + ) + writeMappingsBinarySearch( + println, + "ComplexRegionMapping", + "const RegionSubtag&", + "region", + "IsStructurallyValidRegionTag", + "IsCanonicallyCasedRegionTag", + complex_region_mappings.keys(), + region_maxlength, + "Region subtags with complex mappings.", + source, + url, + ) + + writeComplexLanguageTagMappings( + println, + complex_language_mappings, + "Language subtags with complex mappings.", + source, + url, + ) + writeComplexRegionTagMappings( + println, + complex_region_mappings, + "Region subtags with complex mappings.", + source, + url, + ) + + writeVariantTagMappings( + println, + variant_mappings, + "Mappings from variant subtags to preferred values.", + source, + url, + ) + + writeLegacyMappingsFunction( + println, legacy_mappings, "Canonicalize legacy locale identifiers.", source, url + ) + + writeSignLanguageMappingsFunction( + println, legacy_mappings, "Mappings from legacy sign languages.", source, url + ) + + writeUnicodeExtensionsMappings(println, unicode_mappings, "Unicode") + writeUnicodeExtensionsMappings(println, transform_mappings, "Transform") + + +def writeCLDRLanguageTagLikelySubtagsTest(println, data, url): + """Writes the likely-subtags test file.""" + + println(generatedFileWarning) + + source = "CLDR Supplemental Data, version {}".format(data["version"]) + language_mappings = data["languageMappings"] + complex_language_mappings = data["complexLanguageMappings"] + script_mappings = data["scriptMappings"] + region_mappings = data["regionMappings"] + complex_region_mappings = data["complexRegionMappings"] + likely_subtags = data["likelySubtags"] + + def bcp47(tag): + (language, script, region) = tag + return "{}{}{}".format( + language, "-" + script if script else "", "-" + region if region else "" + ) + + def canonical(tag): + (language, script, region) = tag + + # Map deprecated language subtags. + if language in language_mappings: + language = language_mappings[language] + elif language in complex_language_mappings: + (language2, script2, region2) = complex_language_mappings[language] + (language, script, region) = ( + language2, + script if script else script2, + region if region else region2, + ) + + # Map deprecated script subtags. + if script in script_mappings: + script = script_mappings[script] + + # Map deprecated region subtags. + if region in region_mappings: + region = region_mappings[region] + else: + # Assume no complex region mappings are needed for now. + assert ( + region not in complex_region_mappings + ), "unexpected region with complex mappings: {}".format(region) + + return (language, script, region) + + # https://unicode.org/reports/tr35/#Likely_Subtags + + def addLikelySubtags(tag): + # Step 1: Canonicalize. + (language, script, region) = canonical(tag) + if script == "Zzzz": + script = None + if region == "ZZ": + region = None + + # Step 2: Lookup. + searches = ( + (language, script, region), + (language, None, region), + (language, script, None), + (language, None, None), + ("und", script, None), + ) + search = next(search for search in searches if search in likely_subtags) + + (language_s, script_s, region_s) = search + (language_m, script_m, region_m) = likely_subtags[search] + + # Step 3: Return. + return ( + language if language != language_s else language_m, + script if script != script_s else script_m, + region if region != region_s else region_m, + ) + + # https://unicode.org/reports/tr35/#Likely_Subtags + def removeLikelySubtags(tag): + # Step 1: Add likely subtags. + max = addLikelySubtags(tag) + + # Step 2: Remove variants (doesn't apply here). + + # Step 3: Find a match. + (language, script, region) = max + for trial in ( + (language, None, None), + (language, None, region), + (language, script, None), + ): + if addLikelySubtags(trial) == max: + return trial + + # Step 4: Return maximized if no match found. + return max + + def likely_canonical(from_tag, to_tag): + # Canonicalize the input tag. + from_tag = canonical(from_tag) + + # Update the expected result if necessary. + if from_tag in likely_subtags: + to_tag = likely_subtags[from_tag] + + # Canonicalize the expected output. + to_canonical = canonical(to_tag) + + # Sanity check: This should match the result of |addLikelySubtags|. + assert to_canonical == addLikelySubtags(from_tag) + + return to_canonical + + # |likely_subtags| contains non-canonicalized tags, so canonicalize it first. + likely_subtags_canonical = { + k: likely_canonical(k, v) for (k, v) in likely_subtags.items() + } + + # Add test data for |Intl.Locale.prototype.maximize()|. + writeMappingsVar( + println, + {bcp47(k): bcp47(v) for (k, v) in likely_subtags_canonical.items()}, + "maxLikelySubtags", + "Extracted from likelySubtags.xml.", + source, + url, + ) + + # Use the maximalized tags as the input for the remove likely-subtags test. + minimized = { + tag: removeLikelySubtags(tag) for tag in likely_subtags_canonical.values() + } + + # Add test data for |Intl.Locale.prototype.minimize()|. + writeMappingsVar( + println, + {bcp47(k): bcp47(v) for (k, v) in minimized.items()}, + "minLikelySubtags", + "Extracted from likelySubtags.xml.", + source, + url, + ) + + println( + """ +for (let [tag, maximal] of Object.entries(maxLikelySubtags)) { + assertEq(new Intl.Locale(tag).maximize().toString(), maximal); +}""" + ) + + println( + """ +for (let [tag, minimal] of Object.entries(minLikelySubtags)) { + assertEq(new Intl.Locale(tag).minimize().toString(), minimal); +}""" + ) + + println( + """ +if (typeof reportCompare === "function") + reportCompare(0, 0);""" + ) + + +def readCLDRVersionFromICU(): + icuDir = os.path.join(topsrcdir, "intl/icu/source") + if not os.path.isdir(icuDir): + raise RuntimeError("not a directory: {}".format(icuDir)) + + reVersion = re.compile(r'\s*cldrVersion\{"(\d+(?:\.\d+)?)"\}') + + for line in flines(os.path.join(icuDir, "data/misc/supplementalData.txt")): + m = reVersion.match(line) + if m: + version = m.group(1) + break + + if version is None: + raise RuntimeError("can't resolve CLDR version") + + return version + + +def updateCLDRLangTags(args): + """Update the LanguageTagGenerated.cpp file.""" + version = args.version + url = args.url + out = args.out + filename = args.file + + # Determine current CLDR version from ICU. + if version is None: + version = readCLDRVersionFromICU() + + url = url.replace("<VERSION>", version) + + print("Arguments:") + print("\tCLDR version: %s" % version) + print("\tDownload url: %s" % url) + if filename is not None: + print("\tLocal CLDR common.zip file: %s" % filename) + print("\tOutput file: %s" % out) + print("") + + data = { + "version": version, + } + + def readFiles(cldr_file): + with ZipFile(cldr_file) as zip_file: + data.update(readSupplementalData(zip_file)) + data.update(readUnicodeExtensions(zip_file)) + + print("Processing CLDR data...") + if filename is not None: + print("Always make sure you have the newest CLDR common.zip!") + with open(filename, "rb") as cldr_file: + readFiles(cldr_file) + else: + print("Downloading CLDR common.zip...") + with closing(urlopen(url)) as cldr_file: + cldr_data = io.BytesIO(cldr_file.read()) + readFiles(cldr_data) + + print("Writing Intl data...") + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + writeCLDRLanguageTagData(println, data, url) + + print("Writing Intl test data...") + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join( + js_src_builtin_intl_dir, + "../../tests/non262/Intl/Locale/likely-subtags-generated.js", + ) + with io.open(test_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println("// |reftest| skip-if(!this.hasOwnProperty('Intl'))") + writeCLDRLanguageTagLikelySubtagsTest(println, data, url) + + +def flines(filepath, encoding="utf-8"): + """Open filepath and iterate over its content.""" + with io.open(filepath, mode="r", encoding=encoding) as f: + for line in f: + yield line + + +@total_ordering +class Zone(object): + """Time zone with optional file name.""" + + def __init__(self, name, filename=""): + self.name = name + self.filename = filename + + def __eq__(self, other): + return hasattr(other, "name") and self.name == other.name + + def __lt__(self, other): + return self.name < other.name + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return self.name + + +class TzDataDir(object): + """tzdata source from a directory.""" + + def __init__(self, obj): + self.name = partial(os.path.basename, obj) + self.resolve = partial(os.path.join, obj) + self.basename = os.path.basename + self.isfile = os.path.isfile + self.listdir = partial(os.listdir, obj) + self.readlines = flines + + +class TzDataFile(object): + """tzdata source from a file (tar or gzipped).""" + + def __init__(self, obj): + self.name = lambda: os.path.splitext( + os.path.splitext(os.path.basename(obj))[0] + )[0] + self.resolve = obj.getmember + self.basename = attrgetter("name") + self.isfile = tarfile.TarInfo.isfile + self.listdir = obj.getnames + self.readlines = partial(self._tarlines, obj) + + def _tarlines(self, tar, m): + with closing(tar.extractfile(m)) as f: + for line in f: + yield line.decode("utf-8") + + +def validateTimeZones(zones, links): + """Validate the zone and link entries.""" + linkZones = set(links.keys()) + intersect = linkZones.intersection(zones) + if intersect: + raise RuntimeError("Links also present in zones: %s" % intersect) + + zoneNames = {z.name for z in zones} + linkTargets = set(links.values()) + if not linkTargets.issubset(zoneNames): + raise RuntimeError( + "Link targets not found: %s" % linkTargets.difference(zoneNames) + ) + + +def partition(iterable, *predicates): + def innerPartition(pred, it): + it1, it2 = tee(it) + return (filter(pred, it1), filterfalse(pred, it2)) + + if len(predicates) == 0: + return iterable + (left, right) = innerPartition(predicates[0], iterable) + if len(predicates) == 1: + return (left, right) + return tuple([left] + list(partition(right, *predicates[1:]))) + + +def listIANAFiles(tzdataDir): + def isTzFile(d, m, f): + return m(f) and d.isfile(d.resolve(f)) + + return filter( + partial(isTzFile, tzdataDir, re.compile("^[a-z0-9]+$").match), + tzdataDir.listdir(), + ) + + +def readIANAFiles(tzdataDir, files): + """Read all IANA time zone files from the given iterable.""" + nameSyntax = "[\w/+\-]+" + pZone = re.compile(r"Zone\s+(?P<name>%s)\s+.*" % nameSyntax) + pLink = re.compile( + r"Link\s+(?P<target>%s)\s+(?P<name>%s)(?:\s+#.*)?" % (nameSyntax, nameSyntax) + ) + + def createZone(line, fname): + match = pZone.match(line) + name = match.group("name") + return Zone(name, fname) + + def createLink(line, fname): + match = pLink.match(line) + (name, target) = match.group("name", "target") + return (Zone(name, fname), target) + + zones = set() + links = dict() + for filename in files: + filepath = tzdataDir.resolve(filename) + for line in tzdataDir.readlines(filepath): + if line.startswith("Zone"): + zones.add(createZone(line, filename)) + if line.startswith("Link"): + (link, target) = createLink(line, filename) + links[link] = target + + return (zones, links) + + +def readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory): + """Read the IANA time zone information from `tzdataDir`.""" + + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + # Remove the placeholder time zone "Factory". + if ignoreFactory: + zones.remove(Zone("Factory")) + + # Merge with backzone data. + if not ignoreBackzone: + zones |= backzones + links = { + name: target for name, target in links.items() if name not in backzones + } + links.update(backlinks) + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUResourceFile(filename): + """Read an ICU resource file. + + Yields (<table-name>, <startOrEnd>, <value>) for each table. + """ + + numberValue = r"-?\d+" + stringValue = r'".+?"' + + def asVector(val): + return r"%s(?:\s*,\s*%s)*" % (val, val) + + numberVector = asVector(numberValue) + stringVector = asVector(stringValue) + + reNumberVector = re.compile(numberVector) + reStringVector = re.compile(stringVector) + reNumberValue = re.compile(numberValue) + reStringValue = re.compile(stringValue) + + def parseValue(value): + m = reNumberVector.match(value) + if m: + return [int(v) for v in reNumberValue.findall(value)] + m = reStringVector.match(value) + if m: + return [v[1:-1] for v in reStringValue.findall(value)] + raise RuntimeError("unknown value type: %s" % value) + + def extractValue(values): + if len(values) == 0: + return None + if len(values) == 1: + return values[0] + return values + + def line(*args): + maybeMultiComments = r"(?:/\*[^*]*\*/)*" + maybeSingleComment = r"(?://.*)?" + lineStart = "^%s" % maybeMultiComments + lineEnd = "%s\s*%s$" % (maybeMultiComments, maybeSingleComment) + return re.compile(r"\s*".join(chain([lineStart], args, [lineEnd]))) + + tableName = r'(?P<quote>"?)(?P<name>.+?)(?P=quote)' + tableValue = r"(?P<value>%s|%s)" % (numberVector, stringVector) + + reStartTable = line(tableName, r"\{") + reEndTable = line(r"\}") + reSingleValue = line(r",?", tableValue, r",?") + reCompactTable = line(tableName, r"\{", tableValue, r"\}") + reEmptyLine = line() + + tables = [] + + def currentTable(): + return "|".join(tables) + + values = [] + for line in flines(filename, "utf-8-sig"): + line = line.strip() + if line == "": + continue + + m = reEmptyLine.match(line) + if m: + continue + + m = reStartTable.match(line) + if m: + assert len(values) == 0 + tables.append(m.group("name")) + continue + + m = reEndTable.match(line) + if m: + yield (currentTable(), extractValue(values)) + tables.pop() + values = [] + continue + + m = reCompactTable.match(line) + if m: + assert len(values) == 0 + tables.append(m.group("name")) + yield (currentTable(), extractValue(parseValue(m.group("value")))) + tables.pop() + continue + + m = reSingleValue.match(line) + if m and tables: + values.extend(parseValue(m.group("value"))) + continue + + raise RuntimeError("unknown entry: %s" % line) + + +def readICUTimeZonesFromTimezoneTypes(icuTzDir): + """Read the ICU time zone information from `icuTzDir`/timezoneTypes.txt + and returns the tuple (zones, links). + """ + typeMapTimeZoneKey = "timezoneTypes:table(nofallback)|typeMap|timezone|" + typeAliasTimeZoneKey = "timezoneTypes:table(nofallback)|typeAlias|timezone|" + + def toTimeZone(name): + return Zone(name.replace(":", "/")) + + zones = set() + links = dict() + + for name, value in readICUResourceFile(os.path.join(icuTzDir, "timezoneTypes.txt")): + if name.startswith(typeMapTimeZoneKey): + zones.add(toTimeZone(name[len(typeMapTimeZoneKey) :])) + if name.startswith(typeAliasTimeZoneKey): + links[toTimeZone(name[len(typeAliasTimeZoneKey) :])] = value + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUTimeZonesFromZoneInfo(icuTzDir): + """Read the ICU time zone information from `icuTzDir`/zoneinfo64.txt + and returns the tuple (zones, links). + """ + zoneKey = "zoneinfo64:table(nofallback)|Zones:array|:table" + linkKey = "zoneinfo64:table(nofallback)|Zones:array|:int" + namesKey = "zoneinfo64:table(nofallback)|Names" + + tzId = 0 + tzLinks = dict() + tzNames = [] + + for name, value in readICUResourceFile(os.path.join(icuTzDir, "zoneinfo64.txt")): + if name == zoneKey: + tzId += 1 + elif name == linkKey: + tzLinks[tzId] = int(value) + tzId += 1 + elif name == namesKey: + tzNames.extend(value) + + links = {Zone(tzNames[zone]): tzNames[target] for (zone, target) in tzLinks.items()} + zones = {Zone(v) for v in tzNames if Zone(v) not in links} + + validateTimeZones(zones, links) + + return (zones, links) + + +def readICUTimeZones(icuDir, icuTzDir, ignoreFactory): + # zoneinfo64.txt contains the supported time zones by ICU. This data is + # generated from tzdata files, it doesn't include "backzone" in stock ICU. + (zoneinfoZones, zoneinfoLinks) = readICUTimeZonesFromZoneInfo(icuTzDir) + + # timezoneTypes.txt contains the canonicalization information for ICU. This + # data is generated from CLDR files. It includes data about time zones from + # tzdata's "backzone" file. + (typesZones, typesLinks) = readICUTimeZonesFromTimezoneTypes(icuTzDir) + + # Remove the placeholder time zone "Factory". + # See also <https://github.com/eggert/tz/blob/master/factory>. + if ignoreFactory: + zoneinfoZones.remove(Zone("Factory")) + + # Remove the ICU placeholder time zone "Etc/Unknown". + # See also <https://unicode.org/reports/tr35/#Time_Zone_Identifiers>. + for zones in (zoneinfoZones, typesZones): + zones.remove(Zone("Etc/Unknown")) + + # Remove any outdated ICU links. + for links in (zoneinfoLinks, typesLinks): + for zone in otherICULegacyLinks().keys(): + if zone not in links: + raise KeyError(f"Can't remove non-existent link from '{zone}'") + del links[zone] + + # Information in zoneinfo64 should be a superset of timezoneTypes. + def inZoneInfo64(zone): + return zone in zoneinfoZones or zone in zoneinfoLinks + + notFoundInZoneInfo64 = [zone for zone in typesZones if not inZoneInfo64(zone)] + if notFoundInZoneInfo64: + raise RuntimeError( + "Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64 + ) + + notFoundInZoneInfo64 = [ + zone for zone in typesLinks.keys() if not inZoneInfo64(zone) + ] + if notFoundInZoneInfo64: + raise RuntimeError( + "Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64 + ) + + # zoneinfo64.txt only defines the supported time zones by ICU, the canonicalization + # rules are defined through timezoneTypes.txt. Merge both to get the actual zones + # and links used by ICU. + icuZones = set( + chain( + (zone for zone in zoneinfoZones if zone not in typesLinks), + (zone for zone in typesZones), + ) + ) + icuLinks = dict( + chain( + ( + (zone, target) + for (zone, target) in zoneinfoLinks.items() + if zone not in typesZones + ), + ((zone, target) for (zone, target) in typesLinks.items()), + ) + ) + + return (icuZones, icuLinks) + + +def readICULegacyZones(icuDir): + """Read the ICU legacy time zones from `icuTzDir`/tools/tzcode/icuzones + and returns the tuple (zones, links). + """ + tzdir = TzDataDir(os.path.join(icuDir, "tools/tzcode")) + + # Per spec we must recognize only IANA time zones and links, but ICU + # recognizes various legacy, non-IANA time zones and links. Compute these + # non-IANA time zones and links. + + # Most legacy, non-IANA time zones and links are in the icuzones file. + (zones, links) = readIANAFiles(tzdir, ["icuzones"]) + + # Remove the ICU placeholder time zone "Etc/Unknown". + # See also <https://unicode.org/reports/tr35/#Time_Zone_Identifiers>. + zones.remove(Zone("Etc/Unknown")) + + # A handful of non-IANA zones/links are not in icuzones and must be added + # manually so that we won't invoke ICU with them. + for zone, target in otherICULegacyLinks().items(): + if zone in links: + if links[zone] != target: + raise KeyError( + f"Can't overwrite link '{zone} -> {links[zone]}' with '{target}'" + ) + else: + print( + f"Info: Link '{zone} -> {target}' can be removed from otherICULegacyLinks()" + ) + links[zone] = target + + return (zones, links) + + +def otherICULegacyLinks(): + """The file `icuTzDir`/tools/tzcode/icuzones contains all ICU legacy time + zones with the exception of time zones which are removed by IANA after an + ICU release. + + For example ICU 67 uses tzdata2018i, but tzdata2020b removed the link from + "US/Pacific-New" to "America/Los_Angeles". ICU standalone tzdata updates + don't include modified icuzones files, so we must manually record any IANA + modifications here. + + After an ICU update, we can remove any no longer needed entries from this + function by checking if the relevant entries are now included in icuzones. + """ + + return { + # Current ICU is up-to-date with IANA, so this dict is empty. + } + + +def icuTzDataVersion(icuTzDir): + """Read the ICU time zone version from `icuTzDir`/zoneinfo64.txt.""" + + def searchInFile(pattern, f): + p = re.compile(pattern) + for line in flines(f, "utf-8-sig"): + m = p.search(line) + if m: + return m.group(1) + return None + + zoneinfo = os.path.join(icuTzDir, "zoneinfo64.txt") + if not os.path.isfile(zoneinfo): + raise RuntimeError("file not found: %s" % zoneinfo) + version = searchInFile("^//\s+tz version:\s+([0-9]{4}[a-z])$", zoneinfo) + if version is None: + raise RuntimeError( + "%s does not contain a valid tzdata version string" % zoneinfo + ) + return version + + +def findIncorrectICUZones(ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone): + """Find incorrect ICU zone entries.""" + + def isIANATimeZone(zone): + return zone in ianaZones or zone in ianaLinks + + def isICUTimeZone(zone): + return zone in icuZones or zone in icuLinks + + def isICULink(zone): + return zone in icuLinks + + # All IANA zones should be present in ICU. + missingTimeZones = [zone for zone in ianaZones if not isICUTimeZone(zone)] + # Normally zones in backzone are also present as links in one of the other + # time zone files. The only exception to this rule is the Asia/Hanoi time + # zone, this zone is only present in the backzone file. + expectedMissing = [] if ignoreBackzone else [Zone("Asia/Hanoi")] + if missingTimeZones != expectedMissing: + raise RuntimeError( + "Not all zones are present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % missingTimeZones + ) + + # Zones which are only present in ICU? + additionalTimeZones = [zone for zone in icuZones if not isIANATimeZone(zone)] + if additionalTimeZones: + raise RuntimeError( + "Additional zones present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % additionalTimeZones + ) + + # Zones which are marked as links in ICU. + result = ((zone, icuLinks[zone]) for zone in ianaZones if isICULink(zone)) + + # Remove unnecessary UTC mappings. + utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"] + result = ((zone, target) for (zone, target) in result if zone.name not in utcnames) + + return sorted(result, key=itemgetter(0)) + + +def findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks): + """Find incorrect ICU link entries.""" + + def isIANATimeZone(zone): + return zone in ianaZones or zone in ianaLinks + + def isICUTimeZone(zone): + return zone in icuZones or zone in icuLinks + + def isICULink(zone): + return zone in icuLinks + + def isICUZone(zone): + return zone in icuZones + + # All links should be present in ICU. + missingTimeZones = [zone for zone in ianaLinks.keys() if not isICUTimeZone(zone)] + if missingTimeZones: + raise RuntimeError( + "Not all zones are present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % missingTimeZones + ) + + # Links which are only present in ICU? + additionalTimeZones = [zone for zone in icuLinks.keys() if not isIANATimeZone(zone)] + if additionalTimeZones: + raise RuntimeError( + "Additional links present in ICU, did you forget " + "to run intl/update-tzdata.sh? %s" % additionalTimeZones + ) + + result = chain( + # IANA links which have a different target in ICU. + ( + (zone, target, icuLinks[zone]) + for (zone, target) in ianaLinks.items() + if isICULink(zone) and target != icuLinks[zone] + ), + # IANA links which are zones in ICU. + ( + (zone, target, zone.name) + for (zone, target) in ianaLinks.items() + if isICUZone(zone) + ), + ) + + # Remove unnecessary UTC mappings. + utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"] + result = ( + (zone, target, icuTarget) + for (zone, target, icuTarget) in result + if target not in utcnames or icuTarget not in utcnames + ) + + return sorted(result, key=itemgetter(0)) + + +generatedFileWarning = "// Generated by make_intl_data.py. DO NOT EDIT." +tzdataVersionComment = "// tzdata version = {0}" + + +def processTimeZones( + tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignoreFactory, out +): + """Read the time zone info and create a new time zone cpp file.""" + print("Processing tzdata mapping...") + (ianaZones, ianaLinks) = readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory) + (icuZones, icuLinks) = readICUTimeZones(icuDir, icuTzDir, ignoreFactory) + (legacyZones, legacyLinks) = readICULegacyZones(icuDir) + + # Remove all legacy ICU time zones. + icuZones = {zone for zone in icuZones if zone not in legacyZones} + icuLinks = { + zone: target for (zone, target) in icuLinks.items() if zone not in legacyLinks + } + + incorrectZones = findIncorrectICUZones( + ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone + ) + if not incorrectZones: + print("<<< No incorrect ICU time zones found, please update Intl.js! >>>") + print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>") + + incorrectLinks = findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks) + if not incorrectLinks: + print("<<< No incorrect ICU time zone links found, please update Intl.js! >>>") + print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>") + + print("Writing Intl tzdata file...") + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println("") + + println("#ifndef builtin_intl_TimeZoneDataGenerated_h") + println("#define builtin_intl_TimeZoneDataGenerated_h") + println("") + + println("namespace js {") + println("namespace timezone {") + println("") + + println("// Format:") + println('// "ZoneName" // ICU-Name [time zone file]') + println("const char* const ianaZonesTreatedAsLinksByICU[] = {") + for zone, icuZone in incorrectZones: + println(' "%s", // %s [%s]' % (zone, icuZone, zone.filename)) + println("};") + println("") + + println("// Format:") + println('// "LinkName", "Target" // ICU-Target [time zone file]') + println("struct LinkAndTarget") + println("{") + println(" const char* const link;") + println(" const char* const target;") + println("};") + println("") + println("const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {") + for zone, target, icuTarget in incorrectLinks: + println( + ' { "%s", "%s" }, // %s [%s]' + % (zone, target, icuTarget, zone.filename) + ) + println("};") + println("") + + println( + "// Legacy ICU time zones, these are not valid IANA time zone names. We also" + ) + println("// disallow the old and deprecated System V time zones.") + println( + "// https://ssl.icu-project.org/repos/icu/trunk/icu4c/source/tools/tzcode/icuzones" + ) # NOQA: E501 + println("const char* const legacyICUTimeZones[] = {") + for zone in chain(sorted(legacyLinks.keys()), sorted(legacyZones)): + println(' "%s",' % zone) + println("};") + println("") + + println("} // namespace timezone") + println("} // namespace js") + println("") + println("#endif /* builtin_intl_TimeZoneDataGenerated_h */") + + +def updateBackzoneLinks(tzdataDir, links): + def withZone(fn): + return lambda zone_target: fn(zone_target[0]) + + (backzoneZones, backzoneLinks) = readIANAFiles(tzdataDir, ["backzone"]) + (stableZones, updatedLinks, updatedZones) = partition( + links.items(), + # Link not changed in backzone. + withZone(lambda zone: zone not in backzoneLinks and zone not in backzoneZones), + # Link has a new target. + withZone(lambda zone: zone in backzoneLinks), + ) + # Keep stable zones and links with updated target. + return dict( + chain( + stableZones, + map(withZone(lambda zone: (zone, backzoneLinks[zone])), updatedLinks), + ) + ) + + +def generateTzDataLinkTestContent(testDir, version, fileName, description, links): + with io.open( + os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println( + """ +const tzMapper = [ + x => x, + x => x.toUpperCase(), + x => x.toLowerCase(), +]; +""" + ) + + println(description) + println("const links = {") + for zone, target in sorted(links, key=itemgetter(0)): + println(' "%s": "%s",' % (zone, target)) + println("};") + + println( + """ +for (let [linkName, target] of Object.entries(links)) { + if (target === "Etc/UTC" || target === "Etc/GMT") + target = "UTC"; + + for (let map of tzMapper) { + let dtf = new Intl.DateTimeFormat(undefined, {timeZone: map(linkName)}); + let resolvedTimeZone = dtf.resolvedOptions().timeZone; + assertEq(resolvedTimeZone, target, `${linkName} -> ${target}`); + } +} +""" + ) + println( + """ +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); +""" + ) + + +def generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, testDir): + (zones, links) = readIANAFiles(tzdataDir, ["backward"]) + assert len(zones) == 0 + + if not ignoreBackzone: + links = updateBackzoneLinks(tzdataDir, links) + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backward_links.js", + "// Link names derived from IANA Time Zone Database, backward file.", + links.items(), + ) + + +def generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, testDir): + tzfiles = filterfalse( + {"backward", "backzone"}.__contains__, listIANAFiles(tzdataDir) + ) + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + + if not ignoreBackzone: + links = updateBackzoneLinks(tzdataDir, links) + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_notbackward_links.js", + "// Link names derived from IANA Time Zone Database, excluding backward file.", + links.items(), + ) + + +def generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, testDir): + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + if not ignoreBackzone: + comment = """\ +// This file was generated with historical, pre-1970 backzone information +// respected. Therefore, every zone key listed below is its own Zone, not +// a Link to a modern-day target as IANA ignoring backzones would say. + +""" + else: + comment = """\ +// This file was generated while ignoring historical, pre-1970 backzone +// information. Therefore, every zone key listed below is part of a Link +// whose target is the corresponding value. + +""" + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backzone.js", + comment + "// Backzone zones derived from IANA Time Zone Database.", + ( + (zone, zone if not ignoreBackzone else links[zone]) + for zone in backzones + if zone in links + ), + ) + + +def generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, testDir): + backzoneFiles = {"backzone"} + (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__) + + # Read zone and link infos. + (zones, links) = readIANAFiles(tzdataDir, tzfiles) + (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles) + + if not ignoreBackzone: + comment = """\ +// This file was generated with historical, pre-1970 backzone information +// respected. Therefore, every zone key listed below points to a target +// in the backzone file and not to its modern-day target as IANA ignoring +// backzones would say. + +""" + else: + comment = """\ +// This file was generated while ignoring historical, pre-1970 backzone +// information. Therefore, every zone key listed below is part of a Link +// whose target is the corresponding value ignoring any backzone entries. + +""" + + generateTzDataLinkTestContent( + testDir, + version, + "timeZone_backzone_links.js", + comment + "// Backzone links derived from IANA Time Zone Database.", + ( + (zone, target if not ignoreBackzone else links[zone]) + for (zone, target) in backlinks.items() + ), + ) + + +def generateTzDataTestVersion(tzdataDir, version, testDir): + fileName = "timeZone_version.js" + + with io.open( + os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println("""const tzdata = "{0}";""".format(version)) + + println( + """ +if (typeof getICUOptions === "undefined") { + var getICUOptions = SpecialPowers.Cu.getJSTestingFunctions().getICUOptions; +} + +var options = getICUOptions(); + +assertEq(options.tzdata, tzdata); + +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); +""" + ) + + +def generateTzDataTestCanonicalZones( + tzdataDir, version, ignoreBackzone, ignoreFactory, testDir +): + fileName = "supportedValuesOf-timeZones-canonical.js" + + # Read zone and link infos. + (ianaZones, _) = readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory) + + # Replace Etc/GMT and Etc/UTC with UTC. + ianaZones.remove(Zone("Etc/GMT")) + ianaZones.remove(Zone("Etc/UTC")) + ianaZones.add(Zone("UTC")) + + # See findIncorrectICUZones() for why Asia/Hanoi has to be special-cased. + ianaZones.remove(Zone("Asia/Hanoi")) + + if not ignoreBackzone: + comment = """\ +// This file was generated with historical, pre-1970 backzone information +// respected. +""" + else: + comment = """\ +// This file was generated while ignoring historical, pre-1970 backzone +// information. +""" + + with io.open( + os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println(tzdataVersionComment.format(version)) + println("") + println(comment) + + println("const zones = [") + for zone in sorted(ianaZones): + println(f' "{zone}",') + println("];") + + println( + """ +let supported = Intl.supportedValuesOf("timeZone"); + +assertEqArray(supported, zones); + +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); +""" + ) + + +def generateTzDataTests(tzdataDir, version, ignoreBackzone, ignoreFactory, testDir): + dtfTestDir = os.path.join(testDir, "DateTimeFormat") + if not os.path.isdir(dtfTestDir): + raise RuntimeError("not a directory: %s" % dtfTestDir) + + generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, dtfTestDir) + generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, dtfTestDir) + generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, dtfTestDir) + generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, dtfTestDir) + generateTzDataTestVersion(tzdataDir, version, dtfTestDir) + generateTzDataTestCanonicalZones( + tzdataDir, version, ignoreBackzone, ignoreFactory, testDir + ) + + +def updateTzdata(topsrcdir, args): + """Update the time zone cpp file.""" + + icuDir = os.path.join(topsrcdir, "intl/icu/source") + if not os.path.isdir(icuDir): + raise RuntimeError("not a directory: %s" % icuDir) + + icuTzDir = os.path.join(topsrcdir, "intl/tzdata/source") + if not os.path.isdir(icuTzDir): + raise RuntimeError("not a directory: %s" % icuTzDir) + + intlTestDir = os.path.join(topsrcdir, "js/src/tests/non262/Intl") + if not os.path.isdir(intlTestDir): + raise RuntimeError("not a directory: %s" % intlTestDir) + + tzDir = args.tz + if tzDir is not None and not (os.path.isdir(tzDir) or os.path.isfile(tzDir)): + raise RuntimeError("not a directory or file: %s" % tzDir) + ignoreBackzone = args.ignore_backzone + # TODO: Accept or ignore the placeholder time zone "Factory"? + ignoreFactory = False + out = args.out + + version = icuTzDataVersion(icuTzDir) + url = ( + "https://www.iana.org/time-zones/repository/releases/tzdata%s.tar.gz" % version + ) + + print("Arguments:") + print("\ttzdata version: %s" % version) + print("\ttzdata URL: %s" % url) + print("\ttzdata directory|file: %s" % tzDir) + print("\tICU directory: %s" % icuDir) + print("\tICU timezone directory: %s" % icuTzDir) + print("\tIgnore backzone file: %s" % ignoreBackzone) + print("\tOutput file: %s" % out) + print("") + + def updateFrom(f): + if os.path.isfile(f) and tarfile.is_tarfile(f): + with tarfile.open(f, "r:*") as tar: + processTimeZones( + TzDataFile(tar), + icuDir, + icuTzDir, + version, + ignoreBackzone, + ignoreFactory, + out, + ) + generateTzDataTests( + TzDataFile(tar), version, ignoreBackzone, ignoreFactory, intlTestDir + ) + elif os.path.isdir(f): + processTimeZones( + TzDataDir(f), + icuDir, + icuTzDir, + version, + ignoreBackzone, + ignoreFactory, + out, + ) + generateTzDataTests( + TzDataDir(f), version, ignoreBackzone, ignoreFactory, intlTestDir + ) + else: + raise RuntimeError("unknown format") + + if tzDir is None: + print("Downloading tzdata file...") + with closing(urlopen(url)) as tzfile: + fname = urlsplit(tzfile.geturl()).path.split("/")[-1] + with tempfile.NamedTemporaryFile(suffix=fname) as tztmpfile: + print("File stored in %s" % tztmpfile.name) + tztmpfile.write(tzfile.read()) + tztmpfile.flush() + updateFrom(tztmpfile.name) + else: + updateFrom(tzDir) + + +def readCurrencyFile(tree): + reCurrency = re.compile(r"^[A-Z]{3}$") + reIntMinorUnits = re.compile(r"^\d+$") + + for country in tree.iterfind(".//CcyNtry"): + # Skip entry if no currency information is available. + currency = country.findtext("Ccy") + if currency is None: + continue + assert reCurrency.match(currency) + + minorUnits = country.findtext("CcyMnrUnts") + assert minorUnits is not None + + # Skip all entries without minorUnits or which use the default minorUnits. + if reIntMinorUnits.match(minorUnits) and int(minorUnits) != 2: + currencyName = country.findtext("CcyNm") + countryName = country.findtext("CtryNm") + yield (currency, int(minorUnits), currencyName, countryName) + + +def writeCurrencyFile(published, currencies, out): + with io.open(out, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + println("// Version: {}".format(published)) + + println( + """ +/** + * Mapping from currency codes to the number of decimal digits used for them. + * Default is 2 digits. + * + * Spec: ISO 4217 Currency and Funds Code List. + * http://www.currency-iso.org/en/home/tables/table-a1.html + */""" + ) + println("var currencyDigits = {") + for currency, entries in groupby( + sorted(currencies, key=itemgetter(0)), itemgetter(0) + ): + for _, minorUnits, currencyName, countryName in entries: + println(" // {} ({})".format(currencyName, countryName)) + println(" {}: {},".format(currency, minorUnits)) + println("};") + + +def updateCurrency(topsrcdir, args): + """Update the CurrencyDataGenerated.js file.""" + import xml.etree.ElementTree as ET + from random import randint + + url = args.url + out = args.out + filename = args.file + + print("Arguments:") + print("\tDownload url: %s" % url) + print("\tLocal currency file: %s" % filename) + print("\tOutput file: %s" % out) + print("") + + def updateFrom(currencyFile): + print("Processing currency code list file...") + tree = ET.parse(currencyFile) + published = tree.getroot().attrib["Pblshd"] + currencies = readCurrencyFile(tree) + + print("Writing CurrencyData file...") + writeCurrencyFile(published, currencies, out) + + if filename is not None: + print("Always make sure you have the newest currency code list file!") + updateFrom(filename) + else: + print("Downloading currency & funds code list...") + request = UrlRequest(url) + request.add_header( + "User-agent", + "Mozilla/5.0 (Mobile; rv:{0}.0) Gecko/{0}.0 Firefox/{0}.0".format( + randint(1, 999) + ), + ) + with closing(urlopen(request)) as currencyFile: + fname = urlsplit(currencyFile.geturl()).path.split("/")[-1] + with tempfile.NamedTemporaryFile(suffix=fname) as currencyTmpFile: + print("File stored in %s" % currencyTmpFile.name) + currencyTmpFile.write(currencyFile.read()) + currencyTmpFile.flush() + updateFrom(currencyTmpFile.name) + + +def writeUnicodeExtensionsMappings(println, mapping, extension): + println( + """ +template <size_t Length> +static inline bool Is{0}Key(mozilla::Span<const char> key, const char (&str)[Length]) {{ + static_assert(Length == {0}KeyLength + 1, + "{0} extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +}} + +template <size_t Length> +static inline bool Is{0}Type(mozilla::Span<const char> type, const char (&str)[Length]) {{ + static_assert(Length > {0}KeyLength + 1, + "{0} extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +}} +""".format( + extension + ).rstrip( + "\n" + ) + ) + + linear_search_max_length = 4 + + needs_binary_search = any( + len(replacements.items()) > linear_search_max_length + for replacements in mapping.values() + ) + + if needs_binary_search: + println( + """ +static int32_t Compare{0}Type(const char* a, mozilla::Span<const char> b) {{ + MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\\0'), + "unexpected null-character in string"); + + using UnsignedChar = unsigned char; + for (size_t i = 0; i < b.size(); i++) {{ + // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if + // we've reached the end of |a|, the below if-statement will always be true. + // That ensures we don't read past the end of |a|. + if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {{ + return r; + }} + }} + + // Return zero if both strings are equal or a positive number if |b| is a + // prefix of |a|. + return int32_t(UnsignedChar(a[b.size()])); +}} + +template <size_t Length> +static inline const char* Search{0}Replacement( + const char* (&types)[Length], const char* (&aliases)[Length], + mozilla::Span<const char> type) {{ + + auto p = std::lower_bound(std::begin(types), std::end(types), type, + [](const auto& a, const auto& b) {{ + return Compare{0}Type(a, b) < 0; + }}); + if (p != std::end(types) && Compare{0}Type(*p, type) == 0) {{ + return aliases[std::distance(std::begin(types), p)]; + }} + return nullptr; +}} +""".format( + extension + ).rstrip( + "\n" + ) + ) + + println( + """ +/** + * Mapping from deprecated BCP 47 {0} extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* mozilla::intl::Locale::Replace{0}ExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) {{ + MOZ_ASSERT(key.size() == {0}KeyLength); + MOZ_ASSERT(IsCanonicallyCased{0}Key(key)); + + MOZ_ASSERT(type.size() > {0}KeyLength); + MOZ_ASSERT(IsCanonicallyCased{0}Type(type)); +""".format( + extension + ) + ) + + def to_hash_key(replacements): + return str(sorted(replacements.items())) + + def write_array(subtags, name, length): + max_entries = (80 - len(" ")) // (length + len('"", ')) + + println(" static const char* {}[{}] = {{".format(name, len(subtags))) + + for entries in grouper(subtags, max_entries): + entries = ( + '"{}"'.format(tag).center(length + 2) + for tag in entries + if tag is not None + ) + println(" {},".format(", ".join(entries))) + + println(" };") + + # Merge duplicate keys. + key_aliases = {} + for key, replacements in sorted(mapping.items(), key=itemgetter(0)): + hash_key = to_hash_key(replacements) + if hash_key not in key_aliases: + key_aliases[hash_key] = [] + else: + key_aliases[hash_key].append(key) + + first_key = True + for key, replacements in sorted(mapping.items(), key=itemgetter(0)): + hash_key = to_hash_key(replacements) + if key in key_aliases[hash_key]: + continue + + cond = ( + 'Is{}Key(key, "{}")'.format(extension, k) + for k in [key] + key_aliases[hash_key] + ) + + if_kind = "if" if first_key else "else if" + cond = (" ||\n" + " " * (2 + len(if_kind) + 2)).join(cond) + println( + """ + {} ({}) {{""".format( + if_kind, cond + ).strip( + "\n" + ) + ) + first_key = False + + replacements = sorted(replacements.items(), key=itemgetter(0)) + + if len(replacements) > linear_search_max_length: + types = [t for (t, _) in replacements] + preferred = [r for (_, r) in replacements] + max_len = max(len(k) for k in types + preferred) + + write_array(types, "types", max_len) + write_array(preferred, "aliases", max_len) + println( + """ + return Search{}Replacement(types, aliases, type); +""".format( + extension + ).strip( + "\n" + ) + ) + else: + for type, replacement in replacements: + println( + """ + if (Is{}Type(type, "{}")) {{ + return "{}"; + }}""".format( + extension, type, replacement + ).strip( + "\n" + ) + ) + + println( + """ + }""".lstrip( + "\n" + ) + ) + + println( + """ + return nullptr; +} +""".strip( + "\n" + ) + ) + + +def readICUUnitResourceFile(filepath): + """Return a set of unit descriptor pairs where the first entry denotes the unit type and the + second entry the unit name. + + Example: + + root{ + units{ + compound{ + } + coordinate{ + } + length{ + meter{ + } + } + } + unitsNarrow:alias{"/LOCALE/unitsShort"} + unitsShort{ + duration{ + day{ + } + day-person:alias{"/LOCALE/unitsShort/duration/day"} + } + length{ + meter{ + } + } + } + } + + Returns {("length", "meter"), ("duration", "day"), ("duration", "day-person")} + """ + + start_table_re = re.compile(r"^([\w\-%:\"]+)\{$") + end_table_re = re.compile(r"^\}$") + table_entry_re = re.compile(r"^([\w\-%:\"]+)\{\"(.*?)\"\}$") + + # The current resource table. + table = {} + + # List of parent tables when parsing. + parents = [] + + # Track multi-line comments state. + in_multiline_comment = False + + for line in flines(filepath, "utf-8-sig"): + # Remove leading and trailing whitespace. + line = line.strip() + + # Skip over comments. + if in_multiline_comment: + if line.endswith("*/"): + in_multiline_comment = False + continue + + if line.startswith("//"): + continue + + if line.startswith("/*"): + in_multiline_comment = True + continue + + # Try to match the start of a table, e.g. `length{` or `meter{`. + match = start_table_re.match(line) + if match: + parents.append(table) + table_name = match.group(1) + new_table = {} + table[table_name] = new_table + table = new_table + continue + + # Try to match the end of a table. + match = end_table_re.match(line) + if match: + table = parents.pop() + continue + + # Try to match a table entry, e.g. `dnam{"meter"}`. + match = table_entry_re.match(line) + if match: + entry_key = match.group(1) + entry_value = match.group(2) + table[entry_key] = entry_value + continue + + raise Exception("unexpected line: '{}' in {}".format(line, filepath)) + + assert len(parents) == 0, "Not all tables closed" + assert len(table) == 1, "More than one root table" + + # Remove the top-level language identifier table. + (_, unit_table) = table.popitem() + + # Add all units for the three display formats "units", "unitsNarrow", and "unitsShort". + # But exclude the pseudo-units "compound" and "ccoordinate". + return { + (unit_type, unit_name if not unit_name.endswith(":alias") else unit_name[:-6]) + for unit_display in ("units", "unitsNarrow", "unitsShort") + if unit_display in unit_table + for (unit_type, unit_names) in unit_table[unit_display].items() + if unit_type != "compound" and unit_type != "coordinate" + for unit_name in unit_names.keys() + } + + +def computeSupportedUnits(all_units, sanctioned_units): + """Given the set of all possible ICU unit identifiers and the set of sanctioned unit + identifiers, compute the set of effectively supported ICU unit identifiers. + """ + + def find_match(unit): + unit_match = [ + (unit_type, unit_name) + for (unit_type, unit_name) in all_units + if unit_name == unit + ] + if unit_match: + assert len(unit_match) == 1 + return unit_match[0] + return None + + def compound_unit_identifiers(): + for numerator in sanctioned_units: + for denominator in sanctioned_units: + yield "{}-per-{}".format(numerator, denominator) + + supported_simple_units = {find_match(unit) for unit in sanctioned_units} + assert None not in supported_simple_units + + supported_compound_units = { + unit_match + for unit_match in (find_match(unit) for unit in compound_unit_identifiers()) + if unit_match + } + + return supported_simple_units | supported_compound_units + + +def readICUDataFilterForUnits(data_filter_file): + with io.open(data_filter_file, mode="r", encoding="utf-8") as f: + data_filter = json.load(f) + + # Find the rule set for the "unit_tree". + unit_tree_rules = [ + entry["rules"] + for entry in data_filter["resourceFilters"] + if entry["categories"] == ["unit_tree"] + ] + assert len(unit_tree_rules) == 1 + + # Compute the list of included units from that rule set. The regular expression must match + # "+/*/length/meter" and mustn't match either "-/*" or "+/*/compound". + included_unit_re = re.compile(r"^\+/\*/(.+?)/(.+)$") + filtered_units = (included_unit_re.match(unit) for unit in unit_tree_rules[0]) + + return {(unit.group(1), unit.group(2)) for unit in filtered_units if unit} + + +def writeSanctionedSimpleUnitIdentifiersFiles(all_units, sanctioned_units): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + intl_components_src_dir = os.path.join( + js_src_builtin_intl_dir, "../../../../intl/components/src" + ) + + def find_unit_type(unit): + result = [ + unit_type for (unit_type, unit_name) in all_units if unit_name == unit + ] + assert result and len(result) == 1 + return result[0] + + sanctioned_js_file = os.path.join( + js_src_builtin_intl_dir, "SanctionedSimpleUnitIdentifiersGenerated.js" + ) + with io.open(sanctioned_js_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + sanctioned_units_object = json.dumps( + {unit: True for unit in sorted(sanctioned_units)}, + sort_keys=True, + indent=2, + separators=(",", ": "), + ) + + println(generatedFileWarning) + + println( + """ +/** + * The list of currently supported simple unit identifiers. + * + * Intl.NumberFormat Unified API Proposal + */""" + ) + + println("// prettier-ignore") + println( + "var sanctionedSimpleUnitIdentifiers = {};".format(sanctioned_units_object) + ) + + sanctioned_h_file = os.path.join(intl_components_src_dir, "MeasureUnitGenerated.h") + with io.open(sanctioned_h_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +#ifndef intl_components_MeasureUnitGenerated_h +#define intl_components_MeasureUnitGenerated_h + +namespace mozilla::intl { + +struct SimpleMeasureUnit { + const char* const type; + const char* const name; +}; + +/** + * The list of currently supported simple unit identifiers. + * + * The list must be kept in alphabetical order of |name|. + */ +inline constexpr SimpleMeasureUnit simpleMeasureUnits[] = { + // clang-format off""" + ) + + for unit_name in sorted(sanctioned_units): + println(' {{"{}", "{}"}},'.format(find_unit_type(unit_name), unit_name)) + + println( + """ + // clang-format on +}; + +} // namespace mozilla::intl + +#endif +""".strip( + "\n" + ) + ) + + writeUnitTestFiles(all_units, sanctioned_units) + + +def writeUnitTestFiles(all_units, sanctioned_units): + """Generate test files for unit number formatters.""" + + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_dir = os.path.join( + js_src_builtin_intl_dir, "../../tests/non262/Intl/NumberFormat" + ) + + def write_test(file_name, test_content, indent=4): + file_path = os.path.join(test_dir, file_name) + with io.open(file_path, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println('// |reftest| skip-if(!this.hasOwnProperty("Intl"))') + println("") + println(generatedFileWarning) + println("") + + sanctioned_units_array = json.dumps( + [unit for unit in sorted(sanctioned_units)], + indent=indent, + separators=(",", ": "), + ) + + println( + "const sanctionedSimpleUnitIdentifiers = {};".format( + sanctioned_units_array + ) + ) + + println(test_content) + + println( + """ +if (typeof reportCompare === "function") +{}reportCompare(true, true);""".format( + " " * indent + ) + ) + + write_test( + "unit-compound-combinations.js", + """ +// Test all simple unit identifier combinations are allowed. + +for (const numerator of sanctionedSimpleUnitIdentifiers) { + for (const denominator of sanctionedSimpleUnitIdentifiers) { + const unit = `${numerator}-per-${denominator}`; + const nf = new Intl.NumberFormat("en", {style: "unit", unit}); + + assertEq(nf.format(1), nf.formatToParts(1).map(p => p.value).join("")); + } +}""", + ) + + all_units_array = json.dumps( + ["-".join(unit) for unit in sorted(all_units)], indent=4, separators=(",", ": ") + ) + + write_test( + "unit-well-formed.js", + """ +const allUnits = {}; +""".format( + all_units_array + ) + + """ +// Test only sanctioned unit identifiers are allowed. + +for (const typeAndUnit of allUnits) { + const [_, type, unit] = typeAndUnit.match(/(\w+)-(.+)/); + + let allowed; + if (unit.includes("-per-")) { + const [numerator, denominator] = unit.split("-per-"); + allowed = sanctionedSimpleUnitIdentifiers.includes(numerator) && + sanctionedSimpleUnitIdentifiers.includes(denominator); + } else { + allowed = sanctionedSimpleUnitIdentifiers.includes(unit); + } + + if (allowed) { + const nf = new Intl.NumberFormat("en", {style: "unit", unit}); + assertEq(nf.format(1), nf.formatToParts(1).map(p => p.value).join("")); + } else { + assertThrowsInstanceOf(() => new Intl.NumberFormat("en", {style: "unit", unit}), + RangeError, `Missing error for "${typeAndUnit}"`); + } +}""", + ) + + write_test( + "unit-formatToParts-has-unit-field.js", + """ +// Test only English and Chinese to keep the overall runtime reasonable. +// +// Chinese is included because it contains more than one "unit" element for +// certain unit combinations. +const locales = ["en", "zh"]; + +// Plural rules for English only differentiate between "one" and "other". Plural +// rules for Chinese only use "other". That means we only need to test two values +// per unit. +const values = [0, 1]; + +// Ensure unit formatters contain at least one "unit" element. + +for (const locale of locales) { + for (const unit of sanctionedSimpleUnitIdentifiers) { + const nf = new Intl.NumberFormat(locale, {style: "unit", unit}); + + for (const value of values) { + assertEq(nf.formatToParts(value).some(e => e.type === "unit"), true, + `locale=${locale}, unit=${unit}`); + } + } + + for (const numerator of sanctionedSimpleUnitIdentifiers) { + for (const denominator of sanctionedSimpleUnitIdentifiers) { + const unit = `${numerator}-per-${denominator}`; + const nf = new Intl.NumberFormat(locale, {style: "unit", unit}); + + for (const value of values) { + assertEq(nf.formatToParts(value).some(e => e.type === "unit"), true, + `locale=${locale}, unit=${unit}`); + } + } + } +}""", + indent=2, + ) + + +def updateUnits(topsrcdir, args): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + icu_path = os.path.join(topsrcdir, "intl", "icu") + icu_unit_path = os.path.join(icu_path, "source", "data", "unit") + + with io.open( + os.path.join(js_src_builtin_intl_dir, "SanctionedSimpleUnitIdentifiers.yaml"), + mode="r", + encoding="utf-8", + ) as f: + sanctioned_units = yaml.safe_load(f) + + # Read all possible ICU unit identifiers from the "unit/root.txt" resource. + unit_root_file = os.path.join(icu_unit_path, "root.txt") + all_units = readICUUnitResourceFile(unit_root_file) + + # Compute the set of effectively supported ICU unit identifiers. + supported_units = computeSupportedUnits(all_units, sanctioned_units) + + # Read the list of units we're including into the ICU data file. + data_filter_file = os.path.join(icu_path, "data_filter.json") + filtered_units = readICUDataFilterForUnits(data_filter_file) + + # Both sets must match to avoid resource loading errors at runtime. + if supported_units != filtered_units: + + def units_to_string(units): + return ", ".join("/".join(u) for u in units) + + missing = supported_units - filtered_units + if missing: + raise RuntimeError("Missing units: {}".format(units_to_string(missing))) + + # Not exactly an error, but we currently don't have a use case where we need to support + # more units than required by ECMA-402. + extra = filtered_units - supported_units + if extra: + raise RuntimeError("Unnecessary units: {}".format(units_to_string(extra))) + + writeSanctionedSimpleUnitIdentifiersFiles(all_units, sanctioned_units) + + +def readICUNumberingSystemsResourceFile(filepath): + """Returns a dictionary of numbering systems where the key denotes the numbering system name + and the value a dictionary with additional numbering system data. + + Example: + + numberingSystems:table(nofallback){ + numberingSystems{ + latn{ + algorithmic:int{0} + desc{"0123456789"} + radix:int{10} + } + roman{ + algorithmic:int{1} + desc{"%roman-upper"} + radix:int{10} + } + } + } + + Returns {"latn": {"digits": "0123456789", "algorithmic": False}, + "roman": {"algorithmic": True}} + """ + + start_table_re = re.compile(r"^(\w+)(?:\:[\w\(\)]+)?\{$") + end_table_re = re.compile(r"^\}$") + table_entry_re = re.compile(r"^(\w+)(?:\:[\w\(\)]+)?\{(?:(?:\"(.*?)\")|(\d+))\}$") + + # The current resource table. + table = {} + + # List of parent tables when parsing. + parents = [] + + # Track multi-line comments state. + in_multiline_comment = False + + for line in flines(filepath, "utf-8-sig"): + # Remove leading and trailing whitespace. + line = line.strip() + + # Skip over comments. + if in_multiline_comment: + if line.endswith("*/"): + in_multiline_comment = False + continue + + if line.startswith("//"): + continue + + if line.startswith("/*"): + in_multiline_comment = True + continue + + # Try to match the start of a table, e.g. `latn{`. + match = start_table_re.match(line) + if match: + parents.append(table) + table_name = match.group(1) + new_table = {} + table[table_name] = new_table + table = new_table + continue + + # Try to match the end of a table. + match = end_table_re.match(line) + if match: + table = parents.pop() + continue + + # Try to match a table entry, e.g. `desc{"0123456789"}`. + match = table_entry_re.match(line) + if match: + entry_key = match.group(1) + entry_value = ( + match.group(2) if match.group(2) is not None else int(match.group(3)) + ) + table[entry_key] = entry_value + continue + + raise Exception("unexpected line: '{}' in {}".format(line, filepath)) + + assert len(parents) == 0, "Not all tables closed" + assert len(table) == 1, "More than one root table" + + # Remove the two top-level "numberingSystems" tables. + (_, numbering_systems) = table.popitem() + (_, numbering_systems) = numbering_systems.popitem() + + # Assert all numbering systems use base 10. + assert all(ns["radix"] == 10 for ns in numbering_systems.values()) + + # Return the numbering systems. + return { + key: {"digits": value["desc"], "algorithmic": False} + if not bool(value["algorithmic"]) + else {"algorithmic": True} + for (key, value) in numbering_systems.items() + } + + +def writeNumberingSystemFiles(numbering_systems): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + + numbering_systems_js_file = os.path.join( + js_src_builtin_intl_dir, "NumberingSystemsGenerated.h" + ) + with io.open( + numbering_systems_js_file, mode="w", encoding="utf-8", newline="" + ) as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +/** + * The list of numbering systems with simple digit mappings. + */ + +#ifndef builtin_intl_NumberingSystemsGenerated_h +#define builtin_intl_NumberingSystemsGenerated_h +""" + ) + + simple_numbering_systems = sorted( + name + for (name, value) in numbering_systems.items() + if not value["algorithmic"] + ) + + println("// clang-format off") + println("#define NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS \\") + println( + "{}".format( + ", \\\n".join( + ' "{}"'.format(name) for name in simple_numbering_systems + ) + ) + ) + println("// clang-format on") + println("") + + println("#endif // builtin_intl_NumberingSystemsGenerated_h") + + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + test_dir = os.path.join(js_src_builtin_intl_dir, "../../tests/non262/Intl") + + intl_shell_js_file = os.path.join(test_dir, "shell.js") + + with io.open(intl_shell_js_file, mode="w", encoding="utf-8", newline="") as f: + println = partial(print, file=f) + + println(generatedFileWarning) + + println( + """ +// source: CLDR file common/bcp47/number.xml; version CLDR {}. +// https://github.com/unicode-org/cldr/blob/master/common/bcp47/number.xml +// https://github.com/unicode-org/cldr/blob/master/common/supplemental/numberingSystems.xml +""".format( + readCLDRVersionFromICU() + ).rstrip() + ) + + numbering_systems_object = json.dumps( + numbering_systems, + indent=2, + separators=(",", ": "), + sort_keys=True, + ensure_ascii=False, + ) + println("const numberingSystems = {};".format(numbering_systems_object)) + + +def updateNumberingSystems(topsrcdir, args): + js_src_builtin_intl_dir = os.path.dirname(os.path.abspath(__file__)) + icu_path = os.path.join(topsrcdir, "intl", "icu") + icu_misc_path = os.path.join(icu_path, "source", "data", "misc") + + with io.open( + os.path.join(js_src_builtin_intl_dir, "NumberingSystems.yaml"), + mode="r", + encoding="utf-8", + ) as f: + numbering_systems = yaml.safe_load(f) + + # Read all possible ICU unit identifiers from the "misc/numberingSystems.txt" resource. + misc_ns_file = os.path.join(icu_misc_path, "numberingSystems.txt") + all_numbering_systems = readICUNumberingSystemsResourceFile(misc_ns_file) + + all_numbering_systems_simple_digits = { + name + for (name, value) in all_numbering_systems.items() + if not value["algorithmic"] + } + + # Assert ICU includes support for all required numbering systems. If this assertion fails, + # something is broken in ICU. + assert all_numbering_systems_simple_digits.issuperset( + numbering_systems + ), "{}".format(numbering_systems.difference(all_numbering_systems_simple_digits)) + + # Assert the spec requires support for all numbering systems with simple digit mappings. If + # this assertion fails, file a PR at <https://github.com/tc39/ecma402> to include any new + # numbering systems. + assert all_numbering_systems_simple_digits.issubset(numbering_systems), "{}".format( + all_numbering_systems_simple_digits.difference(numbering_systems) + ) + + writeNumberingSystemFiles(all_numbering_systems) + + +if __name__ == "__main__": + import argparse + + # This script must reside in js/src/builtin/intl to work correctly. + (thisDir, thisFile) = os.path.split(os.path.abspath(__file__)) + dirPaths = os.path.normpath(thisDir).split(os.sep) + if "/".join(dirPaths[-4:]) != "js/src/builtin/intl": + raise RuntimeError("%s must reside in js/src/builtin/intl" % __file__) + topsrcdir = "/".join(dirPaths[:-4]) + + def EnsureHttps(v): + if not v.startswith("https:"): + raise argparse.ArgumentTypeError("URL protocol must be https: " % v) + return v + + parser = argparse.ArgumentParser(description="Update intl data.") + subparsers = parser.add_subparsers(help="Select update mode") + + parser_cldr_tags = subparsers.add_parser( + "langtags", help="Update CLDR language tags data" + ) + parser_cldr_tags.add_argument( + "--version", metavar="VERSION", help="CLDR version number" + ) + parser_cldr_tags.add_argument( + "--url", + metavar="URL", + default="https://unicode.org/Public/cldr/<VERSION>/cldr-common-<VERSION>.0.zip", + type=EnsureHttps, + help="Download url CLDR data (default: %(default)s)", + ) + parser_cldr_tags.add_argument( + "--out", + default=os.path.join( + topsrcdir, "intl", "components", "src", "LocaleGenerated.cpp" + ), + help="Output file (default: %(default)s)", + ) + parser_cldr_tags.add_argument( + "file", nargs="?", help="Local cldr-common.zip file, if omitted uses <URL>" + ) + parser_cldr_tags.set_defaults(func=updateCLDRLangTags) + + parser_tz = subparsers.add_parser("tzdata", help="Update tzdata") + parser_tz.add_argument( + "--tz", + help="Local tzdata directory or file, if omitted downloads tzdata " + "distribution from https://www.iana.org/time-zones/", + ) + # ICU doesn't include the backzone file by default, but we still like to + # use the backzone time zone names to avoid user confusion. This does lead + # to formatting "historic" dates (pre-1970 era) with the wrong time zone, + # but that's probably acceptable for now. + parser_tz.add_argument( + "--ignore-backzone", + action="store_true", + help="Ignore tzdata's 'backzone' file. Can be enabled to generate more " + "accurate time zone canonicalization reflecting the actual time " + "zones as used by ICU.", + ) + parser_tz.add_argument( + "--out", + default=os.path.join(thisDir, "TimeZoneDataGenerated.h"), + help="Output file (default: %(default)s)", + ) + parser_tz.set_defaults(func=partial(updateTzdata, topsrcdir)) + + parser_currency = subparsers.add_parser( + "currency", help="Update currency digits mapping" + ) + parser_currency.add_argument( + "--url", + metavar="URL", + default="https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/lists/list-one.xml", # NOQA: E501 + type=EnsureHttps, + help="Download url for the currency & funds code list (default: " + "%(default)s)", + ) + parser_currency.add_argument( + "--out", + default=os.path.join(thisDir, "CurrencyDataGenerated.js"), + help="Output file (default: %(default)s)", + ) + parser_currency.add_argument( + "file", nargs="?", help="Local currency code list file, if omitted uses <URL>" + ) + parser_currency.set_defaults(func=partial(updateCurrency, topsrcdir)) + + parser_units = subparsers.add_parser( + "units", help="Update sanctioned unit identifiers mapping" + ) + parser_units.set_defaults(func=partial(updateUnits, topsrcdir)) + + parser_numbering_systems = subparsers.add_parser( + "numbering", help="Update numbering systems with simple digit mappings" + ) + parser_numbering_systems.set_defaults( + func=partial(updateNumberingSystems, topsrcdir) + ) + + args = parser.parse_args() + args.func(args) |