From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- intl/components/src/DateTimeFormat.h | 593 +++++++++++++++++++++++++++++++++++ 1 file changed, 593 insertions(+) create mode 100644 intl/components/src/DateTimeFormat.h (limited to 'intl/components/src/DateTimeFormat.h') diff --git a/intl/components/src/DateTimeFormat.h b/intl/components/src/DateTimeFormat.h new file mode 100644 index 0000000000..4853d9e3b2 --- /dev/null +++ b/intl/components/src/DateTimeFormat.h @@ -0,0 +1,593 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_DateTimeFormat_h_ +#define intl_components_DateTimeFormat_h_ +#include +#include "unicode/udat.h" + +#include "mozilla/Assertions.h" +#include "mozilla/intl/ICU4CGlue.h" +#include "mozilla/intl/ICUError.h" + +#include "mozilla/intl/DateTimePart.h" +#include "mozilla/intl/DateTimePatternGenerator.h" +#include "mozilla/Maybe.h" +#include "mozilla/Span.h" +#include "mozilla/Try.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Utf8.h" +#include "mozilla/Variant.h" +#include "mozilla/Vector.h" + +/* + * To work around webcompat problems caused by Narrow No-Break Space in + * formatted date/time output, where existing code on the web naively + * assumes there will be a normal Space, we replace any occurrences of + * U+202F in the formatted results with U+0020. + * + * The intention is to undo this hack once other major browsers are also + * ready to ship with the updated (ICU72) i18n data that uses NNBSP. + * + * See https://bugzilla.mozilla.org/show_bug.cgi?id=1806042 for details, + * and see DateIntervalFormat.cpp for the other piece of this hack. + */ +#define DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES 1 + +namespace mozilla::intl { + +#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES +static inline bool IsSpecialSpace(char16_t c) { + // NARROW NO-BREAK SPACE and THIN SPACE + return c == 0x202F || c == 0x2009; +} +#endif + +class Calendar; + +/** + * Intro to mozilla::intl::DateTimeFormat + * ====================================== + * + * This component is a Mozilla-focused API for the date formatting provided by + * ICU. The methods internally call out to ICU4C. This is responsible for and + * owns any resources opened through ICU, through RAII. + * + * The construction of a DateTimeFormat contains the majority of the cost + * of the DateTimeFormat operation. DateTimeFormat::TryFormat should be + * relatively inexpensive after the initial construction. + * + * This class supports creating from Styles (a fixed set of options) and from a + * components bag (a list of components and their lengths). + * + * This API serves to back the ECMA-402 Intl.DateTimeFormat API. + * https://tc39.es/ecma402/#datetimeformat-objects + * + * + * ECMA-402 Intl.DateTimeFormat API and implementation details with ICU + * skeletons and patterns. + * ==================================================================== + * + * Different locales have different ways to display dates using the same + * basic components. For example, en-US might use "Sept. 24, 2012" while + * fr-FR might use "24 Sept. 2012". The intent of Intl.DateTimeFormat is to + * permit production of a format for the locale that best matches the + * set of date-time components and their desired representation as specified + * by the API client. + * + * ICU4C supports specification of date and time formats in three ways: + * + * 1) A style is just one of the identifiers FULL, LONG, MEDIUM, or SHORT. + * The date-time components included in each style and their representation + * are defined by ICU using CLDR locale data (CLDR is the Unicode + * Consortium's Common Locale Data Repository). + * + * 2) A skeleton is a string specifying which date-time components to include, + * and which representations to use for them. For example, "yyyyMMMMdd" + * specifies a year with at least four digits, a full month name, and a + * two-digit day. It does not specify in which order the components appear, + * how they are separated, the localized strings for textual components + * (such as weekday or month), whether the month is in format or + * stand-alone form¹, or the numbering system used for numeric components. + * All that information is filled in by ICU using CLDR locale data. + * ¹ The format form is the one used in formatted strings that include a + * day; the stand-alone form is used when not including days, e.g., in + * calendar headers. The two forms differ at least in some Slavic languages, + * e.g. Russian: "22 марта 2013 г." vs. "Март 2013". + * + * 3) A pattern is a string specifying which date-time components to include, + * in which order, with which separators, in which grammatical case. For + * example, "EEEE, d MMMM y" specifies the full localized weekday name, + * followed by comma and space, followed by the day, followed by space, + * followed by the full month name in format form, followed by space, + * followed by the full year. It + * still does not specify localized strings for textual components and the + * numbering system - these are determined by ICU using CLDR locale data or + * possibly API parameters. + * + * All actual formatting in ICU4C is done with patterns; styles and skeletons + * have to be mapped to patterns before processing. + * + * The options of Intl.DateTimeFormat most closely correspond to ICU skeletons. + * This implementation therefore converts DateTimeFormat options to ICU + * skeletons, and then lets ICU map skeletons to actual ICU patterns. The + * pattern may not directly correspond to what the skeleton requests, as the + * mapper (UDateTimePatternGenerator) is constrained by the available locale + * data for the locale. + * + * An ICU pattern represents the information of the following DateTimeFormat + * internal properties described in the specification, which therefore don't + * exist separately in the implementation: + * - [[weekday]], [[era]], [[year]], [[month]], [[day]], [[hour]], [[minute]], + * [[second]], [[timeZoneName]] + * - [[hour12]] + * - [[hourCycle]] + * - [[hourNo0]] + * When needed for the resolvedOptions method, the resolveICUPattern function + * queries the UDateFormat's internal pattern and then maps the it back to the + * specified properties of the object returned by resolvedOptions. + * + * ICU date-time skeletons and patterns aren't fully documented in the ICU + * documentation (see http://bugs.icu-project.org/trac/ticket/9627). The best + * documentation at this point is in UTR 35: + * http://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns + * + * Future support for ICU4X + * ======================== + * This implementation exposes a components bag, and internally handles the + * complexity of working with skeletons and patterns to generate the correct + * results. In the future, if and when we switch to ICU4X, the complexities of + * manipulating patterns will be able to be removed, as ICU4X will directly know + * how to apply the components bag. + */ +class DateTimeFormat final { + public: + /** + * The hour cycle for components. + */ + enum class HourCycle { + H11, + H12, + H23, + H24, + }; + + /** + * The style for dates or times. + */ + enum class Style { + Full, + Long, + Medium, + Short, + }; + + /** + * A bag of options to determine the length of the time and date styles. The + * hour cycle can be overridden. + */ + struct StyleBag { + Maybe