From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/icu_provider/.cargo-checksum.json | 1 + third_party/rust/icu_provider/Cargo.toml | 161 +++ third_party/rust/icu_provider/LICENSE | 44 + third_party/rust/icu_provider/README.md | 122 +++ third_party/rust/icu_provider/src/any.rs | 514 +++++++++ third_party/rust/icu_provider/src/buf.rs | 168 +++ third_party/rust/icu_provider/src/constructors.rs | 373 +++++++ third_party/rust/icu_provider/src/data_provider.rs | 331 ++++++ .../icu_provider/src/datagen/data_conversion.rs | 44 + third_party/rust/icu_provider/src/datagen/iter.rs | 35 + third_party/rust/icu_provider/src/datagen/mod.rs | 203 ++++ .../rust/icu_provider/src/datagen/payload.rs | 229 ++++ third_party/rust/icu_provider/src/dynutil.rs | 256 +++++ third_party/rust/icu_provider/src/error.rs | 292 +++++ third_party/rust/icu_provider/src/fallback.rs | 201 ++++ third_party/rust/icu_provider/src/hello_world.rs | 362 +++++++ third_party/rust/icu_provider/src/key.rs | 717 +++++++++++++ third_party/rust/icu_provider/src/lib.rs | 267 +++++ third_party/rust/icu_provider/src/marker.rs | 86 ++ third_party/rust/icu_provider/src/request.rs | 1121 ++++++++++++++++++++ third_party/rust/icu_provider/src/response.rs | 748 +++++++++++++ .../rust/icu_provider/src/serde/borrow_de_utils.rs | 82 ++ third_party/rust/icu_provider/src/serde/mod.rs | 224 ++++ 23 files changed, 6581 insertions(+) create mode 100644 third_party/rust/icu_provider/.cargo-checksum.json create mode 100644 third_party/rust/icu_provider/Cargo.toml create mode 100644 third_party/rust/icu_provider/LICENSE create mode 100644 third_party/rust/icu_provider/README.md create mode 100644 third_party/rust/icu_provider/src/any.rs create mode 100644 third_party/rust/icu_provider/src/buf.rs create mode 100644 third_party/rust/icu_provider/src/constructors.rs create mode 100644 third_party/rust/icu_provider/src/data_provider.rs create mode 100644 third_party/rust/icu_provider/src/datagen/data_conversion.rs create mode 100644 third_party/rust/icu_provider/src/datagen/iter.rs create mode 100644 third_party/rust/icu_provider/src/datagen/mod.rs create mode 100644 third_party/rust/icu_provider/src/datagen/payload.rs create mode 100644 third_party/rust/icu_provider/src/dynutil.rs create mode 100644 third_party/rust/icu_provider/src/error.rs create mode 100644 third_party/rust/icu_provider/src/fallback.rs create mode 100644 third_party/rust/icu_provider/src/hello_world.rs create mode 100644 third_party/rust/icu_provider/src/key.rs create mode 100644 third_party/rust/icu_provider/src/lib.rs create mode 100644 third_party/rust/icu_provider/src/marker.rs create mode 100644 third_party/rust/icu_provider/src/request.rs create mode 100644 third_party/rust/icu_provider/src/response.rs create mode 100644 third_party/rust/icu_provider/src/serde/borrow_de_utils.rs create mode 100644 third_party/rust/icu_provider/src/serde/mod.rs (limited to 'third_party/rust/icu_provider') diff --git a/third_party/rust/icu_provider/.cargo-checksum.json b/third_party/rust/icu_provider/.cargo-checksum.json new file mode 100644 index 0000000000..56cbc47c42 --- /dev/null +++ b/third_party/rust/icu_provider/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"e45c32b8bdcbb9724771c93110e07499386de9bc779e652ae8a3716df3a3e25a","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"38a5e2a91c50fbe3f5ae01b286f4b1aec56b0007ba4b2a3375b317b84cc5e7ef","src/any.rs":"c88fa7ecb3da7c76e4520702b1e2ea8d760781f8caa7db7c7353a99fcd5883d1","src/buf.rs":"64e924f1d10a5c97c52ea7c08b83f0bff9e477e3d77f57d90b7ed069adff5de9","src/constructors.rs":"a40379974ba1b0ced0ffc2098cc45eff2409e668babd1c22a0c1bb6de775dbb3","src/data_provider.rs":"6527a8c8fdf89226912a2e9f76aaf6939a554658726e6a70ec1830f6ce112fc0","src/datagen/data_conversion.rs":"ee206413cd754803b1c7e2dd5d24c854f863b4df4c46a22c876cb8fc07bf96e8","src/datagen/iter.rs":"6353ec6476596ae6dfd5aee5f3f420c3a1c1c6e71911fa7dae49e17564e1930f","src/datagen/mod.rs":"6663c8926ca78b6e9b22544418b321ed575ae47ba2335d2befc758ee63deaf37","src/datagen/payload.rs":"33df6d11e40b131a71d2deaa861d819db05ccf6c87ed41a927525763295d9c28","src/dynutil.rs":"97c7d9fed27d73b9998cc770c3b966cca35053465636e227558024a09ee4ce9e","src/error.rs":"b44b46412b67fdbdc52dbf9b151fe880f3d6c90505d0ff18b3e055ffa6564bef","src/fallback.rs":"75a3df052cb2b7c77197f23ab39ce00d31664b030346f49daafc3529a25b5741","src/hello_world.rs":"4252dac1ad57320be87a9c127a4319bdef8a607aa4fb6f7df8abfefbb76e3eb8","src/key.rs":"c1edeb791c937ccd210452fe04eecaced43a33c41ae28b8bb9285153ddf844fa","src/lib.rs":"5356e395298dc2f7330104a52dd8d3ca1f895fc2a25d4fea3e2f0e8551ecfc53","src/marker.rs":"b6fd186d4343586647c1e20e5bfb506cd9ab263204b1901750afaec14441091c","src/request.rs":"f0456c886edee9f2cbd273aaca3f7da579d5d73f8356b2a410d28e5c9187aba1","src/response.rs":"e9577781652d4ce1d98f06603fe8e102e51ae1e5ed2b41003facc9d0956a74d3","src/serde/borrow_de_utils.rs":"30bd6e712bf413f47c0801df845ed790ab7edcb49f57020b769ffc79c36b4651","src/serde/mod.rs":"b1c1c69d70d3d61c5e8e4b692470d7b7f53fb8a56568c61eaa21414962a928cc"},"package":"ba58e782287eb6950247abbf11719f83f5d4e4a5c1f2cd490d30a334bc47c2f4"} \ No newline at end of file diff --git a/third_party/rust/icu_provider/Cargo.toml b/third_party/rust/icu_provider/Cargo.toml new file mode 100644 index 0000000000..5fbb63a341 --- /dev/null +++ b/third_party/rust/icu_provider/Cargo.toml @@ -0,0 +1,161 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.67" +name = "icu_provider" +version = "1.4.0" +authors = ["The ICU4X Project Developers"] +include = [ + "data/**/*", + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", +] +description = "Trait and struct definitions for the ICU data provider" +homepage = "https://icu4x.unicode.org" +readme = "README.md" +categories = ["internationalization"] +license-file = "LICENSE" +repository = "https://github.com/unicode-org/icu4x" + +[package.metadata.cargo-all-features] +denylist = ["macros"] +max_combination_size = 3 + +[package.metadata.docs.rs] +all-features = true + +[dependencies.bincode] +version = "1.3" +optional = true + +[dependencies.databake] +version = "0.1.7" +features = ["derive"] +optional = true +default-features = false + +[dependencies.displaydoc] +version = "0.2.3" +default-features = false + +[dependencies.erased-serde] +version = "0.3" +features = ["alloc"] +optional = true +default-features = false + +[dependencies.icu_locid] +version = "~1.4.0" +default-features = false + +[dependencies.icu_provider_macros] +version = "~1.4.0" +optional = true +default-features = false + +[dependencies.log] +version = "0.4" +optional = true + +[dependencies.postcard] +version = "1.0.0" +optional = true +default-features = false + +[dependencies.serde] +version = "1.0" +features = [ + "derive", + "alloc", +] +optional = true +default-features = false + +[dependencies.serde_json] +version = "1.0" +features = ["alloc"] +optional = true +default-features = false + +[dependencies.stable_deref_trait] +version = "1.2.0" +default-features = false + +[dependencies.tinystr] +version = "0.7.4" +default-features = false + +[dependencies.writeable] +version = "0.5.4" +default-features = false + +[dependencies.yoke] +version = "0.7.3" +features = [ + "alloc", + "derive", +] +default-features = false + +[dependencies.zerofrom] +version = "0.1.2" +features = [ + "alloc", + "derive", +] +default-features = false + +[dependencies.zerovec] +version = "0.10.1" +features = ["derive"] +default-features = false + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +datagen = [ + "serde", + "dep:erased-serde", + "dep:databake", + "std", + "sync", +] +deserialize_bincode_1 = [ + "serde", + "dep:bincode", + "std", +] +deserialize_json = [ + "serde", + "dep:serde_json", +] +deserialize_postcard_1 = [ + "serde", + "dep:postcard", +] +experimental = [] +log_error_context = ["logging"] +logging = ["dep:log"] +macros = ["dep:icu_provider_macros"] +serde = [ + "dep:serde", + "yoke/serde", +] +std = ["icu_locid/std"] +sync = [] diff --git a/third_party/rust/icu_provider/LICENSE b/third_party/rust/icu_provider/LICENSE new file mode 100644 index 0000000000..9845aa5f48 --- /dev/null +++ b/third_party/rust/icu_provider/LICENSE @@ -0,0 +1,44 @@ +UNICODE LICENSE V3 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 2020-2023 Unicode, Inc. + +NOTICE TO USER: Carefully read the following legal agreement. BY +DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR +SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT +DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of data files and any associated documentation (the "Data Files") or +software and any associated documentation (the "Software") to deal in the +Data Files or Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Data Files or Software, and to permit persons to whom the +Data Files or Software are furnished to do so, provided that either (a) +this copyright and permission notice appear with all copies of the Data +Files or Software, or (b) this copyright and permission notice appear in +associated Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE +BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA +FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or other +dealings in these Data Files or Software without prior written +authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/third_party/rust/icu_provider/README.md b/third_party/rust/icu_provider/README.md new file mode 100644 index 0000000000..0dd698b1fd --- /dev/null +++ b/third_party/rust/icu_provider/README.md @@ -0,0 +1,122 @@ +# icu_provider [![crates.io](https://img.shields.io/crates/v/icu_provider)](https://crates.io/crates/icu_provider) + + + +`icu_provider` is one of the [`ICU4X`] components. + +Unicode's experience with ICU4X's parent projects, ICU4C and ICU4J, led the team to realize +that data management is the most critical aspect of deploying internationalization, and that it requires +a high level of customization for the needs of the platform it is embedded in. As a result +ICU4X comes with a selection of providers that should allow for ICU4X to naturally fit into +different business and technological needs of customers. + +`icu_provider` defines traits and structs for transmitting data through the ICU4X locale +data pipeline. The primary trait is [`DataProvider`]. It is parameterized by a +[`KeyedDataMarker`], which contains the data type and a [`DataKey`]. It has one method, +[`DataProvider::load`], which transforms a [`DataRequest`] +into a [`DataResponse`]. + +- [`DataKey`] is a fixed identifier for the data type, such as `"plurals/cardinal@1"`. +- [`DataRequest`] contains additional annotations to choose a specific variant of the key, + such as a locale. +- [`DataResponse`] contains the data if the request was successful. + +In addition, there are three other traits which are widely implemented: + +- [`AnyProvider`] returns data as `dyn Any` trait objects. +- [`BufferProvider`] returns data as `[u8]` buffers. +- [`DynamicDataProvider`] returns structured data but is not specific to a key. + +The most common types required for this crate are included via the prelude: + +```rust +use icu_provider::prelude::*; +``` + +### Types of Data Providers + +All nontrivial data providers can fit into one of two classes. + +1. [`AnyProvider`]: Those whose data originates as structured Rust objects +2. [`BufferProvider`]: Those whose data originates as unstructured `[u8]` buffers + +**✨ Key Insight:** A given data provider is generally *either* an [`AnyProvider`] *or* a +[`BufferProvider`]. Which type depends on the data source, and it is not generally possible +to convert one to the other. + +See also [crate::constructors]. + +#### AnyProvider + +These providers are able to return structured data cast into `dyn Any` trait objects. Users +can call [`as_downcasting()`] to get an object implementing [`DataProvider`] by downcasting +the trait objects. + +Examples of AnyProviders: + +- [`DatagenProvider`] reads structured data from CLDR source files and returns ICU4X data structs. +- [`AnyPayloadProvider`] wraps a specific data struct and returns it. +- The `BakedDataProvider` which encodes structured data directly in Rust source + +#### BufferProvider + +These providers are able to return unstructured data typically represented as +[`serde`]-serialized buffers. Users can call [`as_deserializing()`] to get an object +implementing [`DataProvider`] by invoking Serde Deserialize. + +Examples of BufferProviders: + +- [`FsDataProvider`] reads individual buffers from the filesystem. +- [`BlobDataProvider`] reads buffers from a large in-memory blob. + +### Provider Adapters + +ICU4X offers several built-in modules to combine providers in interesting ways. +These can be found in the [`icu_provider_adapters`] crate. + +### Testing Provider + +This crate also contains a concrete provider for demonstration purposes: + +- [`HelloWorldProvider`] returns "hello world" strings in several languages. + +### Types and Lifetimes + +Types compatible with [`Yokeable`] can be passed through the data provider, so long as they are +associated with a marker type implementing [`DataMarker`]. + +Data structs should generally have one lifetime argument: `'data`. This lifetime allows data +structs to borrow zero-copy data. + +### Data generation API + +*This functionality is enabled with the "datagen" Cargo feature* + +The [`datagen`] module contains several APIs for data generation. See [`icu_datagen`] for the reference +data generation implementation. + +[`ICU4X`]: ../icu/index.html +[`DataProvider`]: data_provider::DataProvider +[`DataKey`]: key::DataKey +[`DataLocale`]: request::DataLocale +[`IterableDynamicDataProvider`]: datagen::IterableDynamicDataProvider +[`IterableDataProvider`]: datagen::IterableDataProvider +[`AnyPayloadProvider`]: ../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +[`HelloWorldProvider`]: hello_world::HelloWorldProvider +[`AnyProvider`]: any::AnyProvider +[`Yokeable`]: yoke::Yokeable +[`impl_dynamic_data_provider!`]: impl_dynamic_data_provider +[`icu_provider_adapters`]: ../icu_provider_adapters/index.html +[`DatagenProvider`]: ../icu_datagen/struct.DatagenProvider.html +[`as_downcasting()`]: AsDowncastingAnyProvider::as_downcasting +[`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +[`CldrJsonDataProvider`]: ../icu_datagen/cldr/struct.CldrJsonDataProvider.html +[`FsDataProvider`]: ../icu_provider_fs/struct.FsDataProvider.html +[`BlobDataProvider`]: ../icu_provider_blob/struct.BlobDataProvider.html +[`icu_datagen`]: ../icu_datagen/index.html + + + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/third_party/rust/icu_provider/src/any.rs b/third_party/rust/icu_provider/src/any.rs new file mode 100644 index 0000000000..243055d212 --- /dev/null +++ b/third_party/rust/icu_provider/src/any.rs @@ -0,0 +1,514 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce `Any` objects. + +use crate::prelude::*; +use crate::response::DataPayloadInner; +use core::any::Any; +use core::convert::TryFrom; +use core::convert::TryInto; +use yoke::trait_hack::YokeTraitHack; +use yoke::Yokeable; +use zerofrom::ZeroFrom; + +#[cfg(not(feature = "sync"))] +use alloc::rc::Rc as SelectedRc; +#[cfg(feature = "sync")] +use alloc::sync::Arc as SelectedRc; + +/// A trait that allows to specify `Send + Sync` bounds that are only required when +/// the `sync` Cargo feature is enabled. Without the Cargo feature, this is an empty bound. +#[cfg(feature = "sync")] +pub trait MaybeSendSync: Send + Sync {} +#[cfg(feature = "sync")] +impl MaybeSendSync for T {} + +#[allow(missing_docs)] // docs generated with all features +#[cfg(not(feature = "sync"))] +pub trait MaybeSendSync {} +#[cfg(not(feature = "sync"))] +impl MaybeSendSync for T {} + +/// Representations of the `Any` trait object. +/// +/// **Important Note:** The types enclosed by `StructRef` and `PayloadRc` are NOT the same! +/// The first refers to the struct itself, whereas the second refers to a `DataPayload`. +#[derive(Debug, Clone)] +enum AnyPayloadInner { + /// A reference to `M::Yokeable` + StructRef(&'static dyn Any), + /// A boxed `DataPayload`. + /// + /// Note: This needs to be reference counted, not a `Box`, so that `AnyPayload` is cloneable. + /// If an `AnyPayload` is cloned, the actual cloning of the data is delayed until + /// `downcast()` is invoked (at which point we have the concrete type). + + #[cfg(not(feature = "sync"))] + PayloadRc(SelectedRc), + + #[cfg(feature = "sync")] + PayloadRc(SelectedRc), +} + +/// A type-erased data payload. +/// +/// The only useful method on this type is [`AnyPayload::downcast()`], which transforms this into +/// a normal `DataPayload` which you can subsequently access or mutate. +/// +/// As with `DataPayload`, cloning is designed to be cheap. +#[derive(Debug, Clone, Yokeable)] +pub struct AnyPayload { + inner: AnyPayloadInner, + type_name: &'static str, +} + +/// The [`DataMarker`] marker type for [`AnyPayload`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct AnyMarker; + +impl DataMarker for AnyMarker { + type Yokeable = AnyPayload; +} + +impl crate::dynutil::UpcastDataPayload for AnyMarker +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn upcast(other: DataPayload) -> DataPayload { + DataPayload::from_owned(other.wrap_into_any_payload()) + } +} + +impl AnyPayload { + /// Transforms a type-erased `AnyPayload` into a concrete `DataPayload`. + /// + /// Because it is expected that the call site knows the identity of the AnyPayload (e.g., from + /// the data request), this function returns a `DataError` if the generic type does not match + /// the type stored in the `AnyPayload`. + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + use AnyPayloadInner::*; + let type_name = self.type_name; + match self.inner { + StructRef(any_ref) => { + let down_ref: &'static M::Yokeable = any_ref + .downcast_ref() + .ok_or_else(|| DataError::for_type::().with_str_context(type_name))?; + Ok(DataPayload::from_static_ref(down_ref)) + } + PayloadRc(any_rc) => { + let down_rc = any_rc + .downcast::>() + .map_err(|_| DataError::for_type::().with_str_context(type_name))?; + Ok(SelectedRc::try_unwrap(down_rc).unwrap_or_else(|down_rc| (*down_rc).clone())) + } + } + } + + /// Clones and then transforms a type-erased `AnyPayload` into a concrete `DataPayload`. + pub fn downcast_cloned(&self) -> Result, DataError> + where + M: DataMarker, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + self.clone().downcast() + } + + /// Creates an `AnyPayload` from a static reference to a data struct. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// const HELLO_DATA: HelloWorldV1<'static> = HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }; + /// + /// let any_payload = AnyPayload::from_static_ref(&HELLO_DATA); + /// + /// let payload: DataPayload = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn from_static_ref(static_ref: &'static Y) -> Self + where + Y: for<'a> Yokeable<'a>, + { + AnyPayload { + inner: AnyPayloadInner::StructRef(static_ref), + // Note: This records the Yokeable type rather than the DataMarker type, + // but that is okay since this is only for debugging + type_name: core::any::type_name::(), + } + } +} + +impl DataPayload +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + /// Converts this DataPayload into a type-erased `AnyPayload`. Unless the payload stores a static + /// reference, this will move it to the heap. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// use std::rc::Rc; + /// + /// let payload: DataPayload = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }); + /// + /// let any_payload = payload.wrap_into_any_payload(); + /// + /// let payload: DataPayload = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn wrap_into_any_payload(self) -> AnyPayload { + AnyPayload { + inner: match self.0 { + DataPayloadInner::StaticRef(r) => AnyPayloadInner::StructRef(r), + inner => AnyPayloadInner::PayloadRc(SelectedRc::from(Self(inner))), + }, + type_name: core::any::type_name::(), + } + } +} + +impl DataPayload { + /// Transforms a type-erased `DataPayload` into a concrete `DataPayload`. + #[inline] + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + self.try_unwrap_owned()?.downcast() + } +} + +/// A [`DataResponse`] for type-erased values. +/// +/// Convertible to and from `DataResponse`. +#[allow(clippy::exhaustive_structs)] // this type is stable (the metadata is allowed to grow) +#[derive(Debug)] +pub struct AnyResponse { + /// Metadata about the returned object. + pub metadata: DataResponseMetadata, + + /// The object itself; `None` if it was not loaded. + pub payload: Option, +} + +impl TryFrom> for AnyResponse { + type Error = DataError; + #[inline] + fn try_from(other: DataResponse) -> Result { + Ok(Self { + metadata: other.metadata, + payload: other.payload.map(|p| p.try_unwrap_owned()).transpose()?, + }) + } +} + +impl From for DataResponse { + #[inline] + fn from(other: AnyResponse) -> Self { + Self { + metadata: other.metadata, + payload: other.payload.map(DataPayload::from_owned), + } + } +} + +impl AnyResponse { + /// Transforms a type-erased `AnyResponse` into a concrete `DataResponse`. + #[inline] + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + Ok(DataResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.downcast()).transpose()?, + }) + } + + /// Clones and then transforms a type-erased `AnyResponse` into a concrete `DataResponse`. + pub fn downcast_cloned(&self) -> Result, DataError> + where + M: DataMarker, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + Ok(DataResponse { + metadata: self.metadata.clone(), + payload: self + .payload + .as_ref() + .map(|p| p.downcast_cloned()) + .transpose()?, + }) + } +} + +impl DataResponse +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + /// Moves the inner DataPayload to the heap (requiring an allocation) and returns it as an + /// erased `AnyResponse`. + pub fn wrap_into_any_response(self) -> AnyResponse { + AnyResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.wrap_into_any_payload()), + } + } +} + +/// An object-safe data provider that returns data structs cast to `dyn Any` trait objects. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let any_provider = HelloWorldProvider.as_any_provider(); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Downcasting manually +/// assert_eq!( +/// any_provider +/// .load_any(HelloWorldV1Marker::KEY, req) +/// .expect("load should succeed") +/// .downcast::() +/// .expect("types should match") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// +/// // Downcasting automatically +/// let downcasting_provider: &dyn DataProvider = +/// &any_provider.as_downcasting(); +/// +/// assert_eq!( +/// downcasting_provider +/// .load(req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// ``` +pub trait AnyProvider { + /// Loads an [`AnyPayload`] according to the key and request. + fn load_any(&self, key: DataKey, req: DataRequest) -> Result; +} + +impl<'a, T: AnyProvider + ?Sized> AnyProvider for &'a T { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + (**self).load_any(key, req) + } +} + +impl AnyProvider for alloc::boxed::Box { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + (**self).load_any(key, req) + } +} + +impl AnyProvider for alloc::rc::Rc { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + (**self).load_any(key, req) + } +} + +#[cfg(target_has_atomic = "ptr")] +impl AnyProvider for alloc::sync::Arc { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + (**self).load_any(key, req) + } +} + +/// A wrapper over `DynamicDataProvider` that implements `AnyProvider` +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct DynamicDataProviderAnyMarkerWrap<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_any_provider()`] function. +pub trait AsDynamicDataProviderAnyMarkerWrap { + /// Returns an object implementing `AnyProvider` when called on `DynamicDataProvider` + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap; +} + +impl

AsDynamicDataProviderAnyMarkerWrap for P +where + P: DynamicDataProvider + ?Sized, +{ + #[inline] + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap

{ + DynamicDataProviderAnyMarkerWrap(self) + } +} + +impl

AnyProvider for DynamicDataProviderAnyMarkerWrap<'_, P> +where + P: DynamicDataProvider + ?Sized, +{ + #[inline] + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + self.0.load_data(key, req)?.try_into() + } +} + +/// A wrapper over `AnyProvider` that implements `DynamicDataProvider` via downcasting +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct DowncastingAnyProvider<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_downcasting()`] function. +pub trait AsDowncastingAnyProvider { + /// Returns an object implementing `DynamicDataProvider` when called on `AnyProvider` + fn as_downcasting(&self) -> DowncastingAnyProvider; +} + +impl

AsDowncastingAnyProvider for P +where + P: AnyProvider + ?Sized, +{ + #[inline] + fn as_downcasting(&self) -> DowncastingAnyProvider

{ + DowncastingAnyProvider(self) + } +} + +impl DataProvider for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: KeyedDataMarker, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load(&self, req: DataRequest) -> Result, DataError> { + self.0 + .load_any(M::KEY, req)? + .downcast() + .map_err(|e| e.with_req(M::KEY, req)) + } +} + +impl DynamicDataProvider for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: DataMarker, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { + self.0 + .load_any(key, req)? + .downcast() + .map_err(|e| e.with_req(key, req)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::hello_world::*; + use alloc::borrow::Cow; + + const CONST_DATA: HelloWorldV1<'static> = HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }; + + #[test] + fn test_debug() { + let payload: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }); + + let any_payload = payload.wrap_into_any_payload(); + assert_eq!( + "AnyPayload { inner: PayloadRc(Any { .. }), type_name: \"icu_provider::hello_world::HelloWorldV1Marker\" }", + format!("{any_payload:?}") + ); + + struct WrongMarker; + + impl DataMarker for WrongMarker { + type Yokeable = u8; + } + + let err = any_payload.downcast::().unwrap_err(); + assert_eq!( + "ICU4X data error: Mismatched types: tried to downcast with icu_provider::any::test::test_debug::WrongMarker, but actual type is different: icu_provider::hello_world::HelloWorldV1Marker", + format!("{err}") + ); + } + + #[test] + fn test_non_owned_any_marker() { + // This test demonstrates a code path that can trigger the InvalidState error kind. + let payload_result: DataPayload = + DataPayload::from_owned_buffer(Box::new(*b"pretend we're borrowing from here")) + .map_project(|_, _| AnyPayload::from_static_ref(&CONST_DATA)); + let err = payload_result.downcast::().unwrap_err(); + assert!(matches!( + err, + DataError { + kind: DataErrorKind::InvalidState, + .. + } + )); + } +} diff --git a/third_party/rust/icu_provider/src/buf.rs b/third_party/rust/icu_provider/src/buf.rs new file mode 100644 index 0000000000..0a0ad6eb30 --- /dev/null +++ b/third_party/rust/icu_provider/src/buf.rs @@ -0,0 +1,168 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce opaque buffers. + +use crate::prelude::*; + +/// [`DataMarker`] for raw buffers. Returned by [`BufferProvider`]. +/// +/// The data is expected to be deserialized before it can be used; see +/// [`DataPayload::into_deserialized`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct BufferMarker; + +impl DataMarker for BufferMarker { + type Yokeable = &'static [u8]; +} + +/// A data provider that returns opaque bytes. +/// +/// Generally, these bytes are expected to be deserializable with Serde. To get an object +/// implementing [`DataProvider`] via Serde, use [`as_deserializing()`]. +/// +/// Passing a `BufferProvider` to a `*_with_buffer_provider` constructor requires enabling +/// the deserialization Cargo feature for the expected format(s): +/// - `deserialize_json` +/// - `deserialize_postcard_1` +/// - `deserialize_bincode_1` +/// +/// Along with [`DataProvider`], this is one of the two foundational traits in this crate. +/// +/// [`BufferProvider`] can be made into a trait object. It is used over FFI. +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "deserialize_json")] { +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let buffer_provider = HelloWorldProvider.into_json_provider(); +/// +/// let req = DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Deserializing manually +/// assert_eq!( +/// serde_json::from_slice::( +/// buffer_provider +/// .load_buffer(HelloWorldV1Marker::KEY, req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get() +/// ) +/// .expect("should deserialize"), +/// HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// +/// // Deserialize automatically +/// let deserializing_provider: &dyn DataProvider = +/// &buffer_provider.as_deserializing(); +/// +/// assert_eq!( +/// deserializing_provider +/// .load(req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// # } +/// ``` +/// +/// [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +pub trait BufferProvider { + /// Loads a [`DataPayload`]`<`[`BufferMarker`]`>` according to the key and request. + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError>; +} + +impl<'a, T: BufferProvider + ?Sized> BufferProvider for &'a T { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + (**self).load_buffer(key, req) + } +} + +impl BufferProvider for alloc::boxed::Box { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + (**self).load_buffer(key, req) + } +} + +impl BufferProvider for alloc::rc::Rc { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + (**self).load_buffer(key, req) + } +} + +#[cfg(target_has_atomic = "ptr")] +impl BufferProvider for alloc::sync::Arc { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + (**self).load_buffer(key, req) + } +} + +/// An enum expressing all Serde formats known to ICU4X. +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[non_exhaustive] +pub enum BufferFormat { + /// Serialize using JavaScript Object Notation (JSON). + Json, + /// Serialize using Bincode version 1. + Bincode1, + /// Serialize using Postcard version 1. + Postcard1, +} + +impl BufferFormat { + /// Returns an error if the buffer format is not enabled. + pub fn check_available(&self) -> Result<(), DataError> { + match self { + #[cfg(feature = "deserialize_json")] + BufferFormat::Json => Ok(()), + + #[cfg(feature = "deserialize_bincode_1")] + BufferFormat::Bincode1 => Ok(()), + + #[cfg(feature = "deserialize_postcard_1")] + BufferFormat::Postcard1 => Ok(()), + + // Allowed for cases in which all features are enabled + #[allow(unreachable_patterns)] + _ => Err(DataErrorKind::UnavailableBufferFormat(*self).into_error()), + } + } +} diff --git a/third_party/rust/icu_provider/src/constructors.rs b/third_party/rust/icu_provider/src/constructors.rs new file mode 100644 index 0000000000..f521f1feab --- /dev/null +++ b/third_party/rust/icu_provider/src/constructors.rs @@ -0,0 +1,373 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! 📚 *This module documents ICU4X constructor signatures.* +//! +//! One of the key differences between ICU4X and its parent projects, ICU4C and ICU4J, is in how +//! it deals with locale data. +//! +//! In ICU4X, data can always be explicitly passed to any function that requires data. +//! This enables ICU4X to achieve the following value propositions: +//! +//! 1. Configurable data sources (machine-readable data file, baked into code, JSON, etc). +//! 2. Dynamic data loading at runtime (load data on demand). +//! 3. Reduced overhead and code size (data is resolved locally at each call site). +//! 4. Explicit support for multiple ICU4X instances sharing data. +//! +//! However, as manual data management can be tedious, ICU4X also has a `compiled_data` +//! default Cargo feature that includes data and makes ICU4X work out-of-the box. +//! +//! Subsequently, there are 4 versions of all Rust ICU4X functions that use data: +//! +//! 1. `*` +//! 2. `*_unstable` +//! 3. `*_with_any_provider` +//! 4. `*_with_buffer_provider` +//! +//! # Which constructor should I use? +//! +//! ## When to use `*` +//! +//! If you don't want to customize data at runtime (i.e. if you don't care about code size, +//! updating your data, etc.) you can use the `compiled_data` Cargo feature and don't have to think +//! about where your data comes from. +//! +//! These constructors are sometimes `const` functions, this way Rust can most effectively optimize +//! your usage of ICU4X. +//! +//! ## When to use `*_unstable` +//! +//! Use this constructor if your data provider implements the [`DataProvider`] trait for all +//! data structs in *current and future* ICU4X versions. Examples: +//! +//! 1. `BakedDataProvider` generated for the specific ICU4X minor version +//! 2. Anything with a _blanket_ [`DataProvider`] impl +//! +//! Since the exact set of bounds may change at any time, including in minor SemVer releases, +//! it is the client's responsibility to guarantee that the requirement is upheld. +//! +//! ## When to use `*_with_any_provider` +//! +//! Use this constructor if you need to use a provider that implements [`AnyProvider`] but not +//! [`DataProvider`]. Examples: +//! +//! 1. [`AnyPayloadProvider`] +//! 2. [`ForkByKeyProvider`] between two providers implementing [`AnyProvider`] +//! 3. Providers that cache or override certain keys but not others and therefore +//! can't implement [`DataProvider`] +//! +//! ## When to use `*_with_buffer_provider` +//! +//! Use this constructor if your data originates as byte buffers that need to be deserialized. +//! All such providers should implement [`BufferProvider`]. Examples: +//! +//! 1. [`BlobDataProvider`] +//! 2. [`FsDataProvider`] +//! 3. [`ForkByKeyProvider`] between two providers implementing [`BufferProvider`] +//! +//! Please note that you must enable the `serde` Cargo feature on each crate in which you use the +//! `*_with_buffer_provider` constructor. +//! +//! # Data Versioning Policy +//! +//! The `*_with_any_provider` and `*_with_buffer_provider` functions will succeed to compile and +//! run if given a data provider supporting all of the keys required for the object being +//! constructed, either the current or any previous version within the same SemVer major release. +//! For example, if a data file is built to support FooFormatter version 1.1, then FooFormatter +//! version 1.2 will be able to read the same data file. Likewise, backwards-compatible keys can +//! always be included by `icu_datagen` to support older library versions. +//! +//! The `*_unstable` functions are only guaranteed to work on data built for the exact same minor version +//! of ICU4X. The advantage of the `*_unstable` functions is that they result in the smallest code +//! size and allow for automatic data slicing when `BakedDataProvider` is used. However, the type +//! bounds of this function may change over time, breaking SemVer guarantees. These functions +//! should therefore only be used when you have full control over your data lifecycle at compile +//! time. +//! +//! # Data Providers Over FFI +//! +//! Over FFI, there is only one data provider type: [`ICU4XDataProvider`]. Internally, it is an +//! `enum` between`dyn `[`BufferProvider`] and a unit compiled data variant. +//! +//! To control for code size, there are two Cargo features, `compiled_data` and `buffer_provider`, +//! that enable the corresponding items in the enum. +//! +//! In Rust ICU4X, a similar enum approach was not taken because: +//! +//! 1. Feature-gating the enum branches gets complex across crates. +//! 2. Without feature gating, users need to carry Serde code even if they're not using it, +//! violating one of the core value propositions of ICU4X. +//! 3. We could reduce the number of constructors from 4 to 2 but not to 1, so the educational +//! benefit is limited. +//! +//! [`DataProvider`]: crate::DataProvider +//! [`BufferProvider`]: crate::BufferProvider +//! [`AnyProvider`]: crate::AnyProvider +//! [`AnyPayloadProvider`]: ../../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`ForkByKeyProvider`]: ../../icu_provider_adapters/fork/struct.ForkByKeyProvider.html +//! [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +//! [`StaticDataProvider`]: ../../icu_provider_blob/struct.StaticDataProvider.html +//! [`FsDataProvider`]: ../../icu_provider_blob/struct.FsDataProvider.html +//! [`ICU4XDataProvider`]: ../../icu_capi/provider/ffi/struct.ICU4XDataProvider.html + +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_unstable_docs { + (ANY, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by an [`AnyProvider`](icu_provider::AnyProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (BUFFER, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`BufferProvider`](icu_provider::BufferProvider).\n\n", + "✨ *Enabled with the `serde` feature.*\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (UNSTABLE, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`DataProvider`](icu_provider::DataProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)\n\n", + "

⚠️ The bounds on provider may change over time, including in SemVer minor releases.
" + ) + }; +} + +#[allow(clippy::crate_in_macro_def)] // by convention each crate's data provider is `crate::provider::Baked` +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_data_constructors { + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> Result { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + + (locale: skip, options: skip, result: $result_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, $options_arg:ident: $options_ty:ty, result: $result_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale) + } + }; + + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $config_arg: $config_ty, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $config_arg, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $config_arg, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $config_arg, $options_arg) + } + }; + + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $options_arg) + } + }; +} diff --git a/third_party/rust/icu_provider/src/data_provider.rs b/third_party/rust/icu_provider/src/data_provider.rs new file mode 100644 index 0000000000..df821956a8 --- /dev/null +++ b/third_party/rust/icu_provider/src/data_provider.rs @@ -0,0 +1,331 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::DataError; +use crate::key::DataKey; +use crate::marker::{DataMarker, KeyedDataMarker}; +use crate::request::DataRequest; +use crate::response::DataResponse; + +/// A data provider that loads data for a specific data type. +/// +/// Unlike [`DataProvider`], there may be multiple keys corresponding to the same data type. +/// This is often the case when returning `dyn` trait objects such as [`AnyMarker`]. +/// +/// [`AnyMarker`]: crate::any::AnyMarker +pub trait DynamicDataProvider +where + M: DataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError>; +} + +/// A data provider that loads data for a specific [`DataKey`]. +pub trait DataProvider +where + M: KeyedDataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load(&self, req: DataRequest) -> Result, DataError>; +} + +impl DynamicDataProvider for alloc::boxed::Box

+where + M: DataMarker, + P: DynamicDataProvider + ?Sized, +{ + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { + (**self).load_data(key, req) + } +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::hello_world::*; + use crate::prelude::*; + use alloc::borrow::Cow; + use alloc::string::String; + use core::fmt::Debug; + use serde::{Deserialize, Serialize}; + + // This tests DataProvider borrow semantics with a dummy data provider based on a + // JSON string. It also exercises most of the data provider code paths. + + /// Key for HelloAlt, used for testing mismatched types + const HELLO_ALT_KEY: DataKey = crate::data_key!("core/helloalt@1"); + + /// A data struct serialization-compatible with HelloWorldV1 used for testing mismatched types + #[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom, + )] + struct HelloAlt { + #[zerofrom(clone)] + message: String, + } + + /// Marker type for [`HelloAlt`]. + struct HelloAltMarker {} + + impl DataMarker for HelloAltMarker { + type Yokeable = HelloAlt; + } + + impl KeyedDataMarker for HelloAltMarker { + const KEY: DataKey = HELLO_ALT_KEY; + } + + #[derive(Deserialize, Debug, Clone, Default, PartialEq)] + struct HelloCombined<'data> { + #[serde(borrow)] + pub hello_v1: HelloWorldV1<'data>, + pub hello_alt: HelloAlt, + } + + /// A DataProvider that owns its data, returning an Rc-variant DataPayload. + /// Supports only key::HELLO_WORLD_V1. Uses `impl_dynamic_data_provider!()`. + #[derive(Debug)] + struct DataWarehouse { + hello_v1: HelloWorldV1<'static>, + hello_alt: HelloAlt, + } + + impl DataProvider for DataWarehouse { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.hello_v1.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!(DataWarehouse, [HelloWorldV1Marker,], AnyMarker); + + /// A DataProvider that supports both key::HELLO_WORLD_V1 and HELLO_ALT. + #[derive(Debug)] + struct DataProvider2 { + data: DataWarehouse, + } + + impl From for DataProvider2 { + fn from(warehouse: DataWarehouse) -> Self { + DataProvider2 { data: warehouse } + } + } + + impl DataProvider for DataProvider2 { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_v1.clone())), + }) + } + } + + impl DataProvider for DataProvider2 { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_alt.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!( + DataProvider2, + [HelloWorldV1Marker, HelloAltMarker,], + AnyMarker + ); + + const DATA: &str = r#"{ + "hello_v1": { + "message": "Hello V1" + }, + "hello_alt": { + "message": "Hello Alt" + } + }"#; + + fn get_warehouse(data: &'static str) -> DataWarehouse { + let data: HelloCombined = serde_json::from_str(data).expect("Well-formed data"); + DataWarehouse { + hello_v1: data.hello_v1, + hello_alt: data.hello_alt, + } + } + + fn get_payload_v1 + ?Sized>( + provider: &P, + ) -> Result, DataError> { + provider.load(Default::default())?.take_payload() + } + + fn get_payload_alt + ?Sized>( + provider: &P, + ) -> Result, DataError> { + provider.load(Default::default())?.take_payload() + } + + #[test] + fn test_warehouse_owned() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_generic() { + let warehouse = get_warehouse(DATA); + let hello_data = + get_payload_v1(&warehouse as &dyn DataProvider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let response = get_payload_alt(&warehouse.as_any_provider().as_downcasting()); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MissingDataKey, + .. + }) + )); + } + + #[test] + fn test_provider2() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_provider2_dyn_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = + get_payload_v1(&provider as &dyn DataProvider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_generic_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider as &dyn DataProvider).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_mismatched_types() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + // Request is for v2, but type argument is for v1 + let response: Result, DataError> = AnyProvider::load_any( + &provider.as_any_provider(), + HELLO_ALT_KEY, + Default::default(), + ) + .unwrap() + .downcast(); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MismatchedType(_), + .. + }) + )); + } + + fn check_v1_v2

(d: &P) + where + P: DataProvider + DataProvider + ?Sized, + { + let v1: DataPayload = + d.load(Default::default()).unwrap().take_payload().unwrap(); + let v2: DataPayload = + d.load(Default::default()).unwrap().take_payload().unwrap(); + if v1.get().message == v2.get().message { + panic!() + } + } + + #[test] + fn test_v1_v2_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider); + } + + #[test] + fn test_v1_v2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider.as_any_provider().as_downcasting()); + } +} diff --git a/third_party/rust/icu_provider/src/datagen/data_conversion.rs b/third_party/rust/icu_provider/src/datagen/data_conversion.rs new file mode 100644 index 0000000000..f3ca948e1d --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/data_conversion.rs @@ -0,0 +1,44 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::prelude::*; +use crate::DataKey; +use alloc::boxed::Box; + +/// A trait that allows for converting between data payloads of different types. +/// +/// These payloads will typically be some kind of erased payload, either with +/// [`AnyMarker`], [`BufferMarker`], or [`ExportMarker`](crate::datagen::ExportMarker), where converting +/// requires reifying the type. +/// +/// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to +/// concrete marker types M, and reifying the input to a `DataPayload`, performing some conversion +/// or computation, and erasing the result to `DataPayload`. +pub trait DataConverter { + /// Attempt to convert a payload corresponding to the given data key + /// from one marker type to another marker type. + /// + /// If this is not possible (for example, if the provider does not know about the key), + /// the original payload is returned back to the caller. + fn convert( + &self, + key: DataKey, + from: DataPayload, + ) -> Result, (DataPayload, DataError)>; +} + +impl DataConverter for Box

+where + MFrom: DataMarker, + MTo: DataMarker, + P: DataConverter + ?Sized, +{ + fn convert( + &self, + key: DataKey, + from: DataPayload, + ) -> Result, (DataPayload, DataError)> { + (**self).convert(key, from) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/iter.rs b/third_party/rust/icu_provider/src/datagen/iter.rs new file mode 100644 index 0000000000..6175d89c6f --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/iter.rs @@ -0,0 +1,35 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Collection of iteration APIs for data providers. + +use crate::prelude::*; + +/// A [`DynamicDataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDynamicDataProvider: DynamicDataProvider { + /// Given a [`DataKey`], returns a list of [`DataLocale`]. + fn supported_locales_for_key(&self, key: DataKey) -> Result, DataError>; +} + +/// A [`DataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDataProvider: DataProvider { + /// Returns a list of [`DataLocale`]. + fn supported_locales(&self) -> Result, DataError>; +} + +impl IterableDynamicDataProvider for Box

+where + M: DataMarker, + P: IterableDynamicDataProvider + ?Sized, +{ + fn supported_locales_for_key(&self, key: DataKey) -> Result, DataError> { + (**self).supported_locales_for_key(key) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/mod.rs b/third_party/rust/icu_provider/src/datagen/mod.rs new file mode 100644 index 0000000000..ae1779ab39 --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/mod.rs @@ -0,0 +1,203 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains various utilities required to generate ICU4X data files, typically +//! via the `icu_datagen` reference crate. End users should not need to consume anything in +//! this module as a library unless defining new types that integrate with `icu_datagen`. +//! +//! This module can be enabled with the `datagen` Cargo feature on `icu_provider`. + +mod data_conversion; +mod iter; +mod payload; +pub use data_conversion::DataConverter; +pub use iter::IterableDataProvider; + +#[doc(hidden)] // exposed for make_exportable_provider +pub use iter::IterableDynamicDataProvider; +#[doc(hidden)] // exposed for make_exportable_provider +pub use payload::{ExportBox, ExportMarker}; + +use crate::prelude::*; + +/// The type of built-in fallback that the data was generated for, if applicable. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum BuiltInFallbackMode { + /// Data uses full UTS 35 fallbacking. + Standard, +} + +/// An object capable of exporting data payloads in some form. +pub trait DataExporter: Sync { + /// Save a `payload` corresponding to the given key and locale. + /// Takes non-mut self as it can be called concurrently. + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload, + ) -> Result<(), DataError>; + + /// Function called for singleton keys. + /// Takes non-mut self as it can be called concurrently. + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.put_payload(key, &Default::default(), payload)?; + self.flush(key) + } + + /// Function called after a non-singleton key has been fully enumerated, + /// flushing that key with built-in fallback. + /// + /// Takes non-mut self as it can be called concurrently. + fn flush_with_built_in_fallback( + &self, + _key: DataKey, + _fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + Err(DataError::custom( + "Exporter does not implement built-in fallback", + )) + } + + /// Function called after a non-singleton key has been fully enumerated. + /// Does not include built-in fallback. + /// + /// Takes non-mut self as it can be called concurrently. + fn flush(&self, _key: DataKey) -> Result<(), DataError> { + Ok(()) + } + + /// This function has to be called before the object is dropped (after all + /// keys have been fully dumped). This conceptually takes ownership, so + /// clients *may not* interact with this object after close has been called. + fn close(&mut self) -> Result<(), DataError> { + Ok(()) + } + + /// Returns whether the provider supports built-in fallback. If `true`, the provider must + /// implement [`Self::flush_with_built_in_fallback()`]. + fn supports_built_in_fallback(&self) -> bool { + false + } +} + +/// A [`DynamicDataProvider`] that can be used for exporting data. +/// +/// Use [`make_exportable_provider`](crate::make_exportable_provider) to implement this. +pub trait ExportableProvider: + IterableDynamicDataProvider + DynamicDataProvider + Sync +{ +} + +impl ExportableProvider for T where + T: IterableDynamicDataProvider + DynamicDataProvider + Sync +{ +} + +/// This macro can be used on a data provider to allow it to be used for data generation. +/// +/// Data generation 'compiles' data by using this data provider (which usually translates data from +/// different sources and doesn't have to be efficient) to generate data structs, and then writing +/// them to an efficient format like [`BlobDataProvider`] or [`BakedDataProvider`]. The requirements +/// for `make_exportable_provider` are: +/// * The data struct has to implement [`serde::Serialize`](::serde::Serialize) and [`databake::Bake`] +/// * The provider needs to implement [`IterableDataProvider`] for all specified [`KeyedDataMarker`]s. +/// This allows the generating code to know which [`DataLocale`] to collect. +/// +/// [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +/// [`BakedDataProvider`]: ../../icu_datagen/index.html +#[macro_export] +macro_rules! make_exportable_provider { + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ]) => { + $crate::impl_dynamic_data_provider!( + $provider, + [ $($(#[$cfg])? $struct_m),+, ], + $crate::datagen::ExportMarker + ); + $crate::impl_dynamic_data_provider!( + $provider, + [ $($(#[$cfg])? $struct_m),+, ], + $crate::any::AnyMarker + ); + + impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider { + fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result, $crate::DataError> { + match key.hashed() { + $( + $(#[$cfg])? + h if h == <$struct_m as $crate::KeyedDataMarker>::KEY.hashed() => { + $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_key(key)) + } + } + } + }; +} + +/// A `DataExporter` that forks to multiple `DataExporter`s. +#[derive(Default)] +pub struct MultiExporter(Vec>); + +impl MultiExporter { + /// Creates a `MultiExporter` for the given exporters. + pub const fn new(exporters: Vec>) -> Self { + Self(exporters) + } +} + +impl core::fmt::Debug for MultiExporter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MultiExporter") + .field("0", &format!("vec[len = {}]", self.0.len())) + .finish() + } +} + +impl DataExporter for MultiExporter { + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.put_payload(key, locale, payload)) + } + + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_singleton(key, payload)) + } + + fn flush(&self, key: DataKey) -> Result<(), DataError> { + self.0.iter().try_for_each(|e| e.flush(key)) + } + + fn flush_with_built_in_fallback( + &self, + key: DataKey, + fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_with_built_in_fallback(key, fallback_mode)) + } + + fn close(&mut self) -> Result<(), DataError> { + self.0.iter_mut().try_for_each(|e| e.close()) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/payload.rs b/third_party/rust/icu_provider/src/datagen/payload.rs new file mode 100644 index 0000000000..97e540b074 --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/payload.rs @@ -0,0 +1,229 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::any::Any; + +use crate::dynutil::UpcastDataPayload; +use crate::prelude::*; +use alloc::boxed::Box; +use databake::{Bake, CrateEnv, TokenStream}; +use yoke::trait_hack::YokeTraitHack; +use yoke::*; + +trait ExportableDataPayload { + fn bake_yoke(&self, env: &CrateEnv) -> TokenStream; + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError>; + fn as_any(&self) -> &dyn Any; + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool; +} + +impl ExportableDataPayload for DataPayload +where + for<'a> >::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<>::Output>: PartialEq, +{ + fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream { + self.get().bake(ctx) + } + + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError> { + use erased_serde::Serialize; + self.get() + .erased_serialize(serializer) + .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; + Ok(()) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool { + match other.as_any().downcast_ref::() { + Some(downcasted) => (*self).eq(downcasted), + None => { + debug_assert!( + false, + "cannot compare ExportableDataPayloads of different types: self is {:?} but other is {:?}", + self.type_id(), + other.as_any().type_id(), + ); + false + } + } + } +} + +#[doc(hidden)] // exposed for make_exportable_provider +#[derive(yoke::Yokeable)] +pub struct ExportBox { + payload: Box, +} + +impl PartialEq for ExportBox { + fn eq(&self, other: &Self) -> bool { + self.payload.eq_dyn(&*other.payload) + } +} + +impl core::fmt::Debug for ExportBox { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ExportBox") + .field("payload", &"") + .finish() + } +} + +impl UpcastDataPayload for ExportMarker +where + M: DataMarker, + M::Yokeable: Sync + Send, + for<'a> >::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<>::Output>: PartialEq, +{ + fn upcast(other: DataPayload) -> DataPayload { + DataPayload::from_owned(ExportBox { + payload: Box::new(other), + }) + } +} + +impl DataPayload { + /// Serializes this [`DataPayload`] into a serializer using Serde. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// // Create an example DataPayload + /// let payload: DataPayload = Default::default(); + /// let export: DataPayload = UpcastDataPayload::upcast(payload); + /// + /// // Serialize the payload to a JSON string + /// let mut buffer: Vec = vec![]; + /// export + /// .serialize(&mut serde_json::Serializer::new(&mut buffer)) + /// .expect("Serialization should succeed"); + /// assert_eq!(r#"{"message":"(und) Hello World"}"#.as_bytes(), buffer); + /// ``` + pub fn serialize(&self, serializer: S) -> Result<(), DataError> + where + S: serde::Serializer, + S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok` + { + self.get() + .payload + .serialize_yoke(&mut ::erase(serializer)) + } + + /// Serializes this [`DataPayload`]'s value into a [`TokenStream`] + /// using its [`Bake`] implementations. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// # use databake::quote; + /// # use std::collections::BTreeSet; + /// + /// // Create an example DataPayload + /// let payload: DataPayload = Default::default(); + /// let export: DataPayload = UpcastDataPayload::upcast(payload); + /// + /// let env = databake::CrateEnv::default(); + /// let tokens = export.tokenize(&env); + /// assert_eq!( + /// quote! { + /// icu_provider::hello_world::HelloWorldV1 { + /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"), + /// } + /// } + /// .to_string(), + /// tokens.to_string() + /// ); + /// assert_eq!( + /// env.into_iter().collect::>(), + /// ["icu_provider", "alloc"] + /// .into_iter() + /// .collect::>() + /// ); + /// ``` + pub fn tokenize(&self, env: &CrateEnv) -> TokenStream { + self.get().payload.bake_yoke(env) + } +} + +/// Marker type for [`ExportBox`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct ExportMarker {} + +impl DataMarker for ExportMarker { + type Yokeable = ExportBox; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hello_world::*; + + #[test] + fn test_compare_with_dyn() { + let payload1: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload2: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload3: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "def".into(), + }); + + assert!(payload1.eq_dyn(&payload2)); + assert!(payload2.eq_dyn(&payload1)); + + assert!(!payload1.eq_dyn(&payload3)); + assert!(!payload3.eq_dyn(&payload1)); + } + + #[test] + fn test_export_marker_partial_eq() { + let payload1: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload2: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload3: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "def".into(), + }, + )); + + assert_eq!(payload1, payload2); + assert_eq!(payload2, payload1); + assert_ne!(payload1, payload3); + assert_ne!(payload3, payload1); + } +} diff --git a/third_party/rust/icu_provider/src/dynutil.rs b/third_party/rust/icu_provider/src/dynutil.rs new file mode 100644 index 0000000000..8ad7b7aa11 --- /dev/null +++ b/third_party/rust/icu_provider/src/dynutil.rs @@ -0,0 +1,256 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Utilities for using trait objects with `DataPayload`. + +/// Trait to allow conversion from `DataPayload` to `DataPayload`. +/// +/// This trait can be manually implemented in order to enable [`impl_dynamic_data_provider`](crate::impl_dynamic_data_provider). +/// +/// [`DataPayload::downcast`]: crate::DataPayload::downcast +pub trait UpcastDataPayload +where + M: crate::DataMarker, + Self: Sized + crate::DataMarker, +{ + /// Upcast a `DataPayload` to a `DataPayload` where `T` implements trait `S`. + /// + /// # Examples + /// + /// Upcast and then downcast a data struct of type `Cow` (cart type `String`) via + /// [`AnyPayload`](crate::any::AnyPayload): + /// + /// ``` + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// let original = DataPayload::::from_static_str("foo"); + /// let upcasted = AnyMarker::upcast(original); + /// let downcasted = upcasted + /// .downcast::() + /// .expect("Type conversion"); + /// assert_eq!(downcasted.get().message, "foo"); + /// ``` + fn upcast(other: crate::DataPayload) -> crate::DataPayload; +} + +/// Implements [`UpcastDataPayload`] from several data markers to a single data marker +/// that all share the same [`DataMarker::Yokeable`]. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// #[icu_provider::data_struct( +/// FooV1Marker, +/// BarV1Marker = "demo/bar@1", +/// BazV1Marker = "demo/baz@1" +/// )] +/// pub struct FooV1<'data> { +/// message: Cow<'data, str>, +/// }; +/// +/// icu_provider::impl_casting_upcast!( +/// FooV1Marker, +/// [BarV1Marker, BazV1Marker,] +/// ); +/// ``` +/// +/// [`DataMarker::Yokeable`]: crate::DataMarker::Yokeable +#[macro_export] +macro_rules! impl_casting_upcast { + ($dyn_m:path, [ $($struct_m:ident),+, ]) => { + $( + impl $crate::dynutil::UpcastDataPayload<$struct_m> for $dyn_m { + fn upcast(other: $crate::DataPayload<$struct_m>) -> $crate::DataPayload<$dyn_m> { + other.cast() + } + } + )+ + } +} + +/// Implements [`DynamicDataProvider`] for a marker type `S` on a type that already implements +/// [`DynamicDataProvider`] or [`DataProvider`] for one or more `M`, where `M` is a concrete type +/// that is convertible to `S` via [`UpcastDataPayload`]. +/// +/// Use this macro to add support to your data provider for: +/// +/// - [`AnyPayload`] if your provider can return typed objects as [`Any`](core::any::Any). +/// +/// ## Wrapping DataProvider +/// +/// If your type implements [`DataProvider`], pass a list of markers as the second argument. +/// This results in a `DynamicDataProvider` that delegates to a specific marker if the key +/// matches or else returns [`DataErrorKind::MissingDataKey`]. +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # // Duplicating HelloWorldProvider because the real one already implements DynamicDataProvider +/// # struct HelloWorldProvider; +/// # impl DataProvider for HelloWorldProvider { +/// # fn load( +/// # &self, +/// # req: DataRequest, +/// # ) -> Result, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DynamicDataProvider on HelloWorldProvider: DataProvider +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.load_data(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // MissingDataKey error as the key does not match: +/// assert_eq!( +/// HelloWorldProvider.load_data(icu_provider::data_key!("dummy@1"), req).unwrap_err().kind, +/// DataErrorKind::MissingDataKey, +/// ); +/// ``` +/// +/// ## Wrapping DynamicDataProvider +/// +/// It is also possible to wrap a [`DynamicDataProvider`] to create another [`DynamicDataProvider`]. To do this, +/// pass a match-like statement for keys as the second argument: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # struct HelloWorldProvider; +/// # impl DynamicDataProvider for HelloWorldProvider { +/// # fn load_data(&self, key: DataKey, req: DataRequest) +/// # -> Result, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DataProvider on HelloWorldProvider: DynamicDataProvider +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, { +/// // Match HelloWorldV1Marker::KEY and delegate to DynamicDataProvider. +/// HW = HelloWorldV1Marker::KEY => HelloWorldV1Marker, +/// // Send the wildcard match also to DynamicDataProvider. +/// _ => HelloWorldV1Marker, +/// }, AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.as_any_provider().load_any(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // Because of the wildcard, any key actually works: +/// HelloWorldProvider.as_any_provider().load_any(icu_provider::data_key!("dummy@1"), req).unwrap(); +/// ``` +/// +/// [`DynamicDataProvider`]: crate::DynamicDataProvider +/// [`DataProvider`]: crate::DataProvider +/// [`AnyPayload`]: (crate::any::AnyPayload) +/// [`DataErrorKind::MissingDataKey`]: (crate::DataErrorKind::MissingDataKey) +/// [`SerializeMarker`]: (crate::serde::SerializeMarker) +#[macro_export] +macro_rules! impl_dynamic_data_provider { + // allow passing in multiple things to do and get dispatched + ($provider:ty, $arms:tt, $one:path, $($rest:path),+) => { + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $one + ); + + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $($rest),+ + ); + }; + + ($provider:ty, { $($ident:ident = $key:path => $struct_m:ty),+, $(_ => $struct_d:ty,)?}, $dyn_m:ty) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + match key.hashed() { + $( + h if h == $key.hashed() => { + let result: $crate::DataResponse<$struct_m> = + $crate::DynamicDataProvider::<$struct_m>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + $( + _ => { + let result: $crate::DataResponse<$struct_d> = + $crate::DynamicDataProvider::<$struct_d>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_d>::upcast(p) + }), + }) + } + )? + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + + }; + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ], $dyn_m:path) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + match key.hashed() { + $( + $(#[$cfg])? + h if h == <$struct_m>::KEY.hashed() => { + let result: $crate::DataResponse<$struct_m> = + $crate::DataProvider::load(self, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + }; +} diff --git a/third_party/rust/icu_provider/src/error.rs b/third_party/rust/icu_provider/src/error.rs new file mode 100644 index 0000000000..5fc19d1a0a --- /dev/null +++ b/third_party/rust/icu_provider/src/error.rs @@ -0,0 +1,292 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::BufferFormat; +use crate::prelude::*; +use core::fmt; +use displaydoc::Display; + +/// A list specifying general categories of data provider error. +/// +/// Errors may be caused either by a malformed request or by the data provider +/// not being able to fulfill a well-formed request. +#[derive(Clone, Copy, Eq, PartialEq, Display, Debug)] +#[non_exhaustive] +pub enum DataErrorKind { + /// No data for the provided resource key. + #[displaydoc("Missing data for key")] + MissingDataKey, + + /// There is data for the key, but not for this particular locale. + #[displaydoc("Missing data for locale")] + MissingLocale, + + /// The request should include a locale. + #[displaydoc("Request needs a locale")] + NeedsLocale, + + /// The request should not contain a locale. + #[displaydoc("Request has an extraneous locale")] + ExtraneousLocale, + + /// The resource was blocked by a filter. The resource may or may not be available. + #[displaydoc("Resource blocked by filter")] + FilteredResource, + + /// The generic type parameter does not match the TypeId. The expected type name is stored + /// as context when this error is returned. + #[displaydoc("Mismatched types: tried to downcast with {0}, but actual type is different")] + MismatchedType(&'static str), + + /// The payload is missing. This is usually caused by a previous error. + #[displaydoc("Missing payload")] + MissingPayload, + + /// A data provider object was given to an operation in an invalid state. + #[displaydoc("Invalid state")] + InvalidState, + + /// The syntax of the [`DataKey`] or [`DataLocale`] was invalid. + #[displaydoc("Parse error for data key or data locale")] + KeyLocaleSyntax, + + /// An unspecified error occurred, such as a Serde error. + /// + /// Check debug logs for potentially more information. + #[displaydoc("Custom")] + Custom, + + /// An error occurred while accessing a system resource. + #[displaydoc("I/O error: {0:?}")] + #[cfg(feature = "std")] + Io(std::io::ErrorKind), + + /// An unspecified data source containing the required data is unavailable. + #[displaydoc("Missing source data")] + #[cfg(feature = "datagen")] + MissingSourceData, + + /// An error indicating that the desired buffer format is not available. This usually + /// means that a required Cargo feature was not enabled + #[displaydoc("Unavailable buffer format: {0:?} (does icu_provider need to be compiled with an additional Cargo feature?)")] + UnavailableBufferFormat(BufferFormat), +} + +/// The error type for ICU4X data provider operations. +/// +/// To create one of these, either start with a [`DataErrorKind`] or use [`DataError::custom()`]. +/// +/// # Example +/// +/// Create a NeedsLocale error and attach a data request for context: +/// +/// ```no_run +/// # use icu_provider::prelude::*; +/// let key: DataKey = unimplemented!(); +/// let req: DataRequest = unimplemented!(); +/// DataErrorKind::NeedsLocale.with_req(key, req); +/// ``` +/// +/// Create a named custom error: +/// +/// ``` +/// # use icu_provider::prelude::*; +/// DataError::custom("This is an example error"); +/// ``` +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +#[non_exhaustive] +pub struct DataError { + /// Broad category of the error. + pub kind: DataErrorKind, + + /// The data key of the request, if available. + pub key: Option, + + /// Additional context, if available. + pub str_context: Option<&'static str>, + + /// Whether this error was created in silent mode to not log. + pub silent: bool, +} + +impl fmt::Display for DataError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ICU4X data error")?; + if self.kind != DataErrorKind::Custom { + write!(f, ": {}", self.kind)?; + } + if let Some(key) = self.key { + write!(f, " (key: {key})")?; + } + if let Some(str_context) = self.str_context { + write!(f, ": {str_context}")?; + } + Ok(()) + } +} + +impl DataErrorKind { + /// Converts this DataErrorKind into a DataError. + /// + /// If possible, you should attach context using a `with_` function. + #[inline] + pub const fn into_error(self) -> DataError { + DataError { + kind: self, + key: None, + str_context: None, + silent: false, + } + } + + /// Creates a DataError with a resource key context. + #[inline] + pub const fn with_key(self, key: DataKey) -> DataError { + self.into_error().with_key(key) + } + + /// Creates a DataError with a string context. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> DataError { + self.into_error().with_str_context(context) + } + + /// Creates a DataError with a type name context. + #[inline] + pub fn with_type_context(self) -> DataError { + self.into_error().with_type_context::() + } + + /// Creates a DataError with a request context. + #[inline] + pub fn with_req(self, key: DataKey, req: DataRequest) -> DataError { + self.into_error().with_req(key, req) + } +} + +impl DataError { + /// Returns a new, empty DataError with kind Custom and a string error message. + #[inline] + pub const fn custom(str_context: &'static str) -> Self { + Self { + kind: DataErrorKind::Custom, + key: None, + str_context: Some(str_context), + silent: false, + } + } + + /// Sets the resource key of a DataError, returning a modified error. + #[inline] + pub const fn with_key(self, key: DataKey) -> Self { + Self { + kind: self.kind, + key: Some(key), + str_context: self.str_context, + silent: self.silent, + } + } + + /// Sets the string context of a DataError, returning a modified error. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> Self { + Self { + kind: self.kind, + key: self.key, + str_context: Some(context), + silent: self.silent, + } + } + + /// Sets the string context of a DataError to the given type name, returning a modified error. + #[inline] + pub fn with_type_context(self) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{self}: Type context: {}", core::any::type_name::()); + } + self.with_str_context(core::any::type_name::()) + } + + /// Logs the data error with the given request, returning an error containing the resource key. + /// + /// If the "logging" Cargo feature is enabled, this logs the whole request. Either way, + /// it returns an error with the resource key portion of the request as context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + pub fn with_req(mut self, key: DataKey, req: DataRequest) -> Self { + if req.metadata.silent { + self.silent = true; + } + // Don't write out a log for MissingDataKey since there is no context to add + #[cfg(feature = "logging")] + if !self.silent && self.kind != DataErrorKind::MissingDataKey { + log::warn!("{} (key: {}, request: {})", self, key, req); + } + self.with_key(key) + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg(feature = "std")] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + pub fn with_path_context + ?Sized>(self, path: &P) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{} (path: {:?})", self, path.as_ref()); + } + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + #[inline] + pub fn with_display_context(self, context: &D) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{}: {}", self, context); + } + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + #[inline] + pub fn with_debug_context(self, context: &D) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{}: {:?}", self, context); + } + self + } + + #[inline] + pub(crate) fn for_type() -> DataError { + DataError { + kind: DataErrorKind::MismatchedType(core::any::type_name::()), + key: None, + str_context: None, + silent: false, + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DataError {} + +#[cfg(feature = "std")] +impl From for DataError { + fn from(e: std::io::Error) -> Self { + #[cfg(feature = "logging")] + log::warn!("I/O error: {}", e); + DataErrorKind::Io(e.kind()).into_error() + } +} diff --git a/third_party/rust/icu_provider/src/fallback.rs b/third_party/rust/icu_provider/src/fallback.rs new file mode 100644 index 0000000000..5c4e13b8da --- /dev/null +++ b/third_party/rust/icu_provider/src/fallback.rs @@ -0,0 +1,201 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Options to define fallback behaviour. +//! +//! These options are consumed by the `LocaleFallbacker` in the `icu_locid_transforms` crate +//! (or the `icu::locid_transforms` module), but are defined here because they are used by `DataKey`. + +use icu_locid::extensions::unicode::Key; + +/// Hint for which subtag to prioritize during fallback. +/// +/// For example, `"en-US"` might fall back to either `"en"` or `"und-US"` depending +/// on this enum. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackPriority { + /// Prioritize the language. This is the default behavior. + /// + /// For example, `"en-US"` should go to `"en"` and then `"und"`. + Language, + /// Prioritize the region. + /// + /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. + Region, + /// Collation-specific fallback rules. Similar to language priority. + /// + /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. + Collation, +} + +impl LocaleFallbackPriority { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self::Language + } +} + +impl Default for LocaleFallbackPriority { + fn default() -> Self { + Self::const_default() + } +} + +/// What additional data is required to load when performing fallback. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackSupplement { + /// Collation supplement + Collation, +} + +/// Configuration settings for a particular fallback operation. +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[non_exhaustive] +pub struct LocaleFallbackConfig { + /// Strategy for choosing which subtags to drop during locale fallback. + /// + /// # Examples + /// + /// Retain the language and script subtags until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Language; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + /// + /// Retain the region subtag until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Region; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub priority: LocaleFallbackPriority, + /// An extension keyword to retain during locale fallback. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.extension_key = Some(icu_locid::extensions::unicode::key!("nu")); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ar-EG-u-nu-latn").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub extension_key: Option, + /// Fallback supplement data key to customize fallback rules. + /// + /// For example, most data keys for collation add additional parent locales, such as + /// "yue" to "zh-Hant", and data used for the `"-u-co"` extension keyword fallback. + /// + /// Currently the only supported fallback supplement is `LocaleFallbackSupplement::Collation`, but more may be + /// added in the future. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::fallback::LocaleFallbackSupplement; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Collation; + /// config.fallback_supplement = Some(LocaleFallbackSupplement::Collation); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("yue-HK").into()); + /// + /// // Run the algorithm and check the results. + /// // TODO(#1964): add "zh" as a target. + /// assert_eq!(fallback_iterator.get(), &locale!("yue-HK").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("yue").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("zh-Hant").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub fallback_supplement: Option, +} + +impl LocaleFallbackConfig { + /// Const version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + priority: LocaleFallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + } + } +} + +impl Default for LocaleFallbackConfig { + fn default() -> Self { + Self::const_default() + } +} diff --git a/third_party/rust/icu_provider/src/hello_world.rs b/third_party/rust/icu_provider/src/hello_world.rs new file mode 100644 index 0000000000..9a51890264 --- /dev/null +++ b/third_party/rust/icu_provider/src/hello_world.rs @@ -0,0 +1,362 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Data provider returning multilingual "Hello World" strings for testing. + +#![allow(clippy::exhaustive_structs)] // data struct module + +use crate as icu_provider; + +use crate::prelude::*; +use alloc::borrow::Cow; +use alloc::string::String; +use core::fmt::Debug; +use writeable::Writeable; +use yoke::*; +use zerofrom::*; + +/// A struct containing "Hello World" in the requested language. +#[derive(Debug, PartialEq, Clone, Yokeable, ZeroFrom)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[cfg_attr( + any(feature = "deserialize_json", feature = "datagen"), + derive(serde::Serialize) +)] +#[cfg_attr(feature = "datagen", derive(databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +pub struct HelloWorldV1<'data> { + /// The translation of "Hello World". + #[cfg_attr(feature = "serde", serde(borrow))] + pub message: Cow<'data, str>, +} + +impl Default for HelloWorldV1<'_> { + fn default() -> Self { + HelloWorldV1 { + message: Cow::Borrowed("(und) Hello World"), + } + } +} + +/// Marker type for [`HelloWorldV1`]. +#[cfg_attr(feature = "datagen", derive(Default, databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +#[derive(Debug)] +pub struct HelloWorldV1Marker; + +impl DataMarker for HelloWorldV1Marker { + type Yokeable = HelloWorldV1<'static>; +} + +impl KeyedDataMarker for HelloWorldV1Marker { + const KEY: DataKey = icu_provider::data_key!("core/helloworld@1"); +} + +/// A data provider returning Hello World strings in different languages. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world: DataPayload = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Hallo Welt", german_hello_world.get().message); +/// ``` +/// +/// Load the reverse string using an auxiliary key: +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let reverse_hello_world: DataPayload = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &"en-x-reverse".parse().unwrap(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Olleh Dlrow", reverse_hello_world.get().message); +/// ``` +#[derive(Debug, PartialEq, Default)] +pub struct HelloWorldProvider; + +impl HelloWorldProvider { + // Data from https://en.wiktionary.org/wiki/Hello_World#Translations + // Keep this sorted! + const DATA: &'static [(&'static str, &'static str)] = &[ + ("bn", "ওহে বিশ্ব"), + ("cs", "Ahoj světe"), + ("de", "Hallo Welt"), + ("de-AT", "Servus Welt"), + ("el", "Καλημέρα κόσμε"), + ("en", "Hello World"), + ("en-001", "Hello from 🗺️"), // WORLD + ("en-002", "Hello from 🌍"), // AFRICA + ("en-019", "Hello from 🌎"), // AMERICAS + ("en-142", "Hello from 🌏"), // ASIA + ("en-GB", "Hello from 🇬🇧"), // GREAT BRITAIN + ("en-GB-u-sd-gbeng", "Hello from 🏴󠁧󠁢󠁥󠁮󠁧󠁿"), // ENGLAND + ("en-x-reverse", "Olleh Dlrow"), + ("eo", "Saluton, Mondo"), + ("fa", "سلام دنیا‎"), + ("fi", "hei maailma"), + ("is", "Halló, heimur"), + ("ja", "こんにちは世界"), + ("ja-x-reverse", "界世はちにんこ"), + ("la", "Ave, munde"), + ("pt", "Olá, mundo"), + ("ro", "Salut, lume"), + ("ru", "Привет, мир"), + ("sr", "Поздрав свете"), + ("sr-Latn", "Pozdrav svete"), + ("vi", "Xin chào thế giới"), + ("zh", "你好世界"), + ]; + + /// Converts this provider into a [`BufferProvider`] that uses JSON serialization. + #[cfg(feature = "deserialize_json")] + pub fn into_json_provider(self) -> HelloWorldJsonProvider { + HelloWorldJsonProvider + } +} + +impl DataProvider for HelloWorldProvider { + fn load(&self, req: DataRequest) -> Result, DataError> { + #[allow(clippy::indexing_slicing)] // binary_search + let data = Self::DATA + .binary_search_by(|(k, _)| req.locale.strict_cmp(k.as_bytes()).reverse()) + .map(|i| Self::DATA[i].1) + .map_err(|_| DataErrorKind::MissingLocale.with_req(HelloWorldV1Marker::KEY, req))?; + Ok(DataResponse { + metadata: Default::default(), + payload: Some(DataPayload::from_static_str(data)), + }) + } +} + +impl DataPayload { + /// Make a [`DataPayload`]`<`[`HelloWorldV1Marker`]`>` from a static string slice. + pub fn from_static_str(s: &'static str) -> DataPayload { + DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed(s), + }) + } +} + +// AnyProvider support. +#[cfg(not(feature = "datagen"))] +icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); + +#[cfg(feature = "deserialize_json")] +/// A data provider returning Hello World strings in different languages as JSON blobs. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world = HelloWorldProvider +/// .into_json_provider() +/// .load_buffer(HelloWorldV1Marker::KEY, DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!(german_hello_world.get(), br#"{"message":"Hallo Welt"}"#); +#[derive(Debug)] +pub struct HelloWorldJsonProvider; + +#[cfg(feature = "deserialize_json")] +impl BufferProvider for HelloWorldJsonProvider { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + key.match_key(HelloWorldV1Marker::KEY)?; + let result = HelloWorldProvider.load(req)?; + let (mut metadata, old_payload) = + DataResponse::::take_metadata_and_payload(result)?; + metadata.buffer_format = Some(icu_provider::buf::BufferFormat::Json); + #[allow(clippy::unwrap_used)] // HelloWorldV1::serialize is infallible + Ok(DataResponse { + metadata, + payload: Some(DataPayload::from_owned_buffer( + serde_json::to_string(old_payload.get()) + .unwrap() + .into_bytes() + .into_boxed_slice(), + )), + }) + } +} + +#[cfg(feature = "datagen")] +impl icu_provider::datagen::IterableDataProvider for HelloWorldProvider { + fn supported_locales(&self) -> Result, DataError> { + #[allow(clippy::unwrap_used)] // datagen + Ok(Self::DATA.iter().map(|(s, _)| s.parse().unwrap()).collect()) + } +} + +#[cfg(feature = "datagen")] +icu_provider::make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); + +/// A type that formats localized "hello world" strings. +/// +/// This type is intended to take the shape of a typical ICU4X formatter API. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::{HelloWorldFormatter, HelloWorldProvider}; +/// use writeable::assert_writeable_eq; +/// +/// let fmt = HelloWorldFormatter::try_new_unstable( +/// &HelloWorldProvider, +/// &locale!("eo").into(), +/// ) +/// .expect("locale exists"); +/// +/// assert_writeable_eq!(fmt.format(), "Saluton, Mondo"); +/// ``` +#[derive(Debug)] +pub struct HelloWorldFormatter { + data: DataPayload, +} + +/// A formatted hello world message. Implements [`Writeable`]. +/// +/// For an example, see [`HelloWorldFormatter`]. +#[derive(Debug)] +pub struct FormattedHelloWorld<'l> { + data: &'l HelloWorldV1<'l>, +} + +impl HelloWorldFormatter { + /// Creates a new [`HelloWorldFormatter`] for the specified locale. + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn try_new(locale: &DataLocale) -> Result { + Self::try_new_unstable(&HelloWorldProvider, locale) + } + + icu_provider::gen_any_buffer_data_constructors!(locale: include, options: skip, error: DataError, + #[cfg(skip)] + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ]); + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)] + pub fn try_new_unstable

(provider: &P, locale: &DataLocale) -> Result + where + P: DataProvider, + { + let data = provider + .load(DataRequest { + locale, + metadata: Default::default(), + })? + .take_payload()?; + Ok(Self { data }) + } + + /// Formats a hello world message, returning a [`FormattedHelloWorld`]. + #[allow(clippy::needless_lifetimes)] // documentary example + pub fn format<'l>(&'l self) -> FormattedHelloWorld<'l> { + FormattedHelloWorld { + data: self.data.get(), + } + } + + /// Formats a hello world message, returning a [`String`]. + pub fn format_to_string(&self) -> String { + self.format().write_to_string().into_owned() + } +} + +impl<'l> Writeable for FormattedHelloWorld<'l> { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.data.message.write_to(sink) + } + + fn write_to_string(&self) -> Cow { + self.data.message.clone() + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + self.data.message.writeable_length_hint() + } +} + +writeable::impl_display_with_writeable!(FormattedHelloWorld<'_>); + +#[cfg(feature = "datagen")] +#[test] +fn test_iter() { + use crate::datagen::IterableDataProvider; + use icu_locid::locale; + + assert_eq!( + HelloWorldProvider.supported_locales().unwrap(), + vec![ + locale!("bn").into(), + locale!("cs").into(), + locale!("de").into(), + locale!("de-AT").into(), + locale!("el").into(), + locale!("en").into(), + locale!("en-001").into(), + locale!("en-002").into(), + locale!("en-019").into(), + locale!("en-142").into(), + locale!("en-GB").into(), + locale!("en-GB-u-sd-gbeng").into(), + "en-x-reverse".parse().unwrap(), + locale!("eo").into(), + locale!("fa").into(), + locale!("fi").into(), + locale!("is").into(), + locale!("ja").into(), + "ja-x-reverse".parse().unwrap(), + locale!("la").into(), + locale!("pt").into(), + locale!("ro").into(), + locale!("ru").into(), + locale!("sr").into(), + locale!("sr-Latn").into(), + locale!("vi").into(), + locale!("zh").into() + ] + ); +} diff --git a/third_party/rust/icu_provider/src/key.rs b/third_party/rust/icu_provider/src/key.rs new file mode 100644 index 0000000000..0e1e1006e1 --- /dev/null +++ b/third_party/rust/icu_provider/src/key.rs @@ -0,0 +1,717 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::{DataError, DataErrorKind}; + +use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority, LocaleFallbackSupplement}; +use alloc::borrow::Cow; +use core::fmt; +use core::fmt::Write; +use core::ops::Deref; +use writeable::{LengthHint, Writeable}; +use zerovec::ule::*; + +#[doc(hidden)] +#[macro_export] +macro_rules! leading_tag { + () => { + "\nicu4x_key_tag" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! trailing_tag { + () => { + "\n" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! tagged { + ($without_tags:expr) => { + concat!( + $crate::leading_tag!(), + $without_tags, + $crate::trailing_tag!() + ) + }; +} + +/// A compact hash of a [`DataKey`]. Useful for keys in maps. +/// +/// The hash will be stable over time within major releases. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[repr(transparent)] +pub struct DataKeyHash([u8; 4]); + +impl DataKeyHash { + const fn compute_from_path(path: DataKeyPath) -> Self { + let hash = fxhash_32( + path.tagged.as_bytes(), + leading_tag!().len(), + trailing_tag!().len(), + ); + Self(hash.to_le_bytes()) + } + + /// Gets the hash value as a byte array. + pub const fn to_bytes(self) -> [u8; 4] { + self.0 + } +} + +/// Const function to compute the FxHash of a byte array. +/// +/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our +/// use case since the strings being hashed originate from a trusted source (the ICU4X +/// components), and the hashes are computed at compile time, so we can check for collisions. +/// +/// We could have considered a SHA or other cryptographic hash function. However, we are using +/// FxHash because: +/// +/// 1. There is precedent for this algorithm in Rust +/// 2. The algorithm is easy to implement as a const function +/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree +/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, +/// such that truncation would be required in order to fit into a u32, partially reducing +/// the benefit of a cryptographically secure algorithm +// The indexing operations in this function have been reviewed in detail and won't panic. +#[allow(clippy::indexing_slicing)] +const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { + // This code is adapted from https://github.com/rust-lang/rustc-hash, + // whose license text is reproduced below. + // + // Copyright 2015 The Rust Project Developers. See the COPYRIGHT + // file at the top-level directory of this distribution and at + // http://rust-lang.org/COPYRIGHT. + // + // Licensed under the Apache License, Version 2.0 or the MIT license + // , at your + // option. This file may not be copied, modified, or distributed + // except according to those terms. + + if ignore_leading + ignore_trailing >= bytes.len() { + return 0; + } + + #[inline] + const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + const ROTATE: u32 = 5; + const SEED32: u32 = 0x9e_37_79_b9; + hash = hash.rotate_left(ROTATE); + hash ^= word; + hash = hash.wrapping_mul(SEED32); + hash + } + + let mut cursor = ignore_leading; + let end = bytes.len() - ignore_trailing; + let mut hash = 0; + + while end - cursor >= 4 { + let word = u32::from_le_bytes([ + bytes[cursor], + bytes[cursor + 1], + bytes[cursor + 2], + bytes[cursor + 3], + ]); + hash = hash_word_32(hash, word); + cursor += 4; + } + + if end - cursor >= 2 { + let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); + hash = hash_word_32(hash, word as u32); + cursor += 2; + } + + if end - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); + } + + hash +} + +impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { + type Container = zerovec::ZeroVec<'a, DataKeyHash>; + type Slice = zerovec::ZeroSlice; + type GetType = ::ULE; + type OwnedType = DataKeyHash; +} + +impl AsULE for DataKeyHash { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// Safe since the ULE type is `self`. +unsafe impl EqULE for DataKeyHash {} + +/// The string path of a data key. For example, "foo@1" +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct DataKeyPath { + // This string literal is wrapped in leading_tag!() and trailing_tag!() to make it detectable + // in a compiled binary. + tagged: &'static str, +} + +impl DataKeyPath { + /// Gets the path as a static string slice. + #[inline] + pub const fn get(self) -> &'static str { + unsafe { + // Safe due to invariant that self.path is tagged correctly + core::str::from_utf8_unchecked(core::mem::transmute(( + self.tagged.as_ptr().add(leading_tag!().len()), + self.tagged.len() - trailing_tag!().len() - leading_tag!().len(), + ))) + } + } +} + +impl Deref for DataKeyPath { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.get() + } +} + +/// Metadata statically associated with a particular [`DataKey`]. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataKeyMetadata { + /// What to prioritize when fallbacking on this [`DataKey`]. + pub fallback_priority: LocaleFallbackPriority, + /// A Unicode extension keyword to consider when loading data for this [`DataKey`]. + pub extension_key: Option, + /// Optional choice for additional fallbacking data required for loading this marker. + /// + /// For more information, see `LocaleFallbackConfig::fallback_supplement`. + pub fallback_supplement: Option, + /// Whether the key has a singleton value, as opposed to per-locale values. Singleton + /// keys behave differently, e.g. they never perform fallback, and can be optimized + /// in data providers. + pub singleton: bool, +} + +impl DataKeyMetadata { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + fallback_priority: LocaleFallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + singleton: false, + } + } + + #[doc(hidden)] + pub const fn construct_internal( + fallback_priority: LocaleFallbackPriority, + extension_key: Option, + fallback_supplement: Option, + singleton: bool, + ) -> Self { + Self { + fallback_priority, + extension_key, + fallback_supplement, + singleton, + } + } +} + +impl Default for DataKeyMetadata { + #[inline] + fn default() -> Self { + Self::const_default() + } +} + +/// Used for loading data from an ICU4X data provider. +/// +/// A resource key is tightly coupled with the code that uses it to load data at runtime. +/// Executables can be searched for `DataKey` instances to produce optimized data files. +/// Therefore, users should not generally create DataKey instances; they should instead use +/// the ones exported by a component. +/// +/// `DataKey`s are created with the [`data_key!`](crate::data_key) macro: +/// +/// ``` +/// # use icu_provider::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/bar@1"); +/// ``` +/// +/// The human-readable path string ends with `@` followed by one or more digits (the version +/// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. +/// +/// Invalid paths are compile-time errors (as [`data_key!`](crate::data_key) uses `const`). +/// +/// ```compile_fail,E0080 +/// # use icu_provider::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/../bar@1"); +/// ``` +#[derive(Copy, Clone)] +pub struct DataKey { + path: DataKeyPath, + hash: DataKeyHash, + metadata: DataKeyMetadata, +} + +impl PartialEq for DataKey { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.path == other.path && self.metadata == other.metadata + } +} + +impl Eq for DataKey {} + +impl Ord for DataKey { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.path + .cmp(&other.path) + .then_with(|| self.metadata.cmp(&other.metadata)) + } +} + +impl PartialOrd for DataKey { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl core::hash::Hash for DataKey { + #[inline] + fn hash(&self, state: &mut H) { + self.hash.hash(state) + } +} + +impl DataKey { + /// Gets a human-readable representation of a [`DataKey`]. + /// + /// The human-readable path string ends with `@` followed by one or more digits (the version + /// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. + /// + /// Useful for reading and writing data to a file system. + #[inline] + pub const fn path(self) -> DataKeyPath { + self.path + } + + /// Gets a platform-independent hash of a [`DataKey`]. + /// + /// The hash is 4 bytes and allows for fast key comparison. + /// + /// # Example + /// + /// ``` + /// use icu_provider::DataKey; + /// use icu_provider::DataKeyHash; + /// + /// const KEY: DataKey = icu_provider::data_key!("foo@1"); + /// const KEY_HASH: DataKeyHash = KEY.hashed(); + /// + /// assert_eq!(KEY_HASH.to_bytes(), [0xe2, 0xb6, 0x17, 0x71]); + /// ``` + #[inline] + pub const fn hashed(self) -> DataKeyHash { + self.hash + } + + /// Gets the metadata associated with this [`DataKey`]. + #[inline] + pub const fn metadata(self) -> DataKeyMetadata { + self.metadata + } + + /// Returns the [`LocaleFallbackConfig`] for this [`DataKey`]. + #[inline] + pub const fn fallback_config(self) -> LocaleFallbackConfig { + let mut config = LocaleFallbackConfig::const_default(); + config.priority = self.metadata.fallback_priority; + config.extension_key = self.metadata.extension_key; + config.fallback_supplement = self.metadata.fallback_supplement; + config + } + + /// Constructs a [`DataKey`] from a path and metadata. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::data_key; + /// use icu_provider::DataKey; + /// + /// const CONST_KEY: DataKey = data_key!("foo@1"); + /// + /// let runtime_key = + /// DataKey::from_path_and_metadata(CONST_KEY.path(), CONST_KEY.metadata()); + /// + /// assert_eq!(CONST_KEY, runtime_key); + /// ``` + #[inline] + pub const fn from_path_and_metadata(path: DataKeyPath, metadata: DataKeyMetadata) -> Self { + Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + } + } + + #[doc(hidden)] + // Error is a str of the expected character class and the index where it wasn't encountered + // The indexing operations in this function have been reviewed in detail and won't panic. + #[allow(clippy::indexing_slicing)] + pub const fn construct_internal( + path: &'static str, + metadata: DataKeyMetadata, + ) -> Result { + if path.len() < leading_tag!().len() + trailing_tag!().len() { + return Err(("tag", 0)); + } + // Start and end of the untagged part + let start = leading_tag!().len(); + let end = path.len() - trailing_tag!().len(); + + // Check tags + let mut i = 0; + while i < leading_tag!().len() { + if path.as_bytes()[i] != leading_tag!().as_bytes()[i] { + return Err(("tag", 0)); + } + i += 1; + } + i = 0; + while i < trailing_tag!().len() { + if path.as_bytes()[end + i] != trailing_tag!().as_bytes()[i] { + return Err(("tag", end + 1)); + } + i += 1; + } + + match Self::validate_path_manual_slice(path, start, end) { + Ok(()) => (), + Err(e) => return Err(e), + }; + + let path = DataKeyPath { tagged: path }; + + Ok(Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + }) + } + + const fn validate_path_manual_slice( + path: &'static str, + start: usize, + end: usize, + ) -> Result<(), (&'static str, usize)> { + debug_assert!(start <= end); + debug_assert!(end <= path.len()); + // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+ + enum State { + Empty, + Body, + At, + Version, + } + use State::*; + let mut i = start; + let mut state = Empty; + loop { + let byte = if i < end { + #[allow(clippy::indexing_slicing)] // protected by debug assertion + Some(path.as_bytes()[i]) + } else { + None + }; + state = match (state, byte) { + (Empty | Body, Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')) => Body, + (Body, Some(b'/')) => Body, + (Body, Some(b'@')) => At, + (At | Version, Some(b'0'..=b'9')) => Version, + // One of these cases will be hit at the latest when i == end, so the loop converges. + (Version, None) => { + return Ok(()); + } + + (Empty, _) => return Err(("[a-zA-Z0-9_]", i)), + (Body, _) => return Err(("[a-zA-z0-9_/@]", i)), + (At, _) => return Err(("[0-9]", i)), + (Version, _) => return Err(("[0-9]", i)), + }; + i += 1; + } + } + + /// Returns [`Ok`] if this data key matches the argument, or the appropriate error. + /// + /// Convenience method for data providers that support a single [`DataKey`]. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// const FOO_BAZ: DataKey = icu_provider::data_key!("foo/baz@1"); + /// const BAR_BAZ: DataKey = icu_provider::data_key!("bar/baz@1"); + /// + /// assert!(matches!(FOO_BAR.match_key(FOO_BAR), Ok(()))); + /// assert!(matches!( + /// FOO_BAR.match_key(FOO_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// assert!(matches!( + /// FOO_BAR.match_key(BAR_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// + /// // The error context contains the argument: + /// assert_eq!(FOO_BAR.match_key(BAR_BAZ).unwrap_err().key, Some(BAR_BAZ)); + /// ``` + pub fn match_key(self, key: Self) -> Result<(), DataError> { + if self == key { + Ok(()) + } else { + Err(DataErrorKind::MissingDataKey.with_key(key)) + } + } +} + +/// See [`DataKey`]. +#[macro_export] +macro_rules! data_key { + ($path:expr) => {{ + $crate::data_key!($path, $crate::DataKeyMetadata::const_default()) + }}; + ($path:expr, $metadata:expr) => {{ + // Force the DataKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::DataKey = { + match $crate::DataKey::construct_internal($crate::tagged!($path), $metadata) { + Ok(v) => v, + #[allow(clippy::panic)] // Const context + Err(_) => panic!(concat!("Invalid resource key: ", $path)), + // TODO Once formatting is const: + // Err((expected, index)) => panic!( + // "Invalid resource key {:?}: expected {:?}, found {:?} ", + // $path, + // expected, + // $crate::tagged!($path).get(index..)) + // ); + } + }; + RESOURCE_KEY_MACRO_CONST + }}; +} + +impl fmt::Debug for DataKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("DataKey{")?; + fmt::Display::fmt(self, f)?; + f.write_char('}')?; + Ok(()) + } +} + +impl Writeable for DataKey { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.path().write_to(sink) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.path().writeable_length_hint() + } + + fn write_to_string(&self) -> Cow { + Cow::Borrowed(self.path().get()) + } +} + +writeable::impl_display_with_writeable!(DataKey); + +#[test] +fn test_path_syntax() { + // Valid keys: + DataKey::construct_internal(tagged!("hello/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world@999"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_458/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world@1"), Default::default()).unwrap(); + + // No version: + assert_eq!( + DataKey::construct_internal(tagged!("hello/world"), Default::default()), + Err(( + "[a-zA-z0-9_/@]", + concat!(leading_tag!(), "hello/world").len() + )) + ); + + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@1foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@1").len())) + ); + + // Meta no longer accepted: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R][u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid meta: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[U]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[uca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-caa]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid characters: + assert_eq!( + DataKey::construct_internal(tagged!("你好/世界@1"), Default::default()), + Err(("[a-zA-Z0-9_]", leading_tag!().len())) + ); + + // Invalid tag: + assert_eq!( + DataKey::construct_internal( + concat!("hello/world@1", trailing_tag!()), + Default::default() + ), + Err(("tag", 0)) + ); + assert_eq!( + DataKey::construct_internal(concat!(leading_tag!(), "hello/world@1"), Default::default()), + Err(("tag", concat!(leading_tag!(), "hello/world@1").len())) + ); + assert_eq!( + DataKey::construct_internal("hello/world@1", Default::default()), + Err(("tag", 0)) + ); +} + +#[test] +fn test_key_to_string() { + struct KeyTestCase { + pub key: DataKey, + pub expected: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + expected: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + expected: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + expected: "core/cardinal@65535", + }, + ] { + writeable::assert_writeable_eq!(&cas.key, cas.expected); + assert_eq!(cas.expected, &*cas.key.path()); + } +} + +#[test] +fn test_hash_word_32() { + assert_eq!(0, fxhash_32(b"", 0, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 0)); + assert_eq!(0, fxhash_32(b"a", 0, 1)); + assert_eq!(0, fxhash_32(b"a", 0, 10)); + assert_eq!(0, fxhash_32(b"a", 10, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 1)); + assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); + assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); + assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); + assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); + assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); + assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); + assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); + assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); + assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); +} + +#[test] +fn test_key_hash() { + struct KeyTestCase { + pub key: DataKey, + pub hash: DataKeyHash, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + hash: DataKeyHash([172, 207, 42, 236]), + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + hash: DataKeyHash([193, 6, 79, 61]), + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + hash: DataKeyHash([176, 131, 182, 223]), + }, + ] { + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.key); + } +} diff --git a/third_party/rust/icu_provider/src/lib.rs b/third_party/rust/icu_provider/src/lib.rs new file mode 100644 index 0000000000..01cb2a3b34 --- /dev/null +++ b/third_party/rust/icu_provider/src/lib.rs @@ -0,0 +1,267 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! `icu_provider` is one of the [`ICU4X`] components. +//! +//! Unicode's experience with ICU4X's parent projects, ICU4C and ICU4J, led the team to realize +//! that data management is the most critical aspect of deploying internationalization, and that it requires +//! a high level of customization for the needs of the platform it is embedded in. As a result +//! ICU4X comes with a selection of providers that should allow for ICU4X to naturally fit into +//! different business and technological needs of customers. +//! +//! `icu_provider` defines traits and structs for transmitting data through the ICU4X locale +//! data pipeline. The primary trait is [`DataProvider`]. It is parameterized by a +//! [`KeyedDataMarker`], which contains the data type and a [`DataKey`]. It has one method, +//! [`DataProvider::load`], which transforms a [`DataRequest`] +//! into a [`DataResponse`]. +//! +//! - [`DataKey`] is a fixed identifier for the data type, such as `"plurals/cardinal@1"`. +//! - [`DataRequest`] contains additional annotations to choose a specific variant of the key, +//! such as a locale. +//! - [`DataResponse`] contains the data if the request was successful. +//! +//! In addition, there are three other traits which are widely implemented: +//! +//! - [`AnyProvider`] returns data as `dyn Any` trait objects. +//! - [`BufferProvider`] returns data as `[u8]` buffers. +//! - [`DynamicDataProvider`] returns structured data but is not specific to a key. +//! +//! The most common types required for this crate are included via the prelude: +//! +//! ``` +//! use icu_provider::prelude::*; +//! ``` +//! +//! ## Types of Data Providers +//! +//! All nontrivial data providers can fit into one of two classes. +//! +//! 1. [`AnyProvider`]: Those whose data originates as structured Rust objects +//! 2. [`BufferProvider`]: Those whose data originates as unstructured `[u8]` buffers +//! +//! **✨ Key Insight:** A given data provider is generally *either* an [`AnyProvider`] *or* a +//! [`BufferProvider`]. Which type depends on the data source, and it is not generally possible +//! to convert one to the other. +//! +//! See also [crate::constructors]. +//! +//! ### AnyProvider +//! +//! These providers are able to return structured data cast into `dyn Any` trait objects. Users +//! can call [`as_downcasting()`] to get an object implementing [`DataProvider`] by downcasting +//! the trait objects. +//! +//! Examples of AnyProviders: +//! +//! - [`DatagenProvider`] reads structured data from CLDR source files and returns ICU4X data structs. +//! - [`AnyPayloadProvider`] wraps a specific data struct and returns it. +//! - The `BakedDataProvider` which encodes structured data directly in Rust source +//! +//! ### BufferProvider +//! +//! These providers are able to return unstructured data typically represented as +//! [`serde`]-serialized buffers. Users can call [`as_deserializing()`] to get an object +//! implementing [`DataProvider`] by invoking Serde Deserialize. +//! +//! Examples of BufferProviders: +//! +//! - [`FsDataProvider`] reads individual buffers from the filesystem. +//! - [`BlobDataProvider`] reads buffers from a large in-memory blob. +//! +//! ## Provider Adapters +//! +//! ICU4X offers several built-in modules to combine providers in interesting ways. +//! These can be found in the [`icu_provider_adapters`] crate. +//! +//! ## Testing Provider +//! +//! This crate also contains a concrete provider for demonstration purposes: +//! +//! - [`HelloWorldProvider`] returns "hello world" strings in several languages. +//! +//! ## Types and Lifetimes +//! +//! Types compatible with [`Yokeable`] can be passed through the data provider, so long as they are +//! associated with a marker type implementing [`DataMarker`]. +//! +//! Data structs should generally have one lifetime argument: `'data`. This lifetime allows data +//! structs to borrow zero-copy data. +//! +//! ## Data generation API +//! +//! *This functionality is enabled with the "datagen" Cargo feature* +//! +//! The [`datagen`] module contains several APIs for data generation. See [`icu_datagen`] for the reference +//! data generation implementation. +//! +//! [`ICU4X`]: ../icu/index.html +//! [`DataProvider`]: data_provider::DataProvider +//! [`DataKey`]: key::DataKey +//! [`DataLocale`]: request::DataLocale +//! [`IterableDynamicDataProvider`]: datagen::IterableDynamicDataProvider +//! [`IterableDataProvider`]: datagen::IterableDataProvider +//! [`AnyPayloadProvider`]: ../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`HelloWorldProvider`]: hello_world::HelloWorldProvider +//! [`AnyProvider`]: any::AnyProvider +//! [`Yokeable`]: yoke::Yokeable +//! [`impl_dynamic_data_provider!`]: impl_dynamic_data_provider +//! [`icu_provider_adapters`]: ../icu_provider_adapters/index.html +//! [`DatagenProvider`]: ../icu_datagen/struct.DatagenProvider.html +//! [`as_downcasting()`]: AsDowncastingAnyProvider::as_downcasting +//! [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +//! [`CldrJsonDataProvider`]: ../icu_datagen/cldr/struct.CldrJsonDataProvider.html +//! [`FsDataProvider`]: ../icu_provider_fs/struct.FsDataProvider.html +//! [`BlobDataProvider`]: ../icu_provider_blob/struct.BlobDataProvider.html +//! [`icu_datagen`]: ../icu_datagen/index.html + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + missing_debug_implementations, + ) +)] +#![warn(missing_docs)] + +extern crate alloc; + +mod data_provider; +mod error; +#[doc(hidden)] +pub mod fallback; +mod key; +mod request; +mod response; + +pub mod any; +pub mod buf; +pub mod constructors; +#[cfg(feature = "datagen")] +pub mod datagen; +pub mod dynutil; +pub mod hello_world; +pub mod marker; +#[cfg(feature = "serde")] +pub mod serde; + +// Types from private modules +pub use crate::data_provider::DataProvider; +pub use crate::data_provider::DynamicDataProvider; +pub use crate::error::DataError; +pub use crate::error::DataErrorKind; +pub use crate::key::DataKey; +pub use crate::key::DataKeyHash; +pub use crate::key::DataKeyMetadata; +pub use crate::key::DataKeyPath; +#[cfg(feature = "experimental")] +pub use crate::request::AuxiliaryKeys; +pub use crate::request::DataLocale; +pub use crate::request::DataRequest; +pub use crate::request::DataRequestMetadata; +pub use crate::response::Cart; +pub use crate::response::DataPayload; +pub use crate::response::DataResponse; +pub use crate::response::DataResponseMetadata; +#[cfg(feature = "macros")] +pub use icu_provider_macros::data_struct; + +// Reexports from public modules +pub use crate::any::AnyMarker; +pub use crate::any::AnyPayload; +pub use crate::any::AnyProvider; +pub use crate::any::AnyResponse; +pub use crate::any::AsDowncastingAnyProvider; +pub use crate::any::AsDynamicDataProviderAnyMarkerWrap; +pub use crate::any::MaybeSendSync; +pub use crate::buf::BufferMarker; +pub use crate::buf::BufferProvider; +pub use crate::marker::DataMarker; +pub use crate::marker::KeyedDataMarker; +#[cfg(feature = "serde")] +pub use crate::serde::AsDeserializingBufferProvider; + +/// Core selection of APIs and structures for the ICU4X data provider. +pub mod prelude { + #[doc(no_inline)] + pub use crate::data_key; + #[doc(no_inline)] + pub use crate::AnyMarker; + #[doc(no_inline)] + pub use crate::AnyPayload; + #[doc(no_inline)] + pub use crate::AnyProvider; + #[doc(no_inline)] + pub use crate::AnyResponse; + #[doc(no_inline)] + #[cfg(feature = "serde")] + pub use crate::AsDeserializingBufferProvider; + #[doc(no_inline)] + pub use crate::AsDowncastingAnyProvider; + #[doc(no_inline)] + pub use crate::AsDynamicDataProviderAnyMarkerWrap; + #[doc(no_inline)] + #[cfg(feature = "experimental")] + pub use crate::AuxiliaryKeys; + #[doc(no_inline)] + pub use crate::BufferMarker; + #[doc(no_inline)] + pub use crate::BufferProvider; + #[doc(no_inline)] + pub use crate::DataError; + #[doc(no_inline)] + pub use crate::DataErrorKind; + #[doc(no_inline)] + pub use crate::DataKey; + #[doc(no_inline)] + pub use crate::DataKeyHash; + #[doc(no_inline)] + pub use crate::DataLocale; + #[doc(no_inline)] + pub use crate::DataMarker; + #[doc(no_inline)] + pub use crate::DataPayload; + #[doc(no_inline)] + pub use crate::DataProvider; + #[doc(no_inline)] + pub use crate::DataRequest; + #[doc(no_inline)] + pub use crate::DataRequestMetadata; + #[doc(no_inline)] + pub use crate::DataResponse; + #[doc(no_inline)] + pub use crate::DataResponseMetadata; + #[doc(no_inline)] + pub use crate::DynamicDataProvider; + #[doc(no_inline)] + pub use crate::KeyedDataMarker; + + #[doc(hidden)] + pub use yoke; + #[doc(hidden)] + pub use zerofrom; +} + +// Additional crate re-exports for compatibility +#[doc(hidden)] +pub use fallback::LocaleFallbackPriority as FallbackPriority; +#[doc(hidden)] +pub use fallback::LocaleFallbackSupplement as FallbackSupplement; +#[doc(hidden)] +pub use yoke; +#[doc(hidden)] +pub use zerofrom; + +// For macros +#[doc(hidden)] +pub mod _internal { + pub use super::fallback::{LocaleFallbackPriority, LocaleFallbackSupplement}; + pub use icu_locid as locid; +} diff --git a/third_party/rust/icu_provider/src/marker.rs b/third_party/rust/icu_provider/src/marker.rs new file mode 100644 index 0000000000..0065bb656c --- /dev/null +++ b/third_party/rust/icu_provider/src/marker.rs @@ -0,0 +1,86 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Marker types and traits for DataProvider. + +use crate::key::DataKey; +use yoke::Yokeable; + +/// Trait marker for data structs. All types delivered by the data provider must be associated with +/// something implementing this trait. +/// +/// Structs implementing this trait are normally generated with the [`data_struct`] macro. +/// +/// By convention, the non-standard `Marker` suffix is used by types implementing DataMarker. +/// +/// In addition to a marker type implementing DataMarker, the following impls must also be present +/// for the data struct: +/// +/// - `impl<'a> Yokeable<'a>` (required) +/// - `impl ZeroFrom` +/// +/// Also see [`KeyedDataMarker`]. +/// +/// Note: `DataMarker`s are quasi-const-generic compile-time objects, and as such are expected +/// to be unit structs. As this is not something that can be enforced by the type system, we +/// currently only have a `'static` bound on them (which is needed by a lot of our code). +/// +/// # Examples +/// +/// Manually implementing DataMarker for a custom type: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// use std::rc::Rc; +/// +/// #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] +/// struct MyDataStruct<'data> { +/// message: Cow<'data, str>, +/// } +/// +/// struct MyDataStructMarker; +/// +/// impl DataMarker for MyDataStructMarker { +/// type Yokeable = MyDataStruct<'static>; +/// } +/// +/// // We can now use MyDataStruct with DataProvider: +/// let s = MyDataStruct { +/// message: Cow::Owned("Hello World".into()), +/// }; +/// let payload = DataPayload::::from_owned(s); +/// assert_eq!(payload.get().message, "Hello World"); +/// ``` +/// +/// [`data_struct`]: crate::data_struct +pub trait DataMarker: 'static { + /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a + /// data struct. + type Yokeable: for<'a> Yokeable<'a>; +} + +/// A [`DataMarker`] with a [`DataKey`] attached. +/// +/// Structs implementing this trait are normally generated with the [`data_struct!`] macro. +/// +/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait. +/// Most markers should be associated with a specific key and should therefore implement this +/// trait. +/// +/// [`BufferMarker`] and [`AnyMarker`] are examples of markers that do _not_ implement this trait +/// because they are not specific to a single key. +/// +/// Note: `KeyedDataMarker`s are quasi-const-generic compile-time objects, and as such are expected +/// to be unit structs. As this is not something that can be enforced by the type system, we +/// currently only have a `'static` bound on them (which is needed by a lot of our code). +/// +/// [`data_struct!`]: crate::data_struct +/// [`DataProvider`]: crate::DataProvider +/// [`BufferMarker`]: crate::BufferMarker +/// [`AnyMarker`]: crate::AnyMarker +pub trait KeyedDataMarker: DataMarker { + /// The single [`DataKey`] associated with this marker. + const KEY: DataKey; +} diff --git a/third_party/rust/icu_provider/src/request.rs b/third_party/rust/icu_provider/src/request.rs new file mode 100644 index 0000000000..1bb84f8667 --- /dev/null +++ b/third_party/rust/icu_provider/src/request.rs @@ -0,0 +1,1121 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{DataError, DataErrorKind}; +use core::cmp::Ordering; +use core::default::Default; +use core::fmt; +use core::fmt::Debug; +use core::hash::Hash; +use core::str::FromStr; +use icu_locid::extensions::unicode as unicode_ext; +use icu_locid::subtags::{Language, Region, Script, Variants}; +use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; +use writeable::{LengthHint, Writeable}; + +#[cfg(feature = "experimental")] +use alloc::string::String; +#[cfg(feature = "experimental")] +use core::ops::Deref; +#[cfg(feature = "experimental")] +use icu_locid::extensions::private::Subtag; +#[cfg(feature = "experimental")] +use tinystr::TinyAsciiStr; + +#[cfg(doc)] +use icu_locid::subtags::Variant; + +/// The request type passed into all data provider implementations. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataRequest<'a> { + /// The locale for which to load data. + /// + /// If locale fallback is enabled, the resulting data may be from a different locale + /// than the one requested here. + pub locale: &'a DataLocale, + /// Metadata that may affect the behavior of the data provider. + pub metadata: DataRequestMetadata, +} + +impl fmt::Display for DataRequest<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.locale, f) + } +} + +/// Metadata for data requests. This is currently empty, but it may be extended with options +/// for tuning locale fallback, buffer layout, and so forth. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataRequestMetadata { + /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. + pub silent: bool, +} + +/// A locale type optimized for use in fallbacking and the ICU4X data pipeline. +/// +/// [`DataLocale`] contains less functionality than [`Locale`] but more than +/// [`LanguageIdentifier`] for better size and performance while still meeting +/// the needs of the ICU4X data pipeline. +/// +/// # Examples +/// +/// Convert a [`Locale`] to a [`DataLocale`] and back: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(locale); +/// let locale = data_locale.into_locale(); +/// +/// assert_eq!(locale, locale!("en-u-ca-buddhist")); +/// ``` +/// +/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more +/// efficient than cloning the [`Locale`], but less efficient than converting an owned +/// [`Locale`]: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(&locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale1, locale2); +/// ``` +/// +/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: +/// +/// ``` +/// use icu_locid::langid; +/// use icu_provider::DataLocale; +/// +/// let langid = langid!("es-CA-valencia"); +/// let data_locale = DataLocale::from(langid); +/// let langid = data_locale.get_langid(); +/// +/// assert_eq!(langid, langid!("es-CA-valencia")); +/// ``` +/// +/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data +/// lookup and fallback. This may change in the future. +/// +/// ``` +/// use icu_locid::{locale, Locale}; +/// use icu_provider::DataLocale; +/// +/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" +/// .parse::() +/// .unwrap(); +/// let data_locale = DataLocale::from(locale); +/// +/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist")); +/// ``` +#[derive(PartialEq, Clone, Default, Eq, Hash)] +pub struct DataLocale { + langid: LanguageIdentifier, + keywords: unicode_ext::Keywords, + #[cfg(feature = "experimental")] + aux: Option, +} + +impl<'a> Default for &'a DataLocale { + fn default() -> Self { + static DEFAULT: DataLocale = DataLocale { + langid: LanguageIdentifier::UND, + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + }; + &DEFAULT + } +} + +impl fmt::Debug for DataLocale { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DataLocale{{{self}}}") + } +} + +impl Writeable for DataLocale { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.langid.write_to(sink)?; + if !self.keywords.is_empty() { + sink.write_str("-u-")?; + self.keywords.write_to(sink)?; + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + sink.write_str("-x-")?; + aux.write_to(sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + let mut length_hint = self.langid.writeable_length_hint(); + if !self.keywords.is_empty() { + length_hint += self.keywords.writeable_length_hint() + 3; + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + length_hint += aux.writeable_length_hint() + 3; + } + length_hint + } + + fn write_to_string(&self) -> alloc::borrow::Cow { + #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] + let mut is_only_langid = self.keywords.is_empty(); + #[cfg(feature = "experimental")] + { + is_only_langid = is_only_langid && self.aux.is_none(); + } + if is_only_langid { + return self.langid.write_to_string(); + } + let mut string = + alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut string); + alloc::borrow::Cow::Owned(string) + } +} + +writeable::impl_display_with_writeable!(DataLocale); + +impl From for DataLocale { + fn from(langid: LanguageIdentifier) -> Self { + Self { + langid, + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + } + } +} + +impl From for DataLocale { + fn from(locale: Locale) -> Self { + Self { + langid: locale.id, + keywords: locale.extensions.unicode.keywords, + #[cfg(feature = "experimental")] + aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), + } + } +} + +impl From<&LanguageIdentifier> for DataLocale { + fn from(langid: &LanguageIdentifier) -> Self { + Self { + langid: langid.clone(), + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + } + } +} + +impl From<&Locale> for DataLocale { + fn from(locale: &Locale) -> Self { + Self { + langid: locale.id.clone(), + keywords: locale.extensions.unicode.keywords.clone(), + #[cfg(feature = "experimental")] + aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), + } + } +} + +impl FromStr for DataLocale { + type Err = DataError; + fn from_str(s: &str) -> Result { + let locale = Locale::from_str(s).map_err(|e| { + DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s) + .with_display_context(&e) + })?; + Ok(DataLocale::from(locale)) + } +} + +impl DataLocale { + /// Compare this [`DataLocale`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::DataLocale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = &[ + /// "ca", + /// "ca-ES", + /// "ca-ES-u-ca-buddhist", + /// "ca-ES-valencia", + /// "ca-ES-x-gbp", + /// "ca-ES-x-gbp-short", + /// "ca-ES-x-usd", + /// "ca-ES-xyzabc", + /// "ca-x-eur", + /// "cat", + /// "pl-Latn-PL", + /// "und", + /// "und-fonipa", + /// "und-u-ca-hebrew", + /// "und-u-ca-japanese", + /// "und-x-mxn", + /// "zh", + /// ]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b); + /// let a_loc: DataLocale = a.parse().unwrap(); + /// assert_eq!( + /// a_loc.strict_cmp(a.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// a_loc, + /// a + /// ); + /// assert_eq!( + /// a_loc.strict_cmp(b.as_bytes()), + /// Ordering::Less, + /// "strict_cmp: {} < {}", + /// a_loc, + /// b + /// ); + /// let b_loc: DataLocale = b.parse().unwrap(); + /// assert_eq!( + /// b_loc.strict_cmp(b.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// b_loc, + /// b + /// ); + /// assert_eq!( + /// b_loc.strict_cmp(a.as_bytes()), + /// Ordering::Greater, + /// "strict_cmp: {} > {}", + /// b_loc, + /// a + /// ); + /// } + /// ``` + /// + /// Comparison against invalid strings: + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// let invalid_strings: &[&str] = &[ + /// // Less than "ca-ES" + /// "CA", + /// "ar-x-gbp-FOO", + /// // Greater than "ca-ES-x-gbp" + /// "ca_ES", + /// "ca-ES-x-gbp-FOO", + /// ]; + /// + /// let data_locale = "ca-ES-x-gbp".parse::().unwrap(); + /// + /// for s in invalid_strings.iter() { + /// let expected_ordering = "ca-ES-x-gbp".cmp(s); + /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); + /// assert_eq!(expected_ordering, actual_ordering, "{}", s); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + let subtags = other.split(|b| *b == b'-'); + let mut subtag_result = self.langid.strict_cmp_iter(subtags); + if self.has_unicode_ext() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"u") => (), + Some(s) => return s.cmp(b"u").reverse(), + None => return Ordering::Greater, + } + subtag_result = self.keywords.strict_cmp_iter(subtags); + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.get_aux() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"x") => (), + Some(s) => return s.cmp(b"x").reverse(), + None => return Ordering::Greater, + } + subtag_result = aux.strict_cmp_iter(subtags); + } + subtag_result.end() + } +} + +impl DataLocale { + /// Returns whether this [`DataLocale`] has all empty fields (no components). + /// + /// See also: + /// + /// - [`DataLocale::is_und()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_empty()); + /// assert!(!"und-u-ca-buddhist" + /// .parse::() + /// .unwrap() + /// .is_empty()); + /// assert!(!"und-x-aux".parse::().unwrap().is_empty()); + /// assert!(!"ca-ES".parse::().unwrap().is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self == <&DataLocale>::default() + } + + /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. + /// + /// This ignores auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_und()); + /// assert!(!"und-u-ca-buddhist".parse::().unwrap().is_und()); + /// assert!("und-x-aux".parse::().unwrap().is_und()); + /// assert!(!"ca-ES".parse::().unwrap().is_und()); + /// ``` + pub fn is_und(&self) -> bool { + self.langid == LanguageIdentifier::UND && self.keywords.is_empty() + } + + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. + /// + /// This ignores extension keywords and auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_langid_und()); + /// assert!("und-u-ca-buddhist" + /// .parse::() + /// .unwrap() + /// .is_langid_und()); + /// assert!("und-x-aux".parse::().unwrap().is_langid_und()); + /// assert!(!"ca-ES".parse::().unwrap().is_langid_und()); + /// ``` + pub fn is_langid_und(&self) -> bool { + self.langid == LanguageIdentifier::UND + } + + /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. + /// + /// This may allocate memory if there are variant subtags. If you need only the language, + /// script, and/or region subtag, use the specific getters for those subtags: + /// + /// - [`DataLocale::language()`] + /// - [`DataLocale::script()`] + /// - [`DataLocale::region()`] + /// + /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] + /// and then access the `id` field. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::langid; + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// + /// let req_no_langid = DataRequest { + /// locale: &Default::default(), + /// metadata: Default::default(), + /// }; + /// + /// let req_with_langid = DataRequest { + /// locale: &langid!("ar-EG").into(), + /// metadata: Default::default(), + /// }; + /// + /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); + /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); + /// ``` + pub fn get_langid(&self) -> LanguageIdentifier { + self.langid.clone() + } + + /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. + #[inline] + pub fn set_langid(&mut self, lid: LanguageIdentifier) { + self.langid = lid; + } + + /// Converts this [`DataLocale`] into a [`Locale`]. + /// + /// See also [`DataLocale::get_langid()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// langid, locale, + /// subtags::{language, region}, + /// Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into(); + /// + /// assert_eq!(locale.get_langid(), langid!("it-IT")); + /// assert_eq!(locale.language(), language!("it")); + /// assert_eq!(locale.script(), None); + /// assert_eq!(locale.region(), Some(region!("IT"))); + /// + /// let locale = locale.into_locale(); + /// assert_eq!(locale, locale!("it-IT-u-ca-coptic")); + /// ``` + /// + /// Auxiliary keys are retained: + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::prelude::*; + /// use writeable::assert_writeable_eq; + /// + /// let locale: Locale = "und-u-nu-arab-x-gbp".parse().unwrap(); + /// let data_locale = DataLocale::from(locale); + /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp"); + /// + /// let recovered_locale = data_locale.into_locale(); + /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp"); + /// ``` + pub fn into_locale(self) -> Locale { + let mut loc = Locale { + id: self.langid, + ..Default::default() + }; + loc.extensions.unicode.keywords = self.keywords; + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux { + loc.extensions.private = + icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect()); + } + loc + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn language(&self) -> Language { + self.langid.language + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn set_language(&mut self, language: Language) { + self.langid.language = language; + } + + /// Returns the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn script(&self) -> Option