From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_provider/src/any.rs | 16 +- vendor/icu_provider/src/buf.rs | 5 +- vendor/icu_provider/src/constructors.rs | 302 +++++++++- vendor/icu_provider/src/datagen/data_conversion.rs | 10 +- vendor/icu_provider/src/datagen/mod.rs | 128 +++- vendor/icu_provider/src/datagen/payload.rs | 99 ++- vendor/icu_provider/src/dynutil.rs | 15 +- vendor/icu_provider/src/error.rs | 34 +- vendor/icu_provider/src/fallback.rs | 201 +++++++ vendor/icu_provider/src/hello_world.rs | 86 ++- vendor/icu_provider/src/helpers.rs | 309 ---------- vendor/icu_provider/src/key.rs | 173 ++++-- vendor/icu_provider/src/lib.rs | 30 +- vendor/icu_provider/src/request.rs | 668 +++++++++++++++++++-- vendor/icu_provider/src/response.rs | 185 ++++-- vendor/icu_provider/src/serde/mod.rs | 36 ++ 16 files changed, 1753 insertions(+), 544 deletions(-) create mode 100644 vendor/icu_provider/src/fallback.rs delete mode 100644 vendor/icu_provider/src/helpers.rs (limited to 'vendor/icu_provider/src') diff --git a/vendor/icu_provider/src/any.rs b/vendor/icu_provider/src/any.rs index c3af61fed..8e1b3c468 100644 --- a/vendor/icu_provider/src/any.rs +++ b/vendor/icu_provider/src/any.rs @@ -5,6 +5,7 @@ //! Traits for data providers that produce `Any` objects. use crate::prelude::*; +use crate::response::DataPayloadInner; use core::any::Any; use core::convert::TryFrom; use core::convert::TryInto; @@ -105,7 +106,7 @@ impl AnyPayload { let down_ref: &'static M::Yokeable = any_ref .downcast_ref() .ok_or_else(|| DataError::for_type::().with_str_context(type_name))?; - Ok(DataPayload::from_owned(M::Yokeable::zero_from(down_ref))) + Ok(DataPayload::from_static_ref(down_ref)) } PayloadRc(any_rc) => { let down_rc = any_rc @@ -166,8 +167,8 @@ where M: DataMarker, M::Yokeable: MaybeSendSync, { - /// Moves this DataPayload to the heap (requiring an allocation) and returns it as an - /// erased `AnyPayload`. + /// Converts this DataPayload into a type-erased `AnyPayload`. Unless the payload stores a static + /// reference, this will move it to the heap. /// /// # Examples /// @@ -190,7 +191,10 @@ where /// ``` pub fn wrap_into_any_payload(self) -> AnyPayload { AnyPayload { - inner: AnyPayloadInner::PayloadRc(SelectedRc::from(self)), + inner: match self.0 { + DataPayloadInner::StaticRef(r) => AnyPayloadInner::StructRef(r), + inner => AnyPayloadInner::PayloadRc(SelectedRc::from(Self(inner))), + }, type_name: core::any::type_name::(), } } @@ -219,7 +223,7 @@ pub struct AnyResponse { /// Metadata about the returned object. pub metadata: DataResponseMetadata, - /// The object itself; None if it was not loaded. + /// The object itself; `None` if it was not loaded. pub payload: Option, } @@ -365,7 +369,7 @@ pub trait AsDynamicDataProviderAnyMarkerWrap { impl

AsDynamicDataProviderAnyMarkerWrap for P where - P: DynamicDataProvider, + P: DynamicDataProvider + ?Sized, { #[inline] fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap

{ diff --git a/vendor/icu_provider/src/buf.rs b/vendor/icu_provider/src/buf.rs index 5e8264998..1d55e9ff9 100644 --- a/vendor/icu_provider/src/buf.rs +++ b/vendor/icu_provider/src/buf.rs @@ -21,9 +21,10 @@ impl DataMarker for BufferMarker { /// A data provider that returns opaque bytes. /// /// Generally, these bytes are expected to be deserializable with Serde. To get an object -/// implementing [`DataProvider`] via Serde, use [`as_deserializing()`], which requires -/// enabling at least one of the deserialization Cargo features: +/// implementing [`DataProvider`] via Serde, use [`as_deserializing()`]. /// +/// Passing a `BufferProvider` to a `*_with_buffer_provider` constructor requires enabling +/// the deserialization Cargo feature for the expected format(s): /// - `deserialize_json` /// - `deserialize_postcard_1` /// - `deserialize_bincode_1` diff --git a/vendor/icu_provider/src/constructors.rs b/vendor/icu_provider/src/constructors.rs index 053da0320..f521f1fea 100644 --- a/vendor/icu_provider/src/constructors.rs +++ b/vendor/icu_provider/src/constructors.rs @@ -7,7 +7,7 @@ //! One of the key differences between ICU4X and its parent projects, ICU4C and ICU4J, is in how //! it deals with locale data. //! -//! In ICU4X, the data provider is an *explicit argument* whenever it is required by the library. +//! In ICU4X, data can always be explicitly passed to any function that requires data. //! This enables ICU4X to achieve the following value propositions: //! //! 1. Configurable data sources (machine-readable data file, baked into code, JSON, etc). @@ -15,21 +15,33 @@ //! 3. Reduced overhead and code size (data is resolved locally at each call site). //! 4. Explicit support for multiple ICU4X instances sharing data. //! -//! In order to achieve these goals, there are 3 versions of all Rust ICU4X functions that -//! take a data provider argument: +//! However, as manual data management can be tedious, ICU4X also has a `compiled_data` +//! default Cargo feature that includes data and makes ICU4X work out-of-the box. //! -//! 1. `*_unstable` -//! 2. `*_with_any_provider` -//! 3. `*_with_buffer_provider` +//! Subsequently, there are 4 versions of all Rust ICU4X functions that use data: +//! +//! 1. `*` +//! 2. `*_unstable` +//! 3. `*_with_any_provider` +//! 4. `*_with_buffer_provider` //! //! # Which constructor should I use? //! +//! ## When to use `*` +//! +//! If you don't want to customize data at runtime (i.e. if you don't care about code size, +//! updating your data, etc.) you can use the `compiled_data` Cargo feature and don't have to think +//! about where your data comes from. +//! +//! These constructors are sometimes `const` functions, this way Rust can most effectively optimize +//! your usage of ICU4X. +//! //! ## When to use `*_unstable` //! //! Use this constructor if your data provider implements the [`DataProvider`] trait for all //! data structs in *current and future* ICU4X versions. Examples: //! -//! 1. `BakedDataProvider` auto-regenerated on new ICU4X versions +//! 1. `BakedDataProvider` generated for the specific ICU4X minor version //! 2. Anything with a _blanket_ [`DataProvider`] impl //! //! Since the exact set of bounds may change at any time, including in minor SemVer releases, @@ -52,9 +64,9 @@ //! //! 1. [`BlobDataProvider`] //! 2. [`FsDataProvider`] -//! 3. [`ForkByKeyProvider`] between any of the above +//! 3. [`ForkByKeyProvider`] between two providers implementing [`BufferProvider`] //! -//! Please note that you must enable the `"serde"` Cargo feature on each crate in which you use the +//! Please note that you must enable the `serde` Cargo feature on each crate in which you use the //! `*_with_buffer_provider` constructor. //! //! # Data Versioning Policy @@ -66,7 +78,7 @@ //! version 1.2 will be able to read the same data file. Likewise, backwards-compatible keys can //! always be included by `icu_datagen` to support older library versions. //! -//! The `*_unstable` functions are only guaranteed to work on data built for the exact same version +//! The `*_unstable` functions are only guaranteed to work on data built for the exact same minor version //! of ICU4X. The advantage of the `*_unstable` functions is that they result in the smallest code //! size and allow for automatic data slicing when `BakedDataProvider` is used. However, the type //! bounds of this function may change over time, breaking SemVer guarantees. These functions @@ -76,20 +88,19 @@ //! # Data Providers Over FFI //! //! Over FFI, there is only one data provider type: [`ICU4XDataProvider`]. Internally, it is an -//! `enum` between `dyn `[`AnyProvider`] and `dyn `[`BufferProvider`]. +//! `enum` between`dyn `[`BufferProvider`] and a unit compiled data variant. //! -//! To control for code size, there are two Cargo features, `any_provider` and `buffer_provider`, +//! To control for code size, there are two Cargo features, `compiled_data` and `buffer_provider`, //! that enable the corresponding items in the enum. //! -//! In Rust ICU4X, a similar buffer/any enum approach was not taken because: +//! In Rust ICU4X, a similar enum approach was not taken because: //! //! 1. Feature-gating the enum branches gets complex across crates. //! 2. Without feature gating, users need to carry Serde code even if they're not using it, //! violating one of the core value propositions of ICU4X. -//! 3. We could reduce the number of constructors from 3 to 2 but not to 1, so the educational +//! 3. We could reduce the number of constructors from 4 to 2 but not to 1, so the educational //! benefit is limited. //! -//! //! [`DataProvider`]: crate::DataProvider //! [`BufferProvider`]: crate::BufferProvider //! [`AnyProvider`]: crate::AnyProvider @@ -99,3 +110,264 @@ //! [`StaticDataProvider`]: ../../icu_provider_blob/struct.StaticDataProvider.html //! [`FsDataProvider`]: ../../icu_provider_blob/struct.FsDataProvider.html //! [`ICU4XDataProvider`]: ../../icu_capi/provider/ffi/struct.ICU4XDataProvider.html + +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_unstable_docs { + (ANY, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by an [`AnyProvider`](icu_provider::AnyProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (BUFFER, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`BufferProvider`](icu_provider::BufferProvider).\n\n", + "✨ *Enabled with the `serde` feature.*\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (UNSTABLE, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`DataProvider`](icu_provider::DataProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)\n\n", + "

⚠️ The bounds on provider may change over time, including in SemVer minor releases.
" + ) + }; +} + +#[allow(clippy::crate_in_macro_def)] // by convention each crate's data provider is `crate::provider::Baked` +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_data_constructors { + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> Result { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + + (locale: skip, options: skip, result: $result_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, $options_arg:ident: $options_ty:ty, result: $result_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale) + } + }; + + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $config_arg: $config_ty, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $config_arg, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $config_arg, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $config_arg, $options_arg) + } + }; + + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $options_arg) + } + }; +} diff --git a/vendor/icu_provider/src/datagen/data_conversion.rs b/vendor/icu_provider/src/datagen/data_conversion.rs index 59146352a..f3ca948e1 100644 --- a/vendor/icu_provider/src/datagen/data_conversion.rs +++ b/vendor/icu_provider/src/datagen/data_conversion.rs @@ -9,16 +9,12 @@ use alloc::boxed::Box; /// A trait that allows for converting between data payloads of different types. /// /// These payloads will typically be some kind of erased payload, either with -/// AnyMarker, BufferMarker, or SerializeMarker, where converting requires reifying the type. +/// [`AnyMarker`], [`BufferMarker`], or [`ExportMarker`](crate::datagen::ExportMarker), where converting +/// requires reifying the type. +/// /// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to /// concrete marker types M, and reifying the input to a `DataPayload`, performing some conversion /// or computation, and erasing the result to `DataPayload`. -/// -/// It will typically be implemented on data providers used in datagen. -/// -/// The [`make_exportable_provider!`] macro is able to automatically implement this trait. -/// -/// [`make_exportable_provider!`]: crate::make_exportable_provider pub trait DataConverter { /// Attempt to convert a payload corresponding to the given data key /// from one marker type to another marker type. diff --git a/vendor/icu_provider/src/datagen/mod.rs b/vendor/icu_provider/src/datagen/mod.rs index 6596a0c07..ae1779ab3 100644 --- a/vendor/icu_provider/src/datagen/mod.rs +++ b/vendor/icu_provider/src/datagen/mod.rs @@ -21,6 +21,14 @@ pub use payload::{ExportBox, ExportMarker}; use crate::prelude::*; +/// The type of built-in fallback that the data was generated for, if applicable. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum BuiltInFallbackMode { + /// Data uses full UTS 35 fallbacking. + Standard, +} + /// An object capable of exporting data payloads in some form. pub trait DataExporter: Sync { /// Save a `payload` corresponding to the given key and locale. @@ -32,7 +40,34 @@ pub trait DataExporter: Sync { payload: &DataPayload, ) -> Result<(), DataError>; - /// Function called after all keys have been fully dumped. + /// Function called for singleton keys. + /// Takes non-mut self as it can be called concurrently. + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.put_payload(key, &Default::default(), payload)?; + self.flush(key) + } + + /// Function called after a non-singleton key has been fully enumerated, + /// flushing that key with built-in fallback. + /// + /// Takes non-mut self as it can be called concurrently. + fn flush_with_built_in_fallback( + &self, + _key: DataKey, + _fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + Err(DataError::custom( + "Exporter does not implement built-in fallback", + )) + } + + /// Function called after a non-singleton key has been fully enumerated. + /// Does not include built-in fallback. + /// /// Takes non-mut self as it can be called concurrently. fn flush(&self, _key: DataKey) -> Result<(), DataError> { Ok(()) @@ -44,13 +79,26 @@ pub trait DataExporter: Sync { fn close(&mut self) -> Result<(), DataError> { Ok(()) } + + /// Returns whether the provider supports built-in fallback. If `true`, the provider must + /// implement [`Self::flush_with_built_in_fallback()`]. + fn supports_built_in_fallback(&self) -> bool { + false + } } /// A [`DynamicDataProvider`] that can be used for exporting data. /// /// Use [`make_exportable_provider`](crate::make_exportable_provider) to implement this. -pub trait ExportableProvider: IterableDynamicDataProvider + Sync {} -impl ExportableProvider for T where T: IterableDynamicDataProvider + Sync {} +pub trait ExportableProvider: + IterableDynamicDataProvider + DynamicDataProvider + Sync +{ +} + +impl ExportableProvider for T where + T: IterableDynamicDataProvider + DynamicDataProvider + Sync +{ +} /// This macro can be used on a data provider to allow it to be used for data generation. /// @@ -66,28 +114,24 @@ impl ExportableProvider for T where T: IterableDynamicDataProvider { + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ]) => { $crate::impl_dynamic_data_provider!( $provider, - [ $($struct_m),+, ], + [ $($(#[$cfg])? $struct_m),+, ], $crate::datagen::ExportMarker ); $crate::impl_dynamic_data_provider!( $provider, - [ $($struct_m),+, ], + [ $($(#[$cfg])? $struct_m),+, ], $crate::any::AnyMarker ); impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider { fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result, $crate::DataError> { - #![allow(non_upper_case_globals)] - // Reusing the struct names as identifiers - $( - const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed(); - )+ match key.hashed() { $( - $struct_m => { + $(#[$cfg])? + h if h == <$struct_m as $crate::KeyedDataMarker>::KEY.hashed() => { $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self) } )+, @@ -97,3 +141,63 @@ macro_rules! make_exportable_provider { } }; } + +/// A `DataExporter` that forks to multiple `DataExporter`s. +#[derive(Default)] +pub struct MultiExporter(Vec>); + +impl MultiExporter { + /// Creates a `MultiExporter` for the given exporters. + pub const fn new(exporters: Vec>) -> Self { + Self(exporters) + } +} + +impl core::fmt::Debug for MultiExporter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MultiExporter") + .field("0", &format!("vec[len = {}]", self.0.len())) + .finish() + } +} + +impl DataExporter for MultiExporter { + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.put_payload(key, locale, payload)) + } + + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_singleton(key, payload)) + } + + fn flush(&self, key: DataKey) -> Result<(), DataError> { + self.0.iter().try_for_each(|e| e.flush(key)) + } + + fn flush_with_built_in_fallback( + &self, + key: DataKey, + fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_with_built_in_fallback(key, fallback_mode)) + } + + fn close(&mut self) -> Result<(), DataError> { + self.0.iter_mut().try_for_each(|e| e.close()) + } +} diff --git a/vendor/icu_provider/src/datagen/payload.rs b/vendor/icu_provider/src/datagen/payload.rs index c0d0a8bb9..97e540b07 100644 --- a/vendor/icu_provider/src/datagen/payload.rs +++ b/vendor/icu_provider/src/datagen/payload.rs @@ -2,24 +2,29 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use core::any::Any; + use crate::dynutil::UpcastDataPayload; use crate::prelude::*; use alloc::boxed::Box; use databake::{Bake, CrateEnv, TokenStream}; +use yoke::trait_hack::YokeTraitHack; use yoke::*; -trait ExportableYoke { +trait ExportableDataPayload { fn bake_yoke(&self, env: &CrateEnv) -> TokenStream; fn serialize_yoke( &self, serializer: &mut dyn erased_serde::Serializer, ) -> Result<(), DataError>; + fn as_any(&self) -> &dyn Any; + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool; } -impl ExportableYoke for Yoke +impl ExportableDataPayload for DataPayload where - Y: for<'a> Yokeable<'a>, - for<'a> >::Output: Bake + serde::Serialize, + for<'a> >::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<>::Output>: PartialEq, { fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream { self.get().bake(ctx) @@ -35,12 +40,37 @@ where .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; Ok(()) } + + fn as_any(&self) -> &dyn Any { + self + } + + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool { + match other.as_any().downcast_ref::() { + Some(downcasted) => (*self).eq(downcasted), + None => { + debug_assert!( + false, + "cannot compare ExportableDataPayloads of different types: self is {:?} but other is {:?}", + self.type_id(), + other.as_any().type_id(), + ); + false + } + } + } } #[doc(hidden)] // exposed for make_exportable_provider #[derive(yoke::Yokeable)] pub struct ExportBox { - payload: Box, + payload: Box, +} + +impl PartialEq for ExportBox { + fn eq(&self, other: &Self) -> bool { + self.payload.eq_dyn(&*other.payload) + } } impl core::fmt::Debug for ExportBox { @@ -54,12 +84,13 @@ impl core::fmt::Debug for ExportBox { impl UpcastDataPayload for ExportMarker where M: DataMarker, - M::Yokeable: Sync, + M::Yokeable: Sync + Send, for<'a> >::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<>::Output>: PartialEq, { fn upcast(other: DataPayload) -> DataPayload { DataPayload::from_owned(ExportBox { - payload: Box::new(other.yoke), + payload: Box::new(other), }) } } @@ -117,7 +148,7 @@ impl DataPayload { /// let tokens = export.tokenize(&env); /// assert_eq!( /// quote! { - /// ::icu_provider::hello_world::HelloWorldV1 { + /// icu_provider::hello_world::HelloWorldV1 { /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"), /// } /// } @@ -144,3 +175,55 @@ pub struct ExportMarker {} impl DataMarker for ExportMarker { type Yokeable = ExportBox; } + +#[cfg(test)] +mod tests { + use super::*; + use crate::hello_world::*; + + #[test] + fn test_compare_with_dyn() { + let payload1: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload2: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload3: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: "def".into(), + }); + + assert!(payload1.eq_dyn(&payload2)); + assert!(payload2.eq_dyn(&payload1)); + + assert!(!payload1.eq_dyn(&payload3)); + assert!(!payload3.eq_dyn(&payload1)); + } + + #[test] + fn test_export_marker_partial_eq() { + let payload1: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload2: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload3: DataPayload = + UpcastDataPayload::upcast(DataPayload::::from_owned( + HelloWorldV1 { + message: "def".into(), + }, + )); + + assert_eq!(payload1, payload2); + assert_eq!(payload2, payload1); + assert_ne!(payload1, payload3); + assert_ne!(payload3, payload1); + } +} diff --git a/vendor/icu_provider/src/dynutil.rs b/vendor/icu_provider/src/dynutil.rs index 5e1491e51..8ad7b7aa1 100644 --- a/vendor/icu_provider/src/dynutil.rs +++ b/vendor/icu_provider/src/dynutil.rs @@ -192,12 +192,9 @@ macro_rules! impl_dynamic_data_provider { $crate::DataResponse<$dyn_m>, $crate::DataError, > { - $( - const $ident: $crate::DataKeyHash = $key.hashed(); - )+ match key.hashed() { $( - $ident => { + h if h == $key.hashed() => { let result: $crate::DataResponse<$struct_m> = $crate::DynamicDataProvider::<$struct_m>::load_data(self, key, req)?; Ok($crate::DataResponse { @@ -226,7 +223,7 @@ macro_rules! impl_dynamic_data_provider { } }; - ($provider:ty, [ $($struct_m:ident),+, ], $dyn_m:path) => { + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ], $dyn_m:path) => { impl $crate::DynamicDataProvider<$dyn_m> for $provider { fn load_data( @@ -237,14 +234,10 @@ macro_rules! impl_dynamic_data_provider { $crate::DataResponse<$dyn_m>, $crate::DataError, > { - #![allow(non_upper_case_globals)] - // Reusing the struct names as identifiers - $( - const $struct_m: $crate::DataKeyHash = $struct_m::KEY.hashed(); - )+ match key.hashed() { $( - $struct_m => { + $(#[$cfg])? + h if h == <$struct_m>::KEY.hashed() => { let result: $crate::DataResponse<$struct_m> = $crate::DataProvider::load(self, req)?; Ok($crate::DataResponse { diff --git a/vendor/icu_provider/src/error.rs b/vendor/icu_provider/src/error.rs index 23f141e02..5fc19d1a0 100644 --- a/vendor/icu_provider/src/error.rs +++ b/vendor/icu_provider/src/error.rs @@ -47,6 +47,10 @@ pub enum DataErrorKind { #[displaydoc("Invalid state")] InvalidState, + /// The syntax of the [`DataKey`] or [`DataLocale`] was invalid. + #[displaydoc("Parse error for data key or data locale")] + KeyLocaleSyntax, + /// An unspecified error occurred, such as a Serde error. /// /// Check debug logs for potentially more information. @@ -198,20 +202,24 @@ impl DataError { /// Sets the string context of a DataError to the given type name, returning a modified error. #[inline] pub fn with_type_context(self) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{self}: Type context: {}", core::any::type_name::()); + } self.with_str_context(core::any::type_name::()) } /// Logs the data error with the given request, returning an error containing the resource key. /// - /// If the "log_error_context" Cargo feature is enabled, this logs the whole request. Either way, + /// If the "logging" Cargo feature is enabled, this logs the whole request. Either way, /// it returns an error with the resource key portion of the request as context. - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] pub fn with_req(mut self, key: DataKey, req: DataRequest) -> Self { if req.metadata.silent { self.silent = true; } // Don't write out a log for MissingDataKey since there is no context to add - #[cfg(feature = "log_error_context")] + #[cfg(feature = "logging")] if !self.silent && self.kind != DataErrorKind::MissingDataKey { log::warn!("{} (key: {}, request: {})", self, key, req); } @@ -220,12 +228,12 @@ impl DataError { /// Logs the data error with the given context, then return self. /// - /// This does not modify the error, but if the "log_error_context" Cargo feature is enabled, + /// This does not modify the error, but if the "logging" Cargo feature is enabled, /// it will print out the context. #[cfg(feature = "std")] - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] pub fn with_path_context + ?Sized>(self, path: &P) -> Self { - #[cfg(feature = "log_error_context")] + #[cfg(feature = "logging")] if !self.silent { log::warn!("{} (path: {:?})", self, path.as_ref()); } @@ -234,12 +242,12 @@ impl DataError { /// Logs the data error with the given context, then return self. /// - /// This does not modify the error, but if the "log_error_context" Cargo feature is enabled, + /// This does not modify the error, but if the "logging" Cargo feature is enabled, /// it will print out the context. - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] #[inline] pub fn with_display_context(self, context: &D) -> Self { - #[cfg(feature = "log_error_context")] + #[cfg(feature = "logging")] if !self.silent { log::warn!("{}: {}", self, context); } @@ -248,12 +256,12 @@ impl DataError { /// Logs the data error with the given context, then return self. /// - /// This does not modify the error, but if the "log_error_context" Cargo feature is enabled, + /// This does not modify the error, but if the "logging" Cargo feature is enabled, /// it will print out the context. - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] #[inline] pub fn with_debug_context(self, context: &D) -> Self { - #[cfg(feature = "log_error_context")] + #[cfg(feature = "logging")] if !self.silent { log::warn!("{}: {:?}", self, context); } @@ -277,7 +285,7 @@ impl std::error::Error for DataError {} #[cfg(feature = "std")] impl From for DataError { fn from(e: std::io::Error) -> Self { - #[cfg(feature = "log_error_context")] + #[cfg(feature = "logging")] log::warn!("I/O error: {}", e); DataErrorKind::Io(e.kind()).into_error() } diff --git a/vendor/icu_provider/src/fallback.rs b/vendor/icu_provider/src/fallback.rs new file mode 100644 index 000000000..5c4e13b8d --- /dev/null +++ b/vendor/icu_provider/src/fallback.rs @@ -0,0 +1,201 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Options to define fallback behaviour. +//! +//! These options are consumed by the `LocaleFallbacker` in the `icu_locid_transforms` crate +//! (or the `icu::locid_transforms` module), but are defined here because they are used by `DataKey`. + +use icu_locid::extensions::unicode::Key; + +/// Hint for which subtag to prioritize during fallback. +/// +/// For example, `"en-US"` might fall back to either `"en"` or `"und-US"` depending +/// on this enum. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackPriority { + /// Prioritize the language. This is the default behavior. + /// + /// For example, `"en-US"` should go to `"en"` and then `"und"`. + Language, + /// Prioritize the region. + /// + /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. + Region, + /// Collation-specific fallback rules. Similar to language priority. + /// + /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. + Collation, +} + +impl LocaleFallbackPriority { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self::Language + } +} + +impl Default for LocaleFallbackPriority { + fn default() -> Self { + Self::const_default() + } +} + +/// What additional data is required to load when performing fallback. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackSupplement { + /// Collation supplement + Collation, +} + +/// Configuration settings for a particular fallback operation. +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[non_exhaustive] +pub struct LocaleFallbackConfig { + /// Strategy for choosing which subtags to drop during locale fallback. + /// + /// # Examples + /// + /// Retain the language and script subtags until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Language; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + /// + /// Retain the region subtag until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Region; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub priority: LocaleFallbackPriority, + /// An extension keyword to retain during locale fallback. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.extension_key = Some(icu_locid::extensions::unicode::key!("nu")); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ar-EG-u-nu-latn").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub extension_key: Option, + /// Fallback supplement data key to customize fallback rules. + /// + /// For example, most data keys for collation add additional parent locales, such as + /// "yue" to "zh-Hant", and data used for the `"-u-co"` extension keyword fallback. + /// + /// Currently the only supported fallback supplement is `LocaleFallbackSupplement::Collation`, but more may be + /// added in the future. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::fallback::LocaleFallbackSupplement; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Collation; + /// config.fallback_supplement = Some(LocaleFallbackSupplement::Collation); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("yue-HK").into()); + /// + /// // Run the algorithm and check the results. + /// // TODO(#1964): add "zh" as a target. + /// assert_eq!(fallback_iterator.get(), &locale!("yue-HK").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("yue").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("zh-Hant").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub fallback_supplement: Option, +} + +impl LocaleFallbackConfig { + /// Const version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + priority: LocaleFallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + } + } +} + +impl Default for LocaleFallbackConfig { + fn default() -> Self { + Self::const_default() + } +} diff --git a/vendor/icu_provider/src/hello_world.rs b/vendor/icu_provider/src/hello_world.rs index ec508ac48..825557ace 100644 --- a/vendor/icu_provider/src/hello_world.rs +++ b/vendor/icu_provider/src/hello_world.rs @@ -6,8 +6,8 @@ #![allow(clippy::exhaustive_structs)] // data struct module -#[cfg(feature = "datagen")] -use crate::datagen::IterableDataProvider; +use crate as icu_provider; + use crate::prelude::*; use alloc::borrow::Cow; use alloc::string::String; @@ -50,7 +50,7 @@ impl DataMarker for HelloWorldV1Marker { } impl KeyedDataMarker for HelloWorldV1Marker { - const KEY: DataKey = crate::data_key!("core/helloworld@1"); + const KEY: DataKey = icu_provider::data_key!("core/helloworld@1"); } /// A data provider returning Hello World strings in different languages. @@ -76,6 +76,25 @@ impl KeyedDataMarker for HelloWorldV1Marker { /// /// assert_eq!("Hallo Welt", german_hello_world.get().message); /// ``` +/// +/// Load the reverse string using an auxiliary key: +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let reverse_hello_world: DataPayload = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &"en+reverse".parse().unwrap(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Olleh Dlrow", reverse_hello_world.get().message); +/// ``` #[derive(Debug, PartialEq, Default)] pub struct HelloWorldProvider; @@ -86,17 +105,28 @@ impl HelloWorldProvider { ("bn", "ওহে বিশ্ব"), ("cs", "Ahoj světe"), ("de", "Hallo Welt"), + ("de-AT", "Servus Welt"), ("el", "Καλημέρα κόσμε"), ("en", "Hello World"), + ("en+reverse", "Olleh Dlrow"), + ("en-001", "Hello from 🗺️"), // WORLD + ("en-002", "Hello from 🌍"), // AFRICA + ("en-019", "Hello from 🌎"), // AMERICAS + ("en-142", "Hello from 🌏"), // ASIA + ("en-GB", "Hello from 🇬🇧"), // GREAT BRITAIN + ("en-GB-u-sd-gbeng", "Hello from 🏴󠁧󠁢󠁥󠁮󠁧󠁿"), // ENGLAND ("eo", "Saluton, Mondo"), ("fa", "سلام دنیا‎"), ("fi", "hei maailma"), ("is", "Halló, heimur"), ("ja", "こんにちは世界"), + ("ja+reverse", "界世はちにんこ"), ("la", "Ave, munde"), ("pt", "Olá, mundo"), ("ro", "Salut, lume"), ("ru", "Привет, мир"), + ("sr", "Поздрав свете"), + ("sr-Latn", "Pozdrav svete"), ("vi", "Xin chào thế giới"), ("zh", "你好世界"), ]; @@ -133,7 +163,7 @@ impl DataPayload { // AnyProvider support. #[cfg(not(feature = "datagen"))] -impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); +icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); #[cfg(feature = "deserialize_json")] /// A data provider returning Hello World strings in different languages as JSON blobs. @@ -172,7 +202,7 @@ impl BufferProvider for HelloWorldJsonProvider { let result = HelloWorldProvider.load(req)?; let (mut metadata, old_payload) = DataResponse::::take_metadata_and_payload(result)?; - metadata.buffer_format = Some(crate::buf::BufferFormat::Json); + metadata.buffer_format = Some(icu_provider::buf::BufferFormat::Json); #[allow(clippy::unwrap_used)] // HelloWorldV1::serialize is infallible Ok(DataResponse { metadata, @@ -187,19 +217,15 @@ impl BufferProvider for HelloWorldJsonProvider { } #[cfg(feature = "datagen")] -impl IterableDataProvider for HelloWorldProvider { +impl icu_provider::datagen::IterableDataProvider for HelloWorldProvider { fn supported_locales(&self) -> Result, DataError> { #[allow(clippy::unwrap_used)] // datagen - Ok(Self::DATA - .iter() - .map(|(s, _)| s.parse::().unwrap()) - .map(DataLocale::from) - .collect()) + Ok(Self::DATA.iter().map(|(s, _)| s.parse().unwrap()).collect()) } } #[cfg(feature = "datagen")] -make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); +icu_provider::make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); /// A type that formats localized "hello world" strings. /// @@ -236,12 +262,22 @@ pub struct FormattedHelloWorld<'l> { impl HelloWorldFormatter { /// Creates a new [`HelloWorldFormatter`] for the specified locale. /// - /// See [`HelloWorldFormatter`] for an example. - /// - /// [📚 Help choosing a constructor](crate::constructors) - ///
- /// ⚠️ The bounds on this function may change over time, including in SemVer minor releases. - ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn try_new(locale: &DataLocale) -> Result { + Self::try_new_unstable(&HelloWorldProvider, locale) + } + + icu_provider::gen_any_buffer_data_constructors!(locale: include, options: skip, error: DataError, + #[cfg(skip)] + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ]); + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)] pub fn try_new_unstable

(provider: &P, locale: &DataLocale) -> Result where P: DataProvider, @@ -255,8 +291,6 @@ impl HelloWorldFormatter { Ok(Self { data }) } - crate::gen_any_buffer_constructors!(locale: include, options: skip, error: DataError); - /// Formats a hello world message, returning a [`FormattedHelloWorld`]. #[allow(clippy::needless_lifetimes)] // documentary example pub fn format<'l>(&'l self) -> FormattedHelloWorld<'l> { @@ -290,6 +324,7 @@ writeable::impl_display_with_writeable!(FormattedHelloWorld<'_>); #[cfg(feature = "datagen")] #[test] fn test_iter() { + use crate::datagen::IterableDataProvider; use icu_locid::locale; assert_eq!( @@ -298,17 +333,28 @@ fn test_iter() { locale!("bn").into(), locale!("cs").into(), locale!("de").into(), + locale!("de-AT").into(), locale!("el").into(), locale!("en").into(), + "en+reverse".parse().unwrap(), + locale!("en-001").into(), + locale!("en-002").into(), + locale!("en-019").into(), + locale!("en-142").into(), + locale!("en-GB").into(), + locale!("en-GB-u-sd-gbeng").into(), locale!("eo").into(), locale!("fa").into(), locale!("fi").into(), locale!("is").into(), locale!("ja").into(), + "ja+reverse".parse().unwrap(), locale!("la").into(), locale!("pt").into(), locale!("ro").into(), locale!("ru").into(), + locale!("sr").into(), + locale!("sr-Latn").into(), locale!("vi").into(), locale!("zh").into() ] diff --git a/vendor/icu_provider/src/helpers.rs b/vendor/icu_provider/src/helpers.rs deleted file mode 100644 index 998e656da..000000000 --- a/vendor/icu_provider/src/helpers.rs +++ /dev/null @@ -1,309 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Internal helper functions. - -/// Const function to compute the FxHash of a byte array with little-endian byte order. -/// -/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our -/// use case since the strings being hashed originate from a trusted source (the ICU4X -/// components), and the hashes are computed at compile time, so we can check for collisions. -/// -/// We could have considered a SHA or other cryptographic hash function. However, we are using -/// FxHash because: -/// -/// 1. There is precedent for this algorithm in Rust -/// 2. The algorithm is easy to implement as a const function -/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree -/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, -/// such that truncation would be required in order to fit into a u32, partially reducing -/// the benefit of a cryptographically secure algorithm -// The indexing operations in this function have been reviewed in detail and won't panic. -#[allow(clippy::indexing_slicing)] -pub const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { - // This code is adapted from https://github.com/rust-lang/rustc-hash, - // whose license text is reproduced below. - // - // Copyright 2015 The Rust Project Developers. See the COPYRIGHT - // file at the top-level directory of this distribution and at - // http://rust-lang.org/COPYRIGHT. - // - // Licensed under the Apache License, Version 2.0 or the MIT license - // , at your - // option. This file may not be copied, modified, or distributed - // except according to those terms. - - if ignore_leading + ignore_trailing >= bytes.len() { - return 0; - } - - #[inline] - const fn hash_word_32(mut hash: u32, word: u32) -> u32 { - const ROTATE: u32 = 5; - const SEED32: u32 = 0x9e_37_79_b9; - hash = hash.rotate_left(ROTATE); - hash ^= word; - hash = hash.wrapping_mul(SEED32); - hash - } - - let mut cursor = ignore_leading; - let end = bytes.len() - ignore_trailing; - let mut hash = 0; - - while end - cursor >= 4 { - let word = u32::from_le_bytes([ - bytes[cursor], - bytes[cursor + 1], - bytes[cursor + 2], - bytes[cursor + 3], - ]); - hash = hash_word_32(hash, word); - cursor += 4; - } - - if end - cursor >= 2 { - let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); - hash = hash_word_32(hash, word as u32); - cursor += 2; - } - - if end - cursor >= 1 { - hash = hash_word_32(hash, bytes[cursor] as u32); - } - - hash -} - -#[test] -fn test_hash_word_32() { - assert_eq!(0, fxhash_32(b"", 0, 0)); - assert_eq!(0, fxhash_32(b"a", 1, 0)); - assert_eq!(0, fxhash_32(b"a", 0, 1)); - assert_eq!(0, fxhash_32(b"a", 0, 10)); - assert_eq!(0, fxhash_32(b"a", 10, 0)); - assert_eq!(0, fxhash_32(b"a", 1, 1)); - assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); - assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); - assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); - assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); - assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); - assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); - assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); - assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); - assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); - - assert_eq!( - fxhash_32(crate::tagged!("props/sc=Khmr@1").as_bytes(), 0, 0), - fxhash_32(crate::tagged!("props/sc=Samr@1").as_bytes(), 0, 0) - ); - - assert_ne!( - fxhash_32( - crate::tagged!("props/sc=Khmr@1").as_bytes(), - crate::leading_tag!().len(), - crate::trailing_tag!().len() - ), - fxhash_32( - crate::tagged!("props/sc=Samr@1").as_bytes(), - crate::leading_tag!().len(), - crate::trailing_tag!().len() - ) - ); -} - -#[doc(hidden)] -#[macro_export] -macro_rules! gen_any_buffer_docs { - (ANY, $krate:path, $see_also:path) => { - concat!( - "Creates a new instance using an [`AnyProvider`](", - stringify!($krate), - "::AnyProvider).\n\n", - "For details on the behavior of this function, see: [`", - stringify!($see_also), - "`]\n\n", - "[📚 Help choosing a constructor](", - stringify!($krate), - "::constructors)", - ) - }; - (BUFFER, $krate:path, $see_also:path) => { - concat!( - "✨ **Enabled with the `\"serde\"` feature.**\n\n", - "Creates a new instance using a [`BufferProvider`](", - stringify!($krate), - "::BufferProvider).\n\n", - "For details on the behavior of this function, see: [`", - stringify!($see_also), - "`]\n\n", - "[📚 Help choosing a constructor](", - stringify!($krate), - "::constructors)", - ) - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! gen_any_buffer_constructors { - (locale: skip, options: skip, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: skip, - options: skip, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: skip, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting()) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing()) - } - }; - - - (locale: skip, options: skip, result: $result_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting()) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing()) - } - }; - - (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: skip, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: skip, $options_arg:ident: $options_ty:ty, result: $result_ty:ty, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), $options_arg) - } - }; - (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:ty, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), $options_arg) - } - }; - (locale: include, options: skip, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - options: skip, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale) - } - }; - - (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - $config_arg: $config_ty, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale, $config_arg, $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale, $config_arg, $options_arg) - } - }; - - (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale, $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale, $options_arg) - } - }; -} diff --git a/vendor/icu_provider/src/key.rs b/vendor/icu_provider/src/key.rs index 8c76608fc..0e1e1006e 100644 --- a/vendor/icu_provider/src/key.rs +++ b/vendor/icu_provider/src/key.rs @@ -3,8 +3,8 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::error::{DataError, DataErrorKind}; -use crate::helpers; +use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority, LocaleFallbackSupplement}; use alloc::borrow::Cow; use core::fmt; use core::fmt::Write; @@ -50,7 +50,7 @@ pub struct DataKeyHash([u8; 4]); impl DataKeyHash { const fn compute_from_path(path: DataKeyPath) -> Self { - let hash = helpers::fxhash_32( + let hash = fxhash_32( path.tagged.as_bytes(), leading_tag!().len(), trailing_tag!().len(), @@ -64,6 +64,79 @@ impl DataKeyHash { } } +/// Const function to compute the FxHash of a byte array. +/// +/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our +/// use case since the strings being hashed originate from a trusted source (the ICU4X +/// components), and the hashes are computed at compile time, so we can check for collisions. +/// +/// We could have considered a SHA or other cryptographic hash function. However, we are using +/// FxHash because: +/// +/// 1. There is precedent for this algorithm in Rust +/// 2. The algorithm is easy to implement as a const function +/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree +/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, +/// such that truncation would be required in order to fit into a u32, partially reducing +/// the benefit of a cryptographically secure algorithm +// The indexing operations in this function have been reviewed in detail and won't panic. +#[allow(clippy::indexing_slicing)] +const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { + // This code is adapted from https://github.com/rust-lang/rustc-hash, + // whose license text is reproduced below. + // + // Copyright 2015 The Rust Project Developers. See the COPYRIGHT + // file at the top-level directory of this distribution and at + // http://rust-lang.org/COPYRIGHT. + // + // Licensed under the Apache License, Version 2.0 or the MIT license + // , at your + // option. This file may not be copied, modified, or distributed + // except according to those terms. + + if ignore_leading + ignore_trailing >= bytes.len() { + return 0; + } + + #[inline] + const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + const ROTATE: u32 = 5; + const SEED32: u32 = 0x9e_37_79_b9; + hash = hash.rotate_left(ROTATE); + hash ^= word; + hash = hash.wrapping_mul(SEED32); + hash + } + + let mut cursor = ignore_leading; + let end = bytes.len() - ignore_trailing; + let mut hash = 0; + + while end - cursor >= 4 { + let word = u32::from_le_bytes([ + bytes[cursor], + bytes[cursor + 1], + bytes[cursor + 2], + bytes[cursor + 3], + ]); + hash = hash_word_32(hash, word); + cursor += 4; + } + + if end - cursor >= 2 { + let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); + hash = hash_word_32(hash, word as u32); + cursor += 2; + } + + if end - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); + } + + hash +} + impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { type Container = zerovec::ZeroVec<'a, DataKeyHash>; type Slice = zerovec::ZeroSlice; @@ -86,48 +159,6 @@ impl AsULE for DataKeyHash { // Safe since the ULE type is `self`. unsafe impl EqULE for DataKeyHash {} -/// Hint for what to prioritize during fallback when data is unavailable. -/// -/// For example, if `"en-US"` is requested, but we have no data for that specific locale, -/// fallback may take us to `"en"` or `"und-US"` to check for data. -#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] -#[non_exhaustive] -pub enum FallbackPriority { - /// Prioritize the language. This is the default behavior. - /// - /// For example, `"en-US"` should go to `"en"` and then `"und"`. - Language, - /// Prioritize the region. - /// - /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. - Region, - /// Collation-specific fallback rules. Similar to language priority. - /// - /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. - Collation, -} - -impl FallbackPriority { - /// Const-friendly version of [`Default::default`]. - pub const fn const_default() -> Self { - Self::Language - } -} - -impl Default for FallbackPriority { - fn default() -> Self { - Self::const_default() - } -} - -/// What additional data to load when performing fallback. -#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] -#[non_exhaustive] -pub enum FallbackSupplement { - /// Collation supplement; see `CollationFallbackSupplementV1Marker` - Collation, -} - /// The string path of a data key. For example, "foo@1" #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct DataKeyPath { @@ -163,35 +194,42 @@ impl Deref for DataKeyPath { #[non_exhaustive] pub struct DataKeyMetadata { /// What to prioritize when fallbacking on this [`DataKey`]. - pub fallback_priority: FallbackPriority, + pub fallback_priority: LocaleFallbackPriority, /// A Unicode extension keyword to consider when loading data for this [`DataKey`]. pub extension_key: Option, /// Optional choice for additional fallbacking data required for loading this marker. /// /// For more information, see `LocaleFallbackConfig::fallback_supplement`. - pub fallback_supplement: Option, + pub fallback_supplement: Option, + /// Whether the key has a singleton value, as opposed to per-locale values. Singleton + /// keys behave differently, e.g. they never perform fallback, and can be optimized + /// in data providers. + pub singleton: bool, } impl DataKeyMetadata { /// Const-friendly version of [`Default::default`]. pub const fn const_default() -> Self { Self { - fallback_priority: FallbackPriority::const_default(), + fallback_priority: LocaleFallbackPriority::const_default(), extension_key: None, fallback_supplement: None, + singleton: false, } } #[doc(hidden)] pub const fn construct_internal( - fallback_priority: FallbackPriority, + fallback_priority: LocaleFallbackPriority, extension_key: Option, - fallback_supplement: Option, + fallback_supplement: Option, + singleton: bool, ) -> Self { Self { fallback_priority, extension_key, fallback_supplement, + singleton, } } } @@ -302,6 +340,16 @@ impl DataKey { self.metadata } + /// Returns the [`LocaleFallbackConfig`] for this [`DataKey`]. + #[inline] + pub const fn fallback_config(self) -> LocaleFallbackConfig { + let mut config = LocaleFallbackConfig::const_default(); + config.priority = self.metadata.fallback_priority; + config.extension_key = self.metadata.extension_key; + config.fallback_supplement = self.metadata.fallback_supplement; + config + } + /// Constructs a [`DataKey`] from a path and metadata. /// /// # Examples @@ -620,35 +668,50 @@ fn test_key_to_string() { }, ] { writeable::assert_writeable_eq!(&cas.key, cas.expected); + assert_eq!(cas.expected, &*cas.key.path()); } } +#[test] +fn test_hash_word_32() { + assert_eq!(0, fxhash_32(b"", 0, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 0)); + assert_eq!(0, fxhash_32(b"a", 0, 1)); + assert_eq!(0, fxhash_32(b"a", 0, 10)); + assert_eq!(0, fxhash_32(b"a", 10, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 1)); + assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); + assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); + assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); + assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); + assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); + assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); + assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); + assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); + assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); +} + #[test] fn test_key_hash() { struct KeyTestCase { pub key: DataKey, pub hash: DataKeyHash, - pub path: &'static str, } for cas in [ KeyTestCase { key: data_key!("core/cardinal@1"), hash: DataKeyHash([172, 207, 42, 236]), - path: "core/cardinal@1", }, KeyTestCase { key: data_key!("core/maxlengthsubcatg@1"), hash: DataKeyHash([193, 6, 79, 61]), - path: "core/maxlengthsubcatg@1", }, KeyTestCase { key: data_key!("core/cardinal@65535"), hash: DataKeyHash([176, 131, 182, 223]), - path: "core/cardinal@65535", }, ] { - assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path); - assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path); + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.key); } } diff --git a/vendor/icu_provider/src/lib.rs b/vendor/icu_provider/src/lib.rs index 86c2001cd..01cb2a3b3 100644 --- a/vendor/icu_provider/src/lib.rs +++ b/vendor/icu_provider/src/lib.rs @@ -80,9 +80,6 @@ //! //! - [`HelloWorldProvider`] returns "hello world" strings in several languages. //! -//! If you need a testing provider that contains the actual resource keys used by ICU4X features, -//! see the [`icu_testdata`] crate. -//! //! ## Types and Lifetimes //! //! Types compatible with [`Yokeable`] can be passed through the data provider, so long as they are @@ -116,7 +113,6 @@ //! [`CldrJsonDataProvider`]: ../icu_datagen/cldr/struct.CldrJsonDataProvider.html //! [`FsDataProvider`]: ../icu_provider_fs/struct.FsDataProvider.html //! [`BlobDataProvider`]: ../icu_provider_blob/struct.BlobDataProvider.html -//! [`icu_testdata`]: ../icu_testdata/index.html //! [`icu_datagen`]: ../icu_datagen/index.html // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations @@ -139,7 +135,8 @@ extern crate alloc; mod data_provider; mod error; -mod helpers; +#[doc(hidden)] +pub mod fallback; mod key; mod request; mod response; @@ -148,12 +145,9 @@ pub mod any; pub mod buf; pub mod constructors; #[cfg(feature = "datagen")] -#[macro_use] pub mod datagen; -#[macro_use] pub mod dynutil; pub mod hello_world; -#[macro_use] pub mod marker; #[cfg(feature = "serde")] pub mod serde; @@ -167,8 +161,8 @@ pub use crate::key::DataKey; pub use crate::key::DataKeyHash; pub use crate::key::DataKeyMetadata; pub use crate::key::DataKeyPath; -pub use crate::key::FallbackPriority; -pub use crate::key::FallbackSupplement; +#[cfg(feature = "experimental")] +pub use crate::request::AuxiliaryKeys; pub use crate::request::DataLocale; pub use crate::request::DataRequest; pub use crate::request::DataRequestMetadata; @@ -214,6 +208,9 @@ pub mod prelude { #[doc(no_inline)] pub use crate::AsDynamicDataProviderAnyMarkerWrap; #[doc(no_inline)] + #[cfg(feature = "experimental")] + pub use crate::AuxiliaryKeys; + #[doc(no_inline)] pub use crate::BufferMarker; #[doc(no_inline)] pub use crate::BufferProvider; @@ -252,8 +249,19 @@ pub mod prelude { pub use zerofrom; } +// Additional crate re-exports for compatibility +#[doc(hidden)] +pub use fallback::LocaleFallbackPriority as FallbackPriority; +#[doc(hidden)] +pub use fallback::LocaleFallbackSupplement as FallbackSupplement; +#[doc(hidden)] +pub use yoke; +#[doc(hidden)] +pub use zerofrom; + // For macros #[doc(hidden)] pub mod _internal { - pub use icu_locid::extensions_unicode_key; + pub use super::fallback::{LocaleFallbackPriority, LocaleFallbackSupplement}; + pub use icu_locid as locid; } diff --git a/vendor/icu_provider/src/request.rs b/vendor/icu_provider/src/request.rs index c5bdbe84b..15a6ce831 100644 --- a/vendor/icu_provider/src/request.rs +++ b/vendor/icu_provider/src/request.rs @@ -2,18 +2,30 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use crate::{DataError, DataErrorKind}; use core::cmp::Ordering; use core::default::Default; use core::fmt; use core::fmt::Debug; +use core::hash::Hash; +use core::str::FromStr; use icu_locid::extensions::unicode as unicode_ext; use icu_locid::subtags::{Language, Region, Script, Variants}; use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; use writeable::{LengthHint, Writeable}; +#[cfg(feature = "experimental")] +use alloc::string::String; +#[cfg(feature = "experimental")] +use core::ops::Deref; +#[cfg(feature = "experimental")] +use tinystr::TinyAsciiStr; + #[cfg(doc)] use icu_locid::subtags::Variant; +const AUXILIARY_KEY_SEPARATOR: u8 = b'+'; + /// The request type passed into all data provider implementations. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::exhaustive_structs)] // this type is stable @@ -42,7 +54,7 @@ pub struct DataRequestMetadata { pub silent: bool, } -/// The main locale type used by the ICU4X data provider. +/// A locale type optimized for use in fallbacking and the ICU4X data pipeline. /// /// [`DataLocale`] contains less functionality than [`Locale`] but more than /// [`LanguageIdentifier`] for better size and performance while still meeting @@ -109,6 +121,8 @@ pub struct DataRequestMetadata { pub struct DataLocale { langid: LanguageIdentifier, keywords: unicode_ext::Keywords, + #[cfg(feature = "experimental")] + aux: Option, } impl<'a> Default for &'a DataLocale { @@ -116,6 +130,8 @@ impl<'a> Default for &'a DataLocale { static DEFAULT: DataLocale = DataLocale { langid: LanguageIdentifier::UND, keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, }; &DEFAULT } @@ -134,20 +150,34 @@ impl Writeable for DataLocale { sink.write_str("-u-")?; self.keywords.write_to(sink)?; } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + sink.write_char(AuxiliaryKeys::separator() as char)?; + aux.write_to(sink)?; + } Ok(()) } fn writeable_length_hint(&self) -> LengthHint { - self.langid.writeable_length_hint() - + if !self.keywords.is_empty() { - self.keywords.writeable_length_hint() + 3 - } else { - LengthHint::exact(0) - } + let mut length_hint = self.langid.writeable_length_hint(); + if !self.keywords.is_empty() { + length_hint += self.keywords.writeable_length_hint() + 3; + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + length_hint += aux.writeable_length_hint() + 1; + } + length_hint } fn write_to_string(&self) -> alloc::borrow::Cow { - if self.keywords.is_empty() { + #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] + let mut is_only_langid = self.keywords.is_empty(); + #[cfg(feature = "experimental")] + { + is_only_langid = is_only_langid && self.aux.is_none(); + } + if is_only_langid { return self.langid.write_to_string(); } let mut string = @@ -164,6 +194,8 @@ impl From for DataLocale { Self { langid, keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, } } } @@ -173,6 +205,8 @@ impl From for DataLocale { Self { langid: locale.id, keywords: locale.extensions.unicode.keywords, + #[cfg(feature = "experimental")] + aux: None, } } } @@ -182,6 +216,8 @@ impl From<&LanguageIdentifier> for DataLocale { Self { langid: langid.clone(), keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, } } } @@ -191,7 +227,40 @@ impl From<&Locale> for DataLocale { Self { langid: locale.id.clone(), keywords: locale.extensions.unicode.keywords.clone(), + #[cfg(feature = "experimental")] + aux: None, + } + } +} + +impl FromStr for DataLocale { + type Err = DataError; + fn from_str(s: &str) -> Result { + let mut aux_iter = s.splitn(2, AUXILIARY_KEY_SEPARATOR as char); + let Some(locale_str) = aux_iter.next() else { + return Err(DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s)); + }; + let locale = Locale::from_str(locale_str).map_err(|e| { + DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s) + .with_display_context(&e) + })?; + #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] + let mut data_locale = DataLocale::from(locale); + #[cfg(feature = "experimental")] + if let Some(aux_str) = aux_iter.next() { + let aux = AuxiliaryKeys::from_str(aux_str)?; + data_locale.set_aux(aux); } + if aux_iter.next().is_some() { + return Err(DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s)); + } + Ok(data_locale) } } @@ -212,11 +281,18 @@ impl DataLocale { /// use std::cmp::Ordering; /// /// let bcp47_strings: &[&str] = &[ + /// "ca", + /// "ca+EUR", /// "ca-ES", + /// "ca-ES+GBP", + /// "ca-ES+GBP+short", + /// "ca-ES+USD", /// "ca-ES-u-ca-buddhist", /// "ca-ES-valencia", + /// "cat", /// "pl-Latn-PL", /// "und", + /// "und+MXN", /// "und-fonipa", /// "und-u-ca-hebrew", /// "und-u-ca-japanese", @@ -226,37 +302,70 @@ impl DataLocale { /// for ab in bcp47_strings.windows(2) { /// let a = ab[0]; /// let b = ab[1]; - /// assert!(a.cmp(b) == Ordering::Less); - /// let a_loc: DataLocale = a.parse::().unwrap().into(); - /// assert!( - /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal, - /// "{} == {}", - /// a, + /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b); + /// let a_loc: DataLocale = a.parse().unwrap(); + /// assert_eq!( + /// a_loc.strict_cmp(a.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// a_loc, /// a /// ); - /// assert!( - /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less, - /// "{} < {}", - /// a, + /// assert_eq!( + /// a_loc.strict_cmp(b.as_bytes()), + /// Ordering::Less, + /// "strict_cmp: {} < {}", + /// a_loc, /// b /// ); - /// let b_loc: DataLocale = b.parse::().unwrap().into(); - /// assert!( - /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal, - /// "{} == {}", - /// b, + /// let b_loc: DataLocale = b.parse().unwrap(); + /// assert_eq!( + /// b_loc.strict_cmp(b.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// b_loc, /// b /// ); - /// assert!( - /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater, - /// "{} > {}", - /// b, + /// assert_eq!( + /// b_loc.strict_cmp(a.as_bytes()), + /// Ordering::Greater, + /// "strict_cmp: {} > {}", + /// b_loc, /// a /// ); /// } /// ``` + /// + /// Comparison against invalid strings: + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// let invalid_strings: &[&str] = &[ + /// // Less than "ca-ES" + /// "CA", + /// "ar+GBP+FOO", + /// // Greater than "ca-ES+GBP" + /// "ca_ES", + /// "ca-ES+GBP+FOO", + /// ]; + /// + /// let data_locale = "ca-ES+GBP".parse::().unwrap(); + /// + /// for s in invalid_strings.iter() { + /// let expected_ordering = "ca-ES+GBP".cmp(s); + /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); + /// assert_eq!(expected_ordering, actual_ordering, "{}", s); + /// } + /// ``` pub fn strict_cmp(&self, other: &[u8]) -> Ordering { - let subtags = other.split(|b| *b == b'-'); + let mut aux_iter = other.splitn(2, |b| *b == AUXILIARY_KEY_SEPARATOR); + let Some(locale_str) = aux_iter.next() else { + debug_assert!(other.is_empty()); + return Ordering::Greater; + }; + let aux_str = aux_iter.next(); + let subtags = locale_str.split(|b| *b == b'-'); let mut subtag_result = self.langid.strict_cmp_iter(subtags); if self.has_unicode_ext() { let mut subtags = match subtag_result { @@ -270,20 +379,114 @@ impl DataLocale { } subtag_result = self.keywords.strict_cmp_iter(subtags); } - subtag_result.end() + let has_more_subtags = match subtag_result { + SubtagOrderingResult::Subtags(mut s) => s.next().is_some(), + SubtagOrderingResult::Ordering(o) => return o, + }; + // If we get here, `self` has equal or fewer subtags than the `other`. + // There are 2^3 = 8 cases to handle for auxiliary keys, expanded below. + match (has_more_subtags, self.get_aux(), aux_str) { + (false, None, None) => { + // foo == foo + Ordering::Equal + } + (false, Some(self_aux), Some(other_aux)) => { + // foo+BAR1 ?= foo+BAR2 + let aux_ordering = self_aux.as_bytes().cmp(other_aux); + if aux_ordering != Ordering::Equal { + return aux_ordering; + } + Ordering::Equal + } + (false, Some(_), None) => { + // foo+BAR > foo + Ordering::Greater + } + (_, _, _) => { + // foo < foo-bar + // foo < foo-bar+BAR + // foo < foo+BAR + // foo+BAR < foo-bar + // foo+BAR < foo-bar+BAR + Ordering::Less + } + } } } impl DataLocale { /// Returns whether this [`DataLocale`] has all empty fields (no components). + /// + /// See also: + /// + /// - [`DataLocale::is_und()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_empty()); + /// assert!(!"und-u-ca-buddhist" + /// .parse::() + /// .unwrap() + /// .is_empty()); + /// assert!(!"und+auxiliary".parse::().unwrap().is_empty()); + /// assert!(!"ca-ES".parse::().unwrap().is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self == <&DataLocale>::default() } + /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. + /// + /// This ignores auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_und()); + /// assert!(!"und-u-ca-buddhist".parse::().unwrap().is_und()); + /// assert!("und+auxiliary".parse::().unwrap().is_und()); + /// assert!(!"ca-ES".parse::().unwrap().is_und()); + /// ``` + pub fn is_und(&self) -> bool { + self.langid == LanguageIdentifier::UND && self.keywords.is_empty() + } + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. /// - /// Note that this only checks the language identifier; extension keywords may also be set. - /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`]. + /// This ignores extension keywords and auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::().unwrap().is_langid_und()); + /// assert!("und-u-ca-buddhist" + /// .parse::() + /// .unwrap() + /// .is_langid_und()); + /// assert!("und+auxiliary" + /// .parse::() + /// .unwrap() + /// .is_langid_und()); + /// assert!(!"ca-ES".parse::().unwrap().is_langid_und()); + /// ``` pub fn is_langid_und(&self) -> bool { self.langid == LanguageIdentifier::UND } @@ -339,7 +542,8 @@ impl DataLocale { /// /// ``` /// use icu_locid::{ - /// langid, locale, subtags_language as language, subtags_region as region, + /// langid, locale, + /// subtags::{language, region}, /// Locale, /// }; /// use icu_provider::prelude::*; @@ -442,7 +646,7 @@ impl DataLocale { /// /// ``` /// use icu_locid::{ - /// extensions_unicode_key as key, extensions_unicode_value as value, + /// extensions::unicode::{key, value}, /// Locale, /// }; /// use icu_provider::prelude::*; @@ -484,6 +688,327 @@ impl DataLocale { { self.keywords.retain_by_key(predicate) } + + /// Gets the auxiliary key for this [`DataLocale`]. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn get_aux(&self) -> Option<&AuxiliaryKeys> { + self.aux.as_ref() + } + + #[cfg(not(feature = "experimental"))] + pub(crate) fn get_aux(&self) -> Option<&str> { + None + } + + /// Returns whether this [`DataLocale`] has an auxiliary key. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn has_aux(&self) -> bool { + self.aux.is_some() + } + + /// Sets an auxiliary key on this [`DataLocale`]. + /// + /// Returns the previous auxiliary key if present. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option { + self.aux.replace(value) + } + + /// Remove an auxiliary key, if present. Returns the removed auxiliary key. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_provider::prelude::*; + /// use writeable::assert_writeable_eq; + /// + /// let mut data_locale: DataLocale = locale!("ar-EG").into(); + /// let aux = "GBP" + /// .parse::() + /// .expect("contains valid characters"); + /// data_locale.set_aux(aux); + /// assert_writeable_eq!(data_locale, "ar-EG+GBP"); + /// + /// let maybe_aux = data_locale.remove_aux(); + /// assert_writeable_eq!(data_locale, "ar-EG"); + /// assert_writeable_eq!(maybe_aux.unwrap(), "GBP"); + /// ``` + #[cfg(feature = "experimental")] + pub fn remove_aux(&mut self) -> Option { + self.aux.take() + } +} + +/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary +/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`]. +/// +/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary +/// keys are separated from the BCP-47 locale and from each other with the character returned by +/// [`AuxiliaryKeys::separator()`]. +/// +/// An auxiliary key currently allows alphanumerics and `-`. +/// +///

+/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature +/// of the `icu_provider` crate. Use with caution. +/// #3632 +///
+/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::prelude::*; +/// use writeable::assert_writeable_eq; +/// +/// let mut data_locale: DataLocale = locale!("ar-EG").into(); +/// assert_writeable_eq!(data_locale, "ar-EG"); +/// assert!(!data_locale.has_aux()); +/// assert_eq!(data_locale.get_aux(), None); +/// +/// let aux = "GBP" +/// .parse::() +/// .expect("contains valid characters"); +/// +/// data_locale.set_aux(aux); +/// assert_writeable_eq!(data_locale, "ar-EG+GBP"); +/// assert!(data_locale.has_aux()); +/// assert_eq!(data_locale.get_aux(), Some(&"GBP".parse().unwrap())); +/// ``` +/// +/// Multiple auxiliary keys are allowed: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::prelude::*; +/// use writeable::assert_writeable_eq; +/// +/// let data_locale = "ar-EG+GBP+long".parse::().unwrap(); +/// assert_writeable_eq!(data_locale, "ar-EG+GBP+long"); +/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2); +/// ``` +/// +/// Not all strings are valid auxiliary keys: +/// +/// ``` +/// use icu_provider::prelude::*; +/// +/// assert!("abcdefg".parse::().is_ok()); +/// assert!("ABC123".parse::().is_ok()); +/// assert!("abc-xyz".parse::().is_ok()); +/// +/// assert!("".parse::().is_err()); +/// assert!("!@#$%".parse::().is_err()); +/// assert!("abc_xyz".parse::().is_err()); +/// ``` +/// +/// [`Keywords`]: unicode_ext::Keywords +#[derive(Debug, PartialEq, Clone, Eq, Hash)] +#[cfg(feature = "experimental")] +pub struct AuxiliaryKeys { + // DISCUSS: SmallStr? TinyStrAuto? + // DISCUSS: Make this a dynamically sized type so references can be taken? + value: AuxiliaryKeysInner, +} + +#[cfg(feature = "experimental")] +#[derive(Clone)] +enum AuxiliaryKeysInner { + Boxed(alloc::boxed::Box), + Stack(TinyAsciiStr<23>), + // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")` + // Static(&'static str), +} + +#[cfg(feature = "experimental")] +impl Deref for AuxiliaryKeysInner { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + match self { + Self::Boxed(s) => s.deref(), + Self::Stack(s) => s.as_str(), + } + } +} + +#[cfg(feature = "experimental")] +impl PartialEq for AuxiliaryKeysInner { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +#[cfg(feature = "experimental")] +impl Eq for AuxiliaryKeysInner {} + +#[cfg(feature = "experimental")] +impl Debug for AuxiliaryKeysInner { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.deref().fmt(f) + } +} + +#[cfg(feature = "experimental")] +impl Hash for AuxiliaryKeysInner { + #[inline] + fn hash(&self, state: &mut H) { + self.deref().hash(state) + } +} + +#[cfg(feature = "experimental")] +writeable::impl_display_with_writeable!(AuxiliaryKeys); + +#[cfg(feature = "experimental")] +impl Writeable for AuxiliaryKeys { + fn write_to(&self, sink: &mut W) -> fmt::Result { + self.value.write_to(sink) + } + fn writeable_length_hint(&self) -> LengthHint { + self.value.writeable_length_hint() + } + fn write_to_string(&self) -> alloc::borrow::Cow { + self.value.write_to_string() + } +} + +#[cfg(feature = "experimental")] +impl FromStr for AuxiliaryKeys { + type Err = DataError; + + fn from_str(s: &str) -> Result { + Self::try_from_str(s) + } +} + +#[cfg(feature = "experimental")] +impl AuxiliaryKeys { + /// Returns this [`AuxiliaryKeys`] as a single byte slice. + /// + /// NOTE: Do not make this public because we might not always store these in a single string. + /// External clients who need this can use `::write_to_string`. + #[inline] + pub(crate) fn as_bytes(&self) -> &[u8] { + self.value.as_bytes() + } + + /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// // Single auxiliary key: + /// let a = AuxiliaryKeys::try_from_iter(["abc"]).unwrap(); + /// let b = "abc".parse::().unwrap(); + /// assert_eq!(a, b); + /// + /// // Multiple auxiliary keys: + /// let a = AuxiliaryKeys::try_from_iter(["abc", "defg"]).unwrap(); + /// let b = "abc+defg".parse::().unwrap(); + /// assert_eq!(a, b); + /// ``` + /// + /// Don't include the auxiliary key separator or other invalid chars in the iterator strings: + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// assert!(AuxiliaryKeys::try_from_iter(["abc+defg"]).is_err()); + /// assert!(AuxiliaryKeys::try_from_iter(["AB$C"]).is_err()); + /// ``` + pub fn try_from_iter<'a>(iter: impl IntoIterator) -> Result { + // TODO: Avoid the allocation when possible + let mut builder = String::new(); + for item in iter { + if !item.is_empty() + && item + .bytes() + .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-')) + { + if !builder.is_empty() { + builder.push(AuxiliaryKeys::separator() as char); + } + builder.push_str(item) + } else { + return Err(DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(item)); + } + } + if builder.len() <= 23 { + #[allow(clippy::unwrap_used)] // we just checked that the string is ascii + Ok(Self { + value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()), + }) + } else { + Ok(Self { + value: AuxiliaryKeysInner::Boxed(builder.into()), + }) + } + } + + pub(crate) fn try_from_str(s: &str) -> Result { + if !s.is_empty() + && s.bytes() + .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'+')) + { + if s.len() <= 23 { + #[allow(clippy::unwrap_used)] // we just checked that the string is ascii + Ok(Self { + value: AuxiliaryKeysInner::Stack(s.parse().unwrap()), + }) + } else { + Ok(Self { + value: AuxiliaryKeysInner::Boxed(s.into()), + }) + } + } else { + Err(DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s)) + } + } + + /// Iterates over the components of the auxiliary key. + /// + /// # Example + /// + /// ``` + /// use icu_provider::AuxiliaryKeys; + /// + /// let aux: AuxiliaryKeys = "abc+defg".parse().unwrap(); + /// assert_eq!(aux.iter().collect::>(), vec!["abc", "defg"]); + /// ``` + pub fn iter(&self) -> impl Iterator + '_ { + self.value.split(Self::separator() as char) + } + + /// Returns the separator byte used for auxiliary keys in data locales. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::AuxiliaryKeys; + /// + /// assert_eq!(AuxiliaryKeys::separator(), b'+'); + /// ``` + #[inline] + pub const fn separator() -> u8 { + AUXILIARY_KEY_SEPARATOR + } } #[test] @@ -491,24 +1016,91 @@ fn test_data_locale_to_string() { use icu_locid::locale; struct TestCase { - pub locale: DataLocale, + pub locale: Locale, + pub aux: Option<&'static str>, pub expected: &'static str, } for cas in [ TestCase { - locale: Locale::UND.into(), + locale: Locale::UND, + aux: None, expected: "und", }, TestCase { - locale: locale!("und-u-cu-gbp").into(), + locale: locale!("und-u-cu-gbp"), + aux: None, expected: "und-u-cu-gbp", }, TestCase { - locale: locale!("en-ZA-u-cu-gbp").into(), + locale: locale!("en-ZA-u-cu-gbp"), + aux: None, expected: "en-ZA-u-cu-gbp", }, + #[cfg(feature = "experimental")] + TestCase { + locale: locale!("en-ZA-u-nu-arab"), + aux: Some("GBP"), + expected: "en-ZA-u-nu-arab+GBP", + }, ] { - writeable::assert_writeable_eq!(cas.locale, cas.expected); + let mut data_locale = DataLocale::from(cas.locale); + #[cfg(feature = "experimental")] + if let Some(aux) = cas.aux { + data_locale.set_aux(aux.parse().unwrap()); + } + writeable::assert_writeable_eq!(data_locale, cas.expected); + } +} + +#[test] +fn test_data_locale_from_string() { + #[derive(Debug)] + struct TestCase { + pub input: &'static str, + pub success: bool, + } + + for cas in [ + TestCase { + input: "und", + success: true, + }, + TestCase { + input: "und-u-cu-gbp", + success: true, + }, + TestCase { + input: "en-ZA-u-cu-gbp", + success: true, + }, + TestCase { + input: "en...", + success: false, + }, + #[cfg(feature = "experimental")] + TestCase { + input: "en-ZA-u-nu-arab+GBP", + success: true, + }, + #[cfg(not(feature = "experimental"))] + TestCase { + input: "en-ZA-u-nu-arab+GBP", + success: false, + }, + ] { + let data_locale = match (DataLocale::from_str(cas.input), cas.success) { + (Ok(l), true) => l, + (Err(_), false) => { + continue; + } + (Ok(_), false) => { + panic!("DataLocale parsed but it was supposed to fail: {cas:?}"); + } + (Err(_), true) => { + panic!("DataLocale was supposed to parse but it failed: {cas:?}"); + } + }; + writeable::assert_writeable_eq!(data_locale, cas.input); } } diff --git a/vendor/icu_provider/src/response.rs b/vendor/icu_provider/src/response.rs index ad0c5865f..1b03a4fd3 100644 --- a/vendor/icu_provider/src/response.rs +++ b/vendor/icu_provider/src/response.rs @@ -71,11 +71,11 @@ pub struct DataResponseMetadata { /// /// assert_eq!("Demo", payload.get().message); /// ``` -pub struct DataPayload -where - M: DataMarker, -{ - pub(crate) yoke: Yoke>, +pub struct DataPayload(pub(crate) DataPayloadInner); + +pub(crate) enum DataPayloadInner { + Yoke(Yoke>), + StaticRef(&'static M::Yokeable), } /// The type of the "cart" that is used by `DataPayload`. @@ -136,9 +136,10 @@ where for<'a> YokeTraitHack<>::Output>: Clone, { fn clone(&self) -> Self { - Self { - yoke: self.yoke.clone(), - } + Self(match &self.0 { + DataPayloadInner::Yoke(yoke) => DataPayloadInner::Yoke(yoke.clone()), + DataPayloadInner::StaticRef(r) => DataPayloadInner::StaticRef(*r), + }) } } @@ -163,6 +164,7 @@ where fn test_clone_eq() { use crate::hello_world::*; let p1 = DataPayload::::from_static_str("Demo"); + #[allow(clippy::redundant_clone)] let p2 = p1.clone(); assert_eq!(p1, p2); } @@ -192,18 +194,24 @@ where /// assert_eq!(payload.get(), &local_struct); /// ``` #[inline] - pub fn from_owned(data: M::Yokeable) -> Self { - Self { - yoke: Yoke::new_owned(data), - } + pub const fn from_owned(data: M::Yokeable) -> Self { + Self(DataPayloadInner::Yoke(Yoke::new_owned(data))) + } + + #[doc(hidden)] + #[inline] + pub const fn from_static_ref(data: &'static M::Yokeable) -> Self { + Self(DataPayloadInner::StaticRef(data)) } /// Convert a DataPayload that was created via [`DataPayload::from_owned()`] back into the /// concrete type used to construct it. pub fn try_unwrap_owned(self) -> Result { - self.yoke - .try_into_yokeable() - .map_err(|_| DataErrorKind::InvalidState.with_str_context("try_unwrap_owned")) + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke.try_into_yokeable().ok(), + DataPayloadInner::StaticRef(_) => None, + } + .ok_or(DataErrorKind::InvalidState.with_str_context("try_unwrap_owned")) } /// Mutate the data contained in this DataPayload. @@ -244,8 +252,15 @@ where pub fn with_mut<'a, F>(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut >::Output), + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, { - self.yoke.with_mut(f) + if let DataPayloadInner::StaticRef(r) = self.0 { + self.0 = DataPayloadInner::Yoke(Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r))); + } + match &mut self.0 { + DataPayloadInner::Yoke(yoke) => yoke.with_mut(f), + _ => unreachable!(), + } } /// Borrows the underlying data. @@ -266,7 +281,10 @@ where #[inline] #[allow(clippy::needless_lifetimes)] pub fn get<'a>(&'a self) -> &'a >::Output { - self.yoke.get() + match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.get(), + DataPayloadInner::StaticRef(r) => Yokeable::transform(*r), + } } /// Maps `DataPayload` to `DataPayload` by projecting it with [`Yoke::map_project`]. @@ -318,10 +336,15 @@ where >::Output, PhantomData<&'a ()>, ) -> >::Output, + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, { - DataPayload { - yoke: self.yoke.map_project(f), - } + DataPayload(DataPayloadInner::Yoke( + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke, + DataPayloadInner::StaticRef(r) => Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r)), + } + .map_project(f), + )) } /// Version of [`DataPayload::map_project()`] that borrows `self` instead of moving `self`. @@ -362,9 +385,16 @@ where PhantomData<&'a ()>, ) -> >::Output, { - DataPayload { - yoke: self.yoke.map_project_cloned(f), - } + DataPayload(DataPayloadInner::Yoke(match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.map_project_cloned(f), + DataPayloadInner::StaticRef(r) => { + let output: >::Output = + f(Yokeable::transform(*r), PhantomData); + // Safety: >::Output is the same type as M2::Yokeable + let yokeable: M2::Yokeable = unsafe { M2::Yokeable::make(output) }; + Yoke::new_owned(yokeable) + } + })) } /// Version of [`DataPayload::map_project()`] that bubbles up an error from `f`. @@ -411,10 +441,15 @@ where >::Output, PhantomData<&'a ()>, ) -> Result<>::Output, E>, + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, { - Ok(DataPayload { - yoke: self.yoke.try_map_project(f)?, - }) + Ok(DataPayload(DataPayloadInner::Yoke( + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke, + DataPayloadInner::StaticRef(r) => Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r)), + } + .try_map_project(f)?, + ))) } /// Version of [`DataPayload::map_project_cloned()`] that bubbles up an error from `f`. @@ -465,18 +500,27 @@ where PhantomData<&'a ()>, ) -> Result<>::Output, E>, { - Ok(DataPayload { - yoke: self.yoke.try_map_project_cloned(f)?, - }) + Ok(DataPayload(DataPayloadInner::Yoke(match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.try_map_project_cloned(f)?, + DataPayloadInner::StaticRef(r) => { + let output: >::Output = + f(Yokeable::transform(*r), PhantomData)?; + // Safety: >::Output is the same type as M2::Yokeable + Yoke::new_owned(unsafe { M2::Yokeable::make(output) }) + } + }))) } - /// Convert between two [`DataMarker`] types that are compatible with each other. + /// Convert between two [`DataMarker`] types that are compatible with each other + /// with compile-time type checking. /// /// This happens if they both have the same [`DataMarker::Yokeable`] type. /// /// Can be used to erase the key of a data payload in cases where multiple keys correspond /// to the same data struct. /// + /// For runtime dynamic casting, use [`DataPayload::dynamic_cast_mut()`]. + /// /// # Examples /// /// ```no_run @@ -497,7 +541,76 @@ where where M2: DataMarker, { - DataPayload { yoke: self.yoke } + DataPayload(match self.0 { + DataPayloadInner::Yoke(yoke) => DataPayloadInner::Yoke(yoke), + DataPayloadInner::StaticRef(r) => DataPayloadInner::StaticRef(r), + }) + } + + /// Convert a mutable reference of a [`DataPayload`] to another mutable reference + /// of the same type with runtime type checking. + /// + /// Primarily useful to convert from a generic to a concrete marker type. + /// + /// If the `M2` type argument does not match the true marker type, a `DataError` is returned. + /// + /// For compile-time static casting, use [`DataPayload::cast()`]. + /// + /// # Examples + /// + /// Change the results of a particular request based on key: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// + /// struct MyWrapper

{ + /// inner: P, + /// } + /// + /// impl DataProvider for MyWrapper

+ /// where + /// M: KeyedDataMarker, + /// P: DataProvider, + /// { + /// #[inline] + /// fn load(&self, req: DataRequest) -> Result, DataError> { + /// let mut res = self.inner.load(req)?; + /// if let Some(ref mut generic_payload) = res.payload { + /// let mut cast_result = + /// generic_payload.dynamic_cast_mut::(); + /// if let Ok(ref mut concrete_payload) = cast_result { + /// // Add an emoji to the hello world message + /// concrete_payload.with_mut(|data| { + /// data.message.to_mut().insert_str(0, "✨ "); + /// }); + /// } + /// } + /// Ok(res) + /// } + /// } + /// + /// let provider = MyWrapper { + /// inner: HelloWorldProvider, + /// }; + /// let formatter = + /// HelloWorldFormatter::try_new_unstable(&provider, &locale!("de").into()) + /// .unwrap(); + /// + /// assert_eq!(formatter.format_to_string(), "✨ Hallo Welt"); + /// ``` + #[inline] + pub fn dynamic_cast_mut(&mut self) -> Result<&mut DataPayload, DataError> + where + M2: DataMarker, + { + let this: &mut dyn core::any::Any = self; + if let Some(this) = this.downcast_mut() { + Ok(this) + } else { + Err(DataError::for_type::().with_str_context(core::any::type_name::())) + } } } @@ -507,19 +620,17 @@ impl DataPayload { let yoke = Yoke::attach_to_cart(SelectedRc::new(buffer), |b| &**b); // Safe because cart is wrapped let yoke = unsafe { yoke.replace_cart(|b| Some(Cart(b))) }; - Self { yoke } + Self(DataPayloadInner::Yoke(yoke)) } /// Converts a yoked byte buffer into a `DataPayload`. pub fn from_yoked_buffer(yoke: Yoke<&'static [u8], Option>) -> Self { - Self { yoke } + Self(DataPayloadInner::Yoke(yoke)) } /// Converts a static byte buffer into a `DataPayload`. pub fn from_static_buffer(buffer: &'static [u8]) -> Self { - Self { - yoke: Yoke::new_owned(buffer), - } + Self(DataPayloadInner::Yoke(Yoke::new_owned(buffer))) } } @@ -542,7 +653,7 @@ where /// Metadata about the returned object. pub metadata: DataResponseMetadata, - /// The object itself; None if it was not loaded. + /// The object itself; `None` if it was not loaded. pub payload: Option>, } diff --git a/vendor/icu_provider/src/serde/mod.rs b/vendor/icu_provider/src/serde/mod.rs index 098f1e880..edd827c31 100644 --- a/vendor/icu_provider/src/serde/mod.rs +++ b/vendor/icu_provider/src/serde/mod.rs @@ -29,6 +29,13 @@ pub struct DeserializingBufferProvider<'a, P: ?Sized>(&'a P); /// Blanket-implemented trait adding the [`Self::as_deserializing()`] function. pub trait AsDeserializingBufferProvider { /// Wrap this [`BufferProvider`] in a [`DeserializingBufferProvider`]. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` fn as_deserializing(&self) -> DeserializingBufferProvider; } @@ -37,6 +44,13 @@ where P: BufferProvider + ?Sized, { /// Wrap this [`BufferProvider`] in a [`DeserializingBufferProvider`]. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` fn as_deserializing(&self) -> DeserializingBufferProvider { DeserializingBufferProvider(self) } @@ -90,6 +104,13 @@ impl DataPayload { /// Deserialize a [`DataPayload`]`<`[`BufferMarker`]`>` into a [`DataPayload`] of a /// specific concrete type. /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + /// /// This function takes the buffer format as an argument. When a buffer payload is returned /// from a data provider, the buffer format is stored in the [`DataResponseMetadata`]. /// @@ -135,6 +156,14 @@ where // Necessary workaround bound (see `yoke::trait_hack` docs): for<'de> YokeTraitHack<>::Output>: Deserialize<'de>, { + /// Converts a buffer into a concrete type by deserializing from a supported buffer format. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { let buffer_response = BufferProvider::load_buffer(self.0, key, req)?; let buffer_format = buffer_response.metadata.buffer_format.ok_or_else(|| { @@ -161,6 +190,13 @@ where for<'de> YokeTraitHack<>::Output>: Deserialize<'de>, { /// Converts a buffer into a concrete type by deserializing from a supported buffer format. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` fn load(&self, req: DataRequest) -> Result, DataError> { self.load_data(M::KEY, req) } -- cgit v1.2.3