diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_provider/src | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_provider/src')
19 files changed, 6253 insertions, 0 deletions
diff --git a/third_party/rust/icu_provider/src/any.rs b/third_party/rust/icu_provider/src/any.rs new file mode 100644 index 0000000000..243055d212 --- /dev/null +++ b/third_party/rust/icu_provider/src/any.rs @@ -0,0 +1,514 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce `Any` objects. + +use crate::prelude::*; +use crate::response::DataPayloadInner; +use core::any::Any; +use core::convert::TryFrom; +use core::convert::TryInto; +use yoke::trait_hack::YokeTraitHack; +use yoke::Yokeable; +use zerofrom::ZeroFrom; + +#[cfg(not(feature = "sync"))] +use alloc::rc::Rc as SelectedRc; +#[cfg(feature = "sync")] +use alloc::sync::Arc as SelectedRc; + +/// A trait that allows to specify `Send + Sync` bounds that are only required when +/// the `sync` Cargo feature is enabled. Without the Cargo feature, this is an empty bound. +#[cfg(feature = "sync")] +pub trait MaybeSendSync: Send + Sync {} +#[cfg(feature = "sync")] +impl<T: Send + Sync> MaybeSendSync for T {} + +#[allow(missing_docs)] // docs generated with all features +#[cfg(not(feature = "sync"))] +pub trait MaybeSendSync {} +#[cfg(not(feature = "sync"))] +impl<T> MaybeSendSync for T {} + +/// Representations of the `Any` trait object. +/// +/// **Important Note:** The types enclosed by `StructRef` and `PayloadRc` are NOT the same! +/// The first refers to the struct itself, whereas the second refers to a `DataPayload`. +#[derive(Debug, Clone)] +enum AnyPayloadInner { + /// A reference to `M::Yokeable` + StructRef(&'static dyn Any), + /// A boxed `DataPayload<M>`. + /// + /// Note: This needs to be reference counted, not a `Box`, so that `AnyPayload` is cloneable. + /// If an `AnyPayload` is cloned, the actual cloning of the data is delayed until + /// `downcast()` is invoked (at which point we have the concrete type). + + #[cfg(not(feature = "sync"))] + PayloadRc(SelectedRc<dyn Any>), + + #[cfg(feature = "sync")] + PayloadRc(SelectedRc<dyn Any + Send + Sync>), +} + +/// A type-erased data payload. +/// +/// The only useful method on this type is [`AnyPayload::downcast()`], which transforms this into +/// a normal `DataPayload` which you can subsequently access or mutate. +/// +/// As with `DataPayload`, cloning is designed to be cheap. +#[derive(Debug, Clone, Yokeable)] +pub struct AnyPayload { + inner: AnyPayloadInner, + type_name: &'static str, +} + +/// The [`DataMarker`] marker type for [`AnyPayload`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct AnyMarker; + +impl DataMarker for AnyMarker { + type Yokeable = AnyPayload; +} + +impl<M> crate::dynutil::UpcastDataPayload<M> for AnyMarker +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn upcast(other: DataPayload<M>) -> DataPayload<AnyMarker> { + DataPayload::from_owned(other.wrap_into_any_payload()) + } +} + +impl AnyPayload { + /// Transforms a type-erased `AnyPayload` into a concrete `DataPayload<M>`. + /// + /// Because it is expected that the call site knows the identity of the AnyPayload (e.g., from + /// the data request), this function returns a `DataError` if the generic type does not match + /// the type stored in the `AnyPayload`. + pub fn downcast<M>(self) -> Result<DataPayload<M>, DataError> + where + M: DataMarker, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + { + use AnyPayloadInner::*; + let type_name = self.type_name; + match self.inner { + StructRef(any_ref) => { + let down_ref: &'static M::Yokeable = any_ref + .downcast_ref() + .ok_or_else(|| DataError::for_type::<M>().with_str_context(type_name))?; + Ok(DataPayload::from_static_ref(down_ref)) + } + PayloadRc(any_rc) => { + let down_rc = any_rc + .downcast::<DataPayload<M>>() + .map_err(|_| DataError::for_type::<M>().with_str_context(type_name))?; + Ok(SelectedRc::try_unwrap(down_rc).unwrap_or_else(|down_rc| (*down_rc).clone())) + } + } + } + + /// Clones and then transforms a type-erased `AnyPayload` into a concrete `DataPayload<M>`. + pub fn downcast_cloned<M>(&self) -> Result<DataPayload<M>, DataError> + where + M: DataMarker, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + { + self.clone().downcast() + } + + /// Creates an `AnyPayload` from a static reference to a data struct. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// const HELLO_DATA: HelloWorldV1<'static> = HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }; + /// + /// let any_payload = AnyPayload::from_static_ref(&HELLO_DATA); + /// + /// let payload: DataPayload<HelloWorldV1Marker> = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn from_static_ref<Y>(static_ref: &'static Y) -> Self + where + Y: for<'a> Yokeable<'a>, + { + AnyPayload { + inner: AnyPayloadInner::StructRef(static_ref), + // Note: This records the Yokeable type rather than the DataMarker type, + // but that is okay since this is only for debugging + type_name: core::any::type_name::<Y>(), + } + } +} + +impl<M> DataPayload<M> +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + /// Converts this DataPayload into a type-erased `AnyPayload`. Unless the payload stores a static + /// reference, this will move it to the heap. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// use std::rc::Rc; + /// + /// let payload: DataPayload<HelloWorldV1Marker> = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }); + /// + /// let any_payload = payload.wrap_into_any_payload(); + /// + /// let payload: DataPayload<HelloWorldV1Marker> = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn wrap_into_any_payload(self) -> AnyPayload { + AnyPayload { + inner: match self.0 { + DataPayloadInner::StaticRef(r) => AnyPayloadInner::StructRef(r), + inner => AnyPayloadInner::PayloadRc(SelectedRc::from(Self(inner))), + }, + type_name: core::any::type_name::<M>(), + } + } +} + +impl DataPayload<AnyMarker> { + /// Transforms a type-erased `DataPayload<AnyMarker>` into a concrete `DataPayload<M>`. + #[inline] + pub fn downcast<M>(self) -> Result<DataPayload<M>, DataError> + where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + self.try_unwrap_owned()?.downcast() + } +} + +/// A [`DataResponse`] for type-erased values. +/// +/// Convertible to and from `DataResponse<AnyMarker>`. +#[allow(clippy::exhaustive_structs)] // this type is stable (the metadata is allowed to grow) +#[derive(Debug)] +pub struct AnyResponse { + /// Metadata about the returned object. + pub metadata: DataResponseMetadata, + + /// The object itself; `None` if it was not loaded. + pub payload: Option<AnyPayload>, +} + +impl TryFrom<DataResponse<AnyMarker>> for AnyResponse { + type Error = DataError; + #[inline] + fn try_from(other: DataResponse<AnyMarker>) -> Result<Self, Self::Error> { + Ok(Self { + metadata: other.metadata, + payload: other.payload.map(|p| p.try_unwrap_owned()).transpose()?, + }) + } +} + +impl From<AnyResponse> for DataResponse<AnyMarker> { + #[inline] + fn from(other: AnyResponse) -> Self { + Self { + metadata: other.metadata, + payload: other.payload.map(DataPayload::from_owned), + } + } +} + +impl AnyResponse { + /// Transforms a type-erased `AnyResponse` into a concrete `DataResponse<M>`. + #[inline] + pub fn downcast<M>(self) -> Result<DataResponse<M>, DataError> + where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + Ok(DataResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.downcast()).transpose()?, + }) + } + + /// Clones and then transforms a type-erased `AnyResponse` into a concrete `DataResponse<M>`. + pub fn downcast_cloned<M>(&self) -> Result<DataResponse<M>, DataError> + where + M: DataMarker, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + { + Ok(DataResponse { + metadata: self.metadata.clone(), + payload: self + .payload + .as_ref() + .map(|p| p.downcast_cloned()) + .transpose()?, + }) + } +} + +impl<M> DataResponse<M> +where + M: DataMarker, + M::Yokeable: MaybeSendSync, +{ + /// Moves the inner DataPayload to the heap (requiring an allocation) and returns it as an + /// erased `AnyResponse`. + pub fn wrap_into_any_response(self) -> AnyResponse { + AnyResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.wrap_into_any_payload()), + } + } +} + +/// An object-safe data provider that returns data structs cast to `dyn Any` trait objects. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let any_provider = HelloWorldProvider.as_any_provider(); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Downcasting manually +/// assert_eq!( +/// any_provider +/// .load_any(HelloWorldV1Marker::KEY, req) +/// .expect("load should succeed") +/// .downcast::<HelloWorldV1Marker>() +/// .expect("types should match") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// +/// // Downcasting automatically +/// let downcasting_provider: &dyn DataProvider<HelloWorldV1Marker> = +/// &any_provider.as_downcasting(); +/// +/// assert_eq!( +/// downcasting_provider +/// .load(req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// ``` +pub trait AnyProvider { + /// Loads an [`AnyPayload`] according to the key and request. + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError>; +} + +impl<'a, T: AnyProvider + ?Sized> AnyProvider for &'a T { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> { + (**self).load_any(key, req) + } +} + +impl<T: AnyProvider + ?Sized> AnyProvider for alloc::boxed::Box<T> { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> { + (**self).load_any(key, req) + } +} + +impl<T: AnyProvider + ?Sized> AnyProvider for alloc::rc::Rc<T> { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> { + (**self).load_any(key, req) + } +} + +#[cfg(target_has_atomic = "ptr")] +impl<T: AnyProvider + ?Sized> AnyProvider for alloc::sync::Arc<T> { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> { + (**self).load_any(key, req) + } +} + +/// A wrapper over `DynamicDataProvider<AnyMarker>` that implements `AnyProvider` +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct DynamicDataProviderAnyMarkerWrap<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_any_provider()`] function. +pub trait AsDynamicDataProviderAnyMarkerWrap { + /// Returns an object implementing `AnyProvider` when called on `DynamicDataProvider<AnyMarker>` + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap<Self>; +} + +impl<P> AsDynamicDataProviderAnyMarkerWrap for P +where + P: DynamicDataProvider<AnyMarker> + ?Sized, +{ + #[inline] + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap<P> { + DynamicDataProviderAnyMarkerWrap(self) + } +} + +impl<P> AnyProvider for DynamicDataProviderAnyMarkerWrap<'_, P> +where + P: DynamicDataProvider<AnyMarker> + ?Sized, +{ + #[inline] + fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> { + self.0.load_data(key, req)?.try_into() + } +} + +/// A wrapper over `AnyProvider` that implements `DynamicDataProvider<M>` via downcasting +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct DowncastingAnyProvider<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_downcasting()`] function. +pub trait AsDowncastingAnyProvider { + /// Returns an object implementing `DynamicDataProvider<M>` when called on `AnyProvider` + fn as_downcasting(&self) -> DowncastingAnyProvider<Self>; +} + +impl<P> AsDowncastingAnyProvider for P +where + P: AnyProvider + ?Sized, +{ + #[inline] + fn as_downcasting(&self) -> DowncastingAnyProvider<P> { + DowncastingAnyProvider(self) + } +} + +impl<M, P> DataProvider<M> for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: KeyedDataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load(&self, req: DataRequest) -> Result<DataResponse<M>, DataError> { + self.0 + .load_any(M::KEY, req)? + .downcast() + .map_err(|e| e.with_req(M::KEY, req)) + } +} + +impl<M, P> DynamicDataProvider<M> for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load_data(&self, key: DataKey, req: DataRequest) -> Result<DataResponse<M>, DataError> { + self.0 + .load_any(key, req)? + .downcast() + .map_err(|e| e.with_req(key, req)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::hello_world::*; + use alloc::borrow::Cow; + + const CONST_DATA: HelloWorldV1<'static> = HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }; + + #[test] + fn test_debug() { + let payload: DataPayload<HelloWorldV1Marker> = DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }); + + let any_payload = payload.wrap_into_any_payload(); + assert_eq!( + "AnyPayload { inner: PayloadRc(Any { .. }), type_name: \"icu_provider::hello_world::HelloWorldV1Marker\" }", + format!("{any_payload:?}") + ); + + struct WrongMarker; + + impl DataMarker for WrongMarker { + type Yokeable = u8; + } + + let err = any_payload.downcast::<WrongMarker>().unwrap_err(); + assert_eq!( + "ICU4X data error: Mismatched types: tried to downcast with icu_provider::any::test::test_debug::WrongMarker, but actual type is different: icu_provider::hello_world::HelloWorldV1Marker", + format!("{err}") + ); + } + + #[test] + fn test_non_owned_any_marker() { + // This test demonstrates a code path that can trigger the InvalidState error kind. + let payload_result: DataPayload<AnyMarker> = + DataPayload::from_owned_buffer(Box::new(*b"pretend we're borrowing from here")) + .map_project(|_, _| AnyPayload::from_static_ref(&CONST_DATA)); + let err = payload_result.downcast::<HelloWorldV1Marker>().unwrap_err(); + assert!(matches!( + err, + DataError { + kind: DataErrorKind::InvalidState, + .. + } + )); + } +} diff --git a/third_party/rust/icu_provider/src/buf.rs b/third_party/rust/icu_provider/src/buf.rs new file mode 100644 index 0000000000..0a0ad6eb30 --- /dev/null +++ b/third_party/rust/icu_provider/src/buf.rs @@ -0,0 +1,168 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce opaque buffers. + +use crate::prelude::*; + +/// [`DataMarker`] for raw buffers. Returned by [`BufferProvider`]. +/// +/// The data is expected to be deserialized before it can be used; see +/// [`DataPayload::into_deserialized`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct BufferMarker; + +impl DataMarker for BufferMarker { + type Yokeable = &'static [u8]; +} + +/// A data provider that returns opaque bytes. +/// +/// Generally, these bytes are expected to be deserializable with Serde. To get an object +/// implementing [`DataProvider`] via Serde, use [`as_deserializing()`]. +/// +/// Passing a `BufferProvider` to a `*_with_buffer_provider` constructor requires enabling +/// the deserialization Cargo feature for the expected format(s): +/// - `deserialize_json` +/// - `deserialize_postcard_1` +/// - `deserialize_bincode_1` +/// +/// Along with [`DataProvider`], this is one of the two foundational traits in this crate. +/// +/// [`BufferProvider`] can be made into a trait object. It is used over FFI. +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "deserialize_json")] { +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let buffer_provider = HelloWorldProvider.into_json_provider(); +/// +/// let req = DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Deserializing manually +/// assert_eq!( +/// serde_json::from_slice::<HelloWorldV1>( +/// buffer_provider +/// .load_buffer(HelloWorldV1Marker::KEY, req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get() +/// ) +/// .expect("should deserialize"), +/// HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// +/// // Deserialize automatically +/// let deserializing_provider: &dyn DataProvider<HelloWorldV1Marker> = +/// &buffer_provider.as_deserializing(); +/// +/// assert_eq!( +/// deserializing_provider +/// .load(req) +/// .expect("load should succeed") +/// .take_payload() +/// .unwrap() +/// .get(), +/// &HelloWorldV1 { +/// message: Cow::Borrowed("Hallo Welt"), +/// }, +/// ); +/// # } +/// ``` +/// +/// [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +pub trait BufferProvider { + /// Loads a [`DataPayload`]`<`[`BufferMarker`]`>` according to the key and request. + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError>; +} + +impl<'a, T: BufferProvider + ?Sized> BufferProvider for &'a T { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError> { + (**self).load_buffer(key, req) + } +} + +impl<T: BufferProvider + ?Sized> BufferProvider for alloc::boxed::Box<T> { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError> { + (**self).load_buffer(key, req) + } +} + +impl<T: BufferProvider + ?Sized> BufferProvider for alloc::rc::Rc<T> { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError> { + (**self).load_buffer(key, req) + } +} + +#[cfg(target_has_atomic = "ptr")] +impl<T: BufferProvider + ?Sized> BufferProvider for alloc::sync::Arc<T> { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError> { + (**self).load_buffer(key, req) + } +} + +/// An enum expressing all Serde formats known to ICU4X. +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[non_exhaustive] +pub enum BufferFormat { + /// Serialize using JavaScript Object Notation (JSON). + Json, + /// Serialize using Bincode version 1. + Bincode1, + /// Serialize using Postcard version 1. + Postcard1, +} + +impl BufferFormat { + /// Returns an error if the buffer format is not enabled. + pub fn check_available(&self) -> Result<(), DataError> { + match self { + #[cfg(feature = "deserialize_json")] + BufferFormat::Json => Ok(()), + + #[cfg(feature = "deserialize_bincode_1")] + BufferFormat::Bincode1 => Ok(()), + + #[cfg(feature = "deserialize_postcard_1")] + BufferFormat::Postcard1 => Ok(()), + + // Allowed for cases in which all features are enabled + #[allow(unreachable_patterns)] + _ => Err(DataErrorKind::UnavailableBufferFormat(*self).into_error()), + } + } +} diff --git a/third_party/rust/icu_provider/src/constructors.rs b/third_party/rust/icu_provider/src/constructors.rs new file mode 100644 index 0000000000..f521f1feab --- /dev/null +++ b/third_party/rust/icu_provider/src/constructors.rs @@ -0,0 +1,373 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! 📚 *This module documents ICU4X constructor signatures.* +//! +//! One of the key differences between ICU4X and its parent projects, ICU4C and ICU4J, is in how +//! it deals with locale data. +//! +//! In ICU4X, data can always be explicitly passed to any function that requires data. +//! This enables ICU4X to achieve the following value propositions: +//! +//! 1. Configurable data sources (machine-readable data file, baked into code, JSON, etc). +//! 2. Dynamic data loading at runtime (load data on demand). +//! 3. Reduced overhead and code size (data is resolved locally at each call site). +//! 4. Explicit support for multiple ICU4X instances sharing data. +//! +//! However, as manual data management can be tedious, ICU4X also has a `compiled_data` +//! default Cargo feature that includes data and makes ICU4X work out-of-the box. +//! +//! Subsequently, there are 4 versions of all Rust ICU4X functions that use data: +//! +//! 1. `*` +//! 2. `*_unstable` +//! 3. `*_with_any_provider` +//! 4. `*_with_buffer_provider` +//! +//! # Which constructor should I use? +//! +//! ## When to use `*` +//! +//! If you don't want to customize data at runtime (i.e. if you don't care about code size, +//! updating your data, etc.) you can use the `compiled_data` Cargo feature and don't have to think +//! about where your data comes from. +//! +//! These constructors are sometimes `const` functions, this way Rust can most effectively optimize +//! your usage of ICU4X. +//! +//! ## When to use `*_unstable` +//! +//! Use this constructor if your data provider implements the [`DataProvider`] trait for all +//! data structs in *current and future* ICU4X versions. Examples: +//! +//! 1. `BakedDataProvider` generated for the specific ICU4X minor version +//! 2. Anything with a _blanket_ [`DataProvider`] impl +//! +//! Since the exact set of bounds may change at any time, including in minor SemVer releases, +//! it is the client's responsibility to guarantee that the requirement is upheld. +//! +//! ## When to use `*_with_any_provider` +//! +//! Use this constructor if you need to use a provider that implements [`AnyProvider`] but not +//! [`DataProvider`]. Examples: +//! +//! 1. [`AnyPayloadProvider`] +//! 2. [`ForkByKeyProvider`] between two providers implementing [`AnyProvider`] +//! 3. Providers that cache or override certain keys but not others and therefore +//! can't implement [`DataProvider`] +//! +//! ## When to use `*_with_buffer_provider` +//! +//! Use this constructor if your data originates as byte buffers that need to be deserialized. +//! All such providers should implement [`BufferProvider`]. Examples: +//! +//! 1. [`BlobDataProvider`] +//! 2. [`FsDataProvider`] +//! 3. [`ForkByKeyProvider`] between two providers implementing [`BufferProvider`] +//! +//! Please note that you must enable the `serde` Cargo feature on each crate in which you use the +//! `*_with_buffer_provider` constructor. +//! +//! # Data Versioning Policy +//! +//! The `*_with_any_provider` and `*_with_buffer_provider` functions will succeed to compile and +//! run if given a data provider supporting all of the keys required for the object being +//! constructed, either the current or any previous version within the same SemVer major release. +//! For example, if a data file is built to support FooFormatter version 1.1, then FooFormatter +//! version 1.2 will be able to read the same data file. Likewise, backwards-compatible keys can +//! always be included by `icu_datagen` to support older library versions. +//! +//! The `*_unstable` functions are only guaranteed to work on data built for the exact same minor version +//! of ICU4X. The advantage of the `*_unstable` functions is that they result in the smallest code +//! size and allow for automatic data slicing when `BakedDataProvider` is used. However, the type +//! bounds of this function may change over time, breaking SemVer guarantees. These functions +//! should therefore only be used when you have full control over your data lifecycle at compile +//! time. +//! +//! # Data Providers Over FFI +//! +//! Over FFI, there is only one data provider type: [`ICU4XDataProvider`]. Internally, it is an +//! `enum` between`dyn `[`BufferProvider`] and a unit compiled data variant. +//! +//! To control for code size, there are two Cargo features, `compiled_data` and `buffer_provider`, +//! that enable the corresponding items in the enum. +//! +//! In Rust ICU4X, a similar enum approach was not taken because: +//! +//! 1. Feature-gating the enum branches gets complex across crates. +//! 2. Without feature gating, users need to carry Serde code even if they're not using it, +//! violating one of the core value propositions of ICU4X. +//! 3. We could reduce the number of constructors from 4 to 2 but not to 1, so the educational +//! benefit is limited. +//! +//! [`DataProvider`]: crate::DataProvider +//! [`BufferProvider`]: crate::BufferProvider +//! [`AnyProvider`]: crate::AnyProvider +//! [`AnyPayloadProvider`]: ../../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`ForkByKeyProvider`]: ../../icu_provider_adapters/fork/struct.ForkByKeyProvider.html +//! [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +//! [`StaticDataProvider`]: ../../icu_provider_blob/struct.StaticDataProvider.html +//! [`FsDataProvider`]: ../../icu_provider_blob/struct.FsDataProvider.html +//! [`ICU4XDataProvider`]: ../../icu_capi/provider/ffi/struct.ICU4XDataProvider.html + +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_unstable_docs { + (ANY, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by an [`AnyProvider`](icu_provider::AnyProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (BUFFER, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`BufferProvider`](icu_provider::BufferProvider).\n\n", + "✨ *Enabled with the `serde` feature.*\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)", + ) + }; + (UNSTABLE, $data:path) => { + concat!( + "A version of [`", stringify!($data), "`] that uses custom data ", + "provided by a [`DataProvider`](icu_provider::DataProvider).\n\n", + "[📚 Help choosing a constructor](icu_provider::constructors)\n\n", + "<div class=\"stab unstable\">⚠️ The bounds on <tt>provider</tt> may change over time, including in SemVer minor releases.</div>" + ) + }; +} + +#[allow(clippy::crate_in_macro_def)] // by convention each crate's data provider is `crate::provider::Baked` +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_data_constructors { + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> Result<Self, $error_ty> { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result<Self, $error_ty> { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result<Self, $error_ty> { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + + (locale: skip, options: skip, result: $result_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + pub fn $baked() -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing()) + } + }; + + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: skip, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: skip, $options_arg:ident: $options_ty:ty, result: $result_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> $result_ty { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:ty, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked($options_arg: $options_ty) -> Result<Self, $error_ty> { + $($struct :: )? $unstable(&crate::provider::Baked, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), $options_arg) + } + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + options: skip, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, options: skip, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale) -> Result<Self, $error_ty> { + $($struct :: )? $unstable(&crate::provider::Baked, locale) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result<Self, $error_ty> { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result<Self, $error_ty> { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale) + } + }; + + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $config_arg: $config_ty, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result<Self, $error_ty> { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $config_arg, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $config_arg, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $config_arg, $options_arg) + } + }; + + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+) => { + $crate::gen_any_buffer_data_constructors!( + locale: include, + $options_arg: $options_ty, + error: $error_ty, + $(#[$doc])+ + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ] + ); + }; + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, $(#[$doc:meta])+ functions: [$baked:ident, $any:ident, $buffer:ident, $unstable:ident $(, $struct:ident)? $(,)?]) => { + #[cfg(feature = "compiled_data")] + $(#[$doc])+ + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn $baked(locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result<Self, $error_ty> { + $($struct :: )? $unstable(&crate::provider::Baked, locale, $options_arg) + } + #[doc = $crate::gen_any_buffer_unstable_docs!(ANY, $($struct ::)? $baked)] + pub fn $any(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDowncastingAnyProvider; + $($struct :: )? $unstable(&provider.as_downcasting(), locale, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_unstable_docs!(BUFFER, $($struct ::)? $baked)] + pub fn $buffer(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result<Self, $error_ty> { + use $crate::AsDeserializingBufferProvider; + $($struct :: )? $unstable(&provider.as_deserializing(), locale, $options_arg) + } + }; +} diff --git a/third_party/rust/icu_provider/src/data_provider.rs b/third_party/rust/icu_provider/src/data_provider.rs new file mode 100644 index 0000000000..df821956a8 --- /dev/null +++ b/third_party/rust/icu_provider/src/data_provider.rs @@ -0,0 +1,331 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::DataError; +use crate::key::DataKey; +use crate::marker::{DataMarker, KeyedDataMarker}; +use crate::request::DataRequest; +use crate::response::DataResponse; + +/// A data provider that loads data for a specific data type. +/// +/// Unlike [`DataProvider`], there may be multiple keys corresponding to the same data type. +/// This is often the case when returning `dyn` trait objects such as [`AnyMarker`]. +/// +/// [`AnyMarker`]: crate::any::AnyMarker +pub trait DynamicDataProvider<M> +where + M: DataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load_data(&self, key: DataKey, req: DataRequest) -> Result<DataResponse<M>, DataError>; +} + +/// A data provider that loads data for a specific [`DataKey`]. +pub trait DataProvider<M> +where + M: KeyedDataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load(&self, req: DataRequest) -> Result<DataResponse<M>, DataError>; +} + +impl<M, P> DynamicDataProvider<M> for alloc::boxed::Box<P> +where + M: DataMarker, + P: DynamicDataProvider<M> + ?Sized, +{ + fn load_data(&self, key: DataKey, req: DataRequest) -> Result<DataResponse<M>, DataError> { + (**self).load_data(key, req) + } +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::hello_world::*; + use crate::prelude::*; + use alloc::borrow::Cow; + use alloc::string::String; + use core::fmt::Debug; + use serde::{Deserialize, Serialize}; + + // This tests DataProvider borrow semantics with a dummy data provider based on a + // JSON string. It also exercises most of the data provider code paths. + + /// Key for HelloAlt, used for testing mismatched types + const HELLO_ALT_KEY: DataKey = crate::data_key!("core/helloalt@1"); + + /// A data struct serialization-compatible with HelloWorldV1 used for testing mismatched types + #[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom, + )] + struct HelloAlt { + #[zerofrom(clone)] + message: String, + } + + /// Marker type for [`HelloAlt`]. + struct HelloAltMarker {} + + impl DataMarker for HelloAltMarker { + type Yokeable = HelloAlt; + } + + impl KeyedDataMarker for HelloAltMarker { + const KEY: DataKey = HELLO_ALT_KEY; + } + + #[derive(Deserialize, Debug, Clone, Default, PartialEq)] + struct HelloCombined<'data> { + #[serde(borrow)] + pub hello_v1: HelloWorldV1<'data>, + pub hello_alt: HelloAlt, + } + + /// A DataProvider that owns its data, returning an Rc-variant DataPayload. + /// Supports only key::HELLO_WORLD_V1. Uses `impl_dynamic_data_provider!()`. + #[derive(Debug)] + struct DataWarehouse { + hello_v1: HelloWorldV1<'static>, + hello_alt: HelloAlt, + } + + impl DataProvider<HelloWorldV1Marker> for DataWarehouse { + fn load(&self, _: DataRequest) -> Result<DataResponse<HelloWorldV1Marker>, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.hello_v1.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!(DataWarehouse, [HelloWorldV1Marker,], AnyMarker); + + /// A DataProvider that supports both key::HELLO_WORLD_V1 and HELLO_ALT. + #[derive(Debug)] + struct DataProvider2 { + data: DataWarehouse, + } + + impl From<DataWarehouse> for DataProvider2 { + fn from(warehouse: DataWarehouse) -> Self { + DataProvider2 { data: warehouse } + } + } + + impl DataProvider<HelloWorldV1Marker> for DataProvider2 { + fn load(&self, _: DataRequest) -> Result<DataResponse<HelloWorldV1Marker>, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_v1.clone())), + }) + } + } + + impl DataProvider<HelloAltMarker> for DataProvider2 { + fn load(&self, _: DataRequest) -> Result<DataResponse<HelloAltMarker>, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_alt.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!( + DataProvider2, + [HelloWorldV1Marker, HelloAltMarker,], + AnyMarker + ); + + const DATA: &str = r#"{ + "hello_v1": { + "message": "Hello V1" + }, + "hello_alt": { + "message": "Hello Alt" + } + }"#; + + fn get_warehouse(data: &'static str) -> DataWarehouse { + let data: HelloCombined = serde_json::from_str(data).expect("Well-formed data"); + DataWarehouse { + hello_v1: data.hello_v1, + hello_alt: data.hello_alt, + } + } + + fn get_payload_v1<P: DataProvider<HelloWorldV1Marker> + ?Sized>( + provider: &P, + ) -> Result<DataPayload<HelloWorldV1Marker>, DataError> { + provider.load(Default::default())?.take_payload() + } + + fn get_payload_alt<P: DataProvider<HelloAltMarker> + ?Sized>( + provider: &P, + ) -> Result<DataPayload<HelloAltMarker>, DataError> { + provider.load(Default::default())?.take_payload() + } + + #[test] + fn test_warehouse_owned() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_generic() { + let warehouse = get_warehouse(DATA); + let hello_data = + get_payload_v1(&warehouse as &dyn DataProvider<HelloWorldV1Marker>).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let response = get_payload_alt(&warehouse.as_any_provider().as_downcasting()); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MissingDataKey, + .. + }) + )); + } + + #[test] + fn test_provider2() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_provider2_dyn_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = + get_payload_v1(&provider as &dyn DataProvider<HelloWorldV1Marker>).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_generic_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider as &dyn DataProvider<HelloAltMarker>).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_mismatched_types() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + // Request is for v2, but type argument is for v1 + let response: Result<DataResponse<HelloWorldV1Marker>, DataError> = AnyProvider::load_any( + &provider.as_any_provider(), + HELLO_ALT_KEY, + Default::default(), + ) + .unwrap() + .downcast(); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MismatchedType(_), + .. + }) + )); + } + + fn check_v1_v2<P>(d: &P) + where + P: DataProvider<HelloWorldV1Marker> + DataProvider<HelloAltMarker> + ?Sized, + { + let v1: DataPayload<HelloWorldV1Marker> = + d.load(Default::default()).unwrap().take_payload().unwrap(); + let v2: DataPayload<HelloAltMarker> = + d.load(Default::default()).unwrap().take_payload().unwrap(); + if v1.get().message == v2.get().message { + panic!() + } + } + + #[test] + fn test_v1_v2_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider); + } + + #[test] + fn test_v1_v2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider.as_any_provider().as_downcasting()); + } +} diff --git a/third_party/rust/icu_provider/src/datagen/data_conversion.rs b/third_party/rust/icu_provider/src/datagen/data_conversion.rs new file mode 100644 index 0000000000..f3ca948e1d --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/data_conversion.rs @@ -0,0 +1,44 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::prelude::*; +use crate::DataKey; +use alloc::boxed::Box; + +/// A trait that allows for converting between data payloads of different types. +/// +/// These payloads will typically be some kind of erased payload, either with +/// [`AnyMarker`], [`BufferMarker`], or [`ExportMarker`](crate::datagen::ExportMarker), where converting +/// requires reifying the type. +/// +/// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to +/// concrete marker types M, and reifying the input to a `DataPayload<M>`, performing some conversion +/// or computation, and erasing the result to `DataPayload<MTo>`. +pub trait DataConverter<MFrom: DataMarker, MTo: DataMarker> { + /// Attempt to convert a payload corresponding to the given data key + /// from one marker type to another marker type. + /// + /// If this is not possible (for example, if the provider does not know about the key), + /// the original payload is returned back to the caller. + fn convert( + &self, + key: DataKey, + from: DataPayload<MFrom>, + ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)>; +} + +impl<MFrom, MTo, P> DataConverter<MFrom, MTo> for Box<P> +where + MFrom: DataMarker, + MTo: DataMarker, + P: DataConverter<MFrom, MTo> + ?Sized, +{ + fn convert( + &self, + key: DataKey, + from: DataPayload<MFrom>, + ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)> { + (**self).convert(key, from) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/iter.rs b/third_party/rust/icu_provider/src/datagen/iter.rs new file mode 100644 index 0000000000..6175d89c6f --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/iter.rs @@ -0,0 +1,35 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Collection of iteration APIs for data providers. + +use crate::prelude::*; + +/// A [`DynamicDataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDynamicDataProvider<M: DataMarker>: DynamicDataProvider<M> { + /// Given a [`DataKey`], returns a list of [`DataLocale`]. + fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError>; +} + +/// A [`DataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDataProvider<M: KeyedDataMarker>: DataProvider<M> { + /// Returns a list of [`DataLocale`]. + fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError>; +} + +impl<M, P> IterableDynamicDataProvider<M> for Box<P> +where + M: DataMarker, + P: IterableDynamicDataProvider<M> + ?Sized, +{ + fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError> { + (**self).supported_locales_for_key(key) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/mod.rs b/third_party/rust/icu_provider/src/datagen/mod.rs new file mode 100644 index 0000000000..ae1779ab39 --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/mod.rs @@ -0,0 +1,203 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains various utilities required to generate ICU4X data files, typically +//! via the `icu_datagen` reference crate. End users should not need to consume anything in +//! this module as a library unless defining new types that integrate with `icu_datagen`. +//! +//! This module can be enabled with the `datagen` Cargo feature on `icu_provider`. + +mod data_conversion; +mod iter; +mod payload; +pub use data_conversion::DataConverter; +pub use iter::IterableDataProvider; + +#[doc(hidden)] // exposed for make_exportable_provider +pub use iter::IterableDynamicDataProvider; +#[doc(hidden)] // exposed for make_exportable_provider +pub use payload::{ExportBox, ExportMarker}; + +use crate::prelude::*; + +/// The type of built-in fallback that the data was generated for, if applicable. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum BuiltInFallbackMode { + /// Data uses full UTS 35 fallbacking. + Standard, +} + +/// An object capable of exporting data payloads in some form. +pub trait DataExporter: Sync { + /// Save a `payload` corresponding to the given key and locale. + /// Takes non-mut self as it can be called concurrently. + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload<ExportMarker>, + ) -> Result<(), DataError>; + + /// Function called for singleton keys. + /// Takes non-mut self as it can be called concurrently. + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload<ExportMarker>, + ) -> Result<(), DataError> { + self.put_payload(key, &Default::default(), payload)?; + self.flush(key) + } + + /// Function called after a non-singleton key has been fully enumerated, + /// flushing that key with built-in fallback. + /// + /// Takes non-mut self as it can be called concurrently. + fn flush_with_built_in_fallback( + &self, + _key: DataKey, + _fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + Err(DataError::custom( + "Exporter does not implement built-in fallback", + )) + } + + /// Function called after a non-singleton key has been fully enumerated. + /// Does not include built-in fallback. + /// + /// Takes non-mut self as it can be called concurrently. + fn flush(&self, _key: DataKey) -> Result<(), DataError> { + Ok(()) + } + + /// This function has to be called before the object is dropped (after all + /// keys have been fully dumped). This conceptually takes ownership, so + /// clients *may not* interact with this object after close has been called. + fn close(&mut self) -> Result<(), DataError> { + Ok(()) + } + + /// Returns whether the provider supports built-in fallback. If `true`, the provider must + /// implement [`Self::flush_with_built_in_fallback()`]. + fn supports_built_in_fallback(&self) -> bool { + false + } +} + +/// A [`DynamicDataProvider`] that can be used for exporting data. +/// +/// Use [`make_exportable_provider`](crate::make_exportable_provider) to implement this. +pub trait ExportableProvider: + IterableDynamicDataProvider<ExportMarker> + DynamicDataProvider<AnyMarker> + Sync +{ +} + +impl<T> ExportableProvider for T where + T: IterableDynamicDataProvider<ExportMarker> + DynamicDataProvider<AnyMarker> + Sync +{ +} + +/// This macro can be used on a data provider to allow it to be used for data generation. +/// +/// Data generation 'compiles' data by using this data provider (which usually translates data from +/// different sources and doesn't have to be efficient) to generate data structs, and then writing +/// them to an efficient format like [`BlobDataProvider`] or [`BakedDataProvider`]. The requirements +/// for `make_exportable_provider` are: +/// * The data struct has to implement [`serde::Serialize`](::serde::Serialize) and [`databake::Bake`] +/// * The provider needs to implement [`IterableDataProvider`] for all specified [`KeyedDataMarker`]s. +/// This allows the generating code to know which [`DataLocale`] to collect. +/// +/// [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +/// [`BakedDataProvider`]: ../../icu_datagen/index.html +#[macro_export] +macro_rules! make_exportable_provider { + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ]) => { + $crate::impl_dynamic_data_provider!( + $provider, + [ $($(#[$cfg])? $struct_m),+, ], + $crate::datagen::ExportMarker + ); + $crate::impl_dynamic_data_provider!( + $provider, + [ $($(#[$cfg])? $struct_m),+, ], + $crate::any::AnyMarker + ); + + impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider { + fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result<Vec<$crate::DataLocale>, $crate::DataError> { + match key.hashed() { + $( + $(#[$cfg])? + h if h == <$struct_m as $crate::KeyedDataMarker>::KEY.hashed() => { + $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_key(key)) + } + } + } + }; +} + +/// A `DataExporter` that forks to multiple `DataExporter`s. +#[derive(Default)] +pub struct MultiExporter(Vec<Box<dyn DataExporter>>); + +impl MultiExporter { + /// Creates a `MultiExporter` for the given exporters. + pub const fn new(exporters: Vec<Box<dyn DataExporter>>) -> Self { + Self(exporters) + } +} + +impl core::fmt::Debug for MultiExporter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MultiExporter") + .field("0", &format!("vec[len = {}]", self.0.len())) + .finish() + } +} + +impl DataExporter for MultiExporter { + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload<ExportMarker>, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.put_payload(key, locale, payload)) + } + + fn flush_singleton( + &self, + key: DataKey, + payload: &DataPayload<ExportMarker>, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_singleton(key, payload)) + } + + fn flush(&self, key: DataKey) -> Result<(), DataError> { + self.0.iter().try_for_each(|e| e.flush(key)) + } + + fn flush_with_built_in_fallback( + &self, + key: DataKey, + fallback_mode: BuiltInFallbackMode, + ) -> Result<(), DataError> { + self.0 + .iter() + .try_for_each(|e| e.flush_with_built_in_fallback(key, fallback_mode)) + } + + fn close(&mut self) -> Result<(), DataError> { + self.0.iter_mut().try_for_each(|e| e.close()) + } +} diff --git a/third_party/rust/icu_provider/src/datagen/payload.rs b/third_party/rust/icu_provider/src/datagen/payload.rs new file mode 100644 index 0000000000..97e540b074 --- /dev/null +++ b/third_party/rust/icu_provider/src/datagen/payload.rs @@ -0,0 +1,229 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::any::Any; + +use crate::dynutil::UpcastDataPayload; +use crate::prelude::*; +use alloc::boxed::Box; +use databake::{Bake, CrateEnv, TokenStream}; +use yoke::trait_hack::YokeTraitHack; +use yoke::*; + +trait ExportableDataPayload { + fn bake_yoke(&self, env: &CrateEnv) -> TokenStream; + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError>; + fn as_any(&self) -> &dyn Any; + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool; +} + +impl<M: DataMarker> ExportableDataPayload for DataPayload<M> +where + for<'a> <M::Yokeable as Yokeable<'a>>::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: PartialEq, +{ + fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream { + self.get().bake(ctx) + } + + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError> { + use erased_serde::Serialize; + self.get() + .erased_serialize(serializer) + .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; + Ok(()) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn eq_dyn(&self, other: &dyn ExportableDataPayload) -> bool { + match other.as_any().downcast_ref::<Self>() { + Some(downcasted) => (*self).eq(downcasted), + None => { + debug_assert!( + false, + "cannot compare ExportableDataPayloads of different types: self is {:?} but other is {:?}", + self.type_id(), + other.as_any().type_id(), + ); + false + } + } + } +} + +#[doc(hidden)] // exposed for make_exportable_provider +#[derive(yoke::Yokeable)] +pub struct ExportBox { + payload: Box<dyn ExportableDataPayload + Sync + Send>, +} + +impl PartialEq for ExportBox { + fn eq(&self, other: &Self) -> bool { + self.payload.eq_dyn(&*other.payload) + } +} + +impl core::fmt::Debug for ExportBox { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ExportBox") + .field("payload", &"<payload>") + .finish() + } +} + +impl<M> UpcastDataPayload<M> for ExportMarker +where + M: DataMarker, + M::Yokeable: Sync + Send, + for<'a> <M::Yokeable as Yokeable<'a>>::Output: Bake + serde::Serialize, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: PartialEq, +{ + fn upcast(other: DataPayload<M>) -> DataPayload<ExportMarker> { + DataPayload::from_owned(ExportBox { + payload: Box::new(other), + }) + } +} + +impl DataPayload<ExportMarker> { + /// Serializes this [`DataPayload`] into a serializer using Serde. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// // Create an example DataPayload + /// let payload: DataPayload<HelloWorldV1Marker> = Default::default(); + /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload); + /// + /// // Serialize the payload to a JSON string + /// let mut buffer: Vec<u8> = vec![]; + /// export + /// .serialize(&mut serde_json::Serializer::new(&mut buffer)) + /// .expect("Serialization should succeed"); + /// assert_eq!(r#"{"message":"(und) Hello World"}"#.as_bytes(), buffer); + /// ``` + pub fn serialize<S>(&self, serializer: S) -> Result<(), DataError> + where + S: serde::Serializer, + S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok` + { + self.get() + .payload + .serialize_yoke(&mut <dyn erased_serde::Serializer>::erase(serializer)) + } + + /// Serializes this [`DataPayload`]'s value into a [`TokenStream`] + /// using its [`Bake`] implementations. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// # use databake::quote; + /// # use std::collections::BTreeSet; + /// + /// // Create an example DataPayload + /// let payload: DataPayload<HelloWorldV1Marker> = Default::default(); + /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload); + /// + /// let env = databake::CrateEnv::default(); + /// let tokens = export.tokenize(&env); + /// assert_eq!( + /// quote! { + /// icu_provider::hello_world::HelloWorldV1 { + /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"), + /// } + /// } + /// .to_string(), + /// tokens.to_string() + /// ); + /// assert_eq!( + /// env.into_iter().collect::<BTreeSet<_>>(), + /// ["icu_provider", "alloc"] + /// .into_iter() + /// .collect::<BTreeSet<_>>() + /// ); + /// ``` + pub fn tokenize(&self, env: &CrateEnv) -> TokenStream { + self.get().payload.bake_yoke(env) + } +} + +/// Marker type for [`ExportBox`]. +#[allow(clippy::exhaustive_structs)] // marker type +#[derive(Debug)] +pub struct ExportMarker {} + +impl DataMarker for ExportMarker { + type Yokeable = ExportBox; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hello_world::*; + + #[test] + fn test_compare_with_dyn() { + let payload1: DataPayload<HelloWorldV1Marker> = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload2: DataPayload<HelloWorldV1Marker> = DataPayload::from_owned(HelloWorldV1 { + message: "abc".into(), + }); + let payload3: DataPayload<HelloWorldV1Marker> = DataPayload::from_owned(HelloWorldV1 { + message: "def".into(), + }); + + assert!(payload1.eq_dyn(&payload2)); + assert!(payload2.eq_dyn(&payload1)); + + assert!(!payload1.eq_dyn(&payload3)); + assert!(!payload3.eq_dyn(&payload1)); + } + + #[test] + fn test_export_marker_partial_eq() { + let payload1: DataPayload<ExportMarker> = + UpcastDataPayload::upcast(DataPayload::<HelloWorldV1Marker>::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload2: DataPayload<ExportMarker> = + UpcastDataPayload::upcast(DataPayload::<HelloWorldV1Marker>::from_owned( + HelloWorldV1 { + message: "abc".into(), + }, + )); + let payload3: DataPayload<ExportMarker> = + UpcastDataPayload::upcast(DataPayload::<HelloWorldV1Marker>::from_owned( + HelloWorldV1 { + message: "def".into(), + }, + )); + + assert_eq!(payload1, payload2); + assert_eq!(payload2, payload1); + assert_ne!(payload1, payload3); + assert_ne!(payload3, payload1); + } +} diff --git a/third_party/rust/icu_provider/src/dynutil.rs b/third_party/rust/icu_provider/src/dynutil.rs new file mode 100644 index 0000000000..8ad7b7aa11 --- /dev/null +++ b/third_party/rust/icu_provider/src/dynutil.rs @@ -0,0 +1,256 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Utilities for using trait objects with `DataPayload`. + +/// Trait to allow conversion from `DataPayload<T>` to `DataPayload<S>`. +/// +/// This trait can be manually implemented in order to enable [`impl_dynamic_data_provider`](crate::impl_dynamic_data_provider). +/// +/// [`DataPayload::downcast`]: crate::DataPayload::downcast +pub trait UpcastDataPayload<M> +where + M: crate::DataMarker, + Self: Sized + crate::DataMarker, +{ + /// Upcast a `DataPayload<T>` to a `DataPayload<S>` where `T` implements trait `S`. + /// + /// # Examples + /// + /// Upcast and then downcast a data struct of type `Cow<str>` (cart type `String`) via + /// [`AnyPayload`](crate::any::AnyPayload): + /// + /// ``` + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// let original = DataPayload::<HelloWorldV1Marker>::from_static_str("foo"); + /// let upcasted = AnyMarker::upcast(original); + /// let downcasted = upcasted + /// .downcast::<HelloWorldV1Marker>() + /// .expect("Type conversion"); + /// assert_eq!(downcasted.get().message, "foo"); + /// ``` + fn upcast(other: crate::DataPayload<M>) -> crate::DataPayload<Self>; +} + +/// Implements [`UpcastDataPayload`] from several data markers to a single data marker +/// that all share the same [`DataMarker::Yokeable`]. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// #[icu_provider::data_struct( +/// FooV1Marker, +/// BarV1Marker = "demo/bar@1", +/// BazV1Marker = "demo/baz@1" +/// )] +/// pub struct FooV1<'data> { +/// message: Cow<'data, str>, +/// }; +/// +/// icu_provider::impl_casting_upcast!( +/// FooV1Marker, +/// [BarV1Marker, BazV1Marker,] +/// ); +/// ``` +/// +/// [`DataMarker::Yokeable`]: crate::DataMarker::Yokeable +#[macro_export] +macro_rules! impl_casting_upcast { + ($dyn_m:path, [ $($struct_m:ident),+, ]) => { + $( + impl $crate::dynutil::UpcastDataPayload<$struct_m> for $dyn_m { + fn upcast(other: $crate::DataPayload<$struct_m>) -> $crate::DataPayload<$dyn_m> { + other.cast() + } + } + )+ + } +} + +/// Implements [`DynamicDataProvider`] for a marker type `S` on a type that already implements +/// [`DynamicDataProvider`] or [`DataProvider`] for one or more `M`, where `M` is a concrete type +/// that is convertible to `S` via [`UpcastDataPayload`]. +/// +/// Use this macro to add support to your data provider for: +/// +/// - [`AnyPayload`] if your provider can return typed objects as [`Any`](core::any::Any). +/// +/// ## Wrapping DataProvider +/// +/// If your type implements [`DataProvider`], pass a list of markers as the second argument. +/// This results in a `DynamicDataProvider` that delegates to a specific marker if the key +/// matches or else returns [`DataErrorKind::MissingDataKey`]. +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # // Duplicating HelloWorldProvider because the real one already implements DynamicDataProvider<AnyMarker> +/// # struct HelloWorldProvider; +/// # impl DataProvider<HelloWorldV1Marker> for HelloWorldProvider { +/// # fn load( +/// # &self, +/// # req: DataRequest, +/// # ) -> Result<DataResponse<HelloWorldV1Marker>, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DynamicDataProvider<AnyMarker> on HelloWorldProvider: DataProvider<HelloWorldV1Marker> +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.load_data(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // MissingDataKey error as the key does not match: +/// assert_eq!( +/// HelloWorldProvider.load_data(icu_provider::data_key!("dummy@1"), req).unwrap_err().kind, +/// DataErrorKind::MissingDataKey, +/// ); +/// ``` +/// +/// ## Wrapping DynamicDataProvider +/// +/// It is also possible to wrap a [`DynamicDataProvider`] to create another [`DynamicDataProvider`]. To do this, +/// pass a match-like statement for keys as the second argument: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # struct HelloWorldProvider; +/// # impl DynamicDataProvider<HelloWorldV1Marker> for HelloWorldProvider { +/// # fn load_data(&self, key: DataKey, req: DataRequest) +/// # -> Result<DataResponse<HelloWorldV1Marker>, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DataProvider<AnyMarker> on HelloWorldProvider: DynamicDataProvider<HelloWorldV1Marker> +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, { +/// // Match HelloWorldV1Marker::KEY and delegate to DynamicDataProvider<HelloWorldV1Marker>. +/// HW = HelloWorldV1Marker::KEY => HelloWorldV1Marker, +/// // Send the wildcard match also to DynamicDataProvider<HelloWorldV1Marker>. +/// _ => HelloWorldV1Marker, +/// }, AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.as_any_provider().load_any(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // Because of the wildcard, any key actually works: +/// HelloWorldProvider.as_any_provider().load_any(icu_provider::data_key!("dummy@1"), req).unwrap(); +/// ``` +/// +/// [`DynamicDataProvider`]: crate::DynamicDataProvider +/// [`DataProvider`]: crate::DataProvider +/// [`AnyPayload`]: (crate::any::AnyPayload) +/// [`DataErrorKind::MissingDataKey`]: (crate::DataErrorKind::MissingDataKey) +/// [`SerializeMarker`]: (crate::serde::SerializeMarker) +#[macro_export] +macro_rules! impl_dynamic_data_provider { + // allow passing in multiple things to do and get dispatched + ($provider:ty, $arms:tt, $one:path, $($rest:path),+) => { + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $one + ); + + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $($rest),+ + ); + }; + + ($provider:ty, { $($ident:ident = $key:path => $struct_m:ty),+, $(_ => $struct_d:ty,)?}, $dyn_m:ty) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + match key.hashed() { + $( + h if h == $key.hashed() => { + let result: $crate::DataResponse<$struct_m> = + $crate::DynamicDataProvider::<$struct_m>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + $( + _ => { + let result: $crate::DataResponse<$struct_d> = + $crate::DynamicDataProvider::<$struct_d>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_d>::upcast(p) + }), + }) + } + )? + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + + }; + ($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ], $dyn_m:path) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + match key.hashed() { + $( + $(#[$cfg])? + h if h == <$struct_m>::KEY.hashed() => { + let result: $crate::DataResponse<$struct_m> = + $crate::DataProvider::load(self, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + }; +} diff --git a/third_party/rust/icu_provider/src/error.rs b/third_party/rust/icu_provider/src/error.rs new file mode 100644 index 0000000000..5fc19d1a0a --- /dev/null +++ b/third_party/rust/icu_provider/src/error.rs @@ -0,0 +1,292 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::BufferFormat; +use crate::prelude::*; +use core::fmt; +use displaydoc::Display; + +/// A list specifying general categories of data provider error. +/// +/// Errors may be caused either by a malformed request or by the data provider +/// not being able to fulfill a well-formed request. +#[derive(Clone, Copy, Eq, PartialEq, Display, Debug)] +#[non_exhaustive] +pub enum DataErrorKind { + /// No data for the provided resource key. + #[displaydoc("Missing data for key")] + MissingDataKey, + + /// There is data for the key, but not for this particular locale. + #[displaydoc("Missing data for locale")] + MissingLocale, + + /// The request should include a locale. + #[displaydoc("Request needs a locale")] + NeedsLocale, + + /// The request should not contain a locale. + #[displaydoc("Request has an extraneous locale")] + ExtraneousLocale, + + /// The resource was blocked by a filter. The resource may or may not be available. + #[displaydoc("Resource blocked by filter")] + FilteredResource, + + /// The generic type parameter does not match the TypeId. The expected type name is stored + /// as context when this error is returned. + #[displaydoc("Mismatched types: tried to downcast with {0}, but actual type is different")] + MismatchedType(&'static str), + + /// The payload is missing. This is usually caused by a previous error. + #[displaydoc("Missing payload")] + MissingPayload, + + /// A data provider object was given to an operation in an invalid state. + #[displaydoc("Invalid state")] + InvalidState, + + /// The syntax of the [`DataKey`] or [`DataLocale`] was invalid. + #[displaydoc("Parse error for data key or data locale")] + KeyLocaleSyntax, + + /// An unspecified error occurred, such as a Serde error. + /// + /// Check debug logs for potentially more information. + #[displaydoc("Custom")] + Custom, + + /// An error occurred while accessing a system resource. + #[displaydoc("I/O error: {0:?}")] + #[cfg(feature = "std")] + Io(std::io::ErrorKind), + + /// An unspecified data source containing the required data is unavailable. + #[displaydoc("Missing source data")] + #[cfg(feature = "datagen")] + MissingSourceData, + + /// An error indicating that the desired buffer format is not available. This usually + /// means that a required Cargo feature was not enabled + #[displaydoc("Unavailable buffer format: {0:?} (does icu_provider need to be compiled with an additional Cargo feature?)")] + UnavailableBufferFormat(BufferFormat), +} + +/// The error type for ICU4X data provider operations. +/// +/// To create one of these, either start with a [`DataErrorKind`] or use [`DataError::custom()`]. +/// +/// # Example +/// +/// Create a NeedsLocale error and attach a data request for context: +/// +/// ```no_run +/// # use icu_provider::prelude::*; +/// let key: DataKey = unimplemented!(); +/// let req: DataRequest = unimplemented!(); +/// DataErrorKind::NeedsLocale.with_req(key, req); +/// ``` +/// +/// Create a named custom error: +/// +/// ``` +/// # use icu_provider::prelude::*; +/// DataError::custom("This is an example error"); +/// ``` +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +#[non_exhaustive] +pub struct DataError { + /// Broad category of the error. + pub kind: DataErrorKind, + + /// The data key of the request, if available. + pub key: Option<DataKey>, + + /// Additional context, if available. + pub str_context: Option<&'static str>, + + /// Whether this error was created in silent mode to not log. + pub silent: bool, +} + +impl fmt::Display for DataError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ICU4X data error")?; + if self.kind != DataErrorKind::Custom { + write!(f, ": {}", self.kind)?; + } + if let Some(key) = self.key { + write!(f, " (key: {key})")?; + } + if let Some(str_context) = self.str_context { + write!(f, ": {str_context}")?; + } + Ok(()) + } +} + +impl DataErrorKind { + /// Converts this DataErrorKind into a DataError. + /// + /// If possible, you should attach context using a `with_` function. + #[inline] + pub const fn into_error(self) -> DataError { + DataError { + kind: self, + key: None, + str_context: None, + silent: false, + } + } + + /// Creates a DataError with a resource key context. + #[inline] + pub const fn with_key(self, key: DataKey) -> DataError { + self.into_error().with_key(key) + } + + /// Creates a DataError with a string context. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> DataError { + self.into_error().with_str_context(context) + } + + /// Creates a DataError with a type name context. + #[inline] + pub fn with_type_context<T>(self) -> DataError { + self.into_error().with_type_context::<T>() + } + + /// Creates a DataError with a request context. + #[inline] + pub fn with_req(self, key: DataKey, req: DataRequest) -> DataError { + self.into_error().with_req(key, req) + } +} + +impl DataError { + /// Returns a new, empty DataError with kind Custom and a string error message. + #[inline] + pub const fn custom(str_context: &'static str) -> Self { + Self { + kind: DataErrorKind::Custom, + key: None, + str_context: Some(str_context), + silent: false, + } + } + + /// Sets the resource key of a DataError, returning a modified error. + #[inline] + pub const fn with_key(self, key: DataKey) -> Self { + Self { + kind: self.kind, + key: Some(key), + str_context: self.str_context, + silent: self.silent, + } + } + + /// Sets the string context of a DataError, returning a modified error. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> Self { + Self { + kind: self.kind, + key: self.key, + str_context: Some(context), + silent: self.silent, + } + } + + /// Sets the string context of a DataError to the given type name, returning a modified error. + #[inline] + pub fn with_type_context<T>(self) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{self}: Type context: {}", core::any::type_name::<T>()); + } + self.with_str_context(core::any::type_name::<T>()) + } + + /// Logs the data error with the given request, returning an error containing the resource key. + /// + /// If the "logging" Cargo feature is enabled, this logs the whole request. Either way, + /// it returns an error with the resource key portion of the request as context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + pub fn with_req(mut self, key: DataKey, req: DataRequest) -> Self { + if req.metadata.silent { + self.silent = true; + } + // Don't write out a log for MissingDataKey since there is no context to add + #[cfg(feature = "logging")] + if !self.silent && self.kind != DataErrorKind::MissingDataKey { + log::warn!("{} (key: {}, request: {})", self, key, req); + } + self.with_key(key) + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg(feature = "std")] + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + pub fn with_path_context<P: AsRef<std::path::Path> + ?Sized>(self, path: &P) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{} (path: {:?})", self, path.as_ref()); + } + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + #[inline] + pub fn with_display_context<D: fmt::Display + ?Sized>(self, context: &D) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{}: {}", self, context); + } + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "logging" Cargo feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "logging"), allow(unused_variables))] + #[inline] + pub fn with_debug_context<D: fmt::Debug + ?Sized>(self, context: &D) -> Self { + #[cfg(feature = "logging")] + if !self.silent { + log::warn!("{}: {:?}", self, context); + } + self + } + + #[inline] + pub(crate) fn for_type<T>() -> DataError { + DataError { + kind: DataErrorKind::MismatchedType(core::any::type_name::<T>()), + key: None, + str_context: None, + silent: false, + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DataError {} + +#[cfg(feature = "std")] +impl From<std::io::Error> for DataError { + fn from(e: std::io::Error) -> Self { + #[cfg(feature = "logging")] + log::warn!("I/O error: {}", e); + DataErrorKind::Io(e.kind()).into_error() + } +} diff --git a/third_party/rust/icu_provider/src/fallback.rs b/third_party/rust/icu_provider/src/fallback.rs new file mode 100644 index 0000000000..5c4e13b8da --- /dev/null +++ b/third_party/rust/icu_provider/src/fallback.rs @@ -0,0 +1,201 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Options to define fallback behaviour. +//! +//! These options are consumed by the `LocaleFallbacker` in the `icu_locid_transforms` crate +//! (or the `icu::locid_transforms` module), but are defined here because they are used by `DataKey`. + +use icu_locid::extensions::unicode::Key; + +/// Hint for which subtag to prioritize during fallback. +/// +/// For example, `"en-US"` might fall back to either `"en"` or `"und-US"` depending +/// on this enum. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackPriority { + /// Prioritize the language. This is the default behavior. + /// + /// For example, `"en-US"` should go to `"en"` and then `"und"`. + Language, + /// Prioritize the region. + /// + /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. + Region, + /// Collation-specific fallback rules. Similar to language priority. + /// + /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. + Collation, +} + +impl LocaleFallbackPriority { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self::Language + } +} + +impl Default for LocaleFallbackPriority { + fn default() -> Self { + Self::const_default() + } +} + +/// What additional data is required to load when performing fallback. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum LocaleFallbackSupplement { + /// Collation supplement + Collation, +} + +/// Configuration settings for a particular fallback operation. +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[non_exhaustive] +pub struct LocaleFallbackConfig { + /// Strategy for choosing which subtags to drop during locale fallback. + /// + /// # Examples + /// + /// Retain the language and script subtags until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Language; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + /// + /// Retain the region subtag until the final step: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Region; + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ca-ES-valencia").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ca-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES-valencia").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und-ES").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub priority: LocaleFallbackPriority, + /// An extension keyword to retain during locale fallback. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.extension_key = Some(icu_locid::extensions::unicode::key!("nu")); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("ar-EG-u-nu-latn").into()); + /// + /// // Run the algorithm and check the results. + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-EG").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar-u-nu-latn").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("ar").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub extension_key: Option<Key>, + /// Fallback supplement data key to customize fallback rules. + /// + /// For example, most data keys for collation add additional parent locales, such as + /// "yue" to "zh-Hant", and data used for the `"-u-co"` extension keyword fallback. + /// + /// Currently the only supported fallback supplement is `LocaleFallbackSupplement::Collation`, but more may be + /// added in the future. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::fallback::LocaleFallbackConfig; + /// use icu_locid_transform::fallback::LocaleFallbackPriority; + /// use icu_locid_transform::fallback::LocaleFallbackSupplement; + /// use icu_locid_transform::LocaleFallbacker; + /// + /// // Set up the fallback iterator. + /// let fallbacker = LocaleFallbacker::new(); + /// let mut config = LocaleFallbackConfig::default(); + /// config.priority = LocaleFallbackPriority::Collation; + /// config.fallback_supplement = Some(LocaleFallbackSupplement::Collation); + /// let mut fallback_iterator = fallbacker + /// .for_config(config) + /// .fallback_for(locale!("yue-HK").into()); + /// + /// // Run the algorithm and check the results. + /// // TODO(#1964): add "zh" as a target. + /// assert_eq!(fallback_iterator.get(), &locale!("yue-HK").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("yue").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("zh-Hant").into()); + /// fallback_iterator.step(); + /// assert_eq!(fallback_iterator.get(), &locale!("und").into()); + /// ``` + pub fallback_supplement: Option<LocaleFallbackSupplement>, +} + +impl LocaleFallbackConfig { + /// Const version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + priority: LocaleFallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + } + } +} + +impl Default for LocaleFallbackConfig { + fn default() -> Self { + Self::const_default() + } +} diff --git a/third_party/rust/icu_provider/src/hello_world.rs b/third_party/rust/icu_provider/src/hello_world.rs new file mode 100644 index 0000000000..9a51890264 --- /dev/null +++ b/third_party/rust/icu_provider/src/hello_world.rs @@ -0,0 +1,362 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Data provider returning multilingual "Hello World" strings for testing. + +#![allow(clippy::exhaustive_structs)] // data struct module + +use crate as icu_provider; + +use crate::prelude::*; +use alloc::borrow::Cow; +use alloc::string::String; +use core::fmt::Debug; +use writeable::Writeable; +use yoke::*; +use zerofrom::*; + +/// A struct containing "Hello World" in the requested language. +#[derive(Debug, PartialEq, Clone, Yokeable, ZeroFrom)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[cfg_attr( + any(feature = "deserialize_json", feature = "datagen"), + derive(serde::Serialize) +)] +#[cfg_attr(feature = "datagen", derive(databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +pub struct HelloWorldV1<'data> { + /// The translation of "Hello World". + #[cfg_attr(feature = "serde", serde(borrow))] + pub message: Cow<'data, str>, +} + +impl Default for HelloWorldV1<'_> { + fn default() -> Self { + HelloWorldV1 { + message: Cow::Borrowed("(und) Hello World"), + } + } +} + +/// Marker type for [`HelloWorldV1`]. +#[cfg_attr(feature = "datagen", derive(Default, databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +#[derive(Debug)] +pub struct HelloWorldV1Marker; + +impl DataMarker for HelloWorldV1Marker { + type Yokeable = HelloWorldV1<'static>; +} + +impl KeyedDataMarker for HelloWorldV1Marker { + const KEY: DataKey = icu_provider::data_key!("core/helloworld@1"); +} + +/// A data provider returning Hello World strings in different languages. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world: DataPayload<HelloWorldV1Marker> = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Hallo Welt", german_hello_world.get().message); +/// ``` +/// +/// Load the reverse string using an auxiliary key: +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let reverse_hello_world: DataPayload<HelloWorldV1Marker> = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &"en-x-reverse".parse().unwrap(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Olleh Dlrow", reverse_hello_world.get().message); +/// ``` +#[derive(Debug, PartialEq, Default)] +pub struct HelloWorldProvider; + +impl HelloWorldProvider { + // Data from https://en.wiktionary.org/wiki/Hello_World#Translations + // Keep this sorted! + const DATA: &'static [(&'static str, &'static str)] = &[ + ("bn", "ওহে বিশ্ব"), + ("cs", "Ahoj světe"), + ("de", "Hallo Welt"), + ("de-AT", "Servus Welt"), + ("el", "Καλημέρα κόσμε"), + ("en", "Hello World"), + ("en-001", "Hello from 🗺️"), // WORLD + ("en-002", "Hello from 🌍"), // AFRICA + ("en-019", "Hello from 🌎"), // AMERICAS + ("en-142", "Hello from 🌏"), // ASIA + ("en-GB", "Hello from 🇬🇧"), // GREAT BRITAIN + ("en-GB-u-sd-gbeng", "Hello from 🏴"), // ENGLAND + ("en-x-reverse", "Olleh Dlrow"), + ("eo", "Saluton, Mondo"), + ("fa", "سلام دنیا"), + ("fi", "hei maailma"), + ("is", "Halló, heimur"), + ("ja", "こんにちは世界"), + ("ja-x-reverse", "界世はちにんこ"), + ("la", "Ave, munde"), + ("pt", "Olá, mundo"), + ("ro", "Salut, lume"), + ("ru", "Привет, мир"), + ("sr", "Поздрав свете"), + ("sr-Latn", "Pozdrav svete"), + ("vi", "Xin chào thế giới"), + ("zh", "你好世界"), + ]; + + /// Converts this provider into a [`BufferProvider`] that uses JSON serialization. + #[cfg(feature = "deserialize_json")] + pub fn into_json_provider(self) -> HelloWorldJsonProvider { + HelloWorldJsonProvider + } +} + +impl DataProvider<HelloWorldV1Marker> for HelloWorldProvider { + fn load(&self, req: DataRequest) -> Result<DataResponse<HelloWorldV1Marker>, DataError> { + #[allow(clippy::indexing_slicing)] // binary_search + let data = Self::DATA + .binary_search_by(|(k, _)| req.locale.strict_cmp(k.as_bytes()).reverse()) + .map(|i| Self::DATA[i].1) + .map_err(|_| DataErrorKind::MissingLocale.with_req(HelloWorldV1Marker::KEY, req))?; + Ok(DataResponse { + metadata: Default::default(), + payload: Some(DataPayload::from_static_str(data)), + }) + } +} + +impl DataPayload<HelloWorldV1Marker> { + /// Make a [`DataPayload`]`<`[`HelloWorldV1Marker`]`>` from a static string slice. + pub fn from_static_str(s: &'static str) -> DataPayload<HelloWorldV1Marker> { + DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed(s), + }) + } +} + +// AnyProvider support. +#[cfg(not(feature = "datagen"))] +icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); + +#[cfg(feature = "deserialize_json")] +/// A data provider returning Hello World strings in different languages as JSON blobs. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world = HelloWorldProvider +/// .into_json_provider() +/// .load_buffer(HelloWorldV1Marker::KEY, DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!(german_hello_world.get(), br#"{"message":"Hallo Welt"}"#); +#[derive(Debug)] +pub struct HelloWorldJsonProvider; + +#[cfg(feature = "deserialize_json")] +impl BufferProvider for HelloWorldJsonProvider { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result<DataResponse<BufferMarker>, DataError> { + key.match_key(HelloWorldV1Marker::KEY)?; + let result = HelloWorldProvider.load(req)?; + let (mut metadata, old_payload) = + DataResponse::<HelloWorldV1Marker>::take_metadata_and_payload(result)?; + metadata.buffer_format = Some(icu_provider::buf::BufferFormat::Json); + #[allow(clippy::unwrap_used)] // HelloWorldV1::serialize is infallible + Ok(DataResponse { + metadata, + payload: Some(DataPayload::from_owned_buffer( + serde_json::to_string(old_payload.get()) + .unwrap() + .into_bytes() + .into_boxed_slice(), + )), + }) + } +} + +#[cfg(feature = "datagen")] +impl icu_provider::datagen::IterableDataProvider<HelloWorldV1Marker> for HelloWorldProvider { + fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError> { + #[allow(clippy::unwrap_used)] // datagen + Ok(Self::DATA.iter().map(|(s, _)| s.parse().unwrap()).collect()) + } +} + +#[cfg(feature = "datagen")] +icu_provider::make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); + +/// A type that formats localized "hello world" strings. +/// +/// This type is intended to take the shape of a typical ICU4X formatter API. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::{HelloWorldFormatter, HelloWorldProvider}; +/// use writeable::assert_writeable_eq; +/// +/// let fmt = HelloWorldFormatter::try_new_unstable( +/// &HelloWorldProvider, +/// &locale!("eo").into(), +/// ) +/// .expect("locale exists"); +/// +/// assert_writeable_eq!(fmt.format(), "Saluton, Mondo"); +/// ``` +#[derive(Debug)] +pub struct HelloWorldFormatter { + data: DataPayload<HelloWorldV1Marker>, +} + +/// A formatted hello world message. Implements [`Writeable`]. +/// +/// For an example, see [`HelloWorldFormatter`]. +#[derive(Debug)] +pub struct FormattedHelloWorld<'l> { + data: &'l HelloWorldV1<'l>, +} + +impl HelloWorldFormatter { + /// Creates a new [`HelloWorldFormatter`] for the specified locale. + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + pub fn try_new(locale: &DataLocale) -> Result<Self, DataError> { + Self::try_new_unstable(&HelloWorldProvider, locale) + } + + icu_provider::gen_any_buffer_data_constructors!(locale: include, options: skip, error: DataError, + #[cfg(skip)] + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ]); + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)] + pub fn try_new_unstable<P>(provider: &P, locale: &DataLocale) -> Result<Self, DataError> + where + P: DataProvider<HelloWorldV1Marker>, + { + let data = provider + .load(DataRequest { + locale, + metadata: Default::default(), + })? + .take_payload()?; + Ok(Self { data }) + } + + /// Formats a hello world message, returning a [`FormattedHelloWorld`]. + #[allow(clippy::needless_lifetimes)] // documentary example + pub fn format<'l>(&'l self) -> FormattedHelloWorld<'l> { + FormattedHelloWorld { + data: self.data.get(), + } + } + + /// Formats a hello world message, returning a [`String`]. + pub fn format_to_string(&self) -> String { + self.format().write_to_string().into_owned() + } +} + +impl<'l> Writeable for FormattedHelloWorld<'l> { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + self.data.message.write_to(sink) + } + + fn write_to_string(&self) -> Cow<str> { + self.data.message.clone() + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + self.data.message.writeable_length_hint() + } +} + +writeable::impl_display_with_writeable!(FormattedHelloWorld<'_>); + +#[cfg(feature = "datagen")] +#[test] +fn test_iter() { + use crate::datagen::IterableDataProvider; + use icu_locid::locale; + + assert_eq!( + HelloWorldProvider.supported_locales().unwrap(), + vec![ + locale!("bn").into(), + locale!("cs").into(), + locale!("de").into(), + locale!("de-AT").into(), + locale!("el").into(), + locale!("en").into(), + locale!("en-001").into(), + locale!("en-002").into(), + locale!("en-019").into(), + locale!("en-142").into(), + locale!("en-GB").into(), + locale!("en-GB-u-sd-gbeng").into(), + "en-x-reverse".parse().unwrap(), + locale!("eo").into(), + locale!("fa").into(), + locale!("fi").into(), + locale!("is").into(), + locale!("ja").into(), + "ja-x-reverse".parse().unwrap(), + locale!("la").into(), + locale!("pt").into(), + locale!("ro").into(), + locale!("ru").into(), + locale!("sr").into(), + locale!("sr-Latn").into(), + locale!("vi").into(), + locale!("zh").into() + ] + ); +} diff --git a/third_party/rust/icu_provider/src/key.rs b/third_party/rust/icu_provider/src/key.rs new file mode 100644 index 0000000000..0e1e1006e1 --- /dev/null +++ b/third_party/rust/icu_provider/src/key.rs @@ -0,0 +1,717 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::{DataError, DataErrorKind}; + +use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority, LocaleFallbackSupplement}; +use alloc::borrow::Cow; +use core::fmt; +use core::fmt::Write; +use core::ops::Deref; +use writeable::{LengthHint, Writeable}; +use zerovec::ule::*; + +#[doc(hidden)] +#[macro_export] +macro_rules! leading_tag { + () => { + "\nicu4x_key_tag" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! trailing_tag { + () => { + "\n" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! tagged { + ($without_tags:expr) => { + concat!( + $crate::leading_tag!(), + $without_tags, + $crate::trailing_tag!() + ) + }; +} + +/// A compact hash of a [`DataKey`]. Useful for keys in maps. +/// +/// The hash will be stable over time within major releases. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[repr(transparent)] +pub struct DataKeyHash([u8; 4]); + +impl DataKeyHash { + const fn compute_from_path(path: DataKeyPath) -> Self { + let hash = fxhash_32( + path.tagged.as_bytes(), + leading_tag!().len(), + trailing_tag!().len(), + ); + Self(hash.to_le_bytes()) + } + + /// Gets the hash value as a byte array. + pub const fn to_bytes(self) -> [u8; 4] { + self.0 + } +} + +/// Const function to compute the FxHash of a byte array. +/// +/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our +/// use case since the strings being hashed originate from a trusted source (the ICU4X +/// components), and the hashes are computed at compile time, so we can check for collisions. +/// +/// We could have considered a SHA or other cryptographic hash function. However, we are using +/// FxHash because: +/// +/// 1. There is precedent for this algorithm in Rust +/// 2. The algorithm is easy to implement as a const function +/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree +/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, +/// such that truncation would be required in order to fit into a u32, partially reducing +/// the benefit of a cryptographically secure algorithm +// The indexing operations in this function have been reviewed in detail and won't panic. +#[allow(clippy::indexing_slicing)] +const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { + // This code is adapted from https://github.com/rust-lang/rustc-hash, + // whose license text is reproduced below. + // + // Copyright 2015 The Rust Project Developers. See the COPYRIGHT + // file at the top-level directory of this distribution and at + // http://rust-lang.org/COPYRIGHT. + // + // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or + // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license + // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your + // option. This file may not be copied, modified, or distributed + // except according to those terms. + + if ignore_leading + ignore_trailing >= bytes.len() { + return 0; + } + + #[inline] + const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + const ROTATE: u32 = 5; + const SEED32: u32 = 0x9e_37_79_b9; + hash = hash.rotate_left(ROTATE); + hash ^= word; + hash = hash.wrapping_mul(SEED32); + hash + } + + let mut cursor = ignore_leading; + let end = bytes.len() - ignore_trailing; + let mut hash = 0; + + while end - cursor >= 4 { + let word = u32::from_le_bytes([ + bytes[cursor], + bytes[cursor + 1], + bytes[cursor + 2], + bytes[cursor + 3], + ]); + hash = hash_word_32(hash, word); + cursor += 4; + } + + if end - cursor >= 2 { + let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); + hash = hash_word_32(hash, word as u32); + cursor += 2; + } + + if end - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); + } + + hash +} + +impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { + type Container = zerovec::ZeroVec<'a, DataKeyHash>; + type Slice = zerovec::ZeroSlice<DataKeyHash>; + type GetType = <DataKeyHash as AsULE>::ULE; + type OwnedType = DataKeyHash; +} + +impl AsULE for DataKeyHash { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// Safe since the ULE type is `self`. +unsafe impl EqULE for DataKeyHash {} + +/// The string path of a data key. For example, "foo@1" +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct DataKeyPath { + // This string literal is wrapped in leading_tag!() and trailing_tag!() to make it detectable + // in a compiled binary. + tagged: &'static str, +} + +impl DataKeyPath { + /// Gets the path as a static string slice. + #[inline] + pub const fn get(self) -> &'static str { + unsafe { + // Safe due to invariant that self.path is tagged correctly + core::str::from_utf8_unchecked(core::mem::transmute(( + self.tagged.as_ptr().add(leading_tag!().len()), + self.tagged.len() - trailing_tag!().len() - leading_tag!().len(), + ))) + } + } +} + +impl Deref for DataKeyPath { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.get() + } +} + +/// Metadata statically associated with a particular [`DataKey`]. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataKeyMetadata { + /// What to prioritize when fallbacking on this [`DataKey`]. + pub fallback_priority: LocaleFallbackPriority, + /// A Unicode extension keyword to consider when loading data for this [`DataKey`]. + pub extension_key: Option<icu_locid::extensions::unicode::Key>, + /// Optional choice for additional fallbacking data required for loading this marker. + /// + /// For more information, see `LocaleFallbackConfig::fallback_supplement`. + pub fallback_supplement: Option<LocaleFallbackSupplement>, + /// Whether the key has a singleton value, as opposed to per-locale values. Singleton + /// keys behave differently, e.g. they never perform fallback, and can be optimized + /// in data providers. + pub singleton: bool, +} + +impl DataKeyMetadata { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + fallback_priority: LocaleFallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + singleton: false, + } + } + + #[doc(hidden)] + pub const fn construct_internal( + fallback_priority: LocaleFallbackPriority, + extension_key: Option<icu_locid::extensions::unicode::Key>, + fallback_supplement: Option<LocaleFallbackSupplement>, + singleton: bool, + ) -> Self { + Self { + fallback_priority, + extension_key, + fallback_supplement, + singleton, + } + } +} + +impl Default for DataKeyMetadata { + #[inline] + fn default() -> Self { + Self::const_default() + } +} + +/// Used for loading data from an ICU4X data provider. +/// +/// A resource key is tightly coupled with the code that uses it to load data at runtime. +/// Executables can be searched for `DataKey` instances to produce optimized data files. +/// Therefore, users should not generally create DataKey instances; they should instead use +/// the ones exported by a component. +/// +/// `DataKey`s are created with the [`data_key!`](crate::data_key) macro: +/// +/// ``` +/// # use icu_provider::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/bar@1"); +/// ``` +/// +/// The human-readable path string ends with `@` followed by one or more digits (the version +/// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. +/// +/// Invalid paths are compile-time errors (as [`data_key!`](crate::data_key) uses `const`). +/// +/// ```compile_fail,E0080 +/// # use icu_provider::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/../bar@1"); +/// ``` +#[derive(Copy, Clone)] +pub struct DataKey { + path: DataKeyPath, + hash: DataKeyHash, + metadata: DataKeyMetadata, +} + +impl PartialEq for DataKey { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.path == other.path && self.metadata == other.metadata + } +} + +impl Eq for DataKey {} + +impl Ord for DataKey { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.path + .cmp(&other.path) + .then_with(|| self.metadata.cmp(&other.metadata)) + } +} + +impl PartialOrd for DataKey { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl core::hash::Hash for DataKey { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.hash.hash(state) + } +} + +impl DataKey { + /// Gets a human-readable representation of a [`DataKey`]. + /// + /// The human-readable path string ends with `@` followed by one or more digits (the version + /// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. + /// + /// Useful for reading and writing data to a file system. + #[inline] + pub const fn path(self) -> DataKeyPath { + self.path + } + + /// Gets a platform-independent hash of a [`DataKey`]. + /// + /// The hash is 4 bytes and allows for fast key comparison. + /// + /// # Example + /// + /// ``` + /// use icu_provider::DataKey; + /// use icu_provider::DataKeyHash; + /// + /// const KEY: DataKey = icu_provider::data_key!("foo@1"); + /// const KEY_HASH: DataKeyHash = KEY.hashed(); + /// + /// assert_eq!(KEY_HASH.to_bytes(), [0xe2, 0xb6, 0x17, 0x71]); + /// ``` + #[inline] + pub const fn hashed(self) -> DataKeyHash { + self.hash + } + + /// Gets the metadata associated with this [`DataKey`]. + #[inline] + pub const fn metadata(self) -> DataKeyMetadata { + self.metadata + } + + /// Returns the [`LocaleFallbackConfig`] for this [`DataKey`]. + #[inline] + pub const fn fallback_config(self) -> LocaleFallbackConfig { + let mut config = LocaleFallbackConfig::const_default(); + config.priority = self.metadata.fallback_priority; + config.extension_key = self.metadata.extension_key; + config.fallback_supplement = self.metadata.fallback_supplement; + config + } + + /// Constructs a [`DataKey`] from a path and metadata. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::data_key; + /// use icu_provider::DataKey; + /// + /// const CONST_KEY: DataKey = data_key!("foo@1"); + /// + /// let runtime_key = + /// DataKey::from_path_and_metadata(CONST_KEY.path(), CONST_KEY.metadata()); + /// + /// assert_eq!(CONST_KEY, runtime_key); + /// ``` + #[inline] + pub const fn from_path_and_metadata(path: DataKeyPath, metadata: DataKeyMetadata) -> Self { + Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + } + } + + #[doc(hidden)] + // Error is a str of the expected character class and the index where it wasn't encountered + // The indexing operations in this function have been reviewed in detail and won't panic. + #[allow(clippy::indexing_slicing)] + pub const fn construct_internal( + path: &'static str, + metadata: DataKeyMetadata, + ) -> Result<Self, (&'static str, usize)> { + if path.len() < leading_tag!().len() + trailing_tag!().len() { + return Err(("tag", 0)); + } + // Start and end of the untagged part + let start = leading_tag!().len(); + let end = path.len() - trailing_tag!().len(); + + // Check tags + let mut i = 0; + while i < leading_tag!().len() { + if path.as_bytes()[i] != leading_tag!().as_bytes()[i] { + return Err(("tag", 0)); + } + i += 1; + } + i = 0; + while i < trailing_tag!().len() { + if path.as_bytes()[end + i] != trailing_tag!().as_bytes()[i] { + return Err(("tag", end + 1)); + } + i += 1; + } + + match Self::validate_path_manual_slice(path, start, end) { + Ok(()) => (), + Err(e) => return Err(e), + }; + + let path = DataKeyPath { tagged: path }; + + Ok(Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + }) + } + + const fn validate_path_manual_slice( + path: &'static str, + start: usize, + end: usize, + ) -> Result<(), (&'static str, usize)> { + debug_assert!(start <= end); + debug_assert!(end <= path.len()); + // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+ + enum State { + Empty, + Body, + At, + Version, + } + use State::*; + let mut i = start; + let mut state = Empty; + loop { + let byte = if i < end { + #[allow(clippy::indexing_slicing)] // protected by debug assertion + Some(path.as_bytes()[i]) + } else { + None + }; + state = match (state, byte) { + (Empty | Body, Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')) => Body, + (Body, Some(b'/')) => Body, + (Body, Some(b'@')) => At, + (At | Version, Some(b'0'..=b'9')) => Version, + // One of these cases will be hit at the latest when i == end, so the loop converges. + (Version, None) => { + return Ok(()); + } + + (Empty, _) => return Err(("[a-zA-Z0-9_]", i)), + (Body, _) => return Err(("[a-zA-z0-9_/@]", i)), + (At, _) => return Err(("[0-9]", i)), + (Version, _) => return Err(("[0-9]", i)), + }; + i += 1; + } + } + + /// Returns [`Ok`] if this data key matches the argument, or the appropriate error. + /// + /// Convenience method for data providers that support a single [`DataKey`]. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// const FOO_BAZ: DataKey = icu_provider::data_key!("foo/baz@1"); + /// const BAR_BAZ: DataKey = icu_provider::data_key!("bar/baz@1"); + /// + /// assert!(matches!(FOO_BAR.match_key(FOO_BAR), Ok(()))); + /// assert!(matches!( + /// FOO_BAR.match_key(FOO_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// assert!(matches!( + /// FOO_BAR.match_key(BAR_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// + /// // The error context contains the argument: + /// assert_eq!(FOO_BAR.match_key(BAR_BAZ).unwrap_err().key, Some(BAR_BAZ)); + /// ``` + pub fn match_key(self, key: Self) -> Result<(), DataError> { + if self == key { + Ok(()) + } else { + Err(DataErrorKind::MissingDataKey.with_key(key)) + } + } +} + +/// See [`DataKey`]. +#[macro_export] +macro_rules! data_key { + ($path:expr) => {{ + $crate::data_key!($path, $crate::DataKeyMetadata::const_default()) + }}; + ($path:expr, $metadata:expr) => {{ + // Force the DataKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::DataKey = { + match $crate::DataKey::construct_internal($crate::tagged!($path), $metadata) { + Ok(v) => v, + #[allow(clippy::panic)] // Const context + Err(_) => panic!(concat!("Invalid resource key: ", $path)), + // TODO Once formatting is const: + // Err((expected, index)) => panic!( + // "Invalid resource key {:?}: expected {:?}, found {:?} ", + // $path, + // expected, + // $crate::tagged!($path).get(index..)) + // ); + } + }; + RESOURCE_KEY_MACRO_CONST + }}; +} + +impl fmt::Debug for DataKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("DataKey{")?; + fmt::Display::fmt(self, f)?; + f.write_char('}')?; + Ok(()) + } +} + +impl Writeable for DataKey { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + self.path().write_to(sink) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.path().writeable_length_hint() + } + + fn write_to_string(&self) -> Cow<str> { + Cow::Borrowed(self.path().get()) + } +} + +writeable::impl_display_with_writeable!(DataKey); + +#[test] +fn test_path_syntax() { + // Valid keys: + DataKey::construct_internal(tagged!("hello/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world@999"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_458/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world@1"), Default::default()).unwrap(); + + // No version: + assert_eq!( + DataKey::construct_internal(tagged!("hello/world"), Default::default()), + Err(( + "[a-zA-z0-9_/@]", + concat!(leading_tag!(), "hello/world").len() + )) + ); + + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@1foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@1").len())) + ); + + // Meta no longer accepted: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R][u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid meta: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[U]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[uca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-caa]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid characters: + assert_eq!( + DataKey::construct_internal(tagged!("你好/世界@1"), Default::default()), + Err(("[a-zA-Z0-9_]", leading_tag!().len())) + ); + + // Invalid tag: + assert_eq!( + DataKey::construct_internal( + concat!("hello/world@1", trailing_tag!()), + Default::default() + ), + Err(("tag", 0)) + ); + assert_eq!( + DataKey::construct_internal(concat!(leading_tag!(), "hello/world@1"), Default::default()), + Err(("tag", concat!(leading_tag!(), "hello/world@1").len())) + ); + assert_eq!( + DataKey::construct_internal("hello/world@1", Default::default()), + Err(("tag", 0)) + ); +} + +#[test] +fn test_key_to_string() { + struct KeyTestCase { + pub key: DataKey, + pub expected: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + expected: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + expected: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + expected: "core/cardinal@65535", + }, + ] { + writeable::assert_writeable_eq!(&cas.key, cas.expected); + assert_eq!(cas.expected, &*cas.key.path()); + } +} + +#[test] +fn test_hash_word_32() { + assert_eq!(0, fxhash_32(b"", 0, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 0)); + assert_eq!(0, fxhash_32(b"a", 0, 1)); + assert_eq!(0, fxhash_32(b"a", 0, 10)); + assert_eq!(0, fxhash_32(b"a", 10, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 1)); + assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); + assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); + assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); + assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); + assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); + assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); + assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); + assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); + assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); +} + +#[test] +fn test_key_hash() { + struct KeyTestCase { + pub key: DataKey, + pub hash: DataKeyHash, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + hash: DataKeyHash([172, 207, 42, 236]), + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + hash: DataKeyHash([193, 6, 79, 61]), + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + hash: DataKeyHash([176, 131, 182, 223]), + }, + ] { + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.key); + } +} diff --git a/third_party/rust/icu_provider/src/lib.rs b/third_party/rust/icu_provider/src/lib.rs new file mode 100644 index 0000000000..01cb2a3b34 --- /dev/null +++ b/third_party/rust/icu_provider/src/lib.rs @@ -0,0 +1,267 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! `icu_provider` is one of the [`ICU4X`] components. +//! +//! Unicode's experience with ICU4X's parent projects, ICU4C and ICU4J, led the team to realize +//! that data management is the most critical aspect of deploying internationalization, and that it requires +//! a high level of customization for the needs of the platform it is embedded in. As a result +//! ICU4X comes with a selection of providers that should allow for ICU4X to naturally fit into +//! different business and technological needs of customers. +//! +//! `icu_provider` defines traits and structs for transmitting data through the ICU4X locale +//! data pipeline. The primary trait is [`DataProvider`]. It is parameterized by a +//! [`KeyedDataMarker`], which contains the data type and a [`DataKey`]. It has one method, +//! [`DataProvider::load`], which transforms a [`DataRequest`] +//! into a [`DataResponse`]. +//! +//! - [`DataKey`] is a fixed identifier for the data type, such as `"plurals/cardinal@1"`. +//! - [`DataRequest`] contains additional annotations to choose a specific variant of the key, +//! such as a locale. +//! - [`DataResponse`] contains the data if the request was successful. +//! +//! In addition, there are three other traits which are widely implemented: +//! +//! - [`AnyProvider`] returns data as `dyn Any` trait objects. +//! - [`BufferProvider`] returns data as `[u8]` buffers. +//! - [`DynamicDataProvider`] returns structured data but is not specific to a key. +//! +//! The most common types required for this crate are included via the prelude: +//! +//! ``` +//! use icu_provider::prelude::*; +//! ``` +//! +//! ## Types of Data Providers +//! +//! All nontrivial data providers can fit into one of two classes. +//! +//! 1. [`AnyProvider`]: Those whose data originates as structured Rust objects +//! 2. [`BufferProvider`]: Those whose data originates as unstructured `[u8]` buffers +//! +//! **✨ Key Insight:** A given data provider is generally *either* an [`AnyProvider`] *or* a +//! [`BufferProvider`]. Which type depends on the data source, and it is not generally possible +//! to convert one to the other. +//! +//! See also [crate::constructors]. +//! +//! ### AnyProvider +//! +//! These providers are able to return structured data cast into `dyn Any` trait objects. Users +//! can call [`as_downcasting()`] to get an object implementing [`DataProvider`] by downcasting +//! the trait objects. +//! +//! Examples of AnyProviders: +//! +//! - [`DatagenProvider`] reads structured data from CLDR source files and returns ICU4X data structs. +//! - [`AnyPayloadProvider`] wraps a specific data struct and returns it. +//! - The `BakedDataProvider` which encodes structured data directly in Rust source +//! +//! ### BufferProvider +//! +//! These providers are able to return unstructured data typically represented as +//! [`serde`]-serialized buffers. Users can call [`as_deserializing()`] to get an object +//! implementing [`DataProvider`] by invoking Serde Deserialize. +//! +//! Examples of BufferProviders: +//! +//! - [`FsDataProvider`] reads individual buffers from the filesystem. +//! - [`BlobDataProvider`] reads buffers from a large in-memory blob. +//! +//! ## Provider Adapters +//! +//! ICU4X offers several built-in modules to combine providers in interesting ways. +//! These can be found in the [`icu_provider_adapters`] crate. +//! +//! ## Testing Provider +//! +//! This crate also contains a concrete provider for demonstration purposes: +//! +//! - [`HelloWorldProvider`] returns "hello world" strings in several languages. +//! +//! ## Types and Lifetimes +//! +//! Types compatible with [`Yokeable`] can be passed through the data provider, so long as they are +//! associated with a marker type implementing [`DataMarker`]. +//! +//! Data structs should generally have one lifetime argument: `'data`. This lifetime allows data +//! structs to borrow zero-copy data. +//! +//! ## Data generation API +//! +//! *This functionality is enabled with the "datagen" Cargo feature* +//! +//! The [`datagen`] module contains several APIs for data generation. See [`icu_datagen`] for the reference +//! data generation implementation. +//! +//! [`ICU4X`]: ../icu/index.html +//! [`DataProvider`]: data_provider::DataProvider +//! [`DataKey`]: key::DataKey +//! [`DataLocale`]: request::DataLocale +//! [`IterableDynamicDataProvider`]: datagen::IterableDynamicDataProvider +//! [`IterableDataProvider`]: datagen::IterableDataProvider +//! [`AnyPayloadProvider`]: ../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`HelloWorldProvider`]: hello_world::HelloWorldProvider +//! [`AnyProvider`]: any::AnyProvider +//! [`Yokeable`]: yoke::Yokeable +//! [`impl_dynamic_data_provider!`]: impl_dynamic_data_provider +//! [`icu_provider_adapters`]: ../icu_provider_adapters/index.html +//! [`DatagenProvider`]: ../icu_datagen/struct.DatagenProvider.html +//! [`as_downcasting()`]: AsDowncastingAnyProvider::as_downcasting +//! [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +//! [`CldrJsonDataProvider`]: ../icu_datagen/cldr/struct.CldrJsonDataProvider.html +//! [`FsDataProvider`]: ../icu_provider_fs/struct.FsDataProvider.html +//! [`BlobDataProvider`]: ../icu_provider_blob/struct.BlobDataProvider.html +//! [`icu_datagen`]: ../icu_datagen/index.html + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + missing_debug_implementations, + ) +)] +#![warn(missing_docs)] + +extern crate alloc; + +mod data_provider; +mod error; +#[doc(hidden)] +pub mod fallback; +mod key; +mod request; +mod response; + +pub mod any; +pub mod buf; +pub mod constructors; +#[cfg(feature = "datagen")] +pub mod datagen; +pub mod dynutil; +pub mod hello_world; +pub mod marker; +#[cfg(feature = "serde")] +pub mod serde; + +// Types from private modules +pub use crate::data_provider::DataProvider; +pub use crate::data_provider::DynamicDataProvider; +pub use crate::error::DataError; +pub use crate::error::DataErrorKind; +pub use crate::key::DataKey; +pub use crate::key::DataKeyHash; +pub use crate::key::DataKeyMetadata; +pub use crate::key::DataKeyPath; +#[cfg(feature = "experimental")] +pub use crate::request::AuxiliaryKeys; +pub use crate::request::DataLocale; +pub use crate::request::DataRequest; +pub use crate::request::DataRequestMetadata; +pub use crate::response::Cart; +pub use crate::response::DataPayload; +pub use crate::response::DataResponse; +pub use crate::response::DataResponseMetadata; +#[cfg(feature = "macros")] +pub use icu_provider_macros::data_struct; + +// Reexports from public modules +pub use crate::any::AnyMarker; +pub use crate::any::AnyPayload; +pub use crate::any::AnyProvider; +pub use crate::any::AnyResponse; +pub use crate::any::AsDowncastingAnyProvider; +pub use crate::any::AsDynamicDataProviderAnyMarkerWrap; +pub use crate::any::MaybeSendSync; +pub use crate::buf::BufferMarker; +pub use crate::buf::BufferProvider; +pub use crate::marker::DataMarker; +pub use crate::marker::KeyedDataMarker; +#[cfg(feature = "serde")] +pub use crate::serde::AsDeserializingBufferProvider; + +/// Core selection of APIs and structures for the ICU4X data provider. +pub mod prelude { + #[doc(no_inline)] + pub use crate::data_key; + #[doc(no_inline)] + pub use crate::AnyMarker; + #[doc(no_inline)] + pub use crate::AnyPayload; + #[doc(no_inline)] + pub use crate::AnyProvider; + #[doc(no_inline)] + pub use crate::AnyResponse; + #[doc(no_inline)] + #[cfg(feature = "serde")] + pub use crate::AsDeserializingBufferProvider; + #[doc(no_inline)] + pub use crate::AsDowncastingAnyProvider; + #[doc(no_inline)] + pub use crate::AsDynamicDataProviderAnyMarkerWrap; + #[doc(no_inline)] + #[cfg(feature = "experimental")] + pub use crate::AuxiliaryKeys; + #[doc(no_inline)] + pub use crate::BufferMarker; + #[doc(no_inline)] + pub use crate::BufferProvider; + #[doc(no_inline)] + pub use crate::DataError; + #[doc(no_inline)] + pub use crate::DataErrorKind; + #[doc(no_inline)] + pub use crate::DataKey; + #[doc(no_inline)] + pub use crate::DataKeyHash; + #[doc(no_inline)] + pub use crate::DataLocale; + #[doc(no_inline)] + pub use crate::DataMarker; + #[doc(no_inline)] + pub use crate::DataPayload; + #[doc(no_inline)] + pub use crate::DataProvider; + #[doc(no_inline)] + pub use crate::DataRequest; + #[doc(no_inline)] + pub use crate::DataRequestMetadata; + #[doc(no_inline)] + pub use crate::DataResponse; + #[doc(no_inline)] + pub use crate::DataResponseMetadata; + #[doc(no_inline)] + pub use crate::DynamicDataProvider; + #[doc(no_inline)] + pub use crate::KeyedDataMarker; + + #[doc(hidden)] + pub use yoke; + #[doc(hidden)] + pub use zerofrom; +} + +// Additional crate re-exports for compatibility +#[doc(hidden)] +pub use fallback::LocaleFallbackPriority as FallbackPriority; +#[doc(hidden)] +pub use fallback::LocaleFallbackSupplement as FallbackSupplement; +#[doc(hidden)] +pub use yoke; +#[doc(hidden)] +pub use zerofrom; + +// For macros +#[doc(hidden)] +pub mod _internal { + pub use super::fallback::{LocaleFallbackPriority, LocaleFallbackSupplement}; + pub use icu_locid as locid; +} diff --git a/third_party/rust/icu_provider/src/marker.rs b/third_party/rust/icu_provider/src/marker.rs new file mode 100644 index 0000000000..0065bb656c --- /dev/null +++ b/third_party/rust/icu_provider/src/marker.rs @@ -0,0 +1,86 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Marker types and traits for DataProvider. + +use crate::key::DataKey; +use yoke::Yokeable; + +/// Trait marker for data structs. All types delivered by the data provider must be associated with +/// something implementing this trait. +/// +/// Structs implementing this trait are normally generated with the [`data_struct`] macro. +/// +/// By convention, the non-standard `Marker` suffix is used by types implementing DataMarker. +/// +/// In addition to a marker type implementing DataMarker, the following impls must also be present +/// for the data struct: +/// +/// - `impl<'a> Yokeable<'a>` (required) +/// - `impl ZeroFrom<Self>` +/// +/// Also see [`KeyedDataMarker`]. +/// +/// Note: `DataMarker`s are quasi-const-generic compile-time objects, and as such are expected +/// to be unit structs. As this is not something that can be enforced by the type system, we +/// currently only have a `'static` bound on them (which is needed by a lot of our code). +/// +/// # Examples +/// +/// Manually implementing DataMarker for a custom type: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// use std::rc::Rc; +/// +/// #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] +/// struct MyDataStruct<'data> { +/// message: Cow<'data, str>, +/// } +/// +/// struct MyDataStructMarker; +/// +/// impl DataMarker for MyDataStructMarker { +/// type Yokeable = MyDataStruct<'static>; +/// } +/// +/// // We can now use MyDataStruct with DataProvider: +/// let s = MyDataStruct { +/// message: Cow::Owned("Hello World".into()), +/// }; +/// let payload = DataPayload::<MyDataStructMarker>::from_owned(s); +/// assert_eq!(payload.get().message, "Hello World"); +/// ``` +/// +/// [`data_struct`]: crate::data_struct +pub trait DataMarker: 'static { + /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a + /// data struct. + type Yokeable: for<'a> Yokeable<'a>; +} + +/// A [`DataMarker`] with a [`DataKey`] attached. +/// +/// Structs implementing this trait are normally generated with the [`data_struct!`] macro. +/// +/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait. +/// Most markers should be associated with a specific key and should therefore implement this +/// trait. +/// +/// [`BufferMarker`] and [`AnyMarker`] are examples of markers that do _not_ implement this trait +/// because they are not specific to a single key. +/// +/// Note: `KeyedDataMarker`s are quasi-const-generic compile-time objects, and as such are expected +/// to be unit structs. As this is not something that can be enforced by the type system, we +/// currently only have a `'static` bound on them (which is needed by a lot of our code). +/// +/// [`data_struct!`]: crate::data_struct +/// [`DataProvider`]: crate::DataProvider +/// [`BufferMarker`]: crate::BufferMarker +/// [`AnyMarker`]: crate::AnyMarker +pub trait KeyedDataMarker: DataMarker { + /// The single [`DataKey`] associated with this marker. + const KEY: DataKey; +} diff --git a/third_party/rust/icu_provider/src/request.rs b/third_party/rust/icu_provider/src/request.rs new file mode 100644 index 0000000000..1bb84f8667 --- /dev/null +++ b/third_party/rust/icu_provider/src/request.rs @@ -0,0 +1,1121 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{DataError, DataErrorKind}; +use core::cmp::Ordering; +use core::default::Default; +use core::fmt; +use core::fmt::Debug; +use core::hash::Hash; +use core::str::FromStr; +use icu_locid::extensions::unicode as unicode_ext; +use icu_locid::subtags::{Language, Region, Script, Variants}; +use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; +use writeable::{LengthHint, Writeable}; + +#[cfg(feature = "experimental")] +use alloc::string::String; +#[cfg(feature = "experimental")] +use core::ops::Deref; +#[cfg(feature = "experimental")] +use icu_locid::extensions::private::Subtag; +#[cfg(feature = "experimental")] +use tinystr::TinyAsciiStr; + +#[cfg(doc)] +use icu_locid::subtags::Variant; + +/// The request type passed into all data provider implementations. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataRequest<'a> { + /// The locale for which to load data. + /// + /// If locale fallback is enabled, the resulting data may be from a different locale + /// than the one requested here. + pub locale: &'a DataLocale, + /// Metadata that may affect the behavior of the data provider. + pub metadata: DataRequestMetadata, +} + +impl fmt::Display for DataRequest<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.locale, f) + } +} + +/// Metadata for data requests. This is currently empty, but it may be extended with options +/// for tuning locale fallback, buffer layout, and so forth. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataRequestMetadata { + /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. + pub silent: bool, +} + +/// A locale type optimized for use in fallbacking and the ICU4X data pipeline. +/// +/// [`DataLocale`] contains less functionality than [`Locale`] but more than +/// [`LanguageIdentifier`] for better size and performance while still meeting +/// the needs of the ICU4X data pipeline. +/// +/// # Examples +/// +/// Convert a [`Locale`] to a [`DataLocale`] and back: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(locale); +/// let locale = data_locale.into_locale(); +/// +/// assert_eq!(locale, locale!("en-u-ca-buddhist")); +/// ``` +/// +/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more +/// efficient than cloning the [`Locale`], but less efficient than converting an owned +/// [`Locale`]: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(&locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale1, locale2); +/// ``` +/// +/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: +/// +/// ``` +/// use icu_locid::langid; +/// use icu_provider::DataLocale; +/// +/// let langid = langid!("es-CA-valencia"); +/// let data_locale = DataLocale::from(langid); +/// let langid = data_locale.get_langid(); +/// +/// assert_eq!(langid, langid!("es-CA-valencia")); +/// ``` +/// +/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data +/// lookup and fallback. This may change in the future. +/// +/// ``` +/// use icu_locid::{locale, Locale}; +/// use icu_provider::DataLocale; +/// +/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" +/// .parse::<Locale>() +/// .unwrap(); +/// let data_locale = DataLocale::from(locale); +/// +/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist")); +/// ``` +#[derive(PartialEq, Clone, Default, Eq, Hash)] +pub struct DataLocale { + langid: LanguageIdentifier, + keywords: unicode_ext::Keywords, + #[cfg(feature = "experimental")] + aux: Option<AuxiliaryKeys>, +} + +impl<'a> Default for &'a DataLocale { + fn default() -> Self { + static DEFAULT: DataLocale = DataLocale { + langid: LanguageIdentifier::UND, + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + }; + &DEFAULT + } +} + +impl fmt::Debug for DataLocale { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DataLocale{{{self}}}") + } +} + +impl Writeable for DataLocale { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + self.langid.write_to(sink)?; + if !self.keywords.is_empty() { + sink.write_str("-u-")?; + self.keywords.write_to(sink)?; + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + sink.write_str("-x-")?; + aux.write_to(sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + let mut length_hint = self.langid.writeable_length_hint(); + if !self.keywords.is_empty() { + length_hint += self.keywords.writeable_length_hint() + 3; + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux.as_ref() { + length_hint += aux.writeable_length_hint() + 3; + } + length_hint + } + + fn write_to_string(&self) -> alloc::borrow::Cow<str> { + #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] + let mut is_only_langid = self.keywords.is_empty(); + #[cfg(feature = "experimental")] + { + is_only_langid = is_only_langid && self.aux.is_none(); + } + if is_only_langid { + return self.langid.write_to_string(); + } + let mut string = + alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut string); + alloc::borrow::Cow::Owned(string) + } +} + +writeable::impl_display_with_writeable!(DataLocale); + +impl From<LanguageIdentifier> for DataLocale { + fn from(langid: LanguageIdentifier) -> Self { + Self { + langid, + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + } + } +} + +impl From<Locale> for DataLocale { + fn from(locale: Locale) -> Self { + Self { + langid: locale.id, + keywords: locale.extensions.unicode.keywords, + #[cfg(feature = "experimental")] + aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), + } + } +} + +impl From<&LanguageIdentifier> for DataLocale { + fn from(langid: &LanguageIdentifier) -> Self { + Self { + langid: langid.clone(), + keywords: unicode_ext::Keywords::new(), + #[cfg(feature = "experimental")] + aux: None, + } + } +} + +impl From<&Locale> for DataLocale { + fn from(locale: &Locale) -> Self { + Self { + langid: locale.id.clone(), + keywords: locale.extensions.unicode.keywords.clone(), + #[cfg(feature = "experimental")] + aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), + } + } +} + +impl FromStr for DataLocale { + type Err = DataError; + fn from_str(s: &str) -> Result<Self, Self::Err> { + let locale = Locale::from_str(s).map_err(|e| { + DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s) + .with_display_context(&e) + })?; + Ok(DataLocale::from(locale)) + } +} + +impl DataLocale { + /// Compare this [`DataLocale`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::DataLocale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = &[ + /// "ca", + /// "ca-ES", + /// "ca-ES-u-ca-buddhist", + /// "ca-ES-valencia", + /// "ca-ES-x-gbp", + /// "ca-ES-x-gbp-short", + /// "ca-ES-x-usd", + /// "ca-ES-xyzabc", + /// "ca-x-eur", + /// "cat", + /// "pl-Latn-PL", + /// "und", + /// "und-fonipa", + /// "und-u-ca-hebrew", + /// "und-u-ca-japanese", + /// "und-x-mxn", + /// "zh", + /// ]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b); + /// let a_loc: DataLocale = a.parse().unwrap(); + /// assert_eq!( + /// a_loc.strict_cmp(a.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// a_loc, + /// a + /// ); + /// assert_eq!( + /// a_loc.strict_cmp(b.as_bytes()), + /// Ordering::Less, + /// "strict_cmp: {} < {}", + /// a_loc, + /// b + /// ); + /// let b_loc: DataLocale = b.parse().unwrap(); + /// assert_eq!( + /// b_loc.strict_cmp(b.as_bytes()), + /// Ordering::Equal, + /// "strict_cmp: {} == {}", + /// b_loc, + /// b + /// ); + /// assert_eq!( + /// b_loc.strict_cmp(a.as_bytes()), + /// Ordering::Greater, + /// "strict_cmp: {} > {}", + /// b_loc, + /// a + /// ); + /// } + /// ``` + /// + /// Comparison against invalid strings: + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// let invalid_strings: &[&str] = &[ + /// // Less than "ca-ES" + /// "CA", + /// "ar-x-gbp-FOO", + /// // Greater than "ca-ES-x-gbp" + /// "ca_ES", + /// "ca-ES-x-gbp-FOO", + /// ]; + /// + /// let data_locale = "ca-ES-x-gbp".parse::<DataLocale>().unwrap(); + /// + /// for s in invalid_strings.iter() { + /// let expected_ordering = "ca-ES-x-gbp".cmp(s); + /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); + /// assert_eq!(expected_ordering, actual_ordering, "{}", s); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + let subtags = other.split(|b| *b == b'-'); + let mut subtag_result = self.langid.strict_cmp_iter(subtags); + if self.has_unicode_ext() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"u") => (), + Some(s) => return s.cmp(b"u").reverse(), + None => return Ordering::Greater, + } + subtag_result = self.keywords.strict_cmp_iter(subtags); + } + #[cfg(feature = "experimental")] + if let Some(aux) = self.get_aux() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"x") => (), + Some(s) => return s.cmp(b"x").reverse(), + None => return Ordering::Greater, + } + subtag_result = aux.strict_cmp_iter(subtags); + } + subtag_result.end() + } +} + +impl DataLocale { + /// Returns whether this [`DataLocale`] has all empty fields (no components). + /// + /// See also: + /// + /// - [`DataLocale::is_und()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::<DataLocale>().unwrap().is_empty()); + /// assert!(!"und-u-ca-buddhist" + /// .parse::<DataLocale>() + /// .unwrap() + /// .is_empty()); + /// assert!(!"und-x-aux".parse::<DataLocale>().unwrap().is_empty()); + /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self == <&DataLocale>::default() + } + + /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. + /// + /// This ignores auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_langid_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::<DataLocale>().unwrap().is_und()); + /// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und()); + /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_und()); + /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und()); + /// ``` + pub fn is_und(&self) -> bool { + self.langid == LanguageIdentifier::UND && self.keywords.is_empty() + } + + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. + /// + /// This ignores extension keywords and auxiliary keys. + /// + /// See also: + /// + /// - [`DataLocale::is_empty()`] + /// - [`DataLocale::is_und()`] + /// + /// # Examples + /// + /// ``` + /// use icu_provider::DataLocale; + /// + /// assert!("und".parse::<DataLocale>().unwrap().is_langid_und()); + /// assert!("und-u-ca-buddhist" + /// .parse::<DataLocale>() + /// .unwrap() + /// .is_langid_und()); + /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_langid_und()); + /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und()); + /// ``` + pub fn is_langid_und(&self) -> bool { + self.langid == LanguageIdentifier::UND + } + + /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. + /// + /// This may allocate memory if there are variant subtags. If you need only the language, + /// script, and/or region subtag, use the specific getters for those subtags: + /// + /// - [`DataLocale::language()`] + /// - [`DataLocale::script()`] + /// - [`DataLocale::region()`] + /// + /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] + /// and then access the `id` field. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::langid; + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// + /// let req_no_langid = DataRequest { + /// locale: &Default::default(), + /// metadata: Default::default(), + /// }; + /// + /// let req_with_langid = DataRequest { + /// locale: &langid!("ar-EG").into(), + /// metadata: Default::default(), + /// }; + /// + /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); + /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); + /// ``` + pub fn get_langid(&self) -> LanguageIdentifier { + self.langid.clone() + } + + /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. + #[inline] + pub fn set_langid(&mut self, lid: LanguageIdentifier) { + self.langid = lid; + } + + /// Converts this [`DataLocale`] into a [`Locale`]. + /// + /// See also [`DataLocale::get_langid()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// langid, locale, + /// subtags::{language, region}, + /// Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into(); + /// + /// assert_eq!(locale.get_langid(), langid!("it-IT")); + /// assert_eq!(locale.language(), language!("it")); + /// assert_eq!(locale.script(), None); + /// assert_eq!(locale.region(), Some(region!("IT"))); + /// + /// let locale = locale.into_locale(); + /// assert_eq!(locale, locale!("it-IT-u-ca-coptic")); + /// ``` + /// + /// Auxiliary keys are retained: + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::prelude::*; + /// use writeable::assert_writeable_eq; + /// + /// let locale: Locale = "und-u-nu-arab-x-gbp".parse().unwrap(); + /// let data_locale = DataLocale::from(locale); + /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp"); + /// + /// let recovered_locale = data_locale.into_locale(); + /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp"); + /// ``` + pub fn into_locale(self) -> Locale { + let mut loc = Locale { + id: self.langid, + ..Default::default() + }; + loc.extensions.unicode.keywords = self.keywords; + #[cfg(feature = "experimental")] + if let Some(aux) = self.aux { + loc.extensions.private = + icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect()); + } + loc + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn language(&self) -> Language { + self.langid.language + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn set_language(&mut self, language: Language) { + self.langid.language = language; + } + + /// Returns the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn script(&self) -> Option<Script> { + self.langid.script + } + + /// Sets the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn set_script(&mut self, script: Option<Script>) { + self.langid.script = script; + } + + /// Returns the [`Region`] for this [`DataLocale`]. + #[inline] + pub fn region(&self) -> Option<Region> { + self.langid.region + } + + /// Sets the [`Region`] for this [`DataLocale`]. + #[inline] + pub fn set_region(&mut self, region: Option<Region>) { + self.langid.region = region; + } + + /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`]. + #[inline] + pub fn has_variants(&self) -> bool { + !self.langid.variants.is_empty() + } + + /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously. + #[inline] + pub fn set_variants(&mut self, variants: Variants) { + self.langid.variants = variants; + } + + /// Removes all [`Variant`] subtags in this [`DataLocale`]. + #[inline] + pub fn clear_variants(&mut self) -> Variants { + self.langid.variants.clear() + } + + /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`]. + #[inline] + pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { + self.keywords.get(key).cloned() + } + + /// Returns whether there are any Unicode extension keywords in this [`DataLocale`]. + #[inline] + pub fn has_unicode_ext(&self) -> bool { + !self.keywords.is_empty() + } + + /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`]. + #[inline] + pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool { + self.keywords.contains_key(key) + } + + /// Returns whether this [`DataLocale`] contains a Unicode extension keyword + /// with the specified key and value. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// extensions::unicode::{key, value}, + /// Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); + /// let locale: DataLocale = locale.into(); + /// + /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None); + /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic"))); + /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),)); + /// ``` + #[inline] + pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool { + self.keywords.get(key) == Some(value) + } + + /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`]. + #[inline] + pub fn set_unicode_ext( + &mut self, + key: unicode_ext::Key, + value: unicode_ext::Value, + ) -> Option<unicode_ext::Value> { + self.keywords.set(key, value) + } + + /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning + /// the value if it was present. + #[inline] + pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { + self.keywords.remove(key) + } + + /// Retains a subset of keywords as specified by the predicate function. + #[inline] + pub fn retain_unicode_ext<F>(&mut self, predicate: F) + where + F: FnMut(&unicode_ext::Key) -> bool, + { + self.keywords.retain_by_key(predicate) + } + + /// Gets the auxiliary key for this [`DataLocale`]. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn get_aux(&self) -> Option<&AuxiliaryKeys> { + self.aux.as_ref() + } + + /// Returns whether this [`DataLocale`] has an auxiliary key. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn has_aux(&self) -> bool { + self.aux.is_some() + } + + /// Sets an auxiliary key on this [`DataLocale`]. + /// + /// Returns the previous auxiliary key if present. + /// + /// For more information and examples, see [`AuxiliaryKeys`]. + #[cfg(feature = "experimental")] + pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> { + self.aux.replace(value) + } + + /// Remove an auxiliary key, if present. Returns the removed auxiliary key. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_provider::prelude::*; + /// use writeable::assert_writeable_eq; + /// + /// let mut data_locale: DataLocale = locale!("ar-EG").into(); + /// let aux = "gbp" + /// .parse::<AuxiliaryKeys>() + /// .expect("contains valid characters"); + /// data_locale.set_aux(aux); + /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp"); + /// + /// let maybe_aux = data_locale.remove_aux(); + /// assert_writeable_eq!(data_locale, "ar-EG"); + /// assert_writeable_eq!(maybe_aux.unwrap(), "gbp"); + /// ``` + #[cfg(feature = "experimental")] + pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> { + self.aux.take() + } +} + +/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary +/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`]. +/// +/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary +/// keys are stored as private use subtags following `-x-`. +/// +/// An auxiliary key currently allows 1-8 lowercase alphanumerics. +/// +/// <div class="stab unstable"> +/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature +/// of the `icu_provider` crate. Use with caution. +/// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a> +/// </div> +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::prelude::*; +/// use writeable::assert_writeable_eq; +/// +/// let mut data_locale: DataLocale = locale!("ar-EG").into(); +/// assert_writeable_eq!(data_locale, "ar-EG"); +/// assert!(!data_locale.has_aux()); +/// assert_eq!(data_locale.get_aux(), None); +/// +/// let aux = "gbp" +/// .parse::<AuxiliaryKeys>() +/// .expect("contains valid characters"); +/// +/// data_locale.set_aux(aux); +/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp"); +/// assert!(data_locale.has_aux()); +/// assert_eq!(data_locale.get_aux(), Some(&"gbp".parse().unwrap())); +/// ``` +/// +/// Multiple auxiliary keys are allowed: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::prelude::*; +/// use writeable::assert_writeable_eq; +/// +/// let data_locale = "ar-EG-x-gbp-long".parse::<DataLocale>().unwrap(); +/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp-long"); +/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2); +/// ``` +/// +/// Not all strings are valid auxiliary keys. +/// The string must be well-formed and case-normalized: +/// +/// ``` +/// use icu_provider::prelude::*; +/// +/// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok()); +/// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok()); +/// +/// assert!("".parse::<AuxiliaryKeys>().is_err()); +/// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err()); +/// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err()); +/// assert!("ABC123".parse::<AuxiliaryKeys>().is_err()); +/// ``` +/// +/// [`Keywords`]: unicode_ext::Keywords +#[derive(Debug, PartialEq, Clone, Eq, Hash)] +#[cfg(feature = "experimental")] +pub struct AuxiliaryKeys { + value: AuxiliaryKeysInner, +} + +#[cfg(feature = "experimental")] +#[derive(Clone)] +enum AuxiliaryKeysInner { + Boxed(alloc::boxed::Box<str>), + Stack(TinyAsciiStr<23>), + // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")` + // Static(&'static str), +} + +#[cfg(feature = "experimental")] +impl Deref for AuxiliaryKeysInner { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + match self { + Self::Boxed(s) => s.deref(), + Self::Stack(s) => s.as_str(), + } + } +} + +#[cfg(feature = "experimental")] +impl PartialEq for AuxiliaryKeysInner { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +#[cfg(feature = "experimental")] +impl Eq for AuxiliaryKeysInner {} + +#[cfg(feature = "experimental")] +impl Debug for AuxiliaryKeysInner { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.deref().fmt(f) + } +} + +#[cfg(feature = "experimental")] +impl Hash for AuxiliaryKeysInner { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.deref().hash(state) + } +} + +#[cfg(feature = "experimental")] +writeable::impl_display_with_writeable!(AuxiliaryKeys); + +#[cfg(feature = "experimental")] +impl Writeable for AuxiliaryKeys { + fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result { + self.value.write_to(sink) + } + fn writeable_length_hint(&self) -> LengthHint { + self.value.writeable_length_hint() + } + fn write_to_string(&self) -> alloc::borrow::Cow<str> { + self.value.write_to_string() + } +} + +#[cfg(feature = "experimental")] +impl FromStr for AuxiliaryKeys { + type Err = DataError; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::try_from_str(s) + } +} + +#[cfg(feature = "experimental")] +impl AuxiliaryKeys { + /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::extensions::private::subtag; + /// use icu_provider::prelude::*; + /// + /// // Single auxiliary key: + /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc")]).unwrap(); + /// let b = "abc".parse::<AuxiliaryKeys>().unwrap(); + /// assert_eq!(a, b); + /// + /// // Multiple auxiliary keys: + /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc"), subtag!("defg")]) + /// .unwrap(); + /// let b = "abc-defg".parse::<AuxiliaryKeys>().unwrap(); + /// assert_eq!(a, b); + /// ``` + /// + /// The iterator can't be empty: + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// assert!(AuxiliaryKeys::try_from_iter([]).is_err()); + /// ``` + pub fn try_from_iter(iter: impl IntoIterator<Item = Subtag>) -> Result<Self, DataError> { + // TODO: Avoid the allocation when possible + let mut builder = String::new(); + for item in iter { + if !builder.is_empty() { + builder.push(AuxiliaryKeys::separator()); + } + builder.push_str(item.as_str()) + } + if builder.is_empty() { + return Err(DataErrorKind::KeyLocaleSyntax.with_str_context("empty aux iterator")); + } + if builder.len() <= 23 { + #[allow(clippy::unwrap_used)] // we just checked that the string is ascii + Ok(Self { + value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()), + }) + } else { + Ok(Self { + value: AuxiliaryKeysInner::Boxed(builder.into()), + }) + } + } + + /// Creates an [`AuxiliaryKeys`] from a single subtag. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::extensions::private::subtag; + /// use icu_provider::prelude::*; + /// + /// // Single auxiliary key: + /// let a = AuxiliaryKeys::from_subtag(subtag!("abc")); + /// let b = "abc".parse::<AuxiliaryKeys>().unwrap(); + /// assert_eq!(a, b); + /// ``` + pub const fn from_subtag(input: Subtag) -> Self { + Self { + value: AuxiliaryKeysInner::Stack(input.into_tinystr().resize()), + } + } + + pub(crate) fn try_from_str(s: &str) -> Result<Self, DataError> { + if !s.is_empty() + && s.split(Self::separator()).all(|b| { + if let Ok(subtag) = Subtag::from_str(b) { + // Enforces normalization: + b == subtag.as_str() + } else { + false + } + }) + { + if s.len() <= 23 { + #[allow(clippy::unwrap_used)] // we just checked that the string is ascii + Ok(Self { + value: AuxiliaryKeysInner::Stack(s.parse().unwrap()), + }) + } else { + Ok(Self { + value: AuxiliaryKeysInner::Boxed(s.into()), + }) + } + } else { + Err(DataErrorKind::KeyLocaleSyntax + .into_error() + .with_display_context(s)) + } + } + + /// Iterates over the components of the auxiliary key. + /// + /// # Example + /// + /// ``` + /// use icu_locid::extensions::private::subtag; + /// use icu_provider::AuxiliaryKeys; + /// + /// let aux: AuxiliaryKeys = "abc-defg".parse().unwrap(); + /// assert_eq!( + /// aux.iter().collect::<Vec<_>>(), + /// vec![subtag!("abc"), subtag!("defg")] + /// ); + /// ``` + pub fn iter(&self) -> impl Iterator<Item = Subtag> + '_ { + self.value + .split(Self::separator()) + .filter_map(|x| match x.parse() { + Ok(x) => Some(x), + Err(_) => { + debug_assert!(false, "failed to convert to subtag: {x}"); + None + } + }) + } + + pub(crate) fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> + where + I: Iterator<Item = &'l [u8]>, + { + for subtag in self.value.split(Self::separator()) { + if let Some(other) = subtags.next() { + match subtag.as_bytes().cmp(other) { + Ordering::Equal => (), + not_equal => return SubtagOrderingResult::Ordering(not_equal), + } + } else { + return SubtagOrderingResult::Ordering(Ordering::Greater); + } + } + SubtagOrderingResult::Subtags(subtags) + } + + /// Returns the internal separator byte used for auxiliary keys in data locales. + /// + /// This is, according to BCP-47, an ASCII hyphen. + #[inline] + pub(crate) const fn separator() -> char { + '-' + } +} + +#[cfg(feature = "experimental")] +impl From<Subtag> for AuxiliaryKeys { + fn from(subtag: Subtag) -> Self { + #[allow(clippy::expect_used)] // subtags definitely fit within auxiliary keys + Self { + value: AuxiliaryKeysInner::Stack( + TinyAsciiStr::from_bytes(subtag.as_str().as_bytes()) + .expect("Subtags are capped to 8 elements, AuxiliaryKeys supports up to 23"), + ), + } + } +} + +#[test] +fn test_data_locale_to_string() { + use icu_locid::locale; + + struct TestCase { + pub locale: Locale, + pub aux: Option<&'static str>, + pub expected: &'static str, + } + + for cas in [ + TestCase { + locale: Locale::UND, + aux: None, + expected: "und", + }, + TestCase { + locale: locale!("und-u-cu-gbp"), + aux: None, + expected: "und-u-cu-gbp", + }, + TestCase { + locale: locale!("en-ZA-u-cu-gbp"), + aux: None, + expected: "en-ZA-u-cu-gbp", + }, + #[cfg(feature = "experimental")] + TestCase { + locale: locale!("en-ZA-u-nu-arab"), + aux: Some("gbp"), + expected: "en-ZA-u-nu-arab-x-gbp", + }, + ] { + let mut data_locale = DataLocale::from(cas.locale); + #[cfg(feature = "experimental")] + if let Some(aux) = cas.aux { + data_locale.set_aux(aux.parse().unwrap()); + } + writeable::assert_writeable_eq!(data_locale, cas.expected); + } +} + +#[test] +fn test_data_locale_from_string() { + #[derive(Debug)] + struct TestCase { + pub input: &'static str, + pub success: bool, + } + + for cas in [ + TestCase { + input: "und", + success: true, + }, + TestCase { + input: "und-u-cu-gbp", + success: true, + }, + TestCase { + input: "en-ZA-u-cu-gbp", + success: true, + }, + TestCase { + input: "en...", + success: false, + }, + #[cfg(feature = "experimental")] + TestCase { + input: "en-ZA-u-nu-arab-x-gbp", + success: true, + }, + #[cfg(not(feature = "experimental"))] + TestCase { + input: "en-ZA-u-nu-arab-x-gbp", + success: false, + }, + ] { + let data_locale = match (DataLocale::from_str(cas.input), cas.success) { + (Ok(l), true) => l, + (Err(_), false) => { + continue; + } + (Ok(_), false) => { + panic!("DataLocale parsed but it was supposed to fail: {cas:?}"); + } + (Err(_), true) => { + panic!("DataLocale was supposed to parse but it failed: {cas:?}"); + } + }; + writeable::assert_writeable_eq!(data_locale, cas.input); + } +} diff --git a/third_party/rust/icu_provider/src/response.rs b/third_party/rust/icu_provider/src/response.rs new file mode 100644 index 0000000000..09b11830fa --- /dev/null +++ b/third_party/rust/icu_provider/src/response.rs @@ -0,0 +1,748 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::BufferMarker; +use crate::error::{DataError, DataErrorKind}; +use crate::marker::DataMarker; +use crate::request::DataLocale; +use alloc::boxed::Box; +use core::convert::TryFrom; +use core::fmt::Debug; +use core::marker::PhantomData; +use core::ops::Deref; +use yoke::trait_hack::YokeTraitHack; +use yoke::*; + +#[cfg(not(feature = "sync"))] +use alloc::rc::Rc as SelectedRc; +#[cfg(feature = "sync")] +use alloc::sync::Arc as SelectedRc; + +/// A response object containing metadata about the returned data. +#[derive(Debug, Clone, PartialEq, Default)] +#[non_exhaustive] +pub struct DataResponseMetadata { + /// The resolved locale of the returned data, if locale fallbacking was performed. + pub locale: Option<DataLocale>, + /// The format of the buffer for buffer-backed data, if known (for example, JSON). + pub buffer_format: Option<crate::buf::BufferFormat>, +} + +/// A container for data payloads returned from a data provider. +/// +/// [`DataPayload`] is built on top of the [`yoke`] framework, which allows for cheap, zero-copy +/// operations on data via the use of self-references. +/// +/// The type of the data stored in [`DataPayload`] is determined by the [`DataMarker`] type parameter. +/// +/// ## Accessing the data +/// +/// To get a reference to the data inside [`DataPayload`], use [`DataPayload::get()`]. If you need +/// to store the data for later use, you need to store the [`DataPayload`] itself, since `get` only +/// returns a reference with an ephemeral lifetime. +/// +/// ## Mutating the data +/// +/// To modify the data stored in a [`DataPayload`], use [`DataPayload::with_mut()`]. +/// +/// ## Transforming the data to a different type +/// +/// To transform a [`DataPayload`] to a different type backed by the same data store (cart), use +/// [`DataPayload::map_project()`] or one of its sister methods. +/// +/// # Cargo feature: `sync` +/// +/// By default, the payload uses non-concurrent reference counting internally, and hence is neither +/// [`Sync`] nor [`Send`]; if these traits are required, the `sync` Cargo feature can be enabled. +/// +/// # Examples +/// +/// Basic usage, using the `HelloWorldV1Marker` marker: +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let payload = DataPayload::<HelloWorldV1Marker>::from_owned(HelloWorldV1 { +/// message: Cow::Borrowed("Demo"), +/// }); +/// +/// assert_eq!("Demo", payload.get().message); +/// ``` +pub struct DataPayload<M: DataMarker>(pub(crate) DataPayloadInner<M>); + +pub(crate) enum DataPayloadInner<M: DataMarker> { + Yoke(Yoke<M::Yokeable, Option<Cart>>), + StaticRef(&'static M::Yokeable), +} + +/// The type of the "cart" that is used by `DataPayload`. +#[derive(Clone, Debug)] +#[allow(clippy::redundant_allocation)] // false positive, it's cheaper to wrap an existing Box in an Rc than to reallocate a huge Rc +pub struct Cart(SelectedRc<Box<[u8]>>); + +impl Deref for Cart { + type Target = Box<[u8]>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} +// Safe because both Rc and Arc are StableDeref, and our impl delegates. +unsafe impl stable_deref_trait::StableDeref for Cart {} +// Safe because both Rc and Arc are CloneableCart, and our impl delegates. +unsafe impl yoke::CloneableCart for Cart {} + +impl Cart { + /// Creates a `Yoke<Y, Option<Cart>>` from owned bytes by applying `f`. + pub fn try_make_yoke<Y, F, E>(cart: Box<[u8]>, f: F) -> Result<Yoke<Y, Option<Self>>, E> + where + for<'a> Y: Yokeable<'a>, + F: FnOnce(&[u8]) -> Result<<Y as Yokeable>::Output, E>, + { + Yoke::try_attach_to_cart(SelectedRc::new(cart), |b| f(b)) + // Safe because the cart is only wrapped + .map(|yoke| unsafe { yoke.replace_cart(Cart) }) + .map(Yoke::wrap_cart_in_option) + } +} + +impl<M> Debug for DataPayload<M> +where + M: DataMarker, + for<'a> &'a <M::Yokeable as Yokeable<'a>>::Output: Debug, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.get().fmt(f) + } +} + +/// Cloning a DataPayload is generally a cheap operation. +/// See notes in the `Clone` impl for [`Yoke`]. +/// +/// # Examples +/// +/// ```no_run +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let resp1: DataPayload<HelloWorldV1Marker> = todo!(); +/// let resp2 = resp1.clone(); +/// ``` +impl<M> Clone for DataPayload<M> +where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, +{ + fn clone(&self) -> Self { + Self(match &self.0 { + DataPayloadInner::Yoke(yoke) => DataPayloadInner::Yoke(yoke.clone()), + DataPayloadInner::StaticRef(r) => DataPayloadInner::StaticRef(*r), + }) + } +} + +impl<M> PartialEq for DataPayload<M> +where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + YokeTraitHack(self.get()).into_ref() == YokeTraitHack(other.get()).into_ref() + } +} + +impl<M> Eq for DataPayload<M> +where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Eq, +{ +} + +#[test] +fn test_clone_eq() { + use crate::hello_world::*; + let p1 = DataPayload::<HelloWorldV1Marker>::from_static_str("Demo"); + #[allow(clippy::redundant_clone)] + let p2 = p1.clone(); + assert_eq!(p1, p2); +} + +impl<M> DataPayload<M> +where + M: DataMarker, +{ + /// Convert a fully owned (`'static`) data struct into a DataPayload. + /// + /// This constructor creates `'static` payloads. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// let local_struct = HelloWorldV1 { + /// message: Cow::Owned("example".to_owned()), + /// }; + /// + /// let payload = + /// DataPayload::<HelloWorldV1Marker>::from_owned(local_struct.clone()); + /// + /// assert_eq!(payload.get(), &local_struct); + /// ``` + #[inline] + pub const fn from_owned(data: M::Yokeable) -> Self { + Self(DataPayloadInner::Yoke(Yoke::new_owned(data))) + } + + #[doc(hidden)] + #[inline] + pub const fn from_static_ref(data: &'static M::Yokeable) -> Self { + Self(DataPayloadInner::StaticRef(data)) + } + + /// Convert a DataPayload that was created via [`DataPayload::from_owned()`] back into the + /// concrete type used to construct it. + pub fn try_unwrap_owned(self) -> Result<M::Yokeable, DataError> { + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke.try_into_yokeable().ok(), + DataPayloadInner::StaticRef(_) => None, + } + .ok_or(DataErrorKind::InvalidState.with_str_context("try_unwrap_owned")) + } + + /// Mutate the data contained in this DataPayload. + /// + /// For safety, all mutation operations must take place within a helper function that cannot + /// borrow data from the surrounding context. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// let mut payload = + /// DataPayload::<HelloWorldV1Marker>::from_static_str("Hello"); + /// + /// payload.with_mut(|s| s.message.to_mut().push_str(" World")); + /// + /// assert_eq!("Hello World", payload.get().message); + /// ``` + /// + /// To transfer data from the context into the data struct, use the `move` keyword: + /// + /// ``` + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// let mut payload = + /// DataPayload::<HelloWorldV1Marker>::from_static_str("Hello"); + /// + /// let suffix = " World"; + /// payload.with_mut(move |s| s.message.to_mut().push_str(suffix)); + /// + /// assert_eq!("Hello World", payload.get().message); + /// ``` + pub fn with_mut<'a, F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut <M::Yokeable as Yokeable<'a>>::Output), + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, + { + if let DataPayloadInner::StaticRef(r) = self.0 { + self.0 = DataPayloadInner::Yoke(Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r))); + } + match &mut self.0 { + DataPayloadInner::Yoke(yoke) => yoke.with_mut(f), + _ => unreachable!(), + } + } + + /// Borrows the underlying data. + /// + /// This function should be used like `Deref` would normally be used. For more information on + /// why DataPayload cannot implement `Deref`, see the `yoke` crate. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// let payload = DataPayload::<HelloWorldV1Marker>::from_static_str("Demo"); + /// + /// assert_eq!("Demo", payload.get().message); + /// ``` + #[inline] + #[allow(clippy::needless_lifetimes)] + pub fn get<'a>(&'a self) -> &'a <M::Yokeable as Yokeable<'a>>::Output { + match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.get(), + DataPayloadInner::StaticRef(r) => Yokeable::transform(*r), + } + } + + /// Maps `DataPayload<M>` to `DataPayload<M2>` by projecting it with [`Yoke::map_project`]. + /// + /// This is accomplished by a function that takes `M`'s data type and returns `M2`'s data + /// type. The function takes a second argument which should be ignored. For more details, + /// see [`Yoke::map_project()`]. + /// + /// The standard [`DataPayload::map_project()`] function moves `self` and cannot capture any + /// data from its context. Use one of the sister methods if you need these capabilities: + /// + /// - [`DataPayload::map_project_cloned()`] if you don't have ownership of `self` + /// - [`DataPayload::try_map_project()`] to bubble up an error + /// - [`DataPayload::try_map_project_cloned()`] to do both of the above + /// + /// # Examples + /// + /// Map from `HelloWorldV1` to a `Cow<str>` containing just the message: + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// // A custom marker type is required when using `map_project`. The Yokeable should be the + /// // target type, and the Cart should correspond to the type being transformed. + /// + /// struct HelloWorldV1MessageMarker; + /// impl DataMarker for HelloWorldV1MessageMarker { + /// type Yokeable = Cow<'static, str>; + /// } + /// + /// let p1: DataPayload<HelloWorldV1Marker> = DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Hello World"), + /// }); + /// + /// assert_eq!("Hello World", p1.get().message); + /// + /// let p2: DataPayload<HelloWorldV1MessageMarker> = p1.map_project(|obj, _| obj.message); + /// + /// // Note: at this point, p1 has been moved. + /// assert_eq!("Hello World", p2.get()); + /// ``` + #[allow(clippy::type_complexity)] + pub fn map_project<M2, F>(self, f: F) -> DataPayload<M2> + where + M2: DataMarker, + F: for<'a> FnOnce( + <M::Yokeable as Yokeable<'a>>::Output, + PhantomData<&'a ()>, + ) -> <M2::Yokeable as Yokeable<'a>>::Output, + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, + { + DataPayload(DataPayloadInner::Yoke( + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke, + DataPayloadInner::StaticRef(r) => Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r)), + } + .map_project(f), + )) + } + + /// Version of [`DataPayload::map_project()`] that borrows `self` instead of moving `self`. + /// + /// # Examples + /// + /// Same example as above, but this time, do not move out of `p1`: + /// + /// ``` + /// // Same imports and definitions as above + /// # use icu_provider::hello_world::*; + /// # use icu_provider::prelude::*; + /// # use std::borrow::Cow; + /// # struct HelloWorldV1MessageMarker; + /// # impl DataMarker for HelloWorldV1MessageMarker { + /// # type Yokeable = Cow<'static, str>; + /// # } + /// + /// let p1: DataPayload<HelloWorldV1Marker> = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Hello World"), + /// }); + /// + /// assert_eq!("Hello World", p1.get().message); + /// + /// let p2: DataPayload<HelloWorldV1MessageMarker> = + /// p1.map_project_cloned(|obj, _| obj.message.clone()); + /// + /// // Note: p1 is still valid. + /// assert_eq!(p1.get().message, *p2.get()); + /// ``` + #[allow(clippy::type_complexity)] + pub fn map_project_cloned<'this, M2, F>(&'this self, f: F) -> DataPayload<M2> + where + M2: DataMarker, + F: for<'a> FnOnce( + &'this <M::Yokeable as Yokeable<'a>>::Output, + PhantomData<&'a ()>, + ) -> <M2::Yokeable as Yokeable<'a>>::Output, + { + DataPayload(DataPayloadInner::Yoke(match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.map_project_cloned(f), + DataPayloadInner::StaticRef(r) => { + let output: <M2::Yokeable as Yokeable<'static>>::Output = + f(Yokeable::transform(*r), PhantomData); + // Safety: <M2::Yokeable as Yokeable<'static>>::Output is the same type as M2::Yokeable; + // we're going from 'static to 'static, however in a generic context it's not + // clear to the compiler that that is the case. We have to use the unsafe make API to do this. + let yokeable: M2::Yokeable = unsafe { M2::Yokeable::make(output) }; + Yoke::new_owned(yokeable) + } + })) + } + + /// Version of [`DataPayload::map_project()`] that bubbles up an error from `f`. + /// + /// # Examples + /// + /// Same example as above, but bubble up an error: + /// + /// ``` + /// // Same imports and definitions as above + /// # use icu_provider::hello_world::*; + /// # use icu_provider::prelude::*; + /// # use std::borrow::Cow; + /// # struct HelloWorldV1MessageMarker; + /// # impl DataMarker for HelloWorldV1MessageMarker { + /// # type Yokeable = Cow<'static, str>; + /// # } + /// + /// let p1: DataPayload<HelloWorldV1Marker> = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Hello World"), + /// }); + /// + /// assert_eq!("Hello World", p1.get().message); + /// + /// let string_to_append = "Extra"; + /// let p2: DataPayload<HelloWorldV1MessageMarker> = + /// p1.try_map_project(|mut obj, _| { + /// if obj.message.is_empty() { + /// return Err("Example error"); + /// } + /// obj.message.to_mut().push_str(string_to_append); + /// Ok(obj.message) + /// })?; + /// + /// assert_eq!("Hello WorldExtra", p2.get()); + /// # Ok::<(), &'static str>(()) + /// ``` + #[allow(clippy::type_complexity)] + pub fn try_map_project<M2, F, E>(self, f: F) -> Result<DataPayload<M2>, E> + where + M2: DataMarker, + F: for<'a> FnOnce( + <M::Yokeable as Yokeable<'a>>::Output, + PhantomData<&'a ()>, + ) -> Result<<M2::Yokeable as Yokeable<'a>>::Output, E>, + M::Yokeable: zerofrom::ZeroFrom<'static, M::Yokeable>, + { + Ok(DataPayload(DataPayloadInner::Yoke( + match self.0 { + DataPayloadInner::Yoke(yoke) => yoke, + DataPayloadInner::StaticRef(r) => Yoke::new_owned(zerofrom::ZeroFrom::zero_from(r)), + } + .try_map_project(f)?, + ))) + } + + /// Version of [`DataPayload::map_project_cloned()`] that bubbles up an error from `f`. + /// + /// # Examples + /// + /// Same example as above, but bubble up an error: + /// + /// ``` + /// // Same imports and definitions as above + /// # use icu_provider::hello_world::*; + /// # use icu_provider::prelude::*; + /// # use std::borrow::Cow; + /// # struct HelloWorldV1MessageMarker; + /// # impl DataMarker for HelloWorldV1MessageMarker { + /// # type Yokeable = Cow<'static, str>; + /// # } + /// + /// let p1: DataPayload<HelloWorldV1Marker> = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Hello World"), + /// }); + /// + /// assert_eq!("Hello World", p1.get().message); + /// + /// let string_to_append = "Extra"; + /// let p2: DataPayload<HelloWorldV1MessageMarker> = p1 + /// .try_map_project_cloned(|obj, _| { + /// if obj.message.is_empty() { + /// return Err("Example error"); + /// } + /// let mut message = obj.message.clone(); + /// message.to_mut().push_str(string_to_append); + /// Ok(message) + /// })?; + /// + /// // Note: p1 is still valid, but the values no longer equal. + /// assert_ne!(p1.get().message, *p2.get()); + /// assert_eq!("Hello WorldExtra", p2.get()); + /// # Ok::<(), &'static str>(()) + /// ``` + #[allow(clippy::type_complexity)] + pub fn try_map_project_cloned<'this, M2, F, E>(&'this self, f: F) -> Result<DataPayload<M2>, E> + where + M2: DataMarker, + F: for<'a> FnOnce( + &'this <M::Yokeable as Yokeable<'a>>::Output, + PhantomData<&'a ()>, + ) -> Result<<M2::Yokeable as Yokeable<'a>>::Output, E>, + { + Ok(DataPayload(DataPayloadInner::Yoke(match &self.0 { + DataPayloadInner::Yoke(yoke) => yoke.try_map_project_cloned(f)?, + DataPayloadInner::StaticRef(r) => { + let output: <M2::Yokeable as Yokeable<'static>>::Output = + f(Yokeable::transform(*r), PhantomData)?; + // Safety: <M2::Yokeable as Yokeable<'static>>::Output is the same type as M2::Yokeable + Yoke::new_owned(unsafe { M2::Yokeable::make(output) }) + } + }))) + } + + /// Convert between two [`DataMarker`] types that are compatible with each other + /// with compile-time type checking. + /// + /// This happens if they both have the same [`DataMarker::Yokeable`] type. + /// + /// Can be used to erase the key of a data payload in cases where multiple keys correspond + /// to the same data struct. + /// + /// For runtime dynamic casting, use [`DataPayload::dynamic_cast_mut()`]. + /// + /// # Examples + /// + /// ```no_run + /// use icu_locid::locale; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// + /// struct CustomHelloWorldV1Marker; + /// impl DataMarker for CustomHelloWorldV1Marker { + /// type Yokeable = HelloWorldV1<'static>; + /// } + /// + /// let hello_world: DataPayload<HelloWorldV1Marker> = todo!(); + /// let custom: DataPayload<CustomHelloWorldV1Marker> = hello_world.cast(); + /// ``` + #[inline] + pub fn cast<M2>(self) -> DataPayload<M2> + where + M2: DataMarker<Yokeable = M::Yokeable>, + { + DataPayload(match self.0 { + DataPayloadInner::Yoke(yoke) => DataPayloadInner::Yoke(yoke), + DataPayloadInner::StaticRef(r) => DataPayloadInner::StaticRef(r), + }) + } + + /// Convert a mutable reference of a [`DataPayload`] to another mutable reference + /// of the same type with runtime type checking. + /// + /// Primarily useful to convert from a generic to a concrete marker type. + /// + /// If the `M2` type argument does not match the true marker type, a `DataError` is returned. + /// + /// For compile-time static casting, use [`DataPayload::cast()`]. + /// + /// # Examples + /// + /// Change the results of a particular request based on key: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// + /// struct MyWrapper<P> { + /// inner: P, + /// } + /// + /// impl<M, P> DataProvider<M> for MyWrapper<P> + /// where + /// M: KeyedDataMarker, + /// P: DataProvider<M>, + /// { + /// #[inline] + /// fn load(&self, req: DataRequest) -> Result<DataResponse<M>, DataError> { + /// let mut res = self.inner.load(req)?; + /// if let Some(ref mut generic_payload) = res.payload { + /// let mut cast_result = + /// generic_payload.dynamic_cast_mut::<HelloWorldV1Marker>(); + /// if let Ok(ref mut concrete_payload) = cast_result { + /// // Add an emoji to the hello world message + /// concrete_payload.with_mut(|data| { + /// data.message.to_mut().insert_str(0, "✨ "); + /// }); + /// } + /// } + /// Ok(res) + /// } + /// } + /// + /// let provider = MyWrapper { + /// inner: HelloWorldProvider, + /// }; + /// let formatter = + /// HelloWorldFormatter::try_new_unstable(&provider, &locale!("de").into()) + /// .unwrap(); + /// + /// assert_eq!(formatter.format_to_string(), "✨ Hallo Welt"); + /// ``` + #[inline] + pub fn dynamic_cast_mut<M2>(&mut self) -> Result<&mut DataPayload<M2>, DataError> + where + M2: DataMarker, + { + let this: &mut dyn core::any::Any = self; + if let Some(this) = this.downcast_mut() { + Ok(this) + } else { + Err(DataError::for_type::<M2>().with_str_context(core::any::type_name::<M>())) + } + } +} + +impl DataPayload<BufferMarker> { + /// Converts an owned byte buffer into a `DataPayload<BufferMarker>`. + pub fn from_owned_buffer(buffer: Box<[u8]>) -> Self { + let yoke = Yoke::attach_to_cart(SelectedRc::new(buffer), |b| &**b); + // Safe because cart is wrapped + let yoke = unsafe { yoke.replace_cart(|b| Some(Cart(b))) }; + Self(DataPayloadInner::Yoke(yoke)) + } + + /// Converts a yoked byte buffer into a `DataPayload<BufferMarker>`. + pub fn from_yoked_buffer(yoke: Yoke<&'static [u8], Option<Cart>>) -> Self { + Self(DataPayloadInner::Yoke(yoke)) + } + + /// Converts a static byte buffer into a `DataPayload<BufferMarker>`. + pub fn from_static_buffer(buffer: &'static [u8]) -> Self { + Self(DataPayloadInner::Yoke(Yoke::new_owned(buffer))) + } +} + +impl<M> Default for DataPayload<M> +where + M: DataMarker, + M::Yokeable: Default, +{ + fn default() -> Self { + Self::from_owned(Default::default()) + } +} + +/// A response object containing an object as payload and metadata about it. +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataResponse<M> +where + M: DataMarker, +{ + /// Metadata about the returned object. + pub metadata: DataResponseMetadata, + + /// The object itself; `None` if it was not loaded. + pub payload: Option<DataPayload<M>>, +} + +impl<M> DataResponse<M> +where + M: DataMarker, +{ + /// Takes ownership of the underlying payload. Error if not present. + /// + /// To take the metadata, too, use [`Self::take_metadata_and_payload()`]. + #[inline] + pub fn take_payload(self) -> Result<DataPayload<M>, DataError> { + Ok(self.take_metadata_and_payload()?.1) + } + + /// Takes ownership of the underlying metadata and payload. Error if payload is not present. + #[inline] + pub fn take_metadata_and_payload( + self, + ) -> Result<(DataResponseMetadata, DataPayload<M>), DataError> { + Ok(( + self.metadata, + self.payload + .ok_or_else(|| DataErrorKind::MissingPayload.with_type_context::<M>())?, + )) + } +} + +impl<M> TryFrom<DataResponse<M>> for DataPayload<M> +where + M: DataMarker, +{ + type Error = DataError; + + fn try_from(response: DataResponse<M>) -> Result<Self, Self::Error> { + response.take_payload() + } +} + +impl<M> Debug for DataResponse<M> +where + M: DataMarker, + for<'a> &'a <M::Yokeable as Yokeable<'a>>::Output: Debug, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "DataResponse {{ metadata: {:?}, payload: {:?} }}", + self.metadata, self.payload + ) + } +} + +/// Cloning a DataResponse is generally a cheap operation. +/// See notes in the `Clone` impl for [`Yoke`]. +/// +/// # Examples +/// +/// ```no_run +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let resp1: DataResponse<HelloWorldV1Marker> = todo!(); +/// let resp2 = resp1.clone(); +/// ``` +impl<M> Clone for DataResponse<M> +where + M: DataMarker, + for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone, +{ + fn clone(&self) -> Self { + Self { + metadata: self.metadata.clone(), + payload: self.payload.clone(), + } + } +} + +#[test] +fn test_debug() { + use crate::hello_world::*; + use alloc::borrow::Cow; + let resp = DataResponse::<HelloWorldV1Marker> { + metadata: Default::default(), + payload: Some(DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed("foo"), + })), + }; + assert_eq!("DataResponse { metadata: DataResponseMetadata { locale: None, buffer_format: None }, payload: Some(HelloWorldV1 { message: \"foo\" }) }", format!("{resp:?}")); +} diff --git a/third_party/rust/icu_provider/src/serde/borrow_de_utils.rs b/third_party/rust/icu_provider/src/serde/borrow_de_utils.rs new file mode 100644 index 0000000000..d614bc9ecb --- /dev/null +++ b/third_party/rust/icu_provider/src/serde/borrow_de_utils.rs @@ -0,0 +1,82 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use alloc::borrow::Cow; +use serde::de::Deserializer; +use serde::Deserialize; + +#[derive(Deserialize)] +#[serde(transparent)] +// Cows fail to borrow in some situations (array, option), but structs of Cows don't. +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct CowWrap<'data>(#[serde(borrow)] pub Cow<'data, str>); + +#[derive(Deserialize)] +#[serde(transparent)] +// Cows fail to borrow in some situations (array, option), but structs of Cows don't. +#[allow(clippy::exhaustive_structs)] // newtype +#[derive(Debug)] +pub struct CowBytesWrap<'data>(#[serde(borrow)] pub Cow<'data, [u8]>); + +pub fn array_of_cow<'de, D, const N: usize>(deserializer: D) -> Result<[Cow<'de, str>; N], D::Error> +where + D: Deserializer<'de>, + [CowWrap<'de>; N]: Deserialize<'de>, +{ + <[CowWrap<'de>; N]>::deserialize(deserializer).map(|array| array.map(|wrap| wrap.0)) +} + +pub fn option_of_cow<'de, D>(deserializer: D) -> Result<Option<Cow<'de, str>>, D::Error> +where + D: Deserializer<'de>, +{ + <Option<CowWrap<'de>>>::deserialize(deserializer).map(|opt| opt.map(|wrap| wrap.0)) +} + +pub fn tuple_of_cow<'de, D>(deserializer: D) -> Result<(Cow<'de, str>, Cow<'de, str>), D::Error> +where + D: Deserializer<'de>, + (CowWrap<'de>, CowWrap<'de>): Deserialize<'de>, +{ + <(CowWrap<'de>, CowWrap<'de>)>::deserialize(deserializer).map(|x| (x.0 .0, x.1 .0)) +} + +#[test] +fn test_option() { + #[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)] + struct Demo<'s>(#[serde(borrow, deserialize_with = "option_of_cow")] Option<Cow<'s, str>>); + + let data_orig = Demo(Some("Hello world".into())); + let json = serde_json::to_string(&data_orig).expect("serialize"); + let data_new = serde_json::from_str::<Demo>(&json).expect("deserialize"); + assert_eq!(data_orig, data_new); + assert!(matches!(data_new.0, Some(Cow::Borrowed(_)))); +} + +#[test] +fn test_tuple() { + #[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)] + struct Demo<'s>( + #[serde(borrow, deserialize_with = "tuple_of_cow")] (Cow<'s, str>, Cow<'s, str>), + ); + + let data_orig = Demo(("Hello world".into(), "Hello earth".into())); + let json = serde_json::to_string(&data_orig).expect("serialize"); + let data_new = serde_json::from_str::<Demo>(&json).expect("deserialize"); + assert_eq!(data_orig, data_new); + assert!(matches!(data_new.0, (Cow::Borrowed(_), Cow::Borrowed(_)))); +} + +#[test] +fn test_array() { + #[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)] + struct Demo<'s>(#[serde(borrow, deserialize_with = "array_of_cow")] [Cow<'s, str>; 1]); + + let data_orig = Demo(["Hello world".into()]); + let json = serde_json::to_string(&data_orig).expect("serialize"); + let data_new = serde_json::from_str::<Demo>(&json).expect("deserialize"); + assert_eq!(data_orig, data_new); + assert!(matches!(data_new.0, [Cow::Borrowed(_)])); +} diff --git a/third_party/rust/icu_provider/src/serde/mod.rs b/third_party/rust/icu_provider/src/serde/mod.rs new file mode 100644 index 0000000000..edd827c312 --- /dev/null +++ b/third_party/rust/icu_provider/src/serde/mod.rs @@ -0,0 +1,224 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Provides the [`DeserializingBufferProvider`] wrapper, which deserializes data using Serde. +//! +//! Providers that produce opaque buffers that need to be deserialized into concrete data structs, +//! such as `FsDataProvider`, should implement [`BufferProvider`]. These can be converted into +//! [`DeserializingBufferProvider`] using the [`as_deserializing`](AsDeserializingBufferProvider::as_deserializing) +//! convenience method. +//! +//! [`BufferProvider`]: crate::buf::BufferProvider + +// Hidden for now, but could be made public-stable in the future. +#[doc(hidden)] +pub mod borrow_de_utils; + +use crate::buf::BufferFormat; +use crate::buf::BufferProvider; +use crate::prelude::*; +use serde::de::Deserialize; +use yoke::trait_hack::YokeTraitHack; +use yoke::Yokeable; + +/// A [`BufferProvider`] that deserializes its data using Serde. +#[derive(Debug)] +pub struct DeserializingBufferProvider<'a, P: ?Sized>(&'a P); + +/// Blanket-implemented trait adding the [`Self::as_deserializing()`] function. +pub trait AsDeserializingBufferProvider { + /// Wrap this [`BufferProvider`] in a [`DeserializingBufferProvider`]. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + fn as_deserializing(&self) -> DeserializingBufferProvider<Self>; +} + +impl<P> AsDeserializingBufferProvider for P +where + P: BufferProvider + ?Sized, +{ + /// Wrap this [`BufferProvider`] in a [`DeserializingBufferProvider`]. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + fn as_deserializing(&self) -> DeserializingBufferProvider<Self> { + DeserializingBufferProvider(self) + } +} + +fn deserialize_impl<'data, M>( + // Allow `bytes` to be unused in case all buffer formats are disabled + #[allow(unused_variables)] bytes: &'data [u8], + buffer_format: BufferFormat, +) -> Result<<M::Yokeable as Yokeable<'data>>::Output, DataError> +where + M: DataMarker, + // Actual bound: + // for<'de> <M::Yokeable as Yokeable<'de>>::Output: Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<<M::Yokeable as Yokeable<'de>>::Output>: Deserialize<'de>, +{ + match buffer_format { + #[cfg(feature = "deserialize_json")] + BufferFormat::Json => { + let mut d = serde_json::Deserializer::from_slice(bytes); + let data = YokeTraitHack::<<M::Yokeable as Yokeable>::Output>::deserialize(&mut d)?; + Ok(data.0) + } + + #[cfg(feature = "deserialize_bincode_1")] + BufferFormat::Bincode1 => { + use bincode::Options; + let options = bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(); + let mut d = bincode::de::Deserializer::from_slice(bytes, options); + let data = YokeTraitHack::<<M::Yokeable as Yokeable>::Output>::deserialize(&mut d)?; + Ok(data.0) + } + + #[cfg(feature = "deserialize_postcard_1")] + BufferFormat::Postcard1 => { + let mut d = postcard::Deserializer::from_bytes(bytes); + let data = YokeTraitHack::<<M::Yokeable as Yokeable>::Output>::deserialize(&mut d)?; + Ok(data.0) + } + + // Allowed for cases in which all features are enabled + #[allow(unreachable_patterns)] + _ => Err(DataErrorKind::UnavailableBufferFormat(buffer_format).into_error()), + } +} + +impl DataPayload<BufferMarker> { + /// Deserialize a [`DataPayload`]`<`[`BufferMarker`]`>` into a [`DataPayload`] of a + /// specific concrete type. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + /// + /// This function takes the buffer format as an argument. When a buffer payload is returned + /// from a data provider, the buffer format is stored in the [`DataResponseMetadata`]. + /// + /// # Examples + /// + /// Requires the `deserialize_json` Cargo feature: + /// + /// ``` + /// use icu_provider::buf::BufferFormat; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// + /// let buffer: &[u8] = br#"{"message":"Hallo Welt"}"#; + /// + /// let buffer_payload = DataPayload::from_owned(buffer); + /// let payload: DataPayload<HelloWorldV1Marker> = buffer_payload + /// .into_deserialized(BufferFormat::Json) + /// .expect("Deserialization successful"); + /// + /// assert_eq!(payload.get().message, "Hallo Welt"); + /// ``` + pub fn into_deserialized<M>( + self, + buffer_format: BufferFormat, + ) -> Result<DataPayload<M>, DataError> + where + M: DataMarker, + // Actual bound: + // for<'de> <M::Yokeable as Yokeable<'de>>::Output: Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<<M::Yokeable as Yokeable<'de>>::Output>: Deserialize<'de>, + { + self.try_map_project(|bytes, _| deserialize_impl::<M>(bytes, buffer_format)) + } +} + +impl<P, M> DynamicDataProvider<M> for DeserializingBufferProvider<'_, P> +where + M: DataMarker, + P: BufferProvider + ?Sized, + // Actual bound: + // for<'de> <M::Yokeable as Yokeable<'de>>::Output: serde::de::Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<<M::Yokeable as Yokeable<'de>>::Output>: Deserialize<'de>, +{ + /// Converts a buffer into a concrete type by deserializing from a supported buffer format. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + fn load_data(&self, key: DataKey, req: DataRequest) -> Result<DataResponse<M>, DataError> { + let buffer_response = BufferProvider::load_buffer(self.0, key, req)?; + let buffer_format = buffer_response.metadata.buffer_format.ok_or_else(|| { + DataError::custom("BufferProvider didn't set BufferFormat").with_req(key, req) + })?; + Ok(DataResponse { + metadata: buffer_response.metadata, + payload: buffer_response + .payload + .map(|p| p.into_deserialized(buffer_format)) + .transpose() + .map_err(|e| e.with_req(key, req))?, + }) + } +} + +impl<P, M> DataProvider<M> for DeserializingBufferProvider<'_, P> +where + M: KeyedDataMarker, + P: BufferProvider + ?Sized, + // Actual bound: + // for<'de> <M::Yokeable as Yokeable<'de>>::Output: Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<<M::Yokeable as Yokeable<'de>>::Output>: Deserialize<'de>, +{ + /// Converts a buffer into a concrete type by deserializing from a supported buffer format. + /// + /// This requires enabling the deserialization Cargo feature + /// for the expected format(s): + /// + /// - `deserialize_json` + /// - `deserialize_postcard_1` + /// - `deserialize_bincode_1` + fn load(&self, req: DataRequest) -> Result<DataResponse<M>, DataError> { + self.load_data(M::KEY, req) + } +} + +#[cfg(feature = "deserialize_json")] +impl From<serde_json::error::Error> for crate::DataError { + fn from(e: serde_json::error::Error) -> Self { + crate::DataError::custom("JSON deserialize").with_display_context(&e) + } +} + +#[cfg(feature = "deserialize_bincode_1")] +impl From<bincode::Error> for crate::DataError { + fn from(e: bincode::Error) -> Self { + crate::DataError::custom("Bincode deserialize").with_display_context(&e) + } +} + +#[cfg(feature = "deserialize_postcard_1")] +impl From<postcard::Error> for crate::DataError { + fn from(e: postcard::Error) -> Self { + crate::DataError::custom("Postcard deserialize").with_display_context(&e) + } +} |