From 4547b622d8d29df964fa2914213088b148c498fc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:32 +0200 Subject: Merging upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_provider/src/any.rs | 416 +++++++++++++ vendor/icu_provider/src/buf.rs | 106 ++++ vendor/icu_provider/src/constructors.rs | 101 ++++ vendor/icu_provider/src/data_provider.rs | 333 +++++++++++ vendor/icu_provider/src/datagen/data_conversion.rs | 48 ++ vendor/icu_provider/src/datagen/heap_measure.rs | 59 ++ vendor/icu_provider/src/datagen/iter.rs | 35 ++ vendor/icu_provider/src/datagen/mod.rs | 120 ++++ vendor/icu_provider/src/datagen/payload.rs | 137 +++++ vendor/icu_provider/src/dynutil.rs | 263 ++++++++ vendor/icu_provider/src/error.rs | 267 +++++++++ vendor/icu_provider/src/hello_world.rs | 307 ++++++++++ vendor/icu_provider/src/helpers.rs | 369 ++++++++++++ vendor/icu_provider/src/key.rs | 660 +++++++++++++++++++++ vendor/icu_provider/src/lib.rs | 214 +++++++ vendor/icu_provider/src/marker.rs | 80 +++ vendor/icu_provider/src/request.rs | 513 ++++++++++++++++ vendor/icu_provider/src/response.rs | 635 ++++++++++++++++++++ vendor/icu_provider/src/serde/borrow_de_utils.rs | 80 +++ vendor/icu_provider/src/serde/mod.rs | 187 ++++++ 20 files changed, 4930 insertions(+) create mode 100644 vendor/icu_provider/src/any.rs create mode 100644 vendor/icu_provider/src/buf.rs create mode 100644 vendor/icu_provider/src/constructors.rs create mode 100644 vendor/icu_provider/src/data_provider.rs create mode 100644 vendor/icu_provider/src/datagen/data_conversion.rs create mode 100644 vendor/icu_provider/src/datagen/heap_measure.rs create mode 100644 vendor/icu_provider/src/datagen/iter.rs create mode 100644 vendor/icu_provider/src/datagen/mod.rs create mode 100644 vendor/icu_provider/src/datagen/payload.rs create mode 100644 vendor/icu_provider/src/dynutil.rs create mode 100644 vendor/icu_provider/src/error.rs create mode 100644 vendor/icu_provider/src/hello_world.rs create mode 100644 vendor/icu_provider/src/helpers.rs create mode 100644 vendor/icu_provider/src/key.rs create mode 100644 vendor/icu_provider/src/lib.rs create mode 100644 vendor/icu_provider/src/marker.rs create mode 100644 vendor/icu_provider/src/request.rs create mode 100644 vendor/icu_provider/src/response.rs create mode 100644 vendor/icu_provider/src/serde/borrow_de_utils.rs create mode 100644 vendor/icu_provider/src/serde/mod.rs (limited to 'vendor/icu_provider/src') diff --git a/vendor/icu_provider/src/any.rs b/vendor/icu_provider/src/any.rs new file mode 100644 index 000000000..1c7a60435 --- /dev/null +++ b/vendor/icu_provider/src/any.rs @@ -0,0 +1,416 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce `Any` objects. + +use crate::prelude::*; +use core::any::Any; +use core::convert::TryFrom; +use core::convert::TryInto; +use yoke::trait_hack::YokeTraitHack; +use yoke::Yokeable; +use zerofrom::ZeroFrom; + +#[cfg(not(feature = "sync"))] +use alloc::rc::Rc as SelectedRc; +#[cfg(feature = "sync")] +use alloc::sync::Arc as SelectedRc; + +/// A trait that allows to specify `Send + Sync` bounds that are only required when +/// the `sync` feature is enabled. Without the feature, this is an empty bound. +#[cfg(feature = "sync")] +pub trait MaybeSendSync: Send + Sync {} +#[cfg(feature = "sync")] +impl MaybeSendSync for T {} + +#[allow(missing_docs)] // docs generated with all features +#[cfg(not(feature = "sync"))] +pub trait MaybeSendSync {} +#[cfg(not(feature = "sync"))] +impl MaybeSendSync for T {} + +/// Representations of the `Any` trait object. +/// +/// **Important Note:** The types enclosed by `StructRef` and `PayloadRc` are NOT the same! +/// The first refers to the struct itself, whereas the second refers to a `DataPayload`. +#[derive(Debug, Clone)] +enum AnyPayloadInner { + /// A reference to `M::Yokeable` + StructRef(&'static dyn Any), + /// A boxed `DataPayload`. + /// + /// Note: This needs to be reference counted, not a `Box`, so that `AnyPayload` is cloneable. + /// If an `AnyPayload` is cloned, the actual cloning of the data is delayed until + /// `downcast()` is invoked (at which point we have the concrete type). + + #[cfg(not(feature = "sync"))] + PayloadRc(SelectedRc), + + #[cfg(feature = "sync")] + PayloadRc(SelectedRc), +} + +/// A type-erased data payload. +/// +/// The only useful method on this type is [`AnyPayload::downcast()`], which transforms this into +/// a normal `DataPayload` which you can subsequently access or mutate. +/// +/// As with `DataPayload`, cloning is designed to be cheap. +#[derive(Debug, Clone, Yokeable)] +pub struct AnyPayload { + inner: AnyPayloadInner, + type_name: &'static str, +} + +/// The [`DataMarker`] marker type for [`AnyPayload`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct AnyMarker; + +impl DataMarker for AnyMarker { + type Yokeable = AnyPayload; +} + +impl crate::dynutil::UpcastDataPayload for AnyMarker +where + M: DataMarker + 'static, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn upcast(other: DataPayload) -> DataPayload { + DataPayload::from_owned(other.wrap_into_any_payload()) + } +} + +impl AnyPayload { + /// Transforms a type-erased `AnyPayload` into a concrete `DataPayload`. + /// + /// Because it is expected that the call site knows the identity of the AnyPayload (e.g., from + /// the data request), this function returns a `DataError` if the generic type does not match + /// the type stored in the `AnyPayload`. + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker + 'static, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + use AnyPayloadInner::*; + let type_name = self.type_name; + match self.inner { + StructRef(any_ref) => { + let down_ref: &'static M::Yokeable = any_ref + .downcast_ref() + .ok_or_else(|| DataError::for_type::().with_str_context(type_name))?; + Ok(DataPayload::from_owned(M::Yokeable::zero_from(down_ref))) + } + PayloadRc(any_rc) => { + let down_rc = any_rc + .downcast::>() + .map_err(|_| DataError::for_type::().with_str_context(type_name))?; + Ok(SelectedRc::try_unwrap(down_rc).unwrap_or_else(|down_rc| (*down_rc).clone())) + } + } + } + + /// Creates an `AnyPayload` from a static reference to a data struct. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// const HELLO_DATA: HelloWorldV1<'static> = HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }; + /// + /// let any_payload = AnyPayload::from_static_ref(&HELLO_DATA); + /// + /// let payload: DataPayload = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn from_static_ref(static_ref: &'static Y) -> Self + where + Y: for<'a> Yokeable<'a>, + { + AnyPayload { + inner: AnyPayloadInner::StructRef(static_ref), + // Note: This records the Yokeable type rather than the DataMarker type, + // but that is okay since this is only for debugging + type_name: core::any::type_name::(), + } + } +} + +impl DataPayload +where + M: DataMarker + 'static, + M::Yokeable: MaybeSendSync, +{ + /// Moves this DataPayload to the heap (requiring an allocation) and returns it as an + /// erased `AnyPayload`. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// use std::rc::Rc; + /// + /// let payload: DataPayload = + /// DataPayload::from_owned(HelloWorldV1 { + /// message: Cow::Borrowed("Custom Hello World"), + /// }); + /// + /// let any_payload = payload.wrap_into_any_payload(); + /// + /// let payload: DataPayload = + /// any_payload.downcast().expect("TypeId matches"); + /// assert_eq!("Custom Hello World", payload.get().message); + /// ``` + pub fn wrap_into_any_payload(self) -> AnyPayload { + AnyPayload { + inner: AnyPayloadInner::PayloadRc(SelectedRc::from(self)), + type_name: core::any::type_name::(), + } + } +} + +impl DataPayload { + /// Transforms a type-erased `DataPayload` into a concrete `DataPayload`. + #[inline] + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + self.try_unwrap_owned()?.downcast() + } +} + +/// A [`DataResponse`] for type-erased values. +/// +/// Convertible to and from `DataResponse`. +#[allow(clippy::exhaustive_structs)] // this type is stable (the metadata is allowed to grow) +pub struct AnyResponse { + /// Metadata about the returned object. + pub metadata: DataResponseMetadata, + + /// The object itself; None if it was not loaded. + pub payload: Option, +} + +impl TryFrom> for AnyResponse { + type Error = DataError; + #[inline] + fn try_from(other: DataResponse) -> Result { + Ok(Self { + metadata: other.metadata, + payload: other.payload.map(|p| p.try_unwrap_owned()).transpose()?, + }) + } +} + +impl From for DataResponse { + #[inline] + fn from(other: AnyResponse) -> Self { + Self { + metadata: other.metadata, + payload: other.payload.map(DataPayload::from_owned), + } + } +} + +impl AnyResponse { + /// Transforms a type-erased `DataResponse` into a concrete `DataResponse`. + #[inline] + pub fn downcast(self) -> Result, DataError> + where + M: DataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + { + Ok(DataResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.downcast()).transpose()?, + }) + } +} + +/// An object-safe data provider that returns data structs cast to `dyn Any` trait objects. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// let any_response = HelloWorldProvider +/// .as_any_provider() +/// .load_any( +/// HelloWorldV1Marker::KEY, +/// DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }, +/// ) +/// .expect("Load should succeed"); +/// +/// // Downcast to something useful +/// let response: DataResponse = +/// any_response.downcast().expect("Types match"); +/// +/// let payload = response.take_payload().expect("Data should be present"); +/// +/// assert_eq!(payload.get().message, "Hallo Welt"); +/// ``` +pub trait AnyProvider { + /// Loads an [`AnyPayload`] according to the key and request. + fn load_any(&self, key: DataKey, req: DataRequest) -> Result; +} + +impl AnyProvider for alloc::boxed::Box { + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + (**self).load_any(key, req) + } +} + +/// A wrapper over `DynamicDataProvider` that implements `AnyProvider` +#[allow(clippy::exhaustive_structs)] // newtype +pub struct DynamicDataProviderAnyMarkerWrap<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_any_provider()`] function. +pub trait AsDynamicDataProviderAnyMarkerWrap { + /// Returns an object implementing `AnyProvider` when called on `DynamicDataProvider` + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap; +} + +impl

AsDynamicDataProviderAnyMarkerWrap for P +where + P: DynamicDataProvider, +{ + #[inline] + fn as_any_provider(&self) -> DynamicDataProviderAnyMarkerWrap

{ + DynamicDataProviderAnyMarkerWrap(self) + } +} + +impl

AnyProvider for DynamicDataProviderAnyMarkerWrap<'_, P> +where + P: DynamicDataProvider + ?Sized, +{ + #[inline] + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + self.0.load_data(key, req)?.try_into() + } +} + +/// A wrapper over `AnyProvider` that implements `DynamicDataProvider` via downcasting +#[allow(clippy::exhaustive_structs)] // newtype +pub struct DowncastingAnyProvider<'a, P: ?Sized>(pub &'a P); + +/// Blanket-implemented trait adding the [`Self::as_downcasting()`] function. +pub trait AsDowncastingAnyProvider { + /// Returns an object implementing `DynamicDataProvider` when called on `AnyProvider` + fn as_downcasting(&self) -> DowncastingAnyProvider; +} + +impl

AsDowncastingAnyProvider for P +where + P: AnyProvider + ?Sized, +{ + #[inline] + fn as_downcasting(&self) -> DowncastingAnyProvider

{ + DowncastingAnyProvider(self) + } +} + +impl DataProvider for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: KeyedDataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load(&self, req: DataRequest) -> Result, DataError> { + self.0.load_any(M::KEY, req)?.downcast() + } +} + +impl DynamicDataProvider for DowncastingAnyProvider<'_, P> +where + P: AnyProvider + ?Sized, + M: DataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, +{ + #[inline] + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { + self.0.load_any(key, req)?.downcast() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::hello_world::*; + use alloc::borrow::Cow; + + const CONST_DATA: HelloWorldV1<'static> = HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }; + + #[test] + fn test_debug() { + let payload: DataPayload = DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed("Custom Hello World"), + }); + + let any_payload = payload.wrap_into_any_payload(); + assert_eq!( + "AnyPayload { inner: PayloadRc(Any { .. }), type_name: \"icu_provider::hello_world::HelloWorldV1Marker\" }", + format!("{:?}", any_payload) + ); + + struct WrongMarker; + + impl DataMarker for WrongMarker { + type Yokeable = u8; + } + + let err = any_payload.downcast::().unwrap_err(); + assert_eq!( + "ICU4X data error: Mismatched types: tried to downcast with icu_provider::any::test::test_debug::WrongMarker, but actual type is different: icu_provider::hello_world::HelloWorldV1Marker", + format!("{}", err) + ); + } + + #[test] + fn test_non_owned_any_marker() { + // This test demonstrates a code path that can trigger the InvalidState error kind. + let payload_result: DataPayload = + DataPayload::from_owned_buffer(Box::new(*b"pretend we're borrowing from here")) + .map_project(|_, _| AnyPayload::from_static_ref(&CONST_DATA)); + let err = payload_result.downcast::().unwrap_err(); + assert!(matches!( + err, + DataError { + kind: DataErrorKind::InvalidState, + .. + } + )); + } +} diff --git a/vendor/icu_provider/src/buf.rs b/vendor/icu_provider/src/buf.rs new file mode 100644 index 000000000..73bc0f165 --- /dev/null +++ b/vendor/icu_provider/src/buf.rs @@ -0,0 +1,106 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Traits for data providers that produce opaque buffers. + +use crate::prelude::*; + +/// [`DataMarker`] for raw buffers. Returned by [`BufferProvider`]. +/// +/// The data is expected to be deserialized before it can be used; see +/// [`DataPayload::into_deserialized`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct BufferMarker; + +impl DataMarker for BufferMarker { + type Yokeable = &'static [u8]; +} + +/// A data provider that returns opaque bytes. +/// +/// Generally, these bytes are expected to be deserializable with Serde. To get an object +/// implementing [`DataProvider`] via Serde, use [`as_deserializing()`], which requires +/// enabling at least one of the Serde features. +/// +/// Along with [`DataProvider`], this is one of the two foundational traits in this crate. +/// +/// [`BufferProvider`] can be made into a trait object. It is used over FFI. +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "deserialize_json")] { +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let buffer_provider = HelloWorldProvider.into_json_provider(); +/// +/// let data_provider = buffer_provider.as_deserializing(); +/// +/// let german_hello_world: DataPayload = data_provider +/// .load(DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Hallo Welt", german_hello_world.get().message); +/// # } +/// ``` +/// +/// [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +pub trait BufferProvider { + /// Loads a [`DataPayload`]`<`[`BufferMarker`]`>` according to the key and request. + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError>; +} + +impl BufferProvider for alloc::boxed::Box { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + (**self).load_buffer(key, req) + } +} + +/// An enum expressing all Serde formats known to ICU4X. +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[non_exhaustive] +pub enum BufferFormat { + /// Serialize using JavaScript Object Notation (JSON). + Json, + /// Serialize using Bincode version 1. + Bincode1, + /// Serialize using Postcard version 0.7. + Postcard1, +} + +impl BufferFormat { + /// Returns an error if the buffer format is not enabled. + pub fn check_available(&self) -> Result<(), DataError> { + match self { + #[cfg(feature = "deserialize_json")] + BufferFormat::Json => Ok(()), + + #[cfg(feature = "deserialize_bincode_1")] + BufferFormat::Bincode1 => Ok(()), + + #[cfg(feature = "deserialize_postcard_1")] + BufferFormat::Postcard1 => Ok(()), + + // Allowed for cases in which all features are enabled + #[allow(unreachable_patterns)] + _ => Err(DataErrorKind::UnavailableBufferFormat(*self).into_error()), + } + } +} diff --git a/vendor/icu_provider/src/constructors.rs b/vendor/icu_provider/src/constructors.rs new file mode 100644 index 000000000..a9330c3f2 --- /dev/null +++ b/vendor/icu_provider/src/constructors.rs @@ -0,0 +1,101 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! 📚 *This module documents ICU4X constructor signatures.* +//! +//! One of the key differences between ICU4X and its parent projects, ICU4C and ICU4J, is in how +//! it deals with locale data. +//! +//! In ICU4X, the data provider is an *explicit argument* whenever it is required by the library. +//! This enables ICU4X to achieve the following value propositions: +//! +//! 1. Configurable data sources (machine-readable data file, baked into code, JSON, etc). +//! 2. Dynamic data loading at runtime (load data on demand). +//! 3. Reduced overhead and code size (data is resolved locally at each call site). +//! 4. Explicit support for multiple ICU4X instances sharing data. +//! +//! In order to achieve these goals, there are 3 versions of all Rust ICU4X functions that +//! take a data provider argument: +//! +//! 1. `*_unstable` +//! 2. `*_with_any_provider` +//! 3. `*_with_buffer_provider` +//! +//! # Which constructor should I use? +//! +//! ## When to use `*_unstable` +//! +//! Use this constructor if your data provider implements the [`DataProvider`] trait for all +//! data structs in *current and future* ICU4X versions. Examples: +//! +//! 1. `BakedDataProvider` auto-regenerated on new ICU4X versions +//! 2. Anything with a _blanket_ [`DataProvider`] impl +//! +//! Since the exact set of bounds may change at any time, including in minor SemVer releases, +//! it is the client's responsibility to guarantee that the requirement is upheld. +//! +//! ## When to use `*_with_any_provider` +//! +//! Use this constructor if you need to use a provider that implements [`AnyProvider`] but not +//! [`DataProvider`]. Examples: +//! +//! 1. [`AnyPayloadProvider`] +//! 2. [`ForkByKeyProvider`] between two providers implementing [`AnyProvider`] +//! 3. Providers that cache or override certain keys but not others and therefore +//! can't implement [`DataProvider`] +//! +//! ## When to use `*_with_buffer_provider` +//! +//! Use this constructor if your data originates as byte buffers that need to be deserialized. +//! All such providers should implement [`BufferProvider`]. Examples: +//! +//! 1. [`BlobDataProvider`] +//! 2. [`FsDataProvider`] +//! 3. [`ForkByKeyProvider`] between any of the above +//! +//! Please note that you must enable the `"serde"` feature on each crate in which you use the +//! `*_with_buffer_provider` constructor. +//! +//! # Data Versioning Policy +//! +//! The `*_with_any_provider` and `*_with_buffer_provider` functions will succeed to compile and +//! run if given a data provider supporting all of the keys required for the object being +//! constructed, either the current or any previous version within the same SemVer major release. +//! For example, if a data file is built to support FooFormatter version 1.1, then FooFormatter +//! version 1.2 will be able to read the same data file. Likewise, backwards-compatible keys can +//! always be included by `icu_datagen` to support older library versions. +//! +//! The `*_unstable` functions are only guaranteed to work on data built for the exact same version +//! of ICU4X. The advantage of the `*_unstable` functions is that they result in the smallest code +//! size and allow for automatic data slicing when `BakedDataProvider` is used. However, the type +//! bounds of this function may change over time, breaking SemVer guarantees. These functions +//! should therefore only be used when you have full control over your data lifecycle at compile +//! time. +//! +//! # Data Providers Over FFI +//! +//! Over FFI, there is only one data provider type: [`ICU4XDataProvider`]. Internally, it is an +//! `enum` between `dyn `[`AnyProvider`] and `dyn `[`BufferProvider`]. +//! +//! To control for code size, there are two features, `any_provider` and `buffer_provider`, that +//! enable the corresponding items in the enum. +//! +//! In Rust ICU4X, a similar buffer/any enum approach was not taken because: +//! +//! 1. Feature-gating the enum branches gets complex across crates. +//! 2. Without feature gating, users need to carry Serde code even if they're not using it, +//! violating one of the core value propositions of ICU4X. +//! 3. We could reduce the number of constructors from 3 to 2 but not to 1, so the educational +//! benefit is limited. +//! +//! +//! [`DataProvider`]: crate::DataProvider +//! [`BufferProvider`]: crate::BufferProvider +//! [`AnyProvider`]: crate::AnyProvider +//! [`AnyPayloadProvider`]: ../../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`ForkByKeyProvider`]: ../../icu_provider_adapters/fork/struct.ForkByKeyProvider.html +//! [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +//! [`StaticDataProvider`]: ../../icu_provider_blob/struct.StaticDataProvider.html +//! [`FsDataProvider`]: ../../icu_provider_blob/struct.FsDataProvider.html +//! [`ICU4XDataProvider`]: ../../icu_capi/provider/ffi/struct.ICU4XDataProvider.html diff --git a/vendor/icu_provider/src/data_provider.rs b/vendor/icu_provider/src/data_provider.rs new file mode 100644 index 000000000..af98a252a --- /dev/null +++ b/vendor/icu_provider/src/data_provider.rs @@ -0,0 +1,333 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::DataError; +use crate::key::DataKey; +use crate::marker::{DataMarker, KeyedDataMarker}; +use crate::request::DataRequest; +use crate::response::DataResponse; + +/// A data provider that loads data for a specific data type. +/// +/// Unlike [`DataProvider`], there may be multiple keys corresponding to the same data type. +/// This is often the case when returning `dyn` trait objects such as [`AnyMarker`]. +/// +/// [`AnyMarker`]: crate::any::AnyMarker +pub trait DynamicDataProvider +where + M: DataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError>; +} + +/// A data provider that loads data for a specific [`DataKey`]. +pub trait DataProvider +where + M: KeyedDataMarker, +{ + /// Query the provider for data, returning the result. + /// + /// Returns [`Ok`] if the request successfully loaded data. If data failed to load, returns an + /// Error with more information. + fn load(&self, req: DataRequest) -> Result, DataError>; +} + +impl DynamicDataProvider for alloc::boxed::Box

+where + M: DataMarker, + P: DynamicDataProvider + ?Sized, +{ + fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { + (**self).load_data(key, req) + } +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::hello_world::*; + use crate::prelude::*; + use crate::yoke::Yokeable; + use crate::zerofrom; + use alloc::borrow::Cow; + use alloc::string::String; + use core::fmt::Debug; + use serde::{Deserialize, Serialize}; + + // This tests DataProvider borrow semantics with a dummy data provider based on a + // JSON string. It also exercises most of the data provider code paths. + + /// Key for HelloAlt, used for testing mismatched types + const HELLO_ALT_KEY: DataKey = crate::data_key!("core/helloalt@1"); + + /// A data struct serialization-compatible with HelloWorldV1 used for testing mismatched types + #[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Yokeable, zerofrom::ZeroFrom, + )] + struct HelloAlt { + #[zerofrom(clone)] + message: String, + } + + /// Marker type for [`HelloAlt`]. + struct HelloAltMarker {} + + impl DataMarker for HelloAltMarker { + type Yokeable = HelloAlt; + } + + impl KeyedDataMarker for HelloAltMarker { + const KEY: DataKey = HELLO_ALT_KEY; + } + + #[derive(Deserialize, Debug, Clone, Default, PartialEq)] + struct HelloCombined<'data> { + #[serde(borrow)] + pub hello_v1: HelloWorldV1<'data>, + pub hello_alt: HelloAlt, + } + + /// A DataProvider that owns its data, returning an Rc-variant DataPayload. + /// Supports only key::HELLO_WORLD_V1. Uses `impl_dynamic_data_provider!()`. + #[derive(Debug)] + struct DataWarehouse { + hello_v1: HelloWorldV1<'static>, + hello_alt: HelloAlt, + } + + impl DataProvider for DataWarehouse { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.hello_v1.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!(DataWarehouse, [HelloWorldV1Marker,], AnyMarker); + + /// A DataProvider that supports both key::HELLO_WORLD_V1 and HELLO_ALT. + #[derive(Debug)] + struct DataProvider2 { + data: DataWarehouse, + } + + impl From for DataProvider2 { + fn from(warehouse: DataWarehouse) -> Self { + DataProvider2 { data: warehouse } + } + } + + impl DataProvider for DataProvider2 { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_v1.clone())), + }) + } + } + + impl DataProvider for DataProvider2 { + fn load(&self, _: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: DataResponseMetadata::default(), + payload: Some(DataPayload::from_owned(self.data.hello_alt.clone())), + }) + } + } + + crate::impl_dynamic_data_provider!( + DataProvider2, + [HelloWorldV1Marker, HelloAltMarker,], + AnyMarker + ); + + const DATA: &str = r#"{ + "hello_v1": { + "message": "Hello V1" + }, + "hello_alt": { + "message": "Hello Alt" + } + }"#; + + fn get_warehouse(data: &'static str) -> DataWarehouse { + let data: HelloCombined = serde_json::from_str(data).expect("Well-formed data"); + DataWarehouse { + hello_v1: data.hello_v1, + hello_alt: data.hello_alt, + } + } + + fn get_payload_v1 + ?Sized>( + provider: &P, + ) -> Result, DataError> { + provider.load(Default::default())?.take_payload() + } + + fn get_payload_alt + ?Sized>( + provider: &P, + ) -> Result, DataError> { + provider.load(Default::default())?.take_payload() + } + + #[test] + fn test_warehouse_owned() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased() { + let warehouse = get_warehouse(DATA); + let hello_data = get_payload_v1(&warehouse.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_generic() { + let warehouse = get_warehouse(DATA); + let hello_data = + get_payload_v1(&warehouse as &dyn DataProvider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_warehouse_owned_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let response = get_payload_alt(&warehouse.as_any_provider().as_downcasting()); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MissingDataKey, + .. + }) + )); + } + + #[test] + fn test_provider2() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_v1(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_erased_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider.as_any_provider().as_downcasting()).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_provider2_dyn_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = + get_payload_v1(&provider as &dyn DataProvider).unwrap(); + assert!(matches!( + hello_data.get(), + HelloWorldV1 { + message: Cow::Borrowed(_), + } + )); + } + + #[test] + fn test_provider2_dyn_generic_alt() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + let hello_data = get_payload_alt(&provider as &dyn DataProvider).unwrap(); + assert!(matches!(hello_data.get(), HelloAlt { .. })); + } + + #[test] + fn test_mismatched_types() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + // Request is for v2, but type argument is for v1 + let response: Result, DataError> = AnyProvider::load_any( + &provider.as_any_provider(), + HELLO_ALT_KEY, + Default::default(), + ) + .unwrap() + .downcast(); + assert!(matches!( + response, + Err(DataError { + kind: DataErrorKind::MismatchedType(_), + .. + }) + )); + } + + fn check_v1_v2

(d: &P) + where + P: DataProvider + DataProvider + ?Sized, + { + let v1: DataPayload = + d.load(Default::default()).unwrap().take_payload().unwrap(); + let v2: DataPayload = + d.load(Default::default()).unwrap().take_payload().unwrap(); + if v1.get().message == v2.get().message { + panic!() + } + } + + #[test] + fn test_v1_v2_generic() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider); + } + + #[test] + fn test_v1_v2_dyn_erased() { + let warehouse = get_warehouse(DATA); + let provider = DataProvider2::from(warehouse); + check_v1_v2(&provider.as_any_provider().as_downcasting()); + } +} diff --git a/vendor/icu_provider/src/datagen/data_conversion.rs b/vendor/icu_provider/src/datagen/data_conversion.rs new file mode 100644 index 000000000..59146352a --- /dev/null +++ b/vendor/icu_provider/src/datagen/data_conversion.rs @@ -0,0 +1,48 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::prelude::*; +use crate::DataKey; +use alloc::boxed::Box; + +/// A trait that allows for converting between data payloads of different types. +/// +/// These payloads will typically be some kind of erased payload, either with +/// AnyMarker, BufferMarker, or SerializeMarker, where converting requires reifying the type. +/// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to +/// concrete marker types M, and reifying the input to a `DataPayload`, performing some conversion +/// or computation, and erasing the result to `DataPayload`. +/// +/// It will typically be implemented on data providers used in datagen. +/// +/// The [`make_exportable_provider!`] macro is able to automatically implement this trait. +/// +/// [`make_exportable_provider!`]: crate::make_exportable_provider +pub trait DataConverter { + /// Attempt to convert a payload corresponding to the given data key + /// from one marker type to another marker type. + /// + /// If this is not possible (for example, if the provider does not know about the key), + /// the original payload is returned back to the caller. + fn convert( + &self, + key: DataKey, + from: DataPayload, + ) -> Result, (DataPayload, DataError)>; +} + +impl DataConverter for Box

+where + MFrom: DataMarker, + MTo: DataMarker, + P: DataConverter + ?Sized, +{ + fn convert( + &self, + key: DataKey, + from: DataPayload, + ) -> Result, (DataPayload, DataError)> { + (**self).convert(key, from) + } +} diff --git a/vendor/icu_provider/src/datagen/heap_measure.rs b/vendor/icu_provider/src/datagen/heap_measure.rs new file mode 100644 index 000000000..d451f3ebe --- /dev/null +++ b/vendor/icu_provider/src/datagen/heap_measure.rs @@ -0,0 +1,59 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::{BufferFormat, BufferMarker}; +use crate::prelude::*; +use yoke::trait_hack::YokeTraitHack; + +/// Stats on the heap size needed when attempting to zero-copy-deserialize +/// a postcard-formatted data struct. +#[derive(Debug, Copy, Clone, yoke::Yokeable, Default)] +#[non_exhaustive] +pub struct HeapStats { + /// Total bytes allocated during deserialization + pub total_bytes_allocated: u64, + /// Total bytes allocated during deserialization that have not yet been freed + pub net_bytes_allocated: usize, +} + +/// The [`DataMarker`] marker type for [`HeapStats`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct HeapStatsMarker; + +impl DataMarker for HeapStatsMarker { + type Yokeable = HeapStats; +} + +impl DataPayload { + /// Given a buffer known to be in postcard-0.7 format, attempt to zero-copy + /// deserialize it and record the amount of heap allocations that occurred. + /// + /// Ideally, this number should be zero. + /// + /// [`dhat`]'s profiler must be initialized before using this. + /// + /// # Panics + /// + /// Panics if the buffer is not in postcard-0.7 format. + #[allow(clippy::expect_used)] // The function documents when panics may occur. + pub fn attempt_zero_copy_heap_size(self) -> HeapStats + where + M: DataMarker, + for<'a> &'a >::Output: serde::Serialize, + for<'de> YokeTraitHack<>::Output>: + serde::Deserialize<'de>, + { + let stats_before = dhat::HeapStats::get(); + // reify, but do nothing with the type + let _reified_data: DataPayload = self + .into_deserialized(BufferFormat::Postcard1) + .expect("Failed to deserialize BufferMarker as postcard-0.7"); + let stats_after = dhat::HeapStats::get(); + + HeapStats { + total_bytes_allocated: stats_after.total_bytes - stats_before.total_bytes, + net_bytes_allocated: stats_after.curr_bytes - stats_before.curr_bytes, + } + } +} diff --git a/vendor/icu_provider/src/datagen/iter.rs b/vendor/icu_provider/src/datagen/iter.rs new file mode 100644 index 000000000..6175d89c6 --- /dev/null +++ b/vendor/icu_provider/src/datagen/iter.rs @@ -0,0 +1,35 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Collection of iteration APIs for data providers. + +use crate::prelude::*; + +/// A [`DynamicDataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDynamicDataProvider: DynamicDataProvider { + /// Given a [`DataKey`], returns a list of [`DataLocale`]. + fn supported_locales_for_key(&self, key: DataKey) -> Result, DataError>; +} + +/// A [`DataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDataProvider: DataProvider { + /// Returns a list of [`DataLocale`]. + fn supported_locales(&self) -> Result, DataError>; +} + +impl IterableDynamicDataProvider for Box

+where + M: DataMarker, + P: IterableDynamicDataProvider + ?Sized, +{ + fn supported_locales_for_key(&self, key: DataKey) -> Result, DataError> { + (**self).supported_locales_for_key(key) + } +} diff --git a/vendor/icu_provider/src/datagen/mod.rs b/vendor/icu_provider/src/datagen/mod.rs new file mode 100644 index 000000000..5ede82275 --- /dev/null +++ b/vendor/icu_provider/src/datagen/mod.rs @@ -0,0 +1,120 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains various utilities required to generate ICU4X data files, typically +//! via the `icu_datagen` reference crate. End users should not need to consume anything in +//! this module as a library unless defining new types that integrate with `icu_datagen`. +//! +//! This module can be enabled with the `datagen` feature on `icu_provider`. + +mod data_conversion; +mod heap_measure; +mod iter; +mod payload; +pub use data_conversion::DataConverter; +pub use heap_measure::{HeapStats, HeapStatsMarker}; +pub use iter::IterableDataProvider; + +#[doc(hidden)] // exposed for make_exportable_provider +pub use iter::IterableDynamicDataProvider; +#[doc(hidden)] // exposed for make_exportable_provider +pub use payload::{ExportBox, ExportMarker}; + +use crate::prelude::*; + +/// An object capable of exporting data payloads in some form. +pub trait DataExporter: Sync { + /// Save a `payload` corresponding to the given key and locale. + /// Takes non-mut self as it can be called concurrently. + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload, + ) -> Result<(), DataError>; + + /// Function called after all keys have been fully dumped. + /// Takes non-mut self as it can be called concurrently. + fn flush(&self, _key: DataKey) -> Result<(), DataError> { + Ok(()) + } + + /// This function has to be called before the object is dropped (after all + /// keys have been fully dumped). This conceptually takes ownership, so + /// clients *may not* interact with this object after close has been called. + fn close(&mut self) -> Result<(), DataError> { + Ok(()) + } +} + +/// A [`DynamicDataProvider`] that can be used for exporting data. +/// +/// Use [`make_exportable_provider`] to implement this. +pub trait ExportableProvider: IterableDynamicDataProvider + Sync {} +impl ExportableProvider for T where T: IterableDynamicDataProvider + Sync {} + +/// This macro can be used on a data provider to allow it to be used for data generation. +/// +/// Data generation 'compiles' data by using this data provider (which usually translates data from +/// different sources and doesn't have to be efficient) to generate data structs, and then writing +/// them to an efficient format like [`BlobDataProvider`] or [`BakedDataProvider`]. The requirements +/// for `make_exportable_provider` are: +/// * The data struct has to implement [`serde::Serialize`](::serde::Serialize) and [`databake::Bake`] +/// * The provider needs to implement [`IterableDataProvider`] for all specified [`KeyedDataMarker`]s. +/// This allows the generating code to know which [`DataLocale`] to collect. +/// +/// [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +/// [`BakedDataProvider`]: ../../icu_datagen/index.html +#[macro_export] +macro_rules! make_exportable_provider { + ($provider:ty, [ $($struct_m:ident),+, ]) => { + $crate::impl_dynamic_data_provider!( + $provider, + [ $($struct_m),+, ], + $crate::datagen::ExportMarker + ); + $crate::impl_dynamic_data_provider!( + $provider, + [ $($struct_m),+, ], + $crate::any::AnyMarker + ); + + impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider { + fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result, $crate::DataError> { + #![allow(non_upper_case_globals)] + // Reusing the struct names as identifiers + $( + const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed(); + )+ + match key.hashed() { + $( + $struct_m => { + $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_key(key)) + } + } + } + + impl $crate::datagen::DataConverter<$crate::buf::BufferMarker, $crate::datagen::HeapStatsMarker> for $provider { + fn convert(&self, key: $crate::DataKey, from: $crate::DataPayload<$crate::buf::BufferMarker>) -> Result<$crate::DataPayload<$crate::datagen::HeapStatsMarker>, ($crate::DataPayload<$crate::buf::BufferMarker>, $crate::DataError)> { + #![allow(non_upper_case_globals)] + // Reusing the struct names as identifiers + $( + const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed(); + )+ + match key.hashed() { + $( + $struct_m => { + let heap_stats = from.attempt_zero_copy_heap_size::<$struct_m>(); + return Ok($crate::DataPayload::from_owned(heap_stats)); + } + )+, + _ => Err((from, $crate::DataErrorKind::MissingDataKey.with_key(key))) + } + } + } + }; +} diff --git a/vendor/icu_provider/src/datagen/payload.rs b/vendor/icu_provider/src/datagen/payload.rs new file mode 100644 index 000000000..b6ea8049f --- /dev/null +++ b/vendor/icu_provider/src/datagen/payload.rs @@ -0,0 +1,137 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::dynutil::UpcastDataPayload; +use crate::prelude::*; +use crate::yoke::*; +use alloc::boxed::Box; +use databake::{Bake, CrateEnv, TokenStream}; + +trait ExportableYoke { + fn bake_yoke(&self, env: &CrateEnv) -> TokenStream; + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError>; +} + +impl ExportableYoke for Yoke +where + Y: for<'a> Yokeable<'a>, + for<'a> >::Output: Bake + serde::Serialize, +{ + fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream { + self.get().bake(ctx) + } + + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError> { + use erased_serde::Serialize; + self.get() + .erased_serialize(serializer) + .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; + Ok(()) + } +} + +#[doc(hidden)] // exposed for make_exportable_provider +#[derive(yoke::Yokeable)] +pub struct ExportBox { + payload: Box, +} + +impl UpcastDataPayload for ExportMarker +where + M: DataMarker, + M::Yokeable: Sync, + for<'a> >::Output: Bake + serde::Serialize, +{ + fn upcast(other: DataPayload) -> DataPayload { + DataPayload::from_owned(ExportBox { + payload: Box::new(other.yoke), + }) + } +} + +impl DataPayload { + /// Serializes this [`DataPayload`] into a serializer using Serde. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// // Create an example DataPayload + /// let payload: DataPayload = Default::default(); + /// let export: DataPayload = UpcastDataPayload::upcast(payload); + /// + /// // Serialize the payload to a JSON string + /// let mut buffer: Vec = vec![]; + /// export + /// .serialize(&mut serde_json::Serializer::new(&mut buffer)) + /// .expect("Serialization should succeed"); + /// assert_eq!("{\"message\":\"(und) Hello World\"}".as_bytes(), buffer); + /// ``` + pub fn serialize(&self, serializer: S) -> Result<(), DataError> + where + S: serde::Serializer, + S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok` + { + self.get() + .payload + .serialize_yoke(&mut ::erase(serializer)) + } + + /// Serializes this [`DataPayload`]'s value into a [`TokenStream`] + /// using its [`Bake`] implementations. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// # use databake::quote; + /// # use std::collections::BTreeSet; + /// + /// // Create an example DataPayload + /// let payload: DataPayload = Default::default(); + /// let export: DataPayload = UpcastDataPayload::upcast(payload); + /// + /// let env = databake::CrateEnv::default(); + /// let tokens = export.tokenize(&env); + /// assert_eq!( + /// quote! { + /// ::icu_provider::hello_world::HelloWorldV1 { + /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"), + /// } + /// } + /// .to_string(), + /// tokens.to_string() + /// ); + /// assert_eq!( + /// env.into_iter().collect::>(), + /// ["icu_provider", "alloc"] + /// .into_iter() + /// .collect::>() + /// ); + /// ``` + pub fn tokenize(&self, env: &CrateEnv) -> TokenStream { + self.get().payload.bake_yoke(env) + } +} + +/// Marker type for [`ExportBox`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct ExportMarker {} + +impl DataMarker for ExportMarker { + type Yokeable = ExportBox; +} diff --git a/vendor/icu_provider/src/dynutil.rs b/vendor/icu_provider/src/dynutil.rs new file mode 100644 index 000000000..89d7d7ded --- /dev/null +++ b/vendor/icu_provider/src/dynutil.rs @@ -0,0 +1,263 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Utilities for using trait objects with `DataPayload`. + +/// Trait to allow conversion from `DataPayload` to `DataPayload`. +/// +/// This trait can be manually implemented in order to enable [`impl_dynamic_data_provider`]. +/// +/// [`DataPayload::downcast`]: crate::DataPayload::downcast +pub trait UpcastDataPayload +where + M: crate::DataMarker, + Self: Sized + crate::DataMarker, +{ + /// Upcast a `DataPayload` to a `DataPayload` where `T` implements trait `S`. + /// + /// # Examples + /// + /// Upcast and then downcast a data struct of type `Cow` (cart type `String`) via + /// [`AnyPayload`](crate::any::AnyPayload): + /// + /// ``` + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::*; + /// use icu_provider::prelude::*; + /// use std::borrow::Cow; + /// + /// let original = DataPayload::::from_static_str("foo"); + /// let upcasted = AnyMarker::upcast(original); + /// let downcasted = upcasted + /// .downcast::() + /// .expect("Type conversion"); + /// assert_eq!(downcasted.get().message, "foo"); + /// ``` + fn upcast(other: crate::DataPayload) -> crate::DataPayload; +} + +/// Implements [`UpcastDataPayload`] from several data markers to a single data marker +/// that all share the same [`DataMarker::Yokeable`]. +/// +/// # Examples +/// +/// ``` +/// use icu_provider::prelude::*; +/// use std::borrow::Cow; +/// +/// #[icu_provider::data_struct( +/// FooV1Marker, +/// BarV1Marker = "demo/bar@1", +/// BazV1Marker = "demo/baz@1" +/// )] +/// pub struct FooV1<'data> { +/// message: Cow<'data, str>, +/// }; +/// +/// icu_provider::impl_casting_upcast!( +/// FooV1Marker, +/// [BarV1Marker, BazV1Marker,] +/// ); +/// ``` +/// +/// [`DataMarker::Yokeable`]: crate::DataMarker::Yokeable +#[macro_export] +macro_rules! impl_casting_upcast { + ($dyn_m:path, [ $($struct_m:ident),+, ]) => { + $( + impl $crate::dynutil::UpcastDataPayload<$struct_m> for $dyn_m { + fn upcast(other: $crate::DataPayload<$struct_m>) -> $crate::DataPayload<$dyn_m> { + other.cast() + } + } + )+ + } +} + +/// Implements [`DynamicDataProvider`] for a marker type `S` on a type that already implements +/// [`DynamicDataProvider`] or [`DataProvider`] for one or more `M`, where `M` is a concrete type +/// that is convertible to `S` via [`UpcastDataPayload`]. +/// +/// Use this macro to add support to your data provider for: +/// +/// - [`AnyPayload`] if your provider can return typed objects as [`Any`](core::any::Any). +/// +/// ## Wrapping DataProvider +/// +/// If your type implements [`DataProvider`], pass a list of markers as the second argument. +/// This results in a `DynamicDataProvider` that delegates to a specific marker if the key +/// matches or else returns [`DataErrorKind::MissingDataKey`]. +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # // Duplicating HelloWorldProvider because the real one already implements DynamicDataProvider +/// # struct HelloWorldProvider; +/// # impl DataProvider for HelloWorldProvider { +/// # fn load( +/// # &self, +/// # req: DataRequest, +/// # ) -> Result, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DynamicDataProvider on HelloWorldProvider: DataProvider +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.load_data(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // MissingDataKey error as the key does not match: +/// assert_eq!( +/// HelloWorldProvider.load_data(icu_provider::data_key!("dummy@1"), req).unwrap_err().kind, +/// DataErrorKind::MissingDataKey, +/// ); +/// ``` +/// +/// ## Wrapping DynamicDataProvider +/// +/// It is also possible to wrap a [`DynamicDataProvider`] to create another [`DynamicDataProvider`]. To do this, +/// pass a match-like statement for keys as the second argument: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::hello_world::*; +/// # +/// # struct HelloWorldProvider; +/// # impl DynamicDataProvider for HelloWorldProvider { +/// # fn load_data(&self, key: DataKey, req: DataRequest) +/// # -> Result, DataError> { +/// # icu_provider::hello_world::HelloWorldProvider.load(req) +/// # } +/// # } +/// +/// // Implement DataProvider on HelloWorldProvider: DynamicDataProvider +/// icu_provider::impl_dynamic_data_provider!(HelloWorldProvider, { +/// // Match HelloWorldV1Marker::KEY and delegate to DynamicDataProvider. +/// HW = HelloWorldV1Marker::KEY => HelloWorldV1Marker, +/// // Send the wildcard match also to DynamicDataProvider. +/// _ => HelloWorldV1Marker, +/// }, AnyMarker); +/// +/// let req = DataRequest { +/// locale: &icu_locid::locale!("de").into(), +/// metadata: Default::default(), +/// }; +/// +/// // Successful because the key matches: +/// HelloWorldProvider.as_any_provider().load_any(HelloWorldV1Marker::KEY, req).unwrap(); +/// +/// // Because of the wildcard, any key actually works: +/// HelloWorldProvider.as_any_provider().load_any(icu_provider::data_key!("dummy@1"), req).unwrap(); +/// ``` +/// +/// [`DynamicDataProvider`]: crate::DynamicDataProvider +/// [`DataProvider`]: crate::DataProvider +/// [`AnyPayload`]: (crate::any::AnyPayload) +/// [`DataErrorKind::MissingDataKey`]: (crate::DataErrorKind::MissingDataKey) +/// [`SerializeMarker`]: (crate::serde::SerializeMarker) +#[macro_export] +macro_rules! impl_dynamic_data_provider { + // allow passing in multiple things to do and get dispatched + ($provider:ty, $arms:tt, $one:path, $($rest:path),+) => { + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $one + ); + + $crate::impl_dynamic_data_provider!( + $provider, + $arms, + $($rest),+ + ); + }; + + ($provider:ty, { $($ident:ident = $key:path => $struct_m:ty),+, $(_ => $struct_d:ty,)?}, $dyn_m:ty) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + $( + const $ident: $crate::DataKeyHash = $key.hashed(); + )+ + match key.hashed() { + $( + $ident => { + let result: $crate::DataResponse<$struct_m> = + $crate::DynamicDataProvider::<$struct_m>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + $( + _ => { + let result: $crate::DataResponse<$struct_d> = + $crate::DynamicDataProvider::<$struct_d>::load_data(self, key, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_d>::upcast(p) + }), + }) + } + )? + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + + }; + ($provider:ty, [ $($struct_m:ident),+, ], $dyn_m:path) => { + impl $crate::DynamicDataProvider<$dyn_m> for $provider + { + fn load_data( + &self, + key: $crate::DataKey, + req: $crate::DataRequest, + ) -> Result< + $crate::DataResponse<$dyn_m>, + $crate::DataError, + > { + #![allow(non_upper_case_globals)] + // Reusing the struct names as identifiers + $( + const $struct_m: $crate::DataKeyHash = $struct_m::KEY.hashed(); + )+ + match key.hashed() { + $( + $struct_m => { + let result: $crate::DataResponse<$struct_m> = + $crate::DataProvider::load(self, req)?; + Ok($crate::DataResponse { + metadata: result.metadata, + payload: result.payload.map(|p| { + $crate::dynutil::UpcastDataPayload::<$struct_m>::upcast(p) + }), + }) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_req(key, req)) + } + } + } + }; +} diff --git a/vendor/icu_provider/src/error.rs b/vendor/icu_provider/src/error.rs new file mode 100644 index 000000000..39bd1d0bb --- /dev/null +++ b/vendor/icu_provider/src/error.rs @@ -0,0 +1,267 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::BufferFormat; +use crate::prelude::*; +use core::fmt; +use displaydoc::Display; + +/// A list specifying general categories of data provider error. +/// +/// Errors may be caused either by a malformed request or by the data provider +/// not being able to fulfill a well-formed request. +#[derive(Clone, Copy, Eq, PartialEq, Display, Debug)] +#[non_exhaustive] +pub enum DataErrorKind { + /// No data for the provided resource key. + #[displaydoc("Missing data for key")] + MissingDataKey, + + /// There is data for the key, but not for this particular locale. + #[displaydoc("Missing data for locale")] + MissingLocale, + + /// The request should include a locale. + #[displaydoc("Request needs a locale")] + NeedsLocale, + + /// The request should not contain a locale. + #[displaydoc("Request has an extraneous locale")] + ExtraneousLocale, + + /// The resource was blocked by a filter. The resource may or may not be available. + #[displaydoc("Resource blocked by filter")] + FilteredResource, + + /// The generic type parameter does not match the TypeId. The expected type name is stored + /// as context when this error is returned. + #[displaydoc("Mismatched types: tried to downcast with {0}, but actual type is different")] + MismatchedType(&'static str), + + /// The payload is missing. This is usually caused by a previous error. + #[displaydoc("Missing payload")] + MissingPayload, + + /// A data provider object was given to an operation in an invalid state. + #[displaydoc("Invalid state")] + InvalidState, + + /// An unspecified error occurred, such as a Serde error. + /// + /// Check debug logs for potentially more information. + #[displaydoc("Custom")] + Custom, + + /// An error occurred while accessing a system resource. + #[displaydoc("I/O error: {0:?}")] + #[cfg(feature = "std")] + Io(std::io::ErrorKind), + + /// An unspecified data source containing the required data is unavailable. + #[displaydoc("Missing source data")] + #[cfg(feature = "datagen")] + MissingSourceData, + + /// An error indicating that the desired buffer format is not available. This usually + /// means that a required feature was not enabled + #[displaydoc("Unavailable buffer format: {0:?} (does icu_provider need to be compiled with an additional feature?)")] + UnavailableBufferFormat(BufferFormat), +} + +/// The error type for ICU4X data provider operations. +/// +/// To create one of these, either start with a [`DataErrorKind`] or use [`DataError::custom()`]. +/// +/// # Example +/// +/// Create a NeedsLocale error and attach a data request for context: +/// +/// ```no_run +/// # use icu_provider::prelude::*; +/// let key: DataKey = unimplemented!(); +/// let req: DataRequest = unimplemented!(); +/// DataErrorKind::NeedsLocale.with_req(key, req); +/// ``` +/// +/// Create a named custom error: +/// +/// ``` +/// # use icu_provider::prelude::*; +/// DataError::custom("This is an example error"); +/// ``` +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +#[non_exhaustive] +pub struct DataError { + /// Broad category of the error. + pub kind: DataErrorKind, + + /// The data key of the request, if available. + pub key: Option, + + /// Additional context, if available. + pub str_context: Option<&'static str>, +} + +impl fmt::Display for DataError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ICU4X data error")?; + if self.kind != DataErrorKind::Custom { + write!(f, ": {}", self.kind)?; + } + if let Some(key) = self.key { + write!(f, " (key: {})", key)?; + } + if let Some(str_context) = self.str_context { + write!(f, ": {}", str_context)?; + } + Ok(()) + } +} + +impl DataErrorKind { + /// Converts this DataErrorKind into a DataError. + /// + /// If possible, you should attach context using a `with_` function. + #[inline] + pub const fn into_error(self) -> DataError { + DataError { + kind: self, + key: None, + str_context: None, + } + } + + /// Creates a DataError with a resource key context. + #[inline] + pub const fn with_key(self, key: DataKey) -> DataError { + self.into_error().with_key(key) + } + + /// Creates a DataError with a string context. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> DataError { + self.into_error().with_str_context(context) + } + + /// Creates a DataError with a type name context. + #[inline] + pub fn with_type_context(self) -> DataError { + self.into_error().with_type_context::() + } + + /// Creates a DataError with a request context. + #[inline] + pub fn with_req(self, key: DataKey, req: DataRequest) -> DataError { + self.into_error().with_req(key, req) + } +} + +impl DataError { + /// Returns a new, empty DataError with kind Custom and a string error message. + #[inline] + pub const fn custom(str_context: &'static str) -> Self { + Self { + kind: DataErrorKind::Custom, + key: None, + str_context: Some(str_context), + } + } + + /// Sets the resource key of a DataError, returning a modified error. + #[inline] + pub const fn with_key(self, key: DataKey) -> Self { + Self { + kind: self.kind, + key: Some(key), + str_context: self.str_context, + } + } + + /// Sets the string context of a DataError, returning a modified error. + #[inline] + pub const fn with_str_context(self, context: &'static str) -> Self { + Self { + kind: self.kind, + key: self.key, + str_context: Some(context), + } + } + + /// Sets the string context of a DataError to the given type name, returning a modified error. + #[inline] + pub fn with_type_context(self) -> Self { + self.with_str_context(core::any::type_name::()) + } + + /// Logs the data error with the given request, returning an error containing the resource key. + /// + /// If the "log_error_context" feature is enabled, this logs the whole request. Either way, + /// it returns an error with the resource key portion of the request as context. + #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + pub fn with_req(self, key: DataKey, req: DataRequest) -> Self { + // Don't write out a log for MissingDataKey since there is no context to add + #[cfg(feature = "log_error_context")] + if self.kind != DataErrorKind::MissingDataKey { + log::warn!("{} (key: {}, request: {})", self, key, req); + } + self.with_key(key) + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "log_error_context" feature is enabled, + /// it will print out the context. + #[cfg(feature = "std")] + #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + pub fn with_path_context + ?Sized>(self, path: &P) -> Self { + #[cfg(feature = "log_error_context")] + log::warn!("{} (path: {:?})", self, path.as_ref()); + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "log_error_context" feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[inline] + pub fn with_display_context(self, context: &D) -> Self { + #[cfg(feature = "log_error_context")] + log::warn!("{}: {}", self, context); + self + } + + /// Logs the data error with the given context, then return self. + /// + /// This does not modify the error, but if the "log_error_context" feature is enabled, + /// it will print out the context. + #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] + #[inline] + pub fn with_debug_context(self, context: &D) -> Self { + #[cfg(feature = "log_error_context")] + log::warn!("{}: {:?}", self, context); + self + } + + #[inline] + pub(crate) fn for_type() -> DataError { + DataError { + kind: DataErrorKind::MismatchedType(core::any::type_name::()), + key: None, + str_context: None, + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DataError {} + +#[cfg(feature = "std")] +impl From for DataError { + fn from(e: std::io::Error) -> Self { + #[cfg(feature = "log_error_context")] + log::warn!("I/O error: {}", e); + DataErrorKind::Io(e.kind()).into_error() + } +} diff --git a/vendor/icu_provider/src/hello_world.rs b/vendor/icu_provider/src/hello_world.rs new file mode 100644 index 000000000..5fa671d84 --- /dev/null +++ b/vendor/icu_provider/src/hello_world.rs @@ -0,0 +1,307 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Data provider returning multilingual "Hello World" strings for testing. + +#![allow(clippy::exhaustive_structs)] // data struct module + +use crate::buf::BufferFormat; +#[cfg(feature = "datagen")] +use crate::datagen::IterableDataProvider; +use crate::helpers; +use crate::prelude::*; +use crate::yoke::{self, *}; +use crate::zerofrom::{self, *}; +use alloc::borrow::Cow; +use alloc::string::String; +use core::fmt::Debug; +use writeable::Writeable; + +/// A struct containing "Hello World" in the requested language. +#[derive(Debug, PartialEq, Clone, Yokeable, ZeroFrom)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +pub struct HelloWorldV1<'data> { + /// The translation of "Hello World". + #[cfg_attr(feature = "serde", serde(borrow))] + pub message: Cow<'data, str>, +} + +impl Default for HelloWorldV1<'_> { + fn default() -> Self { + HelloWorldV1 { + message: Cow::Borrowed("(und) Hello World"), + } + } +} + +/// Marker type for [`HelloWorldV1`]. +#[cfg_attr(feature = "datagen", derive(Default, databake::Bake))] +#[cfg_attr(feature = "datagen", databake(path = icu_provider::hello_world))] +pub struct HelloWorldV1Marker; + +impl DataMarker for HelloWorldV1Marker { + type Yokeable = HelloWorldV1<'static>; +} + +impl KeyedDataMarker for HelloWorldV1Marker { + const KEY: DataKey = crate::data_key!("core/helloworld@1"); +} + +/// A data provider returning Hello World strings in different languages. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world: DataPayload = +/// HelloWorldProvider +/// .load(DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!("Hallo Welt", german_hello_world.get().message); +/// ``` +#[derive(Debug, PartialEq, Default)] +pub struct HelloWorldProvider; + +impl HelloWorldProvider { + // Data from https://en.wiktionary.org/wiki/Hello_World#Translations + // Keep this sorted! + const DATA: &'static [(&'static str, &'static str)] = &[ + ("bn", "ওহে বিশ্ব"), + ("cs", "Ahoj světe"), + ("de", "Hallo Welt"), + ("el", "Καλημέρα κόσμε"), + ("en", "Hello World"), + ("eo", "Saluton, Mondo"), + ("fa", "سلام دنیا‎"), + ("fi", "hei maailma"), + ("is", "Halló, heimur"), + ("ja", "こんにちは世界"), + ("la", "Ave, munde"), + ("pt", "Olá, mundo"), + ("ro", "Salut, lume"), + ("ru", "Привет, мир"), + ("vi", "Xin chào thế giới"), + ("zh", "你好世界"), + ]; + + /// Converts this provider into one that serves JSON blobs of the same data. + pub fn into_json_provider(self) -> HelloWorldJsonProvider { + HelloWorldJsonProvider(self) + } +} + +impl DataProvider for HelloWorldProvider { + fn load(&self, req: DataRequest) -> Result, DataError> { + #[allow(clippy::indexing_slicing)] // binary_search + let data = Self::DATA + .binary_search_by(|(k, _)| req.locale.strict_cmp(k.as_bytes()).reverse()) + .map(|i| Self::DATA[i].1) + .map(|s| HelloWorldV1 { + message: Cow::Borrowed(s), + }) + .map_err(|_| DataErrorKind::MissingLocale.with_req(HelloWorldV1Marker::KEY, req))?; + Ok(DataResponse { + metadata: Default::default(), + payload: Some(DataPayload::from_owned(data)), + }) + } +} + +impl DataPayload { + /// Make a [`DataPayload`]`<`[`HelloWorldV1Marker`]`>` from a static string slice. + pub fn from_static_str(s: &'static str) -> DataPayload { + DataPayload::from_owned(HelloWorldV1 { + message: Cow::Borrowed(s), + }) + } +} + +#[cfg(not(feature = "datagen"))] +impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); + +#[cfg(feature = "datagen")] +make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); + +/// A data provider returning Hello World strings in different languages as JSON blobs. +/// +/// Mostly useful for testing. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::*; +/// use icu_provider::prelude::*; +/// +/// let german_hello_world = HelloWorldProvider +/// .into_json_provider() +/// .load_buffer(HelloWorldV1Marker::KEY, DataRequest { +/// locale: &locale!("de").into(), +/// metadata: Default::default(), +/// }) +/// .expect("Loading should succeed") +/// .take_payload() +/// .expect("Data should be present"); +/// +/// assert_eq!(b"{\"message\":\"Hallo Welt\"}", german_hello_world.get()); +pub struct HelloWorldJsonProvider(HelloWorldProvider); + +impl BufferProvider for HelloWorldJsonProvider { + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + key.match_key(HelloWorldV1Marker::KEY)?; + let result = self.0.load(req)?; + let (mut metadata, old_payload) = + DataResponse::::take_metadata_and_payload(result)?; + metadata.buffer_format = Some(BufferFormat::Json); + let mut buffer = String::new(); + buffer.push_str("{\"message\":\""); + helpers::escape_for_json(&old_payload.get().message, &mut buffer); + buffer.push_str("\"}"); + Ok(DataResponse { + metadata, + payload: Some(DataPayload::from_owned_buffer( + buffer.into_bytes().into_boxed_slice(), + )), + }) + } +} + +/// A type that formats localized "hello world" strings. +/// +/// This type is intended to take the shape of a typical ICU4X formatter API. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::hello_world::{HelloWorldFormatter, HelloWorldProvider}; +/// use writeable::assert_writeable_eq; +/// +/// let fmt = HelloWorldFormatter::try_new_unstable( +/// &HelloWorldProvider, +/// &locale!("eo").into(), +/// ) +/// .expect("locale exists"); +/// +/// assert_writeable_eq!(fmt.format(), "Saluton, Mondo"); +/// ``` +pub struct HelloWorldFormatter { + data: DataPayload, +} + +/// A formatted hello world message. Implements [`Writeable`]. +/// +/// For an example, see [`HelloWorldFormatter`]. +pub struct FormattedHelloWorld<'l> { + data: &'l HelloWorldV1<'l>, +} + +impl HelloWorldFormatter { + /// Creates a new [`HelloWorldFormatter`] for the specified locale. + /// + /// See [`HelloWorldFormatter`] for an example. + /// + /// [📚 Help choosing a constructor](crate::constructors) + ///

+ /// ⚠️ The bounds on this function may change over time, including in SemVer minor releases. + ///
+ pub fn try_new_unstable

(provider: &P, locale: &DataLocale) -> Result + where + P: DataProvider, + { + let data = provider + .load(DataRequest { + locale, + metadata: Default::default(), + })? + .take_payload()?; + Ok(Self { data }) + } + + crate::gen_any_buffer_constructors!(locale: include, options: skip, error: DataError); + + /// Formats a hello world message, returning a [`FormattedHelloWorld`]. + #[allow(clippy::needless_lifetimes)] // documentary example + pub fn format<'l>(&'l self) -> FormattedHelloWorld<'l> { + FormattedHelloWorld { + data: self.data.get(), + } + } + + /// Formats a hello world message, returning a [`String`]. + pub fn format_to_string(&self) -> String { + self.format().write_to_string().into_owned() + } +} + +impl<'l> Writeable for FormattedHelloWorld<'l> { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.data.message.write_to(sink) + } + + fn write_to_string(&self) -> Cow { + self.data.message.clone() + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + self.data.message.writeable_length_hint() + } +} + +#[cfg(feature = "datagen")] +impl IterableDataProvider for HelloWorldProvider { + fn supported_locales(&self) -> Result, DataError> { + #[allow(clippy::unwrap_used)] // datagen + Ok(Self::DATA + .iter() + .map(|(s, _)| s.parse::().unwrap()) + .map(DataLocale::from) + .collect()) + } +} + +#[cfg(feature = "datagen")] +#[test] +fn test_iter() { + use icu_locid::locale; + + assert_eq!( + HelloWorldProvider.supported_locales().unwrap(), + vec![ + locale!("bn").into(), + locale!("cs").into(), + locale!("de").into(), + locale!("el").into(), + locale!("en").into(), + locale!("eo").into(), + locale!("fa").into(), + locale!("fi").into(), + locale!("is").into(), + locale!("ja").into(), + locale!("la").into(), + locale!("pt").into(), + locale!("ro").into(), + locale!("ru").into(), + locale!("vi").into(), + locale!("zh").into() + ] + ); +} diff --git a/vendor/icu_provider/src/helpers.rs b/vendor/icu_provider/src/helpers.rs new file mode 100644 index 000000000..8d499f7ef --- /dev/null +++ b/vendor/icu_provider/src/helpers.rs @@ -0,0 +1,369 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Internal helper functions. + +use alloc::string::String; + +/// Prints a JSON-safe string to the output. +pub fn escape_for_json<'o>(input: &str, output: &'o mut String) -> &'o mut String { + // From the ECMA-404 specification: + // "A string is a sequence of Unicode code points wrapped with quotation marks (U+0022). + // All code points may be placed within the quotation marks except for the code points + // that must be escaped: quotation mark (U+0022), reverse solidus (U+005C), and the + // control characters U+0000 to U+001F. There are two-character escape sequence + // representations of some characters." + for cp in input.chars() { + let str_to_append = match cp { + '\u{0000}' => "\\u0000", + '\u{0001}' => "\\u0001", + '\u{0002}' => "\\u0002", + '\u{0003}' => "\\u0003", + '\u{0004}' => "\\u0004", + '\u{0005}' => "\\u0005", + '\u{0006}' => "\\u0006", + '\u{0007}' => "\\u0007", + '\u{0008}' => "\\b", + '\u{0009}' => "\\t", + '\u{000A}' => "\\n", + '\u{000B}' => "\\u000B", + '\u{000C}' => "\\f", + '\u{000D}' => "\\r", + '\u{000E}' => "\\u000E", + '\u{000F}' => "\\u000F", + '\u{0010}' => "\\u0010", + '\u{0011}' => "\\u0011", + '\u{0012}' => "\\u0012", + '\u{0013}' => "\\u0013", + '\u{0014}' => "\\u0014", + '\u{0015}' => "\\u0015", + '\u{0016}' => "\\u0016", + '\u{0017}' => "\\u0017", + '\u{0018}' => "\\u0018", + '\u{0019}' => "\\u0019", + '\u{001A}' => "\\u001A", + '\u{001B}' => "\\u001B", + '\u{001C}' => "\\u001C", + '\u{001D}' => "\\u001D", + '\u{001E}' => "\\u001E", + '\u{001F}' => "\\u001F", + '\u{0022}' => "\\\"", + '\u{005C}' => "\\\\", + cp => { + output.push(cp); + continue; + } + }; + output.push_str(str_to_append); + } + output +} + +#[test] +fn test_escape_for_json() { + assert_eq!("", escape_for_json("", &mut String::new())); + assert_eq!("abc", escape_for_json("abc", &mut String::new())); + assert_eq!("ab\\nc", escape_for_json("ab\nc", &mut String::new())); + assert_eq!("ab\\\\c", escape_for_json("ab\\c", &mut String::new())); + assert_eq!("ab\\\"c", escape_for_json("ab\"c", &mut String::new())); + assert_eq!( + "ab\\u0000c", + escape_for_json("ab\u{0000}c", &mut String::new()) + ); + assert_eq!( + "ab\\u001Fc", + escape_for_json("ab\u{001F}c", &mut String::new()) + ); +} + +/// Const function to compute the FxHash of a byte array with little-endian byte order. +/// +/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our +/// use case since the strings being hashed originate from a trusted source (the ICU4X +/// components), and the hashes are computed at compile time, so we can check for collisions. +/// +/// We could have considered a SHA or other cryptographic hash function. However, we are using +/// FxHash because: +/// +/// 1. There is precedent for this algorithm in Rust +/// 2. The algorithm is easy to implement as a const function +/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree +/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, +/// such that truncation would be required in order to fit into a u32, partially reducing +/// the benefit of a cryptographically secure algorithm +// The indexing operations in this function have been reviewed in detail and won't panic. +#[allow(clippy::indexing_slicing)] +pub const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { + // This code is adapted from https://github.com/rust-lang/rustc-hash, + // whose license text is reproduced below. + // + // Copyright 2015 The Rust Project Developers. See the COPYRIGHT + // file at the top-level directory of this distribution and at + // http://rust-lang.org/COPYRIGHT. + // + // Licensed under the Apache License, Version 2.0 or the MIT license + // , at your + // option. This file may not be copied, modified, or distributed + // except according to those terms. + + if ignore_leading + ignore_trailing >= bytes.len() { + return 0; + } + + #[inline] + const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + const ROTATE: u32 = 5; + const SEED32: u32 = 0x9e_37_79_b9; + hash = hash.rotate_left(ROTATE); + hash ^= word; + hash = hash.wrapping_mul(SEED32); + hash + } + + let mut cursor = ignore_leading; + let end = bytes.len() - ignore_trailing; + let mut hash = 0; + + while end - cursor >= 4 { + let word = u32::from_le_bytes([ + bytes[cursor], + bytes[cursor + 1], + bytes[cursor + 2], + bytes[cursor + 3], + ]); + hash = hash_word_32(hash, word); + cursor += 4; + } + + if end - cursor >= 2 { + let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); + hash = hash_word_32(hash, word as u32); + cursor += 2; + } + + if end - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); + } + + hash +} + +#[test] +fn test_hash_word_32() { + assert_eq!(0, fxhash_32(b"", 0, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 0)); + assert_eq!(0, fxhash_32(b"a", 0, 1)); + assert_eq!(0, fxhash_32(b"a", 0, 10)); + assert_eq!(0, fxhash_32(b"a", 10, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 1)); + assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); + assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); + assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); + assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); + assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); + assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); + assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); + assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); + assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); + + assert_eq!( + fxhash_32(crate::tagged!("props/sc=Khmr@1").as_bytes(), 0, 0), + fxhash_32(crate::tagged!("props/sc=Samr@1").as_bytes(), 0, 0) + ); + + assert_ne!( + fxhash_32( + crate::tagged!("props/sc=Khmr@1").as_bytes(), + crate::leading_tag!().len(), + crate::trailing_tag!().len() + ), + fxhash_32( + crate::tagged!("props/sc=Samr@1").as_bytes(), + crate::leading_tag!().len(), + crate::trailing_tag!().len() + ) + ); +} + +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_docs { + (ANY, $krate:path, $see_also:path) => { + concat!( + "Creates a new instance using an [`AnyProvider`](", + stringify!($krate), + "::AnyProvider).\n\n", + "For details on the behavior of this function, see: [`", + stringify!($see_also), + "`]\n\n", + "[📚 Help choosing a constructor](", + stringify!($krate), + "::constructors)", + ) + }; + (BUFFER, $krate:path, $see_also:path) => { + concat!( + "✨ **Enabled with the `\"serde\"` feature.**\n\n", + "Creates a new instance using a [`BufferProvider`](", + stringify!($krate), + "::BufferProvider).\n\n", + "For details on the behavior of this function, see: [`", + stringify!($see_also), + "`]\n\n", + "[📚 Help choosing a constructor](", + stringify!($krate), + "::constructors)", + ) + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! gen_any_buffer_constructors { + (locale: skip, options: skip, error: $error_ty:path) => { + $crate::gen_any_buffer_constructors!( + locale: skip, + options: skip, + error: $error_ty, + functions: [ + Self::try_new_unstable, + try_new_with_any_provider, + try_new_with_buffer_provider + ] + ); + }; + (locale: skip, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing()) + } + }; + + (locale: skip, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { + $crate::gen_any_buffer_constructors!( + locale: skip, + $options_arg: $options_ty, + error: $error_ty, + functions: [ + Self::try_new_unstable, + try_new_with_any_provider, + try_new_with_buffer_provider + ] + ); + }; + (locale: skip, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting(), $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing(), $options_arg) + } + }; + + (locale: skip, options: skip, result: $result_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting()) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing()) + } + }; + + (locale: include, options: skip, error: $error_ty:path) => { + $crate::gen_any_buffer_constructors!( + locale: include, + options: skip, + error: $error_ty, + functions: [ + Self::try_new_unstable, + try_new_with_any_provider, + try_new_with_buffer_provider + ] + ); + }; + (locale: include, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting(), locale) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing(), locale) + } + }; + + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { + $crate::gen_any_buffer_constructors!( + locale: include, + $config_arg: $config_ty, + $options_arg: $options_ty, + error: $error_ty, + functions: [ + Self::try_new_unstable, + try_new_with_any_provider, + try_new_with_buffer_provider + ] + ); + }; + (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting(), locale, $config_arg, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing(), locale, $config_arg, $options_arg) + } + }; + + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { + $crate::gen_any_buffer_constructors!( + locale: include, + $options_arg: $options_ty, + error: $error_ty, + functions: [ + Self::try_new_unstable, + try_new_with_any_provider, + try_new_with_buffer_provider + ] + ); + }; + (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { + #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] + pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDowncastingAnyProvider; + $f1(&provider.as_downcasting(), locale, $options_arg) + } + #[cfg(feature = "serde")] + #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] + pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { + use $crate::AsDeserializingBufferProvider; + $f1(&provider.as_deserializing(), locale, $options_arg) + } + }; +} diff --git a/vendor/icu_provider/src/key.rs b/vendor/icu_provider/src/key.rs new file mode 100644 index 000000000..2f55e4d46 --- /dev/null +++ b/vendor/icu_provider/src/key.rs @@ -0,0 +1,660 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::{DataError, DataErrorKind}; +use crate::helpers; + +use alloc::borrow::Cow; +use core::fmt; +use core::fmt::Write; +use core::ops::Deref; +use writeable::{LengthHint, Writeable}; +use zerovec::ule::*; + +#[doc(hidden)] +#[macro_export] +macro_rules! leading_tag { + () => { + "\nicu4x_key_tag" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! trailing_tag { + () => { + "\n" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! tagged { + ($without_tags:expr) => { + concat!( + $crate::leading_tag!(), + $without_tags, + $crate::trailing_tag!() + ) + }; +} + +/// A compact hash of a [`DataKey`]. Useful for keys in maps. +/// +/// The hash will be stable over time within major releases. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[repr(transparent)] +pub struct DataKeyHash([u8; 4]); + +impl DataKeyHash { + const fn compute_from_path(path: DataKeyPath) -> Self { + let hash = helpers::fxhash_32( + path.tagged.as_bytes(), + leading_tag!().len(), + trailing_tag!().len(), + ); + Self(hash.to_le_bytes()) + } + + /// Gets the hash value as a byte array. + pub const fn to_bytes(self) -> [u8; 4] { + self.0 + } +} + +impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { + type Container = zerovec::ZeroVec<'a, DataKeyHash>; + type Slice = zerovec::ZeroSlice; + type GetType = ::ULE; + type OwnedType = DataKeyHash; +} + +impl AsULE for DataKeyHash { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// Safe since the ULE type is `self`. +unsafe impl EqULE for DataKeyHash {} + +/// Hint for what to prioritize during fallback when data is unavailable. +/// +/// For example, if `"en-US"` is requested, but we have no data for that specific locale, +/// fallback may take us to `"en"` or `"und-US"` to check for data. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum FallbackPriority { + /// Prioritize the language. This is the default behavior. + /// + /// For example, `"en-US"` should go to `"en"` and then `"und"`. + Language, + /// Prioritize the region. + /// + /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. + Region, + /// Collation-specific fallback rules. Similar to language priority. + /// + /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. + Collation, +} + +impl FallbackPriority { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self::Language + } +} + +impl Default for FallbackPriority { + fn default() -> Self { + Self::const_default() + } +} + +/// What additional data to load when performing fallback. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum FallbackSupplement { + /// Collation supplement; see `CollationFallbackSupplementV1Marker` + Collation, +} + +/// The string path of a data key. For example, "foo@1" +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct DataKeyPath { + // This string literal is wrapped in leading_tag!() and trailing_tag!() to make it detectable + // in a compiled binary. + tagged: &'static str, +} + +impl DataKeyPath { + /// Gets the path as a static string slice. + #[inline] + pub const fn get(self) -> &'static str { + /// core::slice::from_raw_parts(a, b) = core::mem::transmute((a, b)) hack + /// ```compile_fail + /// const unsafe fn canary() { core::slice::from_raw_parts(0 as *const u8, 0); } + /// ``` + const _: () = (); + unsafe { + // Safe due to invariant that self.path is tagged correctly + core::str::from_utf8_unchecked(core::mem::transmute(( + self.tagged.as_ptr().add(leading_tag!().len()), + self.tagged.len() - trailing_tag!().len() - leading_tag!().len(), + ))) + } + } +} + +impl Deref for DataKeyPath { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.get() + } +} + +/// Metadata statically associated with a particular [`DataKey`]. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataKeyMetadata { + /// What to prioritize when fallbacking on this [`DataKey`]. + pub fallback_priority: FallbackPriority, + /// A Unicode extension keyword to consider when loading data for this [`DataKey`]. + pub extension_key: Option, + /// Optional choice for additional fallbacking data required for loading this marker. + /// + /// For more information, see `LocaleFallbackConfig::fallback_supplement`. + pub fallback_supplement: Option, +} + +impl DataKeyMetadata { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + fallback_priority: FallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + } + } + + #[doc(hidden)] + pub const fn construct_internal( + fallback_priority: FallbackPriority, + extension_key: Option, + fallback_supplement: Option, + ) -> Self { + Self { + fallback_priority, + extension_key, + fallback_supplement, + } + } +} + +impl Default for DataKeyMetadata { + #[inline] + fn default() -> Self { + Self::const_default() + } +} + +/// Used for loading data from an ICU4X data provider. +/// +/// A resource key is tightly coupled with the code that uses it to load data at runtime. +/// Executables can be searched for `DataKey` instances to produce optimized data files. +/// Therefore, users should not generally create DataKey instances; they should instead use +/// the ones exported by a component. +/// +/// `DataKey`s are created with the [`data_key!`] macro: +/// +/// ``` +/// # use icu_provider::prelude::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/bar@1"); +/// ``` +/// +/// The human-readable path string ends with `@` followed by one or more digits (the version +/// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. +/// +/// Invalid paths are compile-time errors (as [`data_key!`] uses `const`). +/// +/// ```compile_fail,E0080 +/// # use icu_provider::prelude::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/../bar@1"); +/// ``` +#[derive(Copy, Clone)] +pub struct DataKey { + path: DataKeyPath, + hash: DataKeyHash, + metadata: DataKeyMetadata, +} + +impl PartialEq for DataKey { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.path == other.path && self.metadata == other.metadata + } +} + +impl Eq for DataKey {} + +impl Ord for DataKey { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.path + .cmp(&other.path) + .then_with(|| self.metadata.cmp(&other.metadata)) + } +} + +impl PartialOrd for DataKey { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl core::hash::Hash for DataKey { + #[inline] + fn hash(&self, state: &mut H) { + self.hash.hash(state) + } +} + +impl DataKey { + /// Gets a human-readable representation of a [`DataKey`]. + /// + /// The human-readable path string ends with `@` followed by one or more digits (the version + /// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. + /// + /// Useful for reading and writing data to a file system. + #[inline] + pub const fn path(self) -> DataKeyPath { + self.path + } + + /// Gets a platform-independent hash of a [`DataKey`]. + /// + /// The hash is 4 bytes and allows for fast key comparison. + /// + /// # Example + /// + /// ``` + /// use icu_provider::DataKey; + /// use icu_provider::DataKeyHash; + /// + /// const KEY: DataKey = icu_provider::data_key!("foo@1"); + /// const KEY_HASH: DataKeyHash = KEY.hashed(); + /// + /// assert_eq!(KEY_HASH.to_bytes(), [0xe2, 0xb6, 0x17, 0x71]); + /// ``` + #[inline] + pub const fn hashed(self) -> DataKeyHash { + self.hash + } + + /// Gets the metadata associated with this [`DataKey`]. + #[inline] + pub const fn metadata(self) -> DataKeyMetadata { + self.metadata + } + + /// Constructs a [`DataKey`] from a path and metadata. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::data_key; + /// use icu_provider::DataKey; + /// + /// const CONST_KEY: DataKey = data_key!("foo@1"); + /// + /// let runtime_key = + /// DataKey::from_path_and_metadata(CONST_KEY.path(), CONST_KEY.metadata()); + /// + /// assert_eq!(CONST_KEY, runtime_key); + /// ``` + #[inline] + pub const fn from_path_and_metadata(path: DataKeyPath, metadata: DataKeyMetadata) -> Self { + Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + } + } + + #[doc(hidden)] + // Error is a str of the expected character class and the index where it wasn't encountered + // The indexing operations in this function have been reviewed in detail and won't panic. + #[allow(clippy::indexing_slicing)] + pub const fn construct_internal( + path: &'static str, + metadata: DataKeyMetadata, + ) -> Result { + if path.len() < leading_tag!().len() + trailing_tag!().len() { + return Err(("tag", 0)); + } + // Start and end of the untagged part + let start = leading_tag!().len(); + let end = path.len() - trailing_tag!().len(); + + // Check tags + let mut i = 0; + while i < leading_tag!().len() { + if path.as_bytes()[i] != leading_tag!().as_bytes()[i] { + return Err(("tag", 0)); + } + i += 1; + } + i = 0; + while i < trailing_tag!().len() { + if path.as_bytes()[end + i] != trailing_tag!().as_bytes()[i] { + return Err(("tag", end + 1)); + } + i += 1; + } + + match Self::validate_path_manual_slice(path, start, end) { + Ok(()) => (), + Err(e) => return Err(e), + }; + + let path = DataKeyPath { tagged: path }; + + Ok(Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + }) + } + + const fn validate_path_manual_slice( + path: &'static str, + start: usize, + end: usize, + ) -> Result<(), (&'static str, usize)> { + debug_assert!(start <= end); + debug_assert!(end <= path.len()); + // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+ + enum State { + Empty, + Body, + At, + Version, + } + use State::*; + let mut i = start; + let mut state = Empty; + loop { + let byte = if i < end { + #[allow(clippy::indexing_slicing)] // protected by debug assertion + Some(path.as_bytes()[i]) + } else { + None + }; + state = match (state, byte) { + (Empty | Body, Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')) => Body, + (Body, Some(b'/')) => Body, + (Body, Some(b'@')) => At, + (At | Version, Some(b'0'..=b'9')) => Version, + // One of these cases will be hit at the latest when i == end, so the loop converges. + (Version, None) => { + return Ok(()); + } + + (Empty, _) => return Err(("[a-zA-Z0-9_]", i)), + (Body, _) => return Err(("[a-zA-z0-9_/@]", i)), + (At, _) => return Err(("[0-9]", i)), + (Version, _) => return Err(("[0-9]", i)), + }; + i += 1; + } + } + + /// Returns [`Ok`] if this data key matches the argument, or the appropriate error. + /// + /// Convenience method for data providers that support a single [`DataKey`]. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// const FOO_BAZ: DataKey = icu_provider::data_key!("foo/baz@1"); + /// const BAR_BAZ: DataKey = icu_provider::data_key!("bar/baz@1"); + /// + /// assert!(matches!(FOO_BAR.match_key(FOO_BAR), Ok(()))); + /// assert!(matches!( + /// FOO_BAR.match_key(FOO_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// assert!(matches!( + /// FOO_BAR.match_key(BAR_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// + /// // The error context contains the argument: + /// assert_eq!(FOO_BAR.match_key(BAR_BAZ).unwrap_err().key, Some(BAR_BAZ)); + /// ``` + pub fn match_key(self, key: Self) -> Result<(), DataError> { + if self == key { + Ok(()) + } else { + Err(DataErrorKind::MissingDataKey.with_key(key)) + } + } +} + +/// See [`DataKey`]. +#[macro_export] +macro_rules! data_key { + ($path:expr) => {{ + $crate::data_key!($path, $crate::DataKeyMetadata::const_default()) + }}; + ($path:expr, $metadata:expr) => {{ + // Force the DataKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::DataKey = { + match $crate::DataKey::construct_internal($crate::tagged!($path), $metadata) { + Ok(v) => v, + #[allow(clippy::panic)] // Const context + Err(_) => panic!(concat!("Invalid resource key: ", $path)), + // TODO Once formatting is const: + // Err((expected, index)) => panic!( + // "Invalid resource key {:?}: expected {:?}, found {:?} ", + // $path, + // expected, + // $crate::tagged!($path).get(index..)) + // ); + } + }; + RESOURCE_KEY_MACRO_CONST + }}; +} + +impl fmt::Debug for DataKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("DataKey{")?; + fmt::Display::fmt(self, f)?; + f.write_char('}')?; + Ok(()) + } +} + +impl Writeable for DataKey { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.path().write_to(sink) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.path().writeable_length_hint() + } + + fn write_to_string(&self) -> Cow { + Cow::Borrowed(self.path().get()) + } +} + +writeable::impl_display_with_writeable!(DataKey); + +#[test] +fn test_path_syntax() { + // Valid keys: + DataKey::construct_internal(tagged!("hello/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world@999"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_458/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world@1"), Default::default()).unwrap(); + + // No version: + assert_eq!( + DataKey::construct_internal(tagged!("hello/world"), Default::default()), + Err(( + "[a-zA-z0-9_/@]", + concat!(leading_tag!(), "hello/world").len() + )) + ); + + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@1foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@1").len())) + ); + + // Meta no longer accepted: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R][u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid meta: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[U]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[uca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-caa]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid characters: + assert_eq!( + DataKey::construct_internal(tagged!("你好/世界@1"), Default::default()), + Err(("[a-zA-Z0-9_]", leading_tag!().len())) + ); + + // Invalid tag: + assert_eq!( + DataKey::construct_internal( + concat!("hello/world@1", trailing_tag!()), + Default::default() + ), + Err(("tag", 0)) + ); + assert_eq!( + DataKey::construct_internal(concat!(leading_tag!(), "hello/world@1"), Default::default()), + Err(("tag", concat!(leading_tag!(), "hello/world@1").len())) + ); + assert_eq!( + DataKey::construct_internal("hello/world@1", Default::default()), + Err(("tag", 0)) + ); +} + +#[test] +fn test_key_to_string() { + struct KeyTestCase { + pub key: DataKey, + pub expected: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + expected: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + expected: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + expected: "core/cardinal@65535", + }, + ] { + assert_eq!(cas.expected, cas.key.to_string()); + writeable::assert_writeable_eq!(&cas.key, cas.expected); + } +} + +#[test] +fn test_key_hash() { + struct KeyTestCase { + pub key: DataKey, + pub hash: DataKeyHash, + pub path: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + hash: DataKeyHash([172, 207, 42, 236]), + path: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + hash: DataKeyHash([193, 6, 79, 61]), + path: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + hash: DataKeyHash([176, 131, 182, 223]), + path: "core/cardinal@65535", + }, + ] { + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path); + assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path); + } +} diff --git a/vendor/icu_provider/src/lib.rs b/vendor/icu_provider/src/lib.rs new file mode 100644 index 000000000..594e872f4 --- /dev/null +++ b/vendor/icu_provider/src/lib.rs @@ -0,0 +1,214 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! `icu_provider` is one of the [`ICU4X`] components. +//! +//! Unicode's experience with ICU4X's parent projects, ICU4C and ICU4J, led the team to realize +//! that data management is the most critical aspect of deploying internationalization, and that it requires +//! a high level of customization for the needs of the platform it is embedded in. As a result +//! ICU4X comes with a selection of providers that should allow for ICU4X to naturally fit into +//! different business and technological needs of customers. +//! +//! `icu_provider` defines traits and structs for transmitting data through the ICU4X locale +//! data pipeline. The primary trait is [`DataProvider`]. It is parameterized by a +//! [`KeyedDataMarker`], which contains the data type and a [`DataKey`]. It has one method, +//! [`DataProvider::load`], which transforms a [`DataRequest`] +//! into a [`DataResponse`]. +//! +//! - [`DataKey`] is a fixed identifier for the data type, such as `"plurals/cardinal@1"`. +//! - [`DataRequest`] contains additional annotations to choose a specific variant of the key, +//! such as a locale. +//! - [`DataResponse`] contains the data if the request was successful. +//! +//! In addition, there are three other traits which are widely implemented: +//! +//! - [`AnyProvider`] returns data as `dyn Any` trait objects. +//! - [`BufferProvider`] returns data as `[u8]` buffers. +//! - [`DynamicDataProvider`] returns structured data but is not specific to a key. +//! +//! The most common types required for this crate are included via the prelude: +//! +//! ``` +//! use icu_provider::prelude::*; +//! ``` +//! +//! ## Types of Data Providers +//! +//! All nontrivial data providers can fit into one of two classes. +//! +//! 1. [`AnyProvider`]: Those whose data originates as structured Rust objects +//! 2. [`BufferProvider`]: Those whose data originates as unstructured `[u8]` buffers +//! +//! **✨ Key Insight:** A given data provider is generally *either* an [`AnyProvider`] *or* a +//! [`BufferProvider`]. Which type depends on the data source, and it is not generally possible +//! to convert one to the other. +//! +//! See also [crate::constructors]. +//! +//! ### AnyProvider +//! +//! These providers are able to return structured data cast into `dyn Any` trait objects. Users +//! can call [`as_downcasting()`] to get an object implementing [`DataProvider`] by downcasting +//! the trait objects. +//! +//! Examples of AnyProviders: +//! +//! - [`CldrJsonDataProvider`] reads structured data from CLDR JSON source files and returns +//! structured Rust objects. +//! - [`AnyPayloadProvider`] wraps a specific data struct and returns it. +//! - The `BakedDataProvider` which encodes structured data directly in Rust source +//! +//! ### BufferProvider +//! +//! These providers are able to return unstructured data typically represented as +//! [`serde`]-serialized buffers. Users can call [`as_deserializing()`] to get an object +//! implementing [`DataProvider`] by invoking Serde Deserialize. +//! +//! Examples of BufferProviders: +//! +//! - [`FsDataProvider`] reads individual buffers from the filesystem. +//! - [`BlobDataProvider`] reads buffers from a large in-memory blob. +//! +//! ## Provider Adapters +//! +//! ICU4X offers several built-in modules to combine providers in interesting ways. +//! These can be found in the [`icu_provider_adapters`] crate. +//! +//! ## Testing Provider +//! +//! This crate also contains a concrete provider for testing purposes: +//! +//! - [`HelloWorldProvider`] returns "hello world" strings in several languages. +//! +//! If you need a testing provider that contains the actual resource keys used by ICU4X features, +//! see the [`icu_testdata`] crate. +//! +//! ## Types and Lifetimes +//! +//! Types compatible with [`Yokeable`] can be passed through the data provider, so long as they are +//! associated with a marker type implementing [`DataMarker`]. +//! +//! Data structs should generally have one lifetime argument: `'data`. This lifetime allows data +//! structs to borrow zero-copy data. +//! +//! ## Data generation API +//! +//! *This functionality is enabled with the "datagen" feature* +//! +//! The [`datagen`] module contains several APIs for data generation. See [`icu_datagen`] for the reference +//! data generation implementation. +//! +//! [`ICU4X`]: ../icu/index.html +//! [`DataProvider`]: data_provider::DataProvider +//! [`DataKey`]: key::DataKey +//! [`DataLocale`]: request::DataLocale +//! [`IterableDynamicDataProvider`]: datagen::IterableDynamicDataProvider +//! [`IterableDataProvider`]: datagen::IterableDataProvider +//! [`AnyPayloadProvider`]: ../icu_provider_adapters/any_payload/struct.AnyPayloadProvider.html +//! [`HelloWorldProvider`]: hello_world::HelloWorldProvider +//! [`AnyProvider`]: any::AnyProvider +//! [`Yokeable`]: yoke::Yokeable +//! [`impl_dynamic_data_provider!`]: impl_dynamic_data_provider +//! [`icu_provider_adapters`]: ../icu_provider_adapters/index.html +//! [`as_downcasting()`]: AsDowncastingAnyProvider::as_downcasting +//! [`as_deserializing()`]: AsDeserializingBufferProvider::as_deserializing +//! [`CldrJsonDataProvider`]: ../icu_datagen/cldr/struct.CldrJsonDataProvider.html +//! [`FsDataProvider`]: ../icu_provider_fs/struct.FsDataProvider.html +//! [`BlobDataProvider`]: ../icu_provider_blob/struct.BlobDataProvider.html +//! [`icu_testdata`]: ../icu_testdata/index.html +//! [`icu_datagen`]: ../icu_datagen/index.html + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + // TODO(#2266): enable missing_debug_implementations, + ) +)] +#![warn(missing_docs)] + +extern crate alloc; + +pub mod any; +pub mod buf; +pub mod constructors; +mod data_provider; +#[cfg(feature = "datagen")] +#[macro_use] +pub mod datagen; +#[macro_use] +pub mod dynutil; +mod error; +pub mod hello_world; +mod helpers; +#[macro_use] +mod key; +pub mod marker; +mod request; +mod response; +#[cfg(feature = "serde")] +pub mod serde; + +#[cfg(feature = "macros")] +pub use icu_provider_macros::data_struct; + +pub mod prelude { + //! Core selection of APIs and structures for the ICU4X data provider. + pub use crate::any::AnyMarker; + pub use crate::any::AnyPayload; + pub use crate::any::AnyProvider; + pub use crate::any::AnyResponse; + pub use crate::buf::BufferMarker; + pub use crate::buf::BufferProvider; + pub use crate::data_key; + pub use crate::data_provider::DataProvider; + pub use crate::data_provider::DynamicDataProvider; + pub use crate::error::DataError; + pub use crate::error::DataErrorKind; + pub use crate::key::DataKey; + pub use crate::key::DataKeyHash; + pub use crate::marker::DataMarker; + pub use crate::marker::KeyedDataMarker; + pub use crate::request::DataLocale; + pub use crate::request::DataRequest; + pub use crate::response::DataPayload; + pub use crate::response::DataResponse; + pub use crate::response::DataResponseMetadata; + + pub use crate::any::AsDowncastingAnyProvider; + pub use crate::any::AsDynamicDataProviderAnyMarkerWrap; + #[cfg(feature = "serde")] + pub use crate::serde::AsDeserializingBufferProvider; + + /// Re-export of the yoke and zerofrom crates for convenience of downstream implementors. + #[doc(hidden)] + pub use yoke; + #[doc(hidden)] + pub use zerofrom; +} + +// Also include the same symbols at the top level for selective inclusion +pub use prelude::*; + +// Less important non-prelude items +pub use crate::any::MaybeSendSync; +pub use crate::key::DataKeyMetadata; +pub use crate::key::DataKeyPath; +pub use crate::key::FallbackPriority; +pub use crate::key::FallbackSupplement; +pub use crate::request::DataRequestMetadata; +pub use crate::response::Cart; + +// For macros +#[doc(hidden)] +pub mod _internal { + pub use icu_locid::extensions_unicode_key; +} diff --git a/vendor/icu_provider/src/marker.rs b/vendor/icu_provider/src/marker.rs new file mode 100644 index 000000000..870885797 --- /dev/null +++ b/vendor/icu_provider/src/marker.rs @@ -0,0 +1,80 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Marker types and traits for DataProvider. + +use crate::key::DataKey; +use crate::yoke::Yokeable; + +/// Trait marker for data structs. All types delivered by the data provider must be associated with +/// something implementing this trait. +/// +/// Structs implementing this trait are normally generated with the [`data_struct`] macro. +/// +/// By convention, the non-standard `Marker` suffix is used by types implementing DataMarker. +/// +/// In addition to a marker type implementing DataMarker, the following impls must also be present +/// for the data struct: +/// +/// - `impl<'a> Yokeable<'a>` (required) +/// - `impl ZeroFrom` +/// +/// Also see [`KeyedDataMarker`]. +/// +/// # Examples +/// +/// Manually implementing DataMarker for a custom type: +/// +/// ``` +/// use icu_provider::prelude::*; +/// use icu_provider::yoke::*; +/// use icu_provider::zerofrom::*; +/// use std::borrow::Cow; +/// use std::rc::Rc; +/// +/// #[derive(Yokeable, ZeroFrom)] +/// struct MyDataStruct<'data> { +/// message: Cow<'data, str>, +/// } +/// +/// struct MyDataStructMarker; +/// +/// impl DataMarker for MyDataStructMarker { +/// type Yokeable = MyDataStruct<'static>; +/// } +/// +/// // We can now use MyDataStruct with DataProvider: +/// let s = MyDataStruct { +/// message: Cow::Owned("Hello World".into()), +/// }; +/// let payload = DataPayload::::from_owned(s); +/// assert_eq!(payload.get().message, "Hello World"); +/// ``` +/// +/// [`data_struct`]: crate::data_struct +pub trait DataMarker { + /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a + /// data struct. + type Yokeable: for<'a> Yokeable<'a>; +} + +/// A [`DataMarker`] with a [`DataKey`] attached. +/// +/// Structs implementing this trait are normally generated with the [`data_struct!`] macro. +/// +/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait. +/// Most markers should be associated with a specific key and should therefore implement this +/// trait. +/// +/// [`BufferMarker`] and [`AnyMarker`] are examples of markers that do _not_ implement this trait +/// because they are not specific to a single key. +/// +/// [`data_struct!`]: crate::data_struct +/// [`DataProvider`]: crate::DataProvider +/// [`BufferMarker`]: crate::BufferMarker +/// [`AnyMarker`]: crate::AnyMarker +pub trait KeyedDataMarker: DataMarker { + /// The single [`DataKey`] associated with this marker. + const KEY: DataKey; +} diff --git a/vendor/icu_provider/src/request.rs b/vendor/icu_provider/src/request.rs new file mode 100644 index 000000000..7f6bb5911 --- /dev/null +++ b/vendor/icu_provider/src/request.rs @@ -0,0 +1,513 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::cmp::Ordering; +use core::default::Default; +use core::fmt; +use core::fmt::Debug; +use icu_locid::extensions::unicode as unicode_ext; +use icu_locid::subtags::{Language, Region, Script, Variants}; +use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; +use writeable::{LengthHint, Writeable}; + +#[cfg(doc)] +use icu_locid::subtags::Variant; + +/// The request type passed into all data provider implementations. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataRequest<'a> { + /// The locale for which to load data. + /// + /// If locale fallback is enabled, the resulting data may be from a different locale + /// than the one requested here. + pub locale: &'a DataLocale, + /// Metadata that may affect the behavior of the data provider. + pub metadata: DataRequestMetadata, +} + +impl fmt::Display for DataRequest<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.locale, f) + } +} + +/// Metadata for data requests. This is currently empty, but it may be extended with options +/// for tuning locale fallback, buffer layout, and so forth. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataRequestMetadata; + +/// The main locale type used by the ICU4X data provider. +/// +/// [`DataLocale`] contains less functionality than [`Locale`] but more than +/// [`LanguageIdentifier`] for better size and performance while still meeting +/// the needs of the ICU4X data pipeline. +/// +/// # Examples +/// +/// Convert a [`Locale`] to a [`DataLocale`] and back: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale2.to_string(), "en-u-ca-buddhist"); +/// ``` +/// +/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more +/// efficient than cloning the [`Locale`], but less efficient than converting an owned +/// [`Locale`]: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(&locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale1, locale2); +/// ``` +/// +/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: +/// +/// ``` +/// use icu_locid::langid; +/// use icu_provider::DataLocale; +/// +/// let langid1 = langid!("es-CA-valencia"); +/// let data_locale = DataLocale::from(langid1); +/// let langid2 = data_locale.get_langid(); +/// +/// assert_eq!(langid2.to_string(), "es-CA-valencia"); +/// ``` +/// +/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data +/// lookup and fallback. This may change in the future. +/// +/// ``` +/// use icu_locid::Locale; +/// use icu_provider::DataLocale; +/// +/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" +/// .parse::() +/// .unwrap(); +/// let data_locale = DataLocale::from(locale); +/// +/// assert_eq!(data_locale.to_string(), "hi-u-ca-buddhist"); +/// ``` +#[derive(PartialEq, Clone, Default, Eq, Hash)] +pub struct DataLocale { + langid: LanguageIdentifier, + keywords: unicode_ext::Keywords, +} + +impl<'a> Default for &'a DataLocale { + fn default() -> Self { + static DEFAULT: DataLocale = DataLocale { + langid: LanguageIdentifier::UND, + keywords: unicode_ext::Keywords::new(), + }; + &DEFAULT + } +} + +impl fmt::Debug for DataLocale { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DataLocale{{{}}}", self) + } +} + +impl Writeable for DataLocale { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.langid.write_to(sink)?; + if !self.keywords.is_empty() { + sink.write_str("-u-")?; + self.keywords.write_to(sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.langid.writeable_length_hint() + + if !self.keywords.is_empty() { + self.keywords.writeable_length_hint() + 3 + } else { + LengthHint::exact(0) + } + } + + fn write_to_string(&self) -> alloc::borrow::Cow { + if self.keywords.is_empty() { + return self.langid.write_to_string(); + } + let mut string = + alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut string); + alloc::borrow::Cow::Owned(string) + } +} + +writeable::impl_display_with_writeable!(DataLocale); + +impl From for DataLocale { + fn from(langid: LanguageIdentifier) -> Self { + Self { + langid, + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From for DataLocale { + fn from(locale: Locale) -> Self { + Self { + langid: locale.id, + keywords: locale.extensions.unicode.keywords, + } + } +} + +impl From<&LanguageIdentifier> for DataLocale { + fn from(langid: &LanguageIdentifier) -> Self { + Self { + langid: langid.clone(), + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From<&Locale> for DataLocale { + fn from(locale: &Locale) -> Self { + Self { + langid: locale.id.clone(), + keywords: locale.extensions.unicode.keywords.clone(), + } + } +} + +impl DataLocale { + /// Compare this [`DataLocale`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::DataLocale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = &[ + /// "ca-ES", + /// "ca-ES-u-ca-buddhist", + /// "ca-ES-valencia", + /// "pl-Latn-PL", + /// "und", + /// "und-fonipa", + /// "und-u-ca-hebrew", + /// "und-u-ca-japanese", + /// "zh", + /// ]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert!(a.cmp(b) == Ordering::Less); + /// let a_loc: DataLocale = a.parse::().unwrap().into(); + /// assert_eq!(a, a_loc.to_string()); + /// assert!( + /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// a, + /// a + /// ); + /// assert!( + /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less, + /// "{} < {}", + /// a, + /// b + /// ); + /// let b_loc: DataLocale = b.parse::().unwrap().into(); + /// assert_eq!(b, b_loc.to_string()); + /// assert!( + /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// b, + /// b + /// ); + /// assert!( + /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater, + /// "{} > {}", + /// b, + /// a + /// ); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + let subtags = other.split(|b| *b == b'-'); + let mut subtag_result = self.langid.strict_cmp_iter(subtags); + if self.has_unicode_ext() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"u") => (), + Some(s) => return s.cmp(b"u").reverse(), + None => return Ordering::Greater, + } + subtag_result = self.keywords.strict_cmp_iter(subtags); + } + subtag_result.end() + } +} + +impl DataLocale { + /// Returns whether this [`DataLocale`] has all empty fields (no components). + pub fn is_empty(&self) -> bool { + self == <&DataLocale>::default() + } + + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. + /// + /// Note that this only checks the language identifier; extension keywords may also be set. + /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`]. + pub fn is_langid_und(&self) -> bool { + self.langid == LanguageIdentifier::UND + } + + /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. + /// + /// This may allocate memory if there are variant subtags. If you need only the language, + /// script, and/or region subtag, use the specific getters for those subtags: + /// + /// - [`DataLocale::language()`] + /// - [`DataLocale::script()`] + /// - [`DataLocale::region()`] + /// + /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] + /// and then access the `id` field. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::langid; + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// + /// let req_no_langid = DataRequest { + /// locale: &Default::default(), + /// metadata: Default::default(), + /// }; + /// + /// let req_with_langid = DataRequest { + /// locale: &langid!("ar-EG").into(), + /// metadata: Default::default(), + /// }; + /// + /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); + /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); + /// ``` + pub fn get_langid(&self) -> LanguageIdentifier { + self.langid.clone() + } + + /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. + #[inline] + pub fn set_langid(&mut self, lid: LanguageIdentifier) { + self.langid = lid; + } + + /// Converts this [`DataLocale`] into a [`Locale`]. + /// + /// See also [`DataLocale::get_langid()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// langid, subtags_language as language, subtags_region as region, Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); + /// let locale: DataLocale = locale.into(); + /// + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// assert_eq!(locale.get_langid(), langid!("it-IT")); + /// assert_eq!(locale.language(), language!("it")); + /// assert_eq!(locale.script(), None); + /// assert_eq!(locale.region(), Some(region!("IT"))); + /// + /// let locale = locale.into_locale(); + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// ``` + pub fn into_locale(self) -> Locale { + let mut loc = Locale { + id: self.langid, + ..Default::default() + }; + loc.extensions.unicode.keywords = self.keywords; + loc + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn language(&self) -> Language { + self.langid.language + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn set_language(&mut self, language: Language) { + self.langid.language = language; + } + + /// Returns the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn script(&self) -> Option