diff options
Diffstat (limited to 'vendor/icu_provider/src/datagen')
-rw-r--r-- | vendor/icu_provider/src/datagen/data_conversion.rs | 48 | ||||
-rw-r--r-- | vendor/icu_provider/src/datagen/heap_measure.rs | 59 | ||||
-rw-r--r-- | vendor/icu_provider/src/datagen/iter.rs | 35 | ||||
-rw-r--r-- | vendor/icu_provider/src/datagen/mod.rs | 120 | ||||
-rw-r--r-- | vendor/icu_provider/src/datagen/payload.rs | 137 |
5 files changed, 399 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/datagen/data_conversion.rs b/vendor/icu_provider/src/datagen/data_conversion.rs new file mode 100644 index 000000000..59146352a --- /dev/null +++ b/vendor/icu_provider/src/datagen/data_conversion.rs @@ -0,0 +1,48 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::prelude::*; +use crate::DataKey; +use alloc::boxed::Box; + +/// A trait that allows for converting between data payloads of different types. +/// +/// These payloads will typically be some kind of erased payload, either with +/// AnyMarker, BufferMarker, or SerializeMarker, where converting requires reifying the type. +/// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to +/// concrete marker types M, and reifying the input to a `DataPayload<M>`, performing some conversion +/// or computation, and erasing the result to `DataPayload<MTo>`. +/// +/// It will typically be implemented on data providers used in datagen. +/// +/// The [`make_exportable_provider!`] macro is able to automatically implement this trait. +/// +/// [`make_exportable_provider!`]: crate::make_exportable_provider +pub trait DataConverter<MFrom: DataMarker, MTo: DataMarker> { + /// Attempt to convert a payload corresponding to the given data key + /// from one marker type to another marker type. + /// + /// If this is not possible (for example, if the provider does not know about the key), + /// the original payload is returned back to the caller. + fn convert( + &self, + key: DataKey, + from: DataPayload<MFrom>, + ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)>; +} + +impl<MFrom, MTo, P> DataConverter<MFrom, MTo> for Box<P> +where + MFrom: DataMarker, + MTo: DataMarker, + P: DataConverter<MFrom, MTo> + ?Sized, +{ + fn convert( + &self, + key: DataKey, + from: DataPayload<MFrom>, + ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)> { + (**self).convert(key, from) + } +} diff --git a/vendor/icu_provider/src/datagen/heap_measure.rs b/vendor/icu_provider/src/datagen/heap_measure.rs new file mode 100644 index 000000000..d451f3ebe --- /dev/null +++ b/vendor/icu_provider/src/datagen/heap_measure.rs @@ -0,0 +1,59 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::buf::{BufferFormat, BufferMarker}; +use crate::prelude::*; +use yoke::trait_hack::YokeTraitHack; + +/// Stats on the heap size needed when attempting to zero-copy-deserialize +/// a postcard-formatted data struct. +#[derive(Debug, Copy, Clone, yoke::Yokeable, Default)] +#[non_exhaustive] +pub struct HeapStats { + /// Total bytes allocated during deserialization + pub total_bytes_allocated: u64, + /// Total bytes allocated during deserialization that have not yet been freed + pub net_bytes_allocated: usize, +} + +/// The [`DataMarker`] marker type for [`HeapStats`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct HeapStatsMarker; + +impl DataMarker for HeapStatsMarker { + type Yokeable = HeapStats; +} + +impl DataPayload<BufferMarker> { + /// Given a buffer known to be in postcard-0.7 format, attempt to zero-copy + /// deserialize it and record the amount of heap allocations that occurred. + /// + /// Ideally, this number should be zero. + /// + /// [`dhat`]'s profiler must be initialized before using this. + /// + /// # Panics + /// + /// Panics if the buffer is not in postcard-0.7 format. + #[allow(clippy::expect_used)] // The function documents when panics may occur. + pub fn attempt_zero_copy_heap_size<M>(self) -> HeapStats + where + M: DataMarker, + for<'a> &'a <M::Yokeable as yoke::Yokeable<'a>>::Output: serde::Serialize, + for<'de> YokeTraitHack<<M::Yokeable as yoke::Yokeable<'de>>::Output>: + serde::Deserialize<'de>, + { + let stats_before = dhat::HeapStats::get(); + // reify, but do nothing with the type + let _reified_data: DataPayload<M> = self + .into_deserialized(BufferFormat::Postcard1) + .expect("Failed to deserialize BufferMarker as postcard-0.7"); + let stats_after = dhat::HeapStats::get(); + + HeapStats { + total_bytes_allocated: stats_after.total_bytes - stats_before.total_bytes, + net_bytes_allocated: stats_after.curr_bytes - stats_before.curr_bytes, + } + } +} diff --git a/vendor/icu_provider/src/datagen/iter.rs b/vendor/icu_provider/src/datagen/iter.rs new file mode 100644 index 000000000..6175d89c6 --- /dev/null +++ b/vendor/icu_provider/src/datagen/iter.rs @@ -0,0 +1,35 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Collection of iteration APIs for data providers. + +use crate::prelude::*; + +/// A [`DynamicDataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDynamicDataProvider<M: DataMarker>: DynamicDataProvider<M> { + /// Given a [`DataKey`], returns a list of [`DataLocale`]. + fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError>; +} + +/// A [`DataProvider`] that can iterate over all supported [`DataLocale`] for a certain key. +/// +/// Implementing this trait means that a data provider knows all of the data it can successfully +/// return from a load request. +pub trait IterableDataProvider<M: KeyedDataMarker>: DataProvider<M> { + /// Returns a list of [`DataLocale`]. + fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError>; +} + +impl<M, P> IterableDynamicDataProvider<M> for Box<P> +where + M: DataMarker, + P: IterableDynamicDataProvider<M> + ?Sized, +{ + fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError> { + (**self).supported_locales_for_key(key) + } +} diff --git a/vendor/icu_provider/src/datagen/mod.rs b/vendor/icu_provider/src/datagen/mod.rs new file mode 100644 index 000000000..5ede82275 --- /dev/null +++ b/vendor/icu_provider/src/datagen/mod.rs @@ -0,0 +1,120 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains various utilities required to generate ICU4X data files, typically +//! via the `icu_datagen` reference crate. End users should not need to consume anything in +//! this module as a library unless defining new types that integrate with `icu_datagen`. +//! +//! This module can be enabled with the `datagen` feature on `icu_provider`. + +mod data_conversion; +mod heap_measure; +mod iter; +mod payload; +pub use data_conversion::DataConverter; +pub use heap_measure::{HeapStats, HeapStatsMarker}; +pub use iter::IterableDataProvider; + +#[doc(hidden)] // exposed for make_exportable_provider +pub use iter::IterableDynamicDataProvider; +#[doc(hidden)] // exposed for make_exportable_provider +pub use payload::{ExportBox, ExportMarker}; + +use crate::prelude::*; + +/// An object capable of exporting data payloads in some form. +pub trait DataExporter: Sync { + /// Save a `payload` corresponding to the given key and locale. + /// Takes non-mut self as it can be called concurrently. + fn put_payload( + &self, + key: DataKey, + locale: &DataLocale, + payload: &DataPayload<ExportMarker>, + ) -> Result<(), DataError>; + + /// Function called after all keys have been fully dumped. + /// Takes non-mut self as it can be called concurrently. + fn flush(&self, _key: DataKey) -> Result<(), DataError> { + Ok(()) + } + + /// This function has to be called before the object is dropped (after all + /// keys have been fully dumped). This conceptually takes ownership, so + /// clients *may not* interact with this object after close has been called. + fn close(&mut self) -> Result<(), DataError> { + Ok(()) + } +} + +/// A [`DynamicDataProvider`] that can be used for exporting data. +/// +/// Use [`make_exportable_provider`] to implement this. +pub trait ExportableProvider: IterableDynamicDataProvider<ExportMarker> + Sync {} +impl<T> ExportableProvider for T where T: IterableDynamicDataProvider<ExportMarker> + Sync {} + +/// This macro can be used on a data provider to allow it to be used for data generation. +/// +/// Data generation 'compiles' data by using this data provider (which usually translates data from +/// different sources and doesn't have to be efficient) to generate data structs, and then writing +/// them to an efficient format like [`BlobDataProvider`] or [`BakedDataProvider`]. The requirements +/// for `make_exportable_provider` are: +/// * The data struct has to implement [`serde::Serialize`](::serde::Serialize) and [`databake::Bake`] +/// * The provider needs to implement [`IterableDataProvider`] for all specified [`KeyedDataMarker`]s. +/// This allows the generating code to know which [`DataLocale`] to collect. +/// +/// [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html +/// [`BakedDataProvider`]: ../../icu_datagen/index.html +#[macro_export] +macro_rules! make_exportable_provider { + ($provider:ty, [ $($struct_m:ident),+, ]) => { + $crate::impl_dynamic_data_provider!( + $provider, + [ $($struct_m),+, ], + $crate::datagen::ExportMarker + ); + $crate::impl_dynamic_data_provider!( + $provider, + [ $($struct_m),+, ], + $crate::any::AnyMarker + ); + + impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider { + fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result<Vec<$crate::DataLocale>, $crate::DataError> { + #![allow(non_upper_case_globals)] + // Reusing the struct names as identifiers + $( + const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed(); + )+ + match key.hashed() { + $( + $struct_m => { + $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self) + } + )+, + _ => Err($crate::DataErrorKind::MissingDataKey.with_key(key)) + } + } + } + + impl $crate::datagen::DataConverter<$crate::buf::BufferMarker, $crate::datagen::HeapStatsMarker> for $provider { + fn convert(&self, key: $crate::DataKey, from: $crate::DataPayload<$crate::buf::BufferMarker>) -> Result<$crate::DataPayload<$crate::datagen::HeapStatsMarker>, ($crate::DataPayload<$crate::buf::BufferMarker>, $crate::DataError)> { + #![allow(non_upper_case_globals)] + // Reusing the struct names as identifiers + $( + const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed(); + )+ + match key.hashed() { + $( + $struct_m => { + let heap_stats = from.attempt_zero_copy_heap_size::<$struct_m>(); + return Ok($crate::DataPayload::from_owned(heap_stats)); + } + )+, + _ => Err((from, $crate::DataErrorKind::MissingDataKey.with_key(key))) + } + } + } + }; +} diff --git a/vendor/icu_provider/src/datagen/payload.rs b/vendor/icu_provider/src/datagen/payload.rs new file mode 100644 index 000000000..b6ea8049f --- /dev/null +++ b/vendor/icu_provider/src/datagen/payload.rs @@ -0,0 +1,137 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::dynutil::UpcastDataPayload; +use crate::prelude::*; +use crate::yoke::*; +use alloc::boxed::Box; +use databake::{Bake, CrateEnv, TokenStream}; + +trait ExportableYoke { + fn bake_yoke(&self, env: &CrateEnv) -> TokenStream; + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError>; +} + +impl<Y, C> ExportableYoke for Yoke<Y, C> +where + Y: for<'a> Yokeable<'a>, + for<'a> <Y as Yokeable<'a>>::Output: Bake + serde::Serialize, +{ + fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream { + self.get().bake(ctx) + } + + fn serialize_yoke( + &self, + serializer: &mut dyn erased_serde::Serializer, + ) -> Result<(), DataError> { + use erased_serde::Serialize; + self.get() + .erased_serialize(serializer) + .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; + Ok(()) + } +} + +#[doc(hidden)] // exposed for make_exportable_provider +#[derive(yoke::Yokeable)] +pub struct ExportBox { + payload: Box<dyn ExportableYoke + Sync>, +} + +impl<M> UpcastDataPayload<M> for ExportMarker +where + M: DataMarker, + M::Yokeable: Sync, + for<'a> <M::Yokeable as Yokeable<'a>>::Output: Bake + serde::Serialize, +{ + fn upcast(other: DataPayload<M>) -> DataPayload<ExportMarker> { + DataPayload::from_owned(ExportBox { + payload: Box::new(other.yoke), + }) + } +} + +impl DataPayload<ExportMarker> { + /// Serializes this [`DataPayload`] into a serializer using Serde. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// + /// // Create an example DataPayload + /// let payload: DataPayload<HelloWorldV1Marker> = Default::default(); + /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload); + /// + /// // Serialize the payload to a JSON string + /// let mut buffer: Vec<u8> = vec![]; + /// export + /// .serialize(&mut serde_json::Serializer::new(&mut buffer)) + /// .expect("Serialization should succeed"); + /// assert_eq!("{\"message\":\"(und) Hello World\"}".as_bytes(), buffer); + /// ``` + pub fn serialize<S>(&self, serializer: S) -> Result<(), DataError> + where + S: serde::Serializer, + S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok` + { + self.get() + .payload + .serialize_yoke(&mut <dyn erased_serde::Serializer>::erase(serializer)) + } + + /// Serializes this [`DataPayload`]'s value into a [`TokenStream`] + /// using its [`Bake`] implementations. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::datagen::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::hello_world::HelloWorldV1Marker; + /// use icu_provider::prelude::*; + /// # use databake::quote; + /// # use std::collections::BTreeSet; + /// + /// // Create an example DataPayload + /// let payload: DataPayload<HelloWorldV1Marker> = Default::default(); + /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload); + /// + /// let env = databake::CrateEnv::default(); + /// let tokens = export.tokenize(&env); + /// assert_eq!( + /// quote! { + /// ::icu_provider::hello_world::HelloWorldV1 { + /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"), + /// } + /// } + /// .to_string(), + /// tokens.to_string() + /// ); + /// assert_eq!( + /// env.into_iter().collect::<BTreeSet<_>>(), + /// ["icu_provider", "alloc"] + /// .into_iter() + /// .collect::<BTreeSet<_>>() + /// ); + /// ``` + pub fn tokenize(&self, env: &CrateEnv) -> TokenStream { + self.get().payload.bake_yoke(env) + } +} + +/// Marker type for [`ExportBox`]. +#[allow(clippy::exhaustive_structs)] // marker type +pub struct ExportMarker {} + +impl DataMarker for ExportMarker { + type Yokeable = ExportBox; +} |