summaryrefslogtreecommitdiffstats
path: root/vendor/icu_provider/src/datagen
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_provider/src/datagen')
-rw-r--r--vendor/icu_provider/src/datagen/data_conversion.rs48
-rw-r--r--vendor/icu_provider/src/datagen/heap_measure.rs59
-rw-r--r--vendor/icu_provider/src/datagen/iter.rs35
-rw-r--r--vendor/icu_provider/src/datagen/mod.rs120
-rw-r--r--vendor/icu_provider/src/datagen/payload.rs137
5 files changed, 399 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/datagen/data_conversion.rs b/vendor/icu_provider/src/datagen/data_conversion.rs
new file mode 100644
index 000000000..59146352a
--- /dev/null
+++ b/vendor/icu_provider/src/datagen/data_conversion.rs
@@ -0,0 +1,48 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::prelude::*;
+use crate::DataKey;
+use alloc::boxed::Box;
+
+/// A trait that allows for converting between data payloads of different types.
+///
+/// These payloads will typically be some kind of erased payload, either with
+/// AnyMarker, BufferMarker, or SerializeMarker, where converting requires reifying the type.
+/// A type implementing [`DataConverter`] will essentially have a "registry" mapping keys to
+/// concrete marker types M, and reifying the input to a `DataPayload<M>`, performing some conversion
+/// or computation, and erasing the result to `DataPayload<MTo>`.
+///
+/// It will typically be implemented on data providers used in datagen.
+///
+/// The [`make_exportable_provider!`] macro is able to automatically implement this trait.
+///
+/// [`make_exportable_provider!`]: crate::make_exportable_provider
+pub trait DataConverter<MFrom: DataMarker, MTo: DataMarker> {
+ /// Attempt to convert a payload corresponding to the given data key
+ /// from one marker type to another marker type.
+ ///
+ /// If this is not possible (for example, if the provider does not know about the key),
+ /// the original payload is returned back to the caller.
+ fn convert(
+ &self,
+ key: DataKey,
+ from: DataPayload<MFrom>,
+ ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)>;
+}
+
+impl<MFrom, MTo, P> DataConverter<MFrom, MTo> for Box<P>
+where
+ MFrom: DataMarker,
+ MTo: DataMarker,
+ P: DataConverter<MFrom, MTo> + ?Sized,
+{
+ fn convert(
+ &self,
+ key: DataKey,
+ from: DataPayload<MFrom>,
+ ) -> Result<DataPayload<MTo>, (DataPayload<MFrom>, DataError)> {
+ (**self).convert(key, from)
+ }
+}
diff --git a/vendor/icu_provider/src/datagen/heap_measure.rs b/vendor/icu_provider/src/datagen/heap_measure.rs
new file mode 100644
index 000000000..d451f3ebe
--- /dev/null
+++ b/vendor/icu_provider/src/datagen/heap_measure.rs
@@ -0,0 +1,59 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::buf::{BufferFormat, BufferMarker};
+use crate::prelude::*;
+use yoke::trait_hack::YokeTraitHack;
+
+/// Stats on the heap size needed when attempting to zero-copy-deserialize
+/// a postcard-formatted data struct.
+#[derive(Debug, Copy, Clone, yoke::Yokeable, Default)]
+#[non_exhaustive]
+pub struct HeapStats {
+ /// Total bytes allocated during deserialization
+ pub total_bytes_allocated: u64,
+ /// Total bytes allocated during deserialization that have not yet been freed
+ pub net_bytes_allocated: usize,
+}
+
+/// The [`DataMarker`] marker type for [`HeapStats`].
+#[allow(clippy::exhaustive_structs)] // marker type
+pub struct HeapStatsMarker;
+
+impl DataMarker for HeapStatsMarker {
+ type Yokeable = HeapStats;
+}
+
+impl DataPayload<BufferMarker> {
+ /// Given a buffer known to be in postcard-0.7 format, attempt to zero-copy
+ /// deserialize it and record the amount of heap allocations that occurred.
+ ///
+ /// Ideally, this number should be zero.
+ ///
+ /// [`dhat`]'s profiler must be initialized before using this.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the buffer is not in postcard-0.7 format.
+ #[allow(clippy::expect_used)] // The function documents when panics may occur.
+ pub fn attempt_zero_copy_heap_size<M>(self) -> HeapStats
+ where
+ M: DataMarker,
+ for<'a> &'a <M::Yokeable as yoke::Yokeable<'a>>::Output: serde::Serialize,
+ for<'de> YokeTraitHack<<M::Yokeable as yoke::Yokeable<'de>>::Output>:
+ serde::Deserialize<'de>,
+ {
+ let stats_before = dhat::HeapStats::get();
+ // reify, but do nothing with the type
+ let _reified_data: DataPayload<M> = self
+ .into_deserialized(BufferFormat::Postcard1)
+ .expect("Failed to deserialize BufferMarker as postcard-0.7");
+ let stats_after = dhat::HeapStats::get();
+
+ HeapStats {
+ total_bytes_allocated: stats_after.total_bytes - stats_before.total_bytes,
+ net_bytes_allocated: stats_after.curr_bytes - stats_before.curr_bytes,
+ }
+ }
+}
diff --git a/vendor/icu_provider/src/datagen/iter.rs b/vendor/icu_provider/src/datagen/iter.rs
new file mode 100644
index 000000000..6175d89c6
--- /dev/null
+++ b/vendor/icu_provider/src/datagen/iter.rs
@@ -0,0 +1,35 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Collection of iteration APIs for data providers.
+
+use crate::prelude::*;
+
+/// A [`DynamicDataProvider`] that can iterate over all supported [`DataLocale`] for a certain key.
+///
+/// Implementing this trait means that a data provider knows all of the data it can successfully
+/// return from a load request.
+pub trait IterableDynamicDataProvider<M: DataMarker>: DynamicDataProvider<M> {
+ /// Given a [`DataKey`], returns a list of [`DataLocale`].
+ fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError>;
+}
+
+/// A [`DataProvider`] that can iterate over all supported [`DataLocale`] for a certain key.
+///
+/// Implementing this trait means that a data provider knows all of the data it can successfully
+/// return from a load request.
+pub trait IterableDataProvider<M: KeyedDataMarker>: DataProvider<M> {
+ /// Returns a list of [`DataLocale`].
+ fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError>;
+}
+
+impl<M, P> IterableDynamicDataProvider<M> for Box<P>
+where
+ M: DataMarker,
+ P: IterableDynamicDataProvider<M> + ?Sized,
+{
+ fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError> {
+ (**self).supported_locales_for_key(key)
+ }
+}
diff --git a/vendor/icu_provider/src/datagen/mod.rs b/vendor/icu_provider/src/datagen/mod.rs
new file mode 100644
index 000000000..5ede82275
--- /dev/null
+++ b/vendor/icu_provider/src/datagen/mod.rs
@@ -0,0 +1,120 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! This module contains various utilities required to generate ICU4X data files, typically
+//! via the `icu_datagen` reference crate. End users should not need to consume anything in
+//! this module as a library unless defining new types that integrate with `icu_datagen`.
+//!
+//! This module can be enabled with the `datagen` feature on `icu_provider`.
+
+mod data_conversion;
+mod heap_measure;
+mod iter;
+mod payload;
+pub use data_conversion::DataConverter;
+pub use heap_measure::{HeapStats, HeapStatsMarker};
+pub use iter::IterableDataProvider;
+
+#[doc(hidden)] // exposed for make_exportable_provider
+pub use iter::IterableDynamicDataProvider;
+#[doc(hidden)] // exposed for make_exportable_provider
+pub use payload::{ExportBox, ExportMarker};
+
+use crate::prelude::*;
+
+/// An object capable of exporting data payloads in some form.
+pub trait DataExporter: Sync {
+ /// Save a `payload` corresponding to the given key and locale.
+ /// Takes non-mut self as it can be called concurrently.
+ fn put_payload(
+ &self,
+ key: DataKey,
+ locale: &DataLocale,
+ payload: &DataPayload<ExportMarker>,
+ ) -> Result<(), DataError>;
+
+ /// Function called after all keys have been fully dumped.
+ /// Takes non-mut self as it can be called concurrently.
+ fn flush(&self, _key: DataKey) -> Result<(), DataError> {
+ Ok(())
+ }
+
+ /// This function has to be called before the object is dropped (after all
+ /// keys have been fully dumped). This conceptually takes ownership, so
+ /// clients *may not* interact with this object after close has been called.
+ fn close(&mut self) -> Result<(), DataError> {
+ Ok(())
+ }
+}
+
+/// A [`DynamicDataProvider`] that can be used for exporting data.
+///
+/// Use [`make_exportable_provider`] to implement this.
+pub trait ExportableProvider: IterableDynamicDataProvider<ExportMarker> + Sync {}
+impl<T> ExportableProvider for T where T: IterableDynamicDataProvider<ExportMarker> + Sync {}
+
+/// This macro can be used on a data provider to allow it to be used for data generation.
+///
+/// Data generation 'compiles' data by using this data provider (which usually translates data from
+/// different sources and doesn't have to be efficient) to generate data structs, and then writing
+/// them to an efficient format like [`BlobDataProvider`] or [`BakedDataProvider`]. The requirements
+/// for `make_exportable_provider` are:
+/// * The data struct has to implement [`serde::Serialize`](::serde::Serialize) and [`databake::Bake`]
+/// * The provider needs to implement [`IterableDataProvider`] for all specified [`KeyedDataMarker`]s.
+/// This allows the generating code to know which [`DataLocale`] to collect.
+///
+/// [`BlobDataProvider`]: ../../icu_provider_blob/struct.BlobDataProvider.html
+/// [`BakedDataProvider`]: ../../icu_datagen/index.html
+#[macro_export]
+macro_rules! make_exportable_provider {
+ ($provider:ty, [ $($struct_m:ident),+, ]) => {
+ $crate::impl_dynamic_data_provider!(
+ $provider,
+ [ $($struct_m),+, ],
+ $crate::datagen::ExportMarker
+ );
+ $crate::impl_dynamic_data_provider!(
+ $provider,
+ [ $($struct_m),+, ],
+ $crate::any::AnyMarker
+ );
+
+ impl $crate::datagen::IterableDynamicDataProvider<$crate::datagen::ExportMarker> for $provider {
+ fn supported_locales_for_key(&self, key: $crate::DataKey) -> Result<Vec<$crate::DataLocale>, $crate::DataError> {
+ #![allow(non_upper_case_globals)]
+ // Reusing the struct names as identifiers
+ $(
+ const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed();
+ )+
+ match key.hashed() {
+ $(
+ $struct_m => {
+ $crate::datagen::IterableDataProvider::<$struct_m>::supported_locales(self)
+ }
+ )+,
+ _ => Err($crate::DataErrorKind::MissingDataKey.with_key(key))
+ }
+ }
+ }
+
+ impl $crate::datagen::DataConverter<$crate::buf::BufferMarker, $crate::datagen::HeapStatsMarker> for $provider {
+ fn convert(&self, key: $crate::DataKey, from: $crate::DataPayload<$crate::buf::BufferMarker>) -> Result<$crate::DataPayload<$crate::datagen::HeapStatsMarker>, ($crate::DataPayload<$crate::buf::BufferMarker>, $crate::DataError)> {
+ #![allow(non_upper_case_globals)]
+ // Reusing the struct names as identifiers
+ $(
+ const $struct_m: $crate::DataKeyHash = <$struct_m as $crate::KeyedDataMarker>::KEY.hashed();
+ )+
+ match key.hashed() {
+ $(
+ $struct_m => {
+ let heap_stats = from.attempt_zero_copy_heap_size::<$struct_m>();
+ return Ok($crate::DataPayload::from_owned(heap_stats));
+ }
+ )+,
+ _ => Err((from, $crate::DataErrorKind::MissingDataKey.with_key(key)))
+ }
+ }
+ }
+ };
+}
diff --git a/vendor/icu_provider/src/datagen/payload.rs b/vendor/icu_provider/src/datagen/payload.rs
new file mode 100644
index 000000000..b6ea8049f
--- /dev/null
+++ b/vendor/icu_provider/src/datagen/payload.rs
@@ -0,0 +1,137 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::dynutil::UpcastDataPayload;
+use crate::prelude::*;
+use crate::yoke::*;
+use alloc::boxed::Box;
+use databake::{Bake, CrateEnv, TokenStream};
+
+trait ExportableYoke {
+ fn bake_yoke(&self, env: &CrateEnv) -> TokenStream;
+ fn serialize_yoke(
+ &self,
+ serializer: &mut dyn erased_serde::Serializer,
+ ) -> Result<(), DataError>;
+}
+
+impl<Y, C> ExportableYoke for Yoke<Y, C>
+where
+ Y: for<'a> Yokeable<'a>,
+ for<'a> <Y as Yokeable<'a>>::Output: Bake + serde::Serialize,
+{
+ fn bake_yoke(&self, ctx: &CrateEnv) -> TokenStream {
+ self.get().bake(ctx)
+ }
+
+ fn serialize_yoke(
+ &self,
+ serializer: &mut dyn erased_serde::Serializer,
+ ) -> Result<(), DataError> {
+ use erased_serde::Serialize;
+ self.get()
+ .erased_serialize(serializer)
+ .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?;
+ Ok(())
+ }
+}
+
+#[doc(hidden)] // exposed for make_exportable_provider
+#[derive(yoke::Yokeable)]
+pub struct ExportBox {
+ payload: Box<dyn ExportableYoke + Sync>,
+}
+
+impl<M> UpcastDataPayload<M> for ExportMarker
+where
+ M: DataMarker,
+ M::Yokeable: Sync,
+ for<'a> <M::Yokeable as Yokeable<'a>>::Output: Bake + serde::Serialize,
+{
+ fn upcast(other: DataPayload<M>) -> DataPayload<ExportMarker> {
+ DataPayload::from_owned(ExportBox {
+ payload: Box::new(other.yoke),
+ })
+ }
+}
+
+impl DataPayload<ExportMarker> {
+ /// Serializes this [`DataPayload`] into a serializer using Serde.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_provider::datagen::*;
+ /// use icu_provider::dynutil::UpcastDataPayload;
+ /// use icu_provider::hello_world::HelloWorldV1Marker;
+ /// use icu_provider::prelude::*;
+ ///
+ /// // Create an example DataPayload
+ /// let payload: DataPayload<HelloWorldV1Marker> = Default::default();
+ /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload);
+ ///
+ /// // Serialize the payload to a JSON string
+ /// let mut buffer: Vec<u8> = vec![];
+ /// export
+ /// .serialize(&mut serde_json::Serializer::new(&mut buffer))
+ /// .expect("Serialization should succeed");
+ /// assert_eq!("{\"message\":\"(und) Hello World\"}".as_bytes(), buffer);
+ /// ```
+ pub fn serialize<S>(&self, serializer: S) -> Result<(), DataError>
+ where
+ S: serde::Serializer,
+ S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok`
+ {
+ self.get()
+ .payload
+ .serialize_yoke(&mut <dyn erased_serde::Serializer>::erase(serializer))
+ }
+
+ /// Serializes this [`DataPayload`]'s value into a [`TokenStream`]
+ /// using its [`Bake`] implementations.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_provider::datagen::*;
+ /// use icu_provider::dynutil::UpcastDataPayload;
+ /// use icu_provider::hello_world::HelloWorldV1Marker;
+ /// use icu_provider::prelude::*;
+ /// # use databake::quote;
+ /// # use std::collections::BTreeSet;
+ ///
+ /// // Create an example DataPayload
+ /// let payload: DataPayload<HelloWorldV1Marker> = Default::default();
+ /// let export: DataPayload<ExportMarker> = UpcastDataPayload::upcast(payload);
+ ///
+ /// let env = databake::CrateEnv::default();
+ /// let tokens = export.tokenize(&env);
+ /// assert_eq!(
+ /// quote! {
+ /// ::icu_provider::hello_world::HelloWorldV1 {
+ /// message: alloc::borrow::Cow::Borrowed("(und) Hello World"),
+ /// }
+ /// }
+ /// .to_string(),
+ /// tokens.to_string()
+ /// );
+ /// assert_eq!(
+ /// env.into_iter().collect::<BTreeSet<_>>(),
+ /// ["icu_provider", "alloc"]
+ /// .into_iter()
+ /// .collect::<BTreeSet<_>>()
+ /// );
+ /// ```
+ pub fn tokenize(&self, env: &CrateEnv) -> TokenStream {
+ self.get().payload.bake_yoke(env)
+ }
+}
+
+/// Marker type for [`ExportBox`].
+#[allow(clippy::exhaustive_structs)] // marker type
+pub struct ExportMarker {}
+
+impl DataMarker for ExportMarker {
+ type Yokeable = ExportBox;
+}