diff options
Diffstat (limited to 'intl/l10n/rust/l10nregistry-rs/src')
16 files changed, 2767 insertions, 0 deletions
diff --git a/intl/l10n/rust/l10nregistry-rs/src/env.rs b/intl/l10n/rust/l10nregistry-rs/src/env.rs new file mode 100644 index 0000000000..7cd1ff30f4 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/env.rs @@ -0,0 +1,5 @@ +use crate::errors::L10nRegistryError; + +pub trait ErrorReporter { + fn report_errors(&self, errors: Vec<L10nRegistryError>); +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/errors.rs b/intl/l10n/rust/l10nregistry-rs/src/errors.rs new file mode 100644 index 0000000000..d58f02ea8e --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/errors.rs @@ -0,0 +1,74 @@ +use fluent_bundle::FluentError; +use fluent_fallback::types::ResourceId; +use std::error::Error; +use unic_langid::LanguageIdentifier; + +#[derive(Debug, Clone, PartialEq)] +pub enum L10nRegistryError { + FluentError { + resource_id: ResourceId, + loc: Option<(usize, usize)>, + error: FluentError, + }, + MissingResource { + locale: LanguageIdentifier, + resource_id: ResourceId, + }, +} + +impl std::fmt::Display for L10nRegistryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingResource { + locale, + resource_id, + } => { + write!( + f, + "Missing resource in locale {}: {}", + locale, resource_id.value + ) + } + Self::FluentError { + resource_id, + loc, + error, + } => { + if let Some(loc) = loc { + write!( + f, + "Fluent Error in {}[line: {}, col: {}]: {}", + resource_id.value, loc.0, loc.1, error + ) + } else { + write!(f, "Fluent Error in {}: {}", resource_id.value, error) + } + } + } + } +} + +impl Error for L10nRegistryError {} + +#[derive(Debug, Clone, PartialEq)] +pub enum L10nRegistrySetupError { + RegistryLocked, + DuplicatedSource { name: String }, + MissingSource { name: String }, +} + +impl std::fmt::Display for L10nRegistrySetupError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::RegistryLocked => write!(f, "Can't modify a registry when locked."), + Self::DuplicatedSource { name } => { + write!(f, "Source with a name {} is already registered.", &name) + } + Self::MissingSource { name } => { + write!(f, "Cannot find a source with a name {}.", &name) + } + } + } +} + +impl Error for L10nRegistrySetupError {} diff --git a/intl/l10n/rust/l10nregistry-rs/src/fluent.rs b/intl/l10n/rust/l10nregistry-rs/src/fluent.rs new file mode 100644 index 0000000000..b6ac2a12ab --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/fluent.rs @@ -0,0 +1,5 @@ +use fluent_bundle::FluentBundle as FluentBundleBase; +pub use fluent_bundle::{FluentError, FluentResource}; +use std::rc::Rc; + +pub type FluentBundle = FluentBundleBase<Rc<FluentResource>>; diff --git a/intl/l10n/rust/l10nregistry-rs/src/lib.rs b/intl/l10n/rust/l10nregistry-rs/src/lib.rs new file mode 100644 index 0000000000..cbd72c09ce --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/lib.rs @@ -0,0 +1,8 @@ +pub mod env; +pub mod errors; +pub mod fluent; +pub mod registry; +pub mod solver; +pub mod source; +#[cfg(feature = "test-fluent")] +pub mod testing; diff --git a/intl/l10n/rust/l10nregistry-rs/src/registry/asynchronous.rs b/intl/l10n/rust/l10nregistry-rs/src/registry/asynchronous.rs new file mode 100644 index 0000000000..bfcff941b5 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/registry/asynchronous.rs @@ -0,0 +1,294 @@ +use std::{ + pin::Pin, + task::{Context, Poll}, +}; + +use crate::{ + env::ErrorReporter, + errors::{L10nRegistryError, L10nRegistrySetupError}, + fluent::{FluentBundle, FluentError}, + registry::{BundleAdapter, L10nRegistry, MetaSources}, + solver::{AsyncTester, ParallelProblemSolver}, + source::{ResourceOption, ResourceStatus}, +}; + +use fluent_fallback::{generator::BundleStream, types::ResourceId}; +use futures::{ + stream::{Collect, FuturesOrdered}, + Stream, StreamExt, +}; +use std::future::Future; +use unic_langid::LanguageIdentifier; + +impl<P, B> L10nRegistry<P, B> +where + P: Clone, + B: Clone, +{ + /// This method is useful for testing various configurations. + #[cfg(feature = "test-fluent")] + pub fn generate_bundles_for_lang( + &self, + langid: LanguageIdentifier, + resource_ids: Vec<ResourceId>, + ) -> Result<GenerateBundles<P, B>, L10nRegistrySetupError> { + let lang_ids = vec![langid]; + + Ok(GenerateBundles::new( + self.clone(), + lang_ids.into_iter(), + resource_ids, + // Cheaply create an immutable shallow copy of the [MetaSources]. + self.try_borrow_metasources()?.clone(), + )) + } + + // Asynchronously generate the bundles. + pub fn generate_bundles( + &self, + locales: std::vec::IntoIter<LanguageIdentifier>, + resource_ids: Vec<ResourceId>, + ) -> Result<GenerateBundles<P, B>, L10nRegistrySetupError> { + Ok(GenerateBundles::new( + self.clone(), + locales, + resource_ids, + // Cheaply create an immutable shallow copy of the [MetaSources]. + self.try_borrow_metasources()?.clone(), + )) + } +} + +/// This enum contains the various states the [GenerateBundles] can be in during the +/// asynchronous generation step. +enum State<P, B> { + Empty, + Locale(LanguageIdentifier), + Solver { + locale: LanguageIdentifier, + solver: ParallelProblemSolver<GenerateBundles<P, B>>, + }, +} + +impl<P, B> Default for State<P, B> { + fn default() -> Self { + Self::Empty + } +} + +impl<P, B> State<P, B> { + fn get_locale(&self) -> &LanguageIdentifier { + match self { + Self::Locale(locale) => locale, + Self::Solver { locale, .. } => locale, + Self::Empty => unreachable!("Attempting to get a locale for an empty state."), + } + } + + fn take_solver(&mut self) -> ParallelProblemSolver<GenerateBundles<P, B>> { + replace_with::replace_with_or_default_and_return(self, |self_| match self_ { + Self::Solver { locale, solver } => (solver, Self::Locale(locale)), + _ => unreachable!("Attempting to take a solver in an invalid state."), + }) + } + + fn put_back_solver(&mut self, solver: ParallelProblemSolver<GenerateBundles<P, B>>) { + replace_with::replace_with_or_default(self, |self_| match self_ { + Self::Locale(locale) => Self::Solver { locale, solver }, + _ => unreachable!("Attempting to put back a solver in an invalid state."), + }) + } +} + +pub struct GenerateBundles<P, B> { + /// Do not access the metasources in the registry, as they may be mutated between + /// async iterations. + reg: L10nRegistry<P, B>, + /// This is an immutable shallow copy of the MetaSources that should not be mutated + /// during the iteration process. This ensures that the iterator will still be + /// valid if the L10nRegistry is mutated while iterating through the sources. + metasources: MetaSources, + locales: std::vec::IntoIter<LanguageIdentifier>, + current_metasource: usize, + resource_ids: Vec<ResourceId>, + state: State<P, B>, +} + +impl<P, B> GenerateBundles<P, B> { + fn new( + reg: L10nRegistry<P, B>, + locales: std::vec::IntoIter<LanguageIdentifier>, + resource_ids: Vec<ResourceId>, + metasources: MetaSources, + ) -> Self { + Self { + reg, + metasources, + locales, + current_metasource: 0, + resource_ids, + state: State::Empty, + } + } +} + +pub type ResourceSetStream = Collect<FuturesOrdered<ResourceStatus>, Vec<ResourceOption>>; +pub struct TestResult(ResourceSetStream); +impl std::marker::Unpin for TestResult {} + +impl Future for TestResult { + type Output = Vec<bool>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + let pinned = Pin::new(&mut self.0); + pinned + .poll(cx) + .map(|set| set.iter().map(|c| !c.is_required_and_missing()).collect()) + } +} + +impl<'l, P, B> AsyncTester for GenerateBundles<P, B> { + type Result = TestResult; + + fn test_async(&self, query: Vec<(usize, usize)>) -> Self::Result { + let locale = self.state.get_locale(); + + let stream = query + .iter() + .map(|(res_idx, source_idx)| { + let resource_id = &self.resource_ids[*res_idx]; + self.metasources + .filesource(self.current_metasource, *source_idx) + .fetch_file(locale, resource_id) + }) + .collect::<FuturesOrdered<_>>(); + TestResult(stream.collect::<_>()) + } +} + +#[async_trait::async_trait(?Send)] +impl<P, B> BundleStream for GenerateBundles<P, B> { + async fn prefetch_async(&mut self) { + todo!(); + } +} + +/// Generate [FluentBundles](FluentBundle) asynchronously. +impl<P, B> Stream for GenerateBundles<P, B> +where + P: ErrorReporter, + B: BundleAdapter, +{ + type Item = Result<FluentBundle, (FluentBundle, Vec<FluentError>)>; + + /// Asynchronously try and get a solver, and then with the solver generate a bundle. + /// If the solver is not ready yet, then this function will return as `Pending`, and + /// the Future runner will need to re-enter at a later point to try again. + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { + if self.metasources.is_empty() { + // There are no metasources available, so no bundles can be generated. + return None.into(); + } + loop { + if let State::Solver { .. } = self.state { + // A solver has already been set up, continue iterating through the + // resources and generating a bundle. + + // Pin the solver so that the async try_poll_next can be called. + let mut solver = self.state.take_solver(); + let pinned_solver = Pin::new(&mut solver); + + if let std::task::Poll::Ready(solver_result) = + pinned_solver.try_poll_next(cx, &self, false) + { + // The solver is ready, but may not have generated an ordering. + + if let Ok(Some(order)) = solver_result { + // The solver resolved an ordering, and a bundle may be able + // to be generated. + + let bundle = self.metasources.bundle_from_order( + self.current_metasource, + self.state.get_locale().clone(), + &order, + &self.resource_ids, + &self.reg.shared.provider, + self.reg.shared.bundle_adapter.as_ref(), + ); + + self.state.put_back_solver(solver); + + if bundle.is_some() { + // The bundle was successfully generated. + return bundle.into(); + } + + // No bundle was generated, continue on. + continue; + } + + // There is no bundle ordering available. + + if self.current_metasource > 0 { + // There are more metasources, create a new solver and try the + // next metasource. If there is an error in the solver_result + // ignore it for now, since there are more metasources. + self.current_metasource -= 1; + let solver = ParallelProblemSolver::new( + self.resource_ids.len(), + self.metasources.get(self.current_metasource).len(), + ); + self.state = State::Solver { + locale: self.state.get_locale().clone(), + solver, + }; + continue; + } + + if let Err(idx) = solver_result { + // Since there are no more metasources, and there is an error, + // report it instead of ignoring it. + self.reg.shared.provider.report_errors(vec![ + L10nRegistryError::MissingResource { + locale: self.state.get_locale().clone(), + resource_id: self.resource_ids[idx].clone(), + }, + ]); + } + + // There are no more metasources. + self.state = State::Empty; + continue; + } + + // The solver is not ready yet, so exit out of this async task + // and mark it as pending. It can be tried again later. + self.state.put_back_solver(solver); + return std::task::Poll::Pending; + } + + // There are no more metasources to search. + + // Try the next locale. + if let Some(locale) = self.locales.next() { + // Restart at the end of the metasources for this locale, and iterate + // backwards. + let last_metasource_idx = self.metasources.len() - 1; + self.current_metasource = last_metasource_idx; + + let solver = ParallelProblemSolver::new( + self.resource_ids.len(), + self.metasources.get(self.current_metasource).len(), + ); + self.state = State::Solver { locale, solver }; + + // Continue iterating on the next solver. + continue; + } + + // There are no more locales or metasources to search. This iterator + // is done. + return None.into(); + } + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/registry/mod.rs b/intl/l10n/rust/l10nregistry-rs/src/registry/mod.rs new file mode 100644 index 0000000000..c342aa55aa --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/registry/mod.rs @@ -0,0 +1,363 @@ +mod asynchronous; +mod synchronous; + +use crate::{ + env::ErrorReporter, + errors::L10nRegistrySetupError, + fluent::FluentBundle, + source::{FileSource, ResourceId}, +}; +use fluent_bundle::FluentResource; +use fluent_fallback::generator::BundleGenerator; +use rustc_hash::FxHashSet; +use std::{ + cell::{Ref, RefCell, RefMut}, + collections::HashSet, + rc::Rc, +}; +use unic_langid::LanguageIdentifier; + +pub use asynchronous::GenerateBundles; +pub use synchronous::GenerateBundlesSync; + +pub type FluentResourceSet = Vec<Rc<FluentResource>>; + +/// The shared information that makes up the configuration the L10nRegistry. It is +/// broken out into a separate struct so that it can be shared via an Rc pointer. +#[derive(Default)] +struct Shared<P, B> { + metasources: RefCell<MetaSources>, + provider: P, + bundle_adapter: Option<B>, +} + +/// [FileSources](FileSource) represent a single directory location to look for .ftl +/// files. These are Stored in a [Vec]. For instance, in a built version of Firefox with +/// the en-US locale, each [FileSource] may represent a different folder with many +/// different files. +/// +/// Firefox supports other *meta sources* for localization files in the form of language +/// packs which can be downloaded from the addon store. These language packs then would +/// be a separate metasource than the app' language. This [MetaSources] adds another [Vec] +/// over the [Vec] of [FileSources](FileSource) in order to provide a unified way to +/// iterate over all possible [FileSource] locations to finally obtain the final bundle. +/// +/// This structure uses an [Rc] to point to the [FileSource] so that a shallow copy +/// of these [FileSources](FileSource) can be obtained for iteration. This makes +/// it quick to copy the list of [MetaSources] for iteration, and guards against +/// invalidating that async nature of iteration when the underlying data mutates. +/// +/// Note that the async iteration of bundles is still only happening in one thread, +/// and is not multi-threaded. The processing is just split over time. +/// +/// The [MetaSources] are ultimately owned by the [Shared] in a [RefCell] so that the +/// source of truth can be mutated, and shallow copies of the [MetaSources] used for +/// iteration will be unaffected. +/// +/// Deriving [Clone] here is a relatively cheap operation, since the [Rc] will be cloned, +/// and point to the original [FileSource]. +#[derive(Default, Clone)] +pub struct MetaSources(Vec<Vec<Rc<FileSource>>>); + +impl MetaSources { + /// Iterate over all FileSources in all MetaSources. + pub fn filesources(&self) -> impl Iterator<Item = &Rc<FileSource>> { + self.0.iter().flatten() + } + + /// Iterate over all FileSources in all MetaSources. + pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut Vec<Rc<FileSource>>> { + self.0.iter_mut() + } + + /// The number of metasources. + pub fn len(&self) -> usize { + self.0.len() + } + + /// If there are no metasources. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Clears out all metasources. + pub fn clear(&mut self) { + self.0.clear(); + } + + /// Clears out only empty metasources. + pub fn clear_empty_metasources(&mut self) { + self.0.retain(|metasource| !metasource.is_empty()); + } + + /// Adds a [FileSource] to its appropriate metasource. + pub fn add_filesource(&mut self, new_source: FileSource) { + if let Some(metasource) = self + .0 + .iter_mut() + .find(|source| source[0].metasource == new_source.metasource) + { + // A metasource was found, add to the existing one. + metasource.push(Rc::new(new_source)); + } else { + // Create a new metasource. + self.0.push(vec![Rc::new(new_source)]); + } + } + + /// Adds a [FileSource] to its appropriate metasource. + pub fn update_filesource(&mut self, new_source: &FileSource) -> bool { + if let Some(metasource) = self + .0 + .iter_mut() + .find(|source| source[0].metasource == new_source.metasource) + { + if let Some(idx) = metasource.iter().position(|source| **source == *new_source) { + *metasource.get_mut(idx).unwrap() = Rc::new(new_source.clone()); + return true; + } + } + false + } + + /// Get a metasource by index, but provide a nice error message if the index + /// is out of bounds. + pub fn get(&self, metasource_idx: usize) -> &Vec<Rc<FileSource>> { + if let Some(metasource) = self.0.get(metasource_idx) { + return &metasource; + } + panic!( + "Metasource index of {} is out of range of the list of {} meta sources.", + metasource_idx, + self.0.len() + ); + } + + /// Get a [FileSource] from a metasource, but provide a nice error message if the + /// index is out of bounds. + pub fn filesource(&self, metasource_idx: usize, filesource_idx: usize) -> &FileSource { + let metasource = self.get(metasource_idx); + let reversed_idx = metasource.len() - 1 - filesource_idx; + if let Some(file_source) = metasource.get(reversed_idx) { + return file_source; + } + panic!( + "File source index of {} is out of range of the list of {} file sources.", + filesource_idx, + metasource.len() + ); + } + + /// Get a [FileSource] by name from a metasource. This is useful for testing. + #[cfg(feature = "test-fluent")] + pub fn file_source_by_name(&self, metasource_idx: usize, name: &str) -> Option<&FileSource> { + use std::borrow::Borrow; + self.get(metasource_idx) + .iter() + .find(|&source| source.name == name) + .map(|source| source.borrow()) + } + + /// Get an iterator for the [FileSources](FileSource) that match the [LanguageIdentifier] + /// and [ResourceId]. + #[cfg(feature = "test-fluent")] + pub fn get_sources_for_resource<'l>( + &'l self, + metasource_idx: usize, + langid: &'l LanguageIdentifier, + resource_id: &'l ResourceId, + ) -> impl Iterator<Item = &FileSource> { + use std::borrow::Borrow; + self.get(metasource_idx) + .iter() + .filter(move |source| source.has_file(langid, resource_id) != Some(false)) + .map(|source| source.borrow()) + } +} + +/// The [BundleAdapter] can adapt the bundle to the environment with such actions as +/// setting the platform, and hooking up functions such as Fluent's DATETIME and +/// NUMBER formatting functions. +pub trait BundleAdapter { + fn adapt_bundle(&self, bundle: &mut FluentBundle); +} + +/// The L10nRegistry is the main struct for owning the registry information. +/// +/// `P` - A provider +/// `B` - A bundle adapter +#[derive(Clone)] +pub struct L10nRegistry<P, B> { + shared: Rc<Shared<P, B>>, +} + +impl<P, B> L10nRegistry<P, B> { + /// Create a new [L10nRegistry] from a provider. + pub fn with_provider(provider: P) -> Self { + Self { + shared: Rc::new(Shared { + metasources: Default::default(), + provider, + bundle_adapter: None, + }), + } + } + + /// Set the bundle adapter. See [BundleAdapter] for more information. + pub fn set_bundle_adapter(&mut self, bundle_adapter: B) -> Result<(), L10nRegistrySetupError> + where + B: BundleAdapter, + { + let shared = Rc::get_mut(&mut self.shared).ok_or(L10nRegistrySetupError::RegistryLocked)?; + shared.bundle_adapter = Some(bundle_adapter); + Ok(()) + } + + pub fn try_borrow_metasources(&self) -> Result<Ref<MetaSources>, L10nRegistrySetupError> { + self.shared + .metasources + .try_borrow() + .map_err(|_| L10nRegistrySetupError::RegistryLocked) + } + + pub fn try_borrow_metasources_mut( + &self, + ) -> Result<RefMut<MetaSources>, L10nRegistrySetupError> { + self.shared + .metasources + .try_borrow_mut() + .map_err(|_| L10nRegistrySetupError::RegistryLocked) + } + + /// Adds a new [FileSource] to the registry and to its appropriate metasource. If the + /// metasource for this [FileSource] does not exist, then it is created. + pub fn register_sources( + &self, + new_sources: Vec<FileSource>, + ) -> Result<(), L10nRegistrySetupError> { + for new_source in new_sources { + self.try_borrow_metasources_mut()? + .add_filesource(new_source); + } + Ok(()) + } + + /// Update the information about sources already stored in the registry. Each + /// [FileSource] provided must exist, or else a [L10nRegistrySetupError] will + /// be returned. + pub fn update_sources( + &self, + new_sources: Vec<FileSource>, + ) -> Result<(), L10nRegistrySetupError> { + for new_source in new_sources { + if !self + .try_borrow_metasources_mut()? + .update_filesource(&new_source) + { + return Err(L10nRegistrySetupError::MissingSource { + name: new_source.name, + }); + } + } + Ok(()) + } + + /// Remove the provided sources. If a metasource becomes empty after this operation, + /// the metasource is also removed. + pub fn remove_sources<S>(&self, del_sources: Vec<S>) -> Result<(), L10nRegistrySetupError> + where + S: ToString, + { + let del_sources: Vec<String> = del_sources.into_iter().map(|s| s.to_string()).collect(); + + for metasource in self.try_borrow_metasources_mut()?.iter_mut() { + metasource.retain(|source| !del_sources.contains(&source.name)); + } + + self.try_borrow_metasources_mut()?.clear_empty_metasources(); + + Ok(()) + } + + /// Clears out all metasources and sources. + pub fn clear_sources(&self) -> Result<(), L10nRegistrySetupError> { + self.try_borrow_metasources_mut()?.clear(); + Ok(()) + } + + /// Flattens out all metasources and returns the complete list of source names. + pub fn get_source_names(&self) -> Result<Vec<String>, L10nRegistrySetupError> { + Ok(self + .try_borrow_metasources()? + .filesources() + .map(|s| s.name.clone()) + .collect()) + } + + /// Checks if any metasources has a source, by the name. + pub fn has_source(&self, name: &str) -> Result<bool, L10nRegistrySetupError> { + Ok(self + .try_borrow_metasources()? + .filesources() + .any(|source| source.name == name)) + } + + /// Get a [FileSource] by name by searching through all meta sources. + pub fn file_source_by_name( + &self, + name: &str, + ) -> Result<Option<FileSource>, L10nRegistrySetupError> { + Ok(self + .try_borrow_metasources()? + .filesources() + .find(|source| source.name == name) + .map(|source| (**source).clone())) + } + + /// Returns a unique list of locale names from all sources. + pub fn get_available_locales(&self) -> Result<Vec<LanguageIdentifier>, L10nRegistrySetupError> { + let mut result = HashSet::new(); + let metasources = self.try_borrow_metasources()?; + for source in metasources.filesources() { + for locale in source.locales() { + result.insert(locale); + } + } + Ok(result.into_iter().map(|l| l.to_owned()).collect()) + } +} + +/// Defines how to generate bundles synchronously and asynchronously. +impl<P, B> BundleGenerator for L10nRegistry<P, B> +where + P: ErrorReporter + Clone, + B: BundleAdapter + Clone, +{ + type Resource = Rc<FluentResource>; + type Iter = GenerateBundlesSync<P, B>; + type Stream = GenerateBundles<P, B>; + type LocalesIter = std::vec::IntoIter<LanguageIdentifier>; + + /// The synchronous version of the bundle generator. This is hooked into Gecko + /// code via the `l10nregistry_generate_bundles_sync` function. + fn bundles_iter( + &self, + locales: Self::LocalesIter, + resource_ids: FxHashSet<ResourceId>, + ) -> Self::Iter { + let resource_ids = resource_ids.into_iter().collect(); + self.generate_bundles_sync(locales, resource_ids) + } + + /// The asynchronous version of the bundle generator. This is hooked into Gecko + /// code via the `l10nregistry_generate_bundles` function. + fn bundles_stream( + &self, + locales: Self::LocalesIter, + resource_ids: FxHashSet<ResourceId>, + ) -> Self::Stream { + let resource_ids = resource_ids.into_iter().collect(); + self.generate_bundles(locales, resource_ids) + .expect("Unable to get the MetaSources.") + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/registry/synchronous.rs b/intl/l10n/rust/l10nregistry-rs/src/registry/synchronous.rs new file mode 100644 index 0000000000..097ca68eee --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/registry/synchronous.rs @@ -0,0 +1,307 @@ +use super::{BundleAdapter, L10nRegistry, MetaSources}; +use crate::env::ErrorReporter; +use crate::errors::L10nRegistryError; +use crate::fluent::{FluentBundle, FluentError}; +use crate::solver::{SerialProblemSolver, SyncTester}; +use crate::source::ResourceOption; +use fluent_fallback::{generator::BundleIterator, types::ResourceId}; +use unic_langid::LanguageIdentifier; + +impl MetaSources { + pub(crate) fn bundle_from_order<P, B>( + &self, + metasource: usize, + locale: LanguageIdentifier, + source_order: &[usize], + resource_ids: &[ResourceId], + error_reporter: &P, + bundle_adapter: Option<&B>, + ) -> Option<Result<FluentBundle, (FluentBundle, Vec<FluentError>)>> + where + P: ErrorReporter, + B: BundleAdapter, + { + let mut bundle = FluentBundle::new(vec![locale.clone()]); + + if let Some(bundle_adapter) = bundle_adapter { + bundle_adapter.adapt_bundle(&mut bundle); + } + + let mut errors = vec![]; + + for (&source_idx, resource_id) in source_order.iter().zip(resource_ids.iter()) { + let source = self.filesource(metasource, source_idx); + if let ResourceOption::Some(res) = + source.fetch_file_sync(&locale, resource_id, /* overload */ true) + { + if source.options.allow_override { + bundle.add_resource_overriding(res); + } else if let Err(err) = bundle.add_resource(res) { + errors.extend(err.into_iter().map(|error| L10nRegistryError::FluentError { + resource_id: resource_id.clone(), + loc: None, + error, + })); + } + } else if resource_id.is_required() { + return None; + } + } + + if !errors.is_empty() { + error_reporter.report_errors(errors); + } + Some(Ok(bundle)) + } +} + +impl<P, B> L10nRegistry<P, B> +where + P: Clone, + B: Clone, +{ + /// A test-only function for easily generating bundles for a single langid. + #[cfg(feature = "test-fluent")] + pub fn generate_bundles_for_lang_sync( + &self, + langid: LanguageIdentifier, + resource_ids: Vec<ResourceId>, + ) -> GenerateBundlesSync<P, B> { + let lang_ids = vec![langid]; + + GenerateBundlesSync::new(self.clone(), lang_ids.into_iter(), resource_ids) + } + + /// Wiring for hooking up the synchronous bundle generation to the + /// [BundleGenerator] trait. + pub fn generate_bundles_sync( + &self, + locales: std::vec::IntoIter<LanguageIdentifier>, + resource_ids: Vec<ResourceId>, + ) -> GenerateBundlesSync<P, B> { + GenerateBundlesSync::new(self.clone(), locales, resource_ids) + } +} + +enum State { + Empty, + Locale(LanguageIdentifier), + Solver { + locale: LanguageIdentifier, + solver: SerialProblemSolver, + }, +} + +impl Default for State { + fn default() -> Self { + Self::Empty + } +} + +impl State { + fn get_locale(&self) -> &LanguageIdentifier { + match self { + Self::Locale(locale) => locale, + Self::Solver { locale, .. } => locale, + Self::Empty => unreachable!("Attempting to get a locale for an empty state."), + } + } + + fn take_solver(&mut self) -> SerialProblemSolver { + replace_with::replace_with_or_default_and_return(self, |self_| match self_ { + Self::Solver { locale, solver } => (solver, Self::Locale(locale)), + _ => unreachable!("Attempting to take a solver in an invalid state."), + }) + } + + fn put_back_solver(&mut self, solver: SerialProblemSolver) { + replace_with::replace_with_or_default(self, |self_| match self_ { + Self::Locale(locale) => Self::Solver { locale, solver }, + _ => unreachable!("Attempting to put back a solver in an invalid state."), + }) + } +} + +pub struct GenerateBundlesSync<P, B> { + reg: L10nRegistry<P, B>, + locales: std::vec::IntoIter<LanguageIdentifier>, + current_metasource: usize, + resource_ids: Vec<ResourceId>, + state: State, +} + +impl<P, B> GenerateBundlesSync<P, B> { + fn new( + reg: L10nRegistry<P, B>, + locales: std::vec::IntoIter<LanguageIdentifier>, + resource_ids: Vec<ResourceId>, + ) -> Self { + Self { + reg, + locales, + current_metasource: 0, + resource_ids, + state: State::Empty, + } + } +} + +impl<P, B> SyncTester for GenerateBundlesSync<P, B> { + fn test_sync(&self, res_idx: usize, source_idx: usize) -> bool { + let locale = self.state.get_locale(); + let resource_id = &self.resource_ids[res_idx]; + !self + .reg + .try_borrow_metasources() + .expect("Unable to get the MetaSources.") + .filesource(self.current_metasource, source_idx) + .fetch_file_sync(locale, resource_id, /* overload */ true) + .is_required_and_missing() + } +} + +impl<P, B> BundleIterator for GenerateBundlesSync<P, B> +where + P: ErrorReporter, +{ + fn prefetch_sync(&mut self) { + if let State::Solver { .. } = self.state { + let mut solver = self.state.take_solver(); + if let Err(idx) = solver.try_next(self, true) { + self.reg + .shared + .provider + .report_errors(vec![L10nRegistryError::MissingResource { + locale: self.state.get_locale().clone(), + resource_id: self.resource_ids[idx].clone(), + }]); + } + self.state.put_back_solver(solver); + return; + } + + if let Some(locale) = self.locales.next() { + let mut solver = SerialProblemSolver::new( + self.resource_ids.len(), + self.reg + .try_borrow_metasources() + .expect("Unable to get the MetaSources.") + .get(self.current_metasource) + .len(), + ); + self.state = State::Locale(locale.clone()); + if let Err(idx) = solver.try_next(self, true) { + self.reg + .shared + .provider + .report_errors(vec![L10nRegistryError::MissingResource { + locale, + resource_id: self.resource_ids[idx].clone(), + }]); + } + self.state.put_back_solver(solver); + } + } +} + +impl<P, B> Iterator for GenerateBundlesSync<P, B> +where + P: ErrorReporter, + B: BundleAdapter, +{ + type Item = Result<FluentBundle, (FluentBundle, Vec<FluentError>)>; + + /// Synchronously generate a bundle based on a solver. + fn next(&mut self) -> Option<Self::Item> { + let metasources = self + .reg + .try_borrow_metasources() + .expect("Unable to get the MetaSources."); + + if metasources.is_empty() { + // There are no metasources available, so no bundles can be generated. + return None; + } + + loop { + if let State::Solver { .. } = self.state { + // A solver has already been set up, continue iterating through the + // resources and generating a bundle. + let mut solver = self.state.take_solver(); + let solver_result = solver.try_next(self, false); + + if let Ok(Some(order)) = solver_result { + // The solver resolved an ordering, and a bundle may be able + // to be generated. + + let bundle = metasources.bundle_from_order( + self.current_metasource, + self.state.get_locale().clone(), + &order, + &self.resource_ids, + &self.reg.shared.provider, + self.reg.shared.bundle_adapter.as_ref(), + ); + + self.state.put_back_solver(solver); + + if bundle.is_some() { + // The bundle was successfully generated. + return bundle; + } + + // No bundle was generated, continue on. + continue; + } + + // There is no bundle ordering available. + + if self.current_metasource > 0 { + // There are more metasources, create a new solver and try the + // next metasource. If there is an error in the solver_result + // ignore it for now, since there are more metasources. + self.current_metasource -= 1; + let solver = SerialProblemSolver::new( + self.resource_ids.len(), + metasources.get(self.current_metasource).len(), + ); + self.state = State::Solver { + locale: self.state.get_locale().clone(), + solver, + }; + continue; + } + + if let Err(idx) = solver_result { + // Since there are no more metasources, and there is an error, + // report it instead of ignoring it. + self.reg.shared.provider.report_errors(vec![ + L10nRegistryError::MissingResource { + locale: self.state.get_locale().clone(), + resource_id: self.resource_ids[idx].clone(), + }, + ]); + } + + self.state = State::Empty; + continue; + } + + // Try the next locale, or break out of the loop if there are none left. + let locale = self.locales.next()?; + + // Restart at the end of the metasources for this locale, and iterate + // backwards. + let last_metasource_idx = metasources.len() - 1; + self.current_metasource = last_metasource_idx; + + let solver = SerialProblemSolver::new( + self.resource_ids.len(), + metasources.get(self.current_metasource).len(), + ); + + // Continue iterating on the next solver. + self.state = State::Solver { locale, solver }; + } + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/README.md b/intl/l10n/rust/l10nregistry-rs/src/solver/README.md new file mode 100644 index 0000000000..acd56b52b4 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/README.md @@ -0,0 +1,239 @@ + +Source Order Problem Solver +====================== + +This module contains an algorithm used to power the `FluentBundle` generator in `L10nRegistry`. + +The main concept behind it is a problem solver which takes a list of resources and a list of sources and computes all possible iterations of valid combinations of source orders that allow for creation of `FluentBundle` with the requested resources. + +The algorithm is notoriously hard to read, write, and modify, which prompts this documentation to be extensive and provide an example with diagram presentations to aid the reader. + +# Example +For the purpose of a graphical illustration of the example, we will evaluate a scenario with two sources and three resources. + +The sources and resource identifiers will be named in concise way (*1* or *A*) to simplify diagrams, while a more tangible names derived from real-world examples in Firefox use-case will be listed in their initial definition. + +### Sources +A source can be a packaged directory, and a language pack, or any other directory, zip file, or remote source which contains localization resource files. +In the example, we have two sources: +* Source 1 named ***0*** (e.g. `browser`) +* Source 2 named ***1*** (e.g. `toolkit`) + +### Resources +A resource is a single Fluent Translation List file. `FluentBundle` is a combination of such resources used together to resolve translations. This algorithm operates on lists of resource identifiers which represent relative paths within the source. +In the example we have three resources: +* Resource 1 named ***A*** (e.g. `branding/brand.ftl`) +* Resource 2 named ***B*** (e.g. `errors/common.ftl`) +* Resource 3 named ***C*** (e.g. `menu/list.ftl`) + +## Task +The task in this example is to generate all possible iterations of the three resources from the given two sources. Since I/O is expensive, and in most production scenarios all necessary translations are available in the first set, the iterator is used to lazily fallback on the alternative sets only in case of missing translations. + +If all resources are available in both sources, the iterator should produce the following results: +1. `[A0, B0, C0]` +2. `[A0, B0, C1]` +3. `[A0, B1, C0]` +4. `[A0, B1, C1]` +5. `[A1, B0, C0]` +6. `[A1, B0, C1]` +7. `[A1, B1, C0]` +8. `[A1, B1, C1]` + +Since the resources are defined by their column, we can store the resources as `[A, B, C]` separately and simplify the notation to just: +1. `[0, 0, 0]` +2. `[0, 0, 1]` +3. `[0, 1, 0]` +4. `[0, 1, 1]` +5. `[1, 0, 0]` +6. `[1, 0, 1]` +7. `[1, 1, 0]` +8. `[1, 1, 1]` + +This notation will be used from now on. + +## State + +For the in-detail diagrams on the algorithm, we'll use another way to look at the iterator - by evaluating it state. At every point of the algorithm, there is a *partial solution* which may lead to a *complete solution*. It is encoded as: + +```rust +struct Solution { + candidate: Vec<usize>, + idx: usize, +} +``` + +and which starting point can be visualized as: + +```text + ▼ +┌┲━┱┬───┬───┐ +│┃0┃│ │ │ +└╂─╂┴───┴───┘ + ┃ ┃ + ┗━┛ +``` +###### Diagrams generated with use of http://marklodato.github.io/js-boxdrawing/ + +where the horizontal block is a candidate, vertical block is a set of sources possible for each resource, and the arrow represents the index of a resource the iterator is currently evaluating. + +With those tools introduced, we can now guide the reader through how the algorithm works. +But before we do that, it is important to justify writing a custom algorithm in place of existing generic solutions, and explain the two testing strategies which heavily impact the algorithm. + +# Existing libraries +Intuitively, the starting point to exploration of the problem scope would be to look at it as some variation of the [Cartesian Product](https://en.wikipedia.org/wiki/Cartesian_product) iterator. + +#### Python + +In Python, `itertools` package provides a function [`itertools::product`](https://docs.python.org/3/library/itertools.html#itertools.product) which can be used to generate such iterator: +```python +import itertools + +for set in itertools.product(range(2), repeat=3): + print(set) +``` + +#### Rust + +In Rust, crate [`itertools`](https://crates.io/crates/itertools) provides, [`multi_cartesian_product`](https://docs.rs/itertools/0.9.0/itertools/trait.Itertools.html#method.multi_cartesian_product) which can be used like this: +```rust +use itertools::Itertools; + +let multi_prod = (0..3).map(|i| 0..2) + .multi_cartesian_product(); + +for set in multi_prod { + println!("{:?}", set); +} +``` +([playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=6ef231f6b011b234babb0aa3e68b78ab)) + +#### Reasons for a custom algorithm + +Unfortunately, the computational complexity of generating all possible sets is growing exponentially, both in the cost of CPU and memory use. +On a high-end laptop, computing the sets for all possible variations of the example above generates *8* sets and takes only *700 nanoseconds*, but computing the same for four sources and 16 resources (a scenario theoretically possible in Firefox with one language pack and Preferences UI for example) generates over *4 billion* sets and takes over *2 minutes*. + +Since one part of static cost is the I/O, the application of a [Memoization](https://en.wikipedia.org/wiki/Memoization) technique allows us to minimize the cost of constructing, storing and retrieving sets. + +Second important observation is that in most scenarios any resource exists in only some of the sources, and ability to bail out from a branch of candidates that cannot lead to a solution yields significantly fewer permutations in result. + +## Optimizations + +The algorithm used here is highly efficient. For the conservative scenario listed above, where 4 sources and 15 resources are all present in every source, the total time on the reference hardware is cut from *2 minutes* to *24 seconds*, while generating the same *4 billion* sets for a **5x** performance improvement. + +### Streaming Iterator +Unline regular iterator, a streaming iterator allows a borrowed reference to be returned, which in this case, where the solver yields a read-only "view" of a solution, allows us to avoid having to clone it. + +### Cache +Memory is much less of a problem for the algorithm than CPU usage, so the solver uses a matrix of source/resource `Option` to memoize visited cells. This allows for each source/resource combination to be tested only once after which all future tests can be skipped. + +### Backtracking +This optimization allows to benefit from the recognition of the fact that most resources are only available in some sources. +Instead of generating all possible sets and then ignoring ones which are incomplete, it allows the algorithm to [backtrack](https://en.wikipedia.org/wiki/Backtracking) from partial candidates that cannot lead to a complete solution. + +That technique is very powerful in the `L10nRegistry` use case and in many scenarios leads to 10-100x speed ups even in cases where all sets have to be generated. + +# Serial vs Parallel Testing +At the core of the solver is a *tester* component which is responsible for eagerly evaluating candidates to allow for early bailouts from partial solutions which cannot lead to a complete solution. + +This can be performed in one of two ways: + +### Serial + The algorithm is synchronous and each extension of the candidate is evaluated serially, one by one, allowing the for *backtracking* as soon as a given extension of a partial solution is confirmed to not lead to a complete solution. + +Bringing back the initial state of the solver: + +```text + ▼ +┌┲━┱┬───┬───┐ +│┃0┃│ │ │ +└╂─╂┴───┴───┘ + ┃ ┃ + ┗━┛ +``` + +The tester will evaluate whether the first resource **A** is available in the first source **0**. The testing will be performed synchronously, and the result will inform the algorithm on whether the candidate may lead to a complete solution, or this branch should be bailed out from, and the next candidate must be tried. + +#### Success case + +If the test returns a success, the extensions of the candidate is generated: +```text + ▼ +┌┲━┱┬┲━┱┬───┐ +│┃0┃│┃0┃│ │ +└╂─╂┴╂─╂┴───┘ + ┃ ┃ ┃ ┃ + ┗━┛ ┗━┛ +``` + +When a candidate is complete, in other words, when the last cell of a candidate has been tested and did not lead to a backtrack, we know that the candidate is a solution to the problem, and we can yield it from the iterator. + +#### Failure case + +If the test returns a failure, the next step is to evaluate alternative source for the same resource. Let's assume that *Source 0* had *Resource A* but it does not have *Resource B*. In such case, the algorithm will increment the second cell's source index: + +```text + ▼ + ┏━┓ + ┃0┃ +┌┲━┱┬╂─╂┬───┐ +│┃0┃│┃1┃│ │ +└╂─╂┴┺━┹┴───┘ + ┃ ┃ + ┗━┛ + ``` + +and that will potentially lead to a partial solution `[0, 1, ]` to be stored for the next iteration. + +If the test fails and no more sources can be generated, the algorithm will *backtrack* from the current cell looking for a cell with the **highest** index prior to the cell that was being evaluated which is not yet on the last source. If such cell is found, the results of all cells **to the right** of the newfound cell are **erased** and the next branch can be evaluated. + +If no such cell can be found, that means that the iterator is complete. + +### Parallel + +If the testing can be performed in parallel, like an asynchronous I/O, the above *serial* solution is sub-optimal as it misses on the benefit of testing multiple cells at once. + +In such a scenario, the algorithm will construct a candidate that *can* be valid (bailing only from candidates that have been already memoized as unavailable), and then test all of the untested cells in that candidate at once. + +```text + ▼ +┌┲━┱┬┲━┱┬┲━┱┐ +│┃0┃│┃0┃│┃0┃│ +└╂─╂┴╂─╂┴╂─╂┘ + ┃ ┃ ┃ ┃ ┃ ┃ + ┗━┛ ┗━┛ ┗━┛ +``` + +When the parallel execution returns, the algorithm memoizes all new cell results and tests if the candidate is now a valid complete solution. + +#### Success case + +If the result a set of successes, the candidate is returned as a solution, and the algorithm proceeds to the same operation as if it was a failure. + +#### Failure case +If the result contains failures, the iterator will now backtrack to find the closest lower or equal cell to the current index which can be advanced to the next source. +In the example state above, the current cell can be advanced to *source 1* and then just a set of `[None, None, 1]` is to be evaluated by the tester (since we know that *A0* and *B0* are valid). + +If that is successful, the `[0, 0, 1]` set is a complete solution and is yielded. + +Then, if the iterator is resumed, the next state to be tested is: + +```text + ▼ + ┏━┓ + ┃0┃ +┌┲━┱┬╂─╂┬┲━┱┐ +│┃0┃│┃1┃│┃0┃│ +└╂─╂┴┺━┹┴╂─╂┘ + ┃ ┃ ┃ ┃ + ┗━┛ ┗━┛ +``` + +since cell *2* was at the highest index, cell *1* is the highest lower than *2* that was not at the highest source index position. That cell is advanced, and all cells after it are *pruned* (in this case, cell *2* is the only one). Then, the memoization kicks in, and since *A0* and *C0* are already cached as valid, the tester receives just `[None, 1, None]` to be tested and the algorithm continues. + +# Summary + +The algorithm explained above is tailored to the problem domain of `L10nRegistry` and is designed to be further extended in the future. + +It is important to maintain this guide up to date as any changes to the algorithm are to be made. + +Good luck. diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/mod.rs b/intl/l10n/rust/l10nregistry-rs/src/solver/mod.rs new file mode 100644 index 0000000000..f14fbfe641 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/mod.rs @@ -0,0 +1,122 @@ +mod parallel; +mod serial; +pub mod testing; + +pub use parallel::{AsyncTester, ParallelProblemSolver}; +pub use serial::{SerialProblemSolver, SyncTester}; + +pub struct ProblemSolver { + width: usize, + depth: usize, + + cache: Vec<Vec<Option<bool>>>, + + solution: Vec<usize>, + idx: usize, + + dirty: bool, +} + +impl ProblemSolver { + pub fn new(width: usize, depth: usize) -> Self { + Self { + width, + depth, + cache: vec![vec![None; depth]; width], + + solution: vec![0; width], + idx: 0, + + dirty: false, + } + } +} + +impl ProblemSolver { + pub fn bail(&mut self) -> bool { + if self.try_advance_source() { + true + } else { + self.try_backtrack() + } + } + + pub fn has_missing_cell(&self) -> Option<usize> { + for res_idx in 0..self.width { + if self.cache[res_idx].iter().all(|c| *c == Some(false)) { + return Some(res_idx); + } + } + None + } + + fn is_cell_missing(&self, res_idx: usize, source_idx: usize) -> bool { + if let Some(false) = self.cache[res_idx][source_idx] { + return true; + } + false + } + + fn is_current_cell_missing(&self) -> bool { + let res_idx = self.idx; + let source_idx = self.solution[res_idx]; + let cell = &self.cache[res_idx][source_idx]; + if let Some(false) = cell { + return true; + } + false + } + + pub fn try_advance_resource(&mut self) -> bool { + if self.idx >= self.width - 1 { + false + } else { + self.idx += 1; + while self.is_current_cell_missing() { + if !self.try_advance_source() { + return false; + } + } + true + } + } + + pub fn try_advance_source(&mut self) -> bool { + while self.solution[self.idx] < self.depth - 1 { + self.solution[self.idx] += 1; + if !self.is_current_cell_missing() { + return true; + } + } + false + } + + pub fn try_backtrack(&mut self) -> bool { + while self.solution[self.idx] == self.depth - 1 { + if self.idx == 0 { + return false; + } + self.idx -= 1; + } + self.solution[self.idx] += 1; + self.prune() + } + + pub fn prune(&mut self) -> bool { + for i in self.idx + 1..self.width { + let mut source_idx = 0; + while self.is_cell_missing(i, source_idx) { + if source_idx >= self.depth - 1 { + return false; + } + source_idx += 1; + } + self.solution[i] = source_idx; + } + true + } + + pub fn is_complete(&self) -> bool { + self.idx == self.width - 1 + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/parallel.rs b/intl/l10n/rust/l10nregistry-rs/src/solver/parallel.rs new file mode 100644 index 0000000000..320ad65c89 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/parallel.rs @@ -0,0 +1,175 @@ +use super::ProblemSolver; +use std::ops::{Deref, DerefMut}; + +use futures::ready; +use std::future::Future; +use std::pin::Pin; + +pub trait AsyncTester { + type Result: Future<Output = Vec<bool>>; + + fn test_async(&self, query: Vec<(usize, usize)>) -> Self::Result; +} + +pub struct ParallelProblemSolver<T> +where + T: AsyncTester, +{ + solver: ProblemSolver, + current_test: Option<(T::Result, Vec<usize>)>, +} + +impl<T: AsyncTester> Deref for ParallelProblemSolver<T> { + type Target = ProblemSolver; + + fn deref(&self) -> &Self::Target { + &self.solver + } +} + +impl<T: AsyncTester> DerefMut for ParallelProblemSolver<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.solver + } +} + +impl<T: AsyncTester> ParallelProblemSolver<T> { + pub fn new(width: usize, depth: usize) -> Self { + Self { + solver: ProblemSolver::new(width, depth), + current_test: None, + } + } +} + +type TestQuery = (Vec<(usize, usize)>, Vec<usize>); + +impl<T: AsyncTester> ParallelProblemSolver<T> { + pub fn try_generate_complete_candidate(&mut self) -> bool { + while !self.is_complete() { + while self.is_current_cell_missing() { + if !self.try_advance_source() { + return false; + } + } + if !self.try_advance_resource() { + return false; + } + } + true + } + + fn try_generate_test_query(&mut self) -> Result<TestQuery, usize> { + let mut test_cells = vec![]; + let query = self + .solution + .iter() + .enumerate() + .filter_map(|(res_idx, source_idx)| { + let cell = self.cache[res_idx][*source_idx]; + match cell { + None => { + test_cells.push(res_idx); + Some(Ok((res_idx, *source_idx))) + } + Some(false) => Some(Err(res_idx)), + Some(true) => None, + } + }) + .collect::<Result<_, _>>()?; + Ok((query, test_cells)) + } + + fn apply_test_result( + &mut self, + resources: Vec<bool>, + testing_cells: Vec<usize>, + ) -> Result<(), usize> { + let mut first_missing = None; + for (result, res_idx) in resources.into_iter().zip(testing_cells) { + let source_idx = self.solution[res_idx]; + self.cache[res_idx][source_idx] = Some(result); + if !result && first_missing.is_none() { + first_missing = Some(res_idx); + } + } + if let Some(idx) = first_missing { + Err(idx) + } else { + Ok(()) + } + } + + pub fn try_poll_next( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + tester: &T, + prefetch: bool, + ) -> std::task::Poll<Result<Option<Vec<usize>>, usize>> + where + <T as AsyncTester>::Result: Unpin, + { + if self.width == 0 || self.depth == 0 { + return Ok(None).into(); + } + + 'outer: loop { + if let Some((test, testing_cells)) = &mut self.current_test { + let pinned = Pin::new(test); + let set = ready!(pinned.poll(cx)); + let testing_cells = testing_cells.clone(); + + if let Err(res_idx) = self.apply_test_result(set, testing_cells) { + self.idx = res_idx; + self.prune(); + if !self.bail() { + if let Some(res_idx) = self.has_missing_cell() { + return Err(res_idx).into(); + } else { + return Ok(None).into(); + } + } + self.current_test = None; + continue 'outer; + } else { + self.current_test = None; + if !prefetch { + self.dirty = true; + } + return Ok(Some(self.solution.clone())).into(); + } + } else { + if self.dirty { + if !self.bail() { + if let Some(res_idx) = self.has_missing_cell() { + return Err(res_idx).into(); + } else { + return Ok(None).into(); + } + } + self.dirty = false; + } + while self.try_generate_complete_candidate() { + match self.try_generate_test_query() { + Ok((query, testing_cells)) => { + self.current_test = Some((tester.test_async(query), testing_cells)); + continue 'outer; + } + Err(res_idx) => { + self.idx = res_idx; + self.prune(); + if !self.bail() { + if let Some(res_idx) = self.has_missing_cell() { + return Err(res_idx).into(); + } else { + return Ok(None).into(); + } + } + } + } + } + return Ok(None).into(); + } + } + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/serial.rs b/intl/l10n/rust/l10nregistry-rs/src/solver/serial.rs new file mode 100644 index 0000000000..9368c12c9e --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/serial.rs @@ -0,0 +1,76 @@ +use super::ProblemSolver; +use std::ops::{Deref, DerefMut}; + +pub trait SyncTester { + fn test_sync(&self, res_idx: usize, source_idx: usize) -> bool; +} + +pub struct SerialProblemSolver(ProblemSolver); + +impl Deref for SerialProblemSolver { + type Target = ProblemSolver; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for SerialProblemSolver { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl SerialProblemSolver { + pub fn new(width: usize, depth: usize) -> Self { + Self(ProblemSolver::new(width, depth)) + } +} + +impl SerialProblemSolver { + fn test_current_cell<T>(&mut self, tester: &T) -> bool + where + T: SyncTester, + { + let res_idx = self.idx; + let source_idx = self.solution[res_idx]; + let cell = &mut self.cache[res_idx][source_idx]; + *cell.get_or_insert_with(|| tester.test_sync(res_idx, source_idx)) + } + + pub fn try_next<T>(&mut self, tester: &T, prefetch: bool) -> Result<Option<&[usize]>, usize> + where + T: SyncTester, + { + if self.width == 0 || self.depth == 0 { + return Ok(None); + } + if self.dirty { + if !self.bail() { + return Ok(None); + } + self.dirty = false; + } + loop { + if !self.test_current_cell(tester) { + if !self.bail() { + if let Some(res_idx) = self.has_missing_cell() { + return Err(res_idx); + } else { + return Ok(None); + } + } + continue; + } + if self.is_complete() { + if !prefetch { + self.dirty = true; + } + return Ok(Some(&self.solution)); + } + if !self.try_advance_resource() { + return Ok(None); + } + } + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/testing/mod.rs b/intl/l10n/rust/l10nregistry-rs/src/solver/testing/mod.rs new file mode 100644 index 0000000000..68f566250e --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/testing/mod.rs @@ -0,0 +1,38 @@ +mod scenarios; + +pub use scenarios::get_scenarios; + +/// Define a testing scenario. +pub struct Scenario { + /// Name of the scenario. + pub name: String, + /// Number of resources. + pub width: usize, + /// Number of sources. + pub depth: usize, + /// Vector of resources, containing a vector of sources, with true indicating + /// whether the resource is present in that source. + pub values: Vec<Vec<bool>>, + /// Vector of solutions, each containing a vector of resources, with the index + /// indicating from which source the resource is chosen. + /// TODO(issue#17): This field is currently unused! + pub solutions: Vec<Vec<usize>>, +} + +impl Scenario { + pub fn new<S: ToString>( + name: S, + width: usize, + depth: usize, + values: Vec<Vec<bool>>, + solutions: Vec<Vec<usize>>, + ) -> Self { + Self { + name: name.to_string(), + width, + depth, + values, + solutions, + } + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/solver/testing/scenarios.rs b/intl/l10n/rust/l10nregistry-rs/src/solver/testing/scenarios.rs new file mode 100644 index 0000000000..8addec979b --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/solver/testing/scenarios.rs @@ -0,0 +1,151 @@ +use super::*; + +pub fn get_scenarios() -> Vec<Scenario> { + vec![ + Scenario::new("no-sources", 1, 0, vec![], vec![]), + Scenario::new("no-resources", 1, 0, vec![vec![true]], vec![]), + Scenario::new("no-keys", 0, 1, vec![], vec![]), + Scenario::new( + "one-res-two-sources", + 1, + 2, + vec![vec![true, true]], + vec![vec![0], vec![1]], + ), + Scenario::new( + "two-res-two-sources", + 2, + 2, + vec![vec![false, true], vec![true, false]], + vec![vec![1, 0]], + ), + Scenario::new( + "small", + 3, + 2, + vec![vec![true, true], vec![true, true], vec![true, true]], + vec![ + vec![0, 0, 0], + vec![0, 0, 1], + vec![0, 1, 0], + vec![0, 1, 1], + vec![1, 0, 0], + vec![1, 0, 1], + vec![1, 1, 0], + vec![1, 1, 1], + ], + ), + Scenario::new( + "incomplete", + 3, + 2, + vec![vec![true, false], vec![false, true], vec![true, true]], + vec![vec![0, 1, 0], vec![0, 1, 1]], + ), + Scenario::new( + "preferences", + 19, + 2, + vec![ + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![true, false], + vec![false, true], + vec![false, true], + vec![false, true], + ], + vec![vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + ]], + ), + Scenario::new( + "langpack", + 3, + 4, + vec![ + vec![true, true, true, true], + vec![true, true, true, true], + vec![true, true, true, true], + ], + vec![ + vec![0, 0, 0], + vec![0, 0, 1], + vec![0, 0, 2], + vec![0, 0, 3], + vec![0, 1, 0], + vec![0, 1, 1], + vec![0, 1, 2], + vec![0, 1, 3], + vec![0, 2, 0], + vec![0, 2, 1], + vec![0, 2, 2], + vec![0, 2, 3], + vec![0, 3, 0], + vec![0, 3, 1], + vec![0, 3, 2], + vec![0, 3, 3], + vec![1, 0, 0], + vec![1, 0, 1], + vec![1, 0, 2], + vec![1, 0, 3], + vec![1, 1, 0], + vec![1, 1, 1], + vec![1, 1, 2], + vec![1, 1, 3], + vec![1, 2, 0], + vec![1, 2, 1], + vec![1, 2, 2], + vec![1, 2, 3], + vec![1, 3, 0], + vec![1, 3, 1], + vec![1, 3, 2], + vec![1, 3, 3], + vec![2, 0, 0], + vec![2, 0, 1], + vec![2, 0, 2], + vec![2, 0, 3], + vec![2, 1, 0], + vec![2, 1, 1], + vec![2, 1, 2], + vec![2, 1, 3], + vec![2, 2, 0], + vec![2, 2, 1], + vec![2, 2, 2], + vec![2, 2, 3], + vec![2, 3, 0], + vec![2, 3, 1], + vec![2, 3, 2], + vec![2, 3, 3], + vec![3, 0, 0], + vec![3, 0, 1], + vec![3, 0, 2], + vec![3, 0, 3], + vec![3, 1, 0], + vec![3, 1, 1], + vec![3, 1, 2], + vec![3, 1, 3], + vec![3, 2, 0], + vec![3, 2, 1], + vec![3, 2, 2], + vec![3, 2, 3], + vec![3, 3, 0], + vec![3, 3, 1], + vec![3, 3, 2], + vec![3, 3, 3], + ], + ), + ] +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/source/fetcher.rs b/intl/l10n/rust/l10nregistry-rs/src/source/fetcher.rs new file mode 100644 index 0000000000..3a022990a6 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/source/fetcher.rs @@ -0,0 +1,30 @@ +use async_trait::async_trait; +use fluent_fallback::types::ResourceId; +use std::io; + +/// The users of [`FileSource`] implement this trait to provide loading of +/// resources, returning the contents of a resource as a +/// `String`. [`FileSource`] handles the conversion from string representation +/// into `FluentResource`. +/// +/// [`FileSource`]: source/struct.FileSource.html +#[async_trait(?Send)] +pub trait FileFetcher { + /// Return the `String` representation for `path`. This version is + /// blocking. + /// + /// See [`fetch`](#tymethod.fetch). + fn fetch_sync(&self, resource_id: &ResourceId) -> io::Result<String>; + + /// Return the `String` representation for `path`. + /// + /// On success, returns `Poll::Ready(Ok(..))`. + /// + /// If no resource is available to be fetched, the method returns + /// `Poll::Pending` and arranges for the current task (via + /// `cx.waker().wake_by_ref()`) to receive a notification when the resource + /// is available. + /// + /// See [`fetch_sync`](#tymethod.fetch_sync) + async fn fetch(&self, path: &ResourceId) -> io::Result<String>; +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/source/mod.rs b/intl/l10n/rust/l10nregistry-rs/src/source/mod.rs new file mode 100644 index 0000000000..1c72065b38 --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/source/mod.rs @@ -0,0 +1,558 @@ +mod fetcher; +pub use fetcher::FileFetcher; +pub use fluent_fallback::types::{ResourceId, ToResourceId}; + +use crate::env::ErrorReporter; +use crate::errors::L10nRegistryError; +use crate::fluent::FluentResource; + +use std::{ + borrow::Borrow, + cell::RefCell, + fmt, + hash::{Hash, Hasher}, + pin::Pin, + rc::Rc, + task::Poll, +}; + +use futures::{future::Shared, Future, FutureExt}; +use rustc_hash::FxHashMap; +use unic_langid::LanguageIdentifier; + +pub type RcResource = Rc<FluentResource>; + +/// An option type whose None variant is either optional or required. +/// +/// This behaves similarly to the standard-library [`Option`] type +/// except that there are two [`None`]-like variants: +/// [`ResourceOption::MissingOptional`] and [`ResourceOption::MissingRequired`]. +#[derive(Clone, Debug)] +pub enum ResourceOption { + /// An available resource. + Some(RcResource), + /// A missing optional resource. + MissingOptional, + /// A missing required resource. + MissingRequired, +} + +impl ResourceOption { + /// Creates a resource option that is either [`ResourceOption::MissingRequired`] + /// or [`ResourceOption::MissingOptional`] based on whether the given [`ResourceId`] + /// is required or optional. + pub fn missing_resource(resource_id: &ResourceId) -> Self { + if resource_id.is_required() { + Self::MissingRequired + } else { + Self::MissingOptional + } + } + + /// Returns [`true`] if this option contains a recource, otherwise [`false`]. + pub fn is_some(&self) -> bool { + matches!(self, Self::Some(_)) + } + + /// Resource [`true`] if this option is missing a resource of any type, otherwise [`false`]. + pub fn is_none(&self) -> bool { + matches!(self, Self::MissingOptional | Self::MissingRequired) + } + + /// Returns [`true`] if this option is missing a required resource, otherwise [`false`]. + pub fn is_required_and_missing(&self) -> bool { + matches!(self, Self::MissingRequired) + } +} + +impl From<ResourceOption> for Option<RcResource> { + fn from(other: ResourceOption) -> Self { + match other { + ResourceOption::Some(id) => Some(id), + _ => None, + } + } +} + +pub type ResourceFuture = Shared<Pin<Box<dyn Future<Output = ResourceOption>>>>; + +#[derive(Debug, Clone)] +pub enum ResourceStatus { + /// The resource is missing. Don't bother trying to fetch. + MissingRequired, + MissingOptional, + /// The resource is loading and future will deliver the result. + Loading(ResourceFuture), + /// The resource is loaded and parsed. + Loaded(RcResource), +} + +impl From<ResourceOption> for ResourceStatus { + fn from(input: ResourceOption) -> Self { + match input { + ResourceOption::Some(res) => Self::Loaded(res), + ResourceOption::MissingOptional => Self::MissingOptional, + ResourceOption::MissingRequired => Self::MissingRequired, + } + } +} + +impl Future for ResourceStatus { + type Output = ResourceOption; + + fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> { + use ResourceStatus::*; + + let this = &mut *self; + + match this { + MissingRequired => ResourceOption::MissingRequired.into(), + MissingOptional => ResourceOption::MissingOptional.into(), + Loaded(res) => ResourceOption::Some(res.clone()).into(), + Loading(res) => Pin::new(res).poll(cx), + } + } +} + +/// `FileSource` provides a generic fetching and caching of fluent resources. +/// The user of `FileSource` provides a [`FileFetcher`](trait.FileFetcher.html) +/// implementation and `FileSource` takes care of the rest. +#[derive(Clone)] +pub struct FileSource { + /// Name of the FileSource, e.g. "browser" + pub name: String, + /// Pre-formatted path for the FileSource, e.g. "/browser/data/locale/{locale}/" + pub pre_path: String, + /// Metasource name for the FileSource, e.g. "app", "langpack" + /// Only sources from the same metasource are passed into the solver. + pub metasource: String, + /// The locales for which data is present in the FileSource, e.g. ["en-US", "pl"] + locales: Vec<LanguageIdentifier>, + shared: Rc<Inner>, + index: Option<Vec<String>>, + pub options: FileSourceOptions, +} + +struct Inner { + fetcher: Box<dyn FileFetcher>, + error_reporter: Option<RefCell<Box<dyn ErrorReporter>>>, + entries: RefCell<FxHashMap<String, ResourceStatus>>, +} + +impl fmt::Display for FileSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name) + } +} + +impl PartialEq<FileSource> for FileSource { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.metasource == other.metasource + } +} + +impl Eq for FileSource {} + +impl Hash for FileSource { + fn hash<H: Hasher>(&self, state: &mut H) { + self.name.hash(state) + } +} + +#[derive(PartialEq, Clone, Debug)] +pub struct FileSourceOptions { + pub allow_override: bool, +} + +impl Default for FileSourceOptions { + fn default() -> Self { + Self { + allow_override: false, + } + } +} + +impl FileSource { + /// Create a `FileSource` using the provided [`FileFetcher`](../trait.FileFetcher.html). + pub fn new( + name: String, + metasource: Option<String>, + locales: Vec<LanguageIdentifier>, + pre_path: String, + options: FileSourceOptions, + fetcher: impl FileFetcher + 'static, + ) -> Self { + FileSource { + name, + metasource: metasource.unwrap_or_default(), + pre_path, + locales, + index: None, + shared: Rc::new(Inner { + entries: RefCell::new(FxHashMap::default()), + fetcher: Box::new(fetcher), + error_reporter: None, + }), + options, + } + } + + pub fn new_with_index( + name: String, + metasource: Option<String>, + locales: Vec<LanguageIdentifier>, + pre_path: String, + options: FileSourceOptions, + fetcher: impl FileFetcher + 'static, + index: Vec<String>, + ) -> Self { + FileSource { + name, + metasource: metasource.unwrap_or_default(), + pre_path, + locales, + index: Some(index), + shared: Rc::new(Inner { + entries: RefCell::new(FxHashMap::default()), + fetcher: Box::new(fetcher), + error_reporter: None, + }), + options, + } + } + + pub fn set_reporter(&mut self, reporter: impl ErrorReporter + 'static) { + let mut shared = Rc::get_mut(&mut self.shared).unwrap(); + shared.error_reporter = Some(RefCell::new(Box::new(reporter))); + } +} + +fn calculate_pos_in_source(source: &str, idx: usize) -> (usize, usize) { + let mut ptr = 0; + let mut result = (1, 1); + for line in source.lines() { + let bytes = line.as_bytes().len(); + if ptr + bytes < idx { + ptr += bytes + 1; + result.0 += 1; + } else { + result.1 = idx - ptr + 1; + break; + } + } + result +} + +impl FileSource { + fn get_path(&self, locale: &LanguageIdentifier, resource_id: &ResourceId) -> String { + format!( + "{}{}", + self.pre_path.replace("{locale}", &locale.to_string()), + resource_id.value, + ) + } + + fn fetch_sync(&self, resource_id: &ResourceId) -> ResourceOption { + self.shared + .fetcher + .fetch_sync(resource_id) + .ok() + .map(|source| match FluentResource::try_new(source) { + Ok(res) => ResourceOption::Some(Rc::new(res)), + Err((res, errors)) => { + if let Some(reporter) = &self.shared.error_reporter { + reporter.borrow().report_errors( + errors + .into_iter() + .map(|e| L10nRegistryError::FluentError { + resource_id: resource_id.clone(), + loc: Some(calculate_pos_in_source(res.source(), e.pos.start)), + error: e.into(), + }) + .collect(), + ); + } + ResourceOption::Some(Rc::new(res)) + } + }) + .unwrap_or_else(|| ResourceOption::missing_resource(resource_id)) + } + + /// Attempt to synchronously fetch resource for the combination of `locale` + /// and `path`. Returns `Some(ResourceResult)` if the resource is available, + /// else `None`. + pub fn fetch_file_sync( + &self, + locale: &LanguageIdentifier, + resource_id: &ResourceId, + overload: bool, + ) -> ResourceOption { + use ResourceStatus::*; + + if self.has_file(locale, resource_id) == Some(false) { + return ResourceOption::missing_resource(resource_id); + } + + let full_path_id = self + .get_path(locale, resource_id) + .to_resource_id(resource_id.resource_type); + + let res = self.shared.lookup_resource(full_path_id.clone(), || { + self.fetch_sync(&full_path_id).into() + }); + + match res { + MissingRequired => ResourceOption::MissingRequired, + MissingOptional => ResourceOption::MissingOptional, + Loaded(res) => ResourceOption::Some(res), + Loading(..) if overload => { + // A sync load has been requested for the same resource that has + // a pending async load in progress. How do we handle this? + // + // Ideally, we would sync load and resolve all the pending + // futures with the result. With the current Futures and + // combinators, it's unclear how to proceed. One potential + // solution is to store a oneshot::Sender and + // Shared<oneshot::Receiver>. When the async loading future + // resolves it would check that the state is still `Loading`, + // and if so, send the result. The sync load would do the same + // send on the oneshot::Sender. + // + // For now, we warn and return the resource, paying the cost of + // duplication of the resource. + self.fetch_sync(&full_path_id) + } + Loading(..) => { + panic!("[l10nregistry] Attempting to synchronously load file {} while it's being loaded asynchronously.", &full_path_id.value); + } + } + } + + /// Attempt to fetch resource for the combination of `locale` and `path`. + /// Returns [`ResourceStatus`](enum.ResourceStatus.html) which is + /// a `Future` that can be polled. + pub fn fetch_file( + &self, + locale: &LanguageIdentifier, + resource_id: &ResourceId, + ) -> ResourceStatus { + use ResourceStatus::*; + + if self.has_file(locale, resource_id) == Some(false) { + return ResourceOption::missing_resource(resource_id).into(); + } + + let full_path_id = self + .get_path(locale, resource_id) + .to_resource_id(resource_id.resource_type); + + self.shared.lookup_resource(full_path_id.clone(), || { + let shared = self.shared.clone(); + Loading(read_resource(full_path_id, shared).boxed_local().shared()) + }) + } + + /// Determine if the `FileSource` has a loaded resource for the combination + /// of `locale` and `path`. Returns `Some(true)` if the file is loaded, else + /// `Some(false)`. `None` is returned if there is an outstanding async fetch + /// pending and the status is yet to be determined. + pub fn has_file<L: Borrow<LanguageIdentifier>>( + &self, + locale: L, + path: &ResourceId, + ) -> Option<bool> { + let locale = locale.borrow(); + if !self.locales.contains(locale) { + Some(false) + } else { + let full_path = self.get_path(locale, path); + if let Some(index) = &self.index { + return Some(index.iter().any(|p| p == &full_path)); + } + self.shared.has_file(&full_path) + } + } + + pub fn locales(&self) -> &[LanguageIdentifier] { + &self.locales + } + + pub fn get_index(&self) -> Option<&Vec<String>> { + self.index.as_ref() + } +} + +impl std::fmt::Debug for FileSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result { + if let Some(index) = &self.index { + f.debug_struct("FileSource") + .field("name", &self.name) + .field("metasource", &self.metasource) + .field("locales", &self.locales) + .field("pre_path", &self.pre_path) + .field("index", index) + .finish() + } else { + f.debug_struct("FileSource") + .field("name", &self.name) + .field("metasource", &self.metasource) + .field("locales", &self.locales) + .field("pre_path", &self.pre_path) + .finish() + } + } +} + +impl Inner { + fn lookup_resource<F>(&self, resource_id: ResourceId, f: F) -> ResourceStatus + where + F: FnOnce() -> ResourceStatus, + { + let mut lock = self.entries.borrow_mut(); + lock.entry(resource_id.value).or_insert_with(|| f()).clone() + } + + fn update_resource(&self, resource_id: ResourceId, resource: ResourceOption) -> ResourceOption { + let mut lock = self.entries.borrow_mut(); + let entry = lock.get_mut(&resource_id.value); + match entry { + Some(entry) => *entry = resource.clone().into(), + _ => panic!("Expected "), + } + resource + } + + pub fn has_file(&self, full_path: &str) -> Option<bool> { + match self.entries.borrow().get(full_path) { + Some(ResourceStatus::MissingRequired) => Some(false), + Some(ResourceStatus::MissingOptional) => Some(false), + Some(ResourceStatus::Loaded(_)) => Some(true), + Some(ResourceStatus::Loading(_)) | None => None, + } + } +} + +async fn read_resource(resource_id: ResourceId, shared: Rc<Inner>) -> ResourceOption { + let resource = shared + .fetcher + .fetch(&resource_id) + .await + .ok() + .map(|source| match FluentResource::try_new(source) { + Ok(res) => ResourceOption::Some(Rc::new(res)), + Err((res, errors)) => { + if let Some(reporter) = &shared.error_reporter.borrow() { + reporter.borrow().report_errors( + errors + .into_iter() + .map(|e| L10nRegistryError::FluentError { + resource_id: resource_id.clone(), + loc: Some(calculate_pos_in_source(res.source(), e.pos.start)), + error: e.into(), + }) + .collect(), + ); + } + ResourceOption::Some(Rc::new(res)) + } + }) + .unwrap_or_else(|| ResourceOption::missing_resource(&resource_id)); + // insert the resource into the cache + shared.update_resource(resource_id, resource) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn calculate_source_pos() { + let source = r#" +key = Value + +key2 = Value 2 +"# + .trim(); + let result = calculate_pos_in_source(source, 0); + assert_eq!(result, (1, 1)); + + let result = calculate_pos_in_source(source, 1); + assert_eq!(result, (1, 2)); + + let result = calculate_pos_in_source(source, 12); + assert_eq!(result, (2, 1)); + + let result = calculate_pos_in_source(source, 13); + assert_eq!(result, (3, 1)); + } +} + +#[cfg(test)] +#[cfg(all(feature = "tokio", feature = "test-fluent"))] +mod tests_tokio { + use super::*; + use crate::testing::TestFileFetcher; + + static FTL_RESOURCE_PRESENT: &str = "toolkit/global/textActions.ftl"; + static FTL_RESOURCE_MISSING: &str = "missing.ftl"; + + #[tokio::test] + async fn file_source_fetch() { + let fetcher = TestFileFetcher::new(); + let en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let fs1 = + fetcher.get_test_file_source("toolkit", None, vec![en_us.clone()], "toolkit/{locale}/"); + + let file = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()).await; + assert!(file.is_some()); + } + + #[tokio::test] + async fn file_source_fetch_missing() { + let fetcher = TestFileFetcher::new(); + let en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let fs1 = + fetcher.get_test_file_source("toolkit", None, vec![en_us.clone()], "toolkit/{locale}/"); + + let file = fs1.fetch_file(&en_us, &FTL_RESOURCE_MISSING.into()).await; + assert!(file.is_none()); + } + + #[tokio::test] + async fn file_source_already_loaded() { + let fetcher = TestFileFetcher::new(); + let en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let fs1 = + fetcher.get_test_file_source("toolkit", None, vec![en_us.clone()], "toolkit/{locale}/"); + + let file = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()).await; + assert!(file.is_some()); + let file = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()).await; + assert!(file.is_some()); + } + + #[tokio::test] + async fn file_source_concurrent() { + let fetcher = TestFileFetcher::new(); + let en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let fs1 = + fetcher.get_test_file_source("toolkit", None, vec![en_us.clone()], "toolkit/{locale}/"); + + let file1 = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()); + let file2 = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()); + assert!(file1.await.is_some()); + assert!(file2.await.is_some()); + } + + #[test] + fn file_source_sync_after_async_fail() { + let fetcher = TestFileFetcher::new(); + let en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let fs1 = + fetcher.get_test_file_source("toolkit", None, vec![en_us.clone()], "toolkit/{locale}/"); + + let _ = fs1.fetch_file(&en_us, &FTL_RESOURCE_PRESENT.into()); + let file2 = fs1.fetch_file_sync(&en_us, &FTL_RESOURCE_PRESENT.into(), true); + assert!(file2.is_some()); + } +} diff --git a/intl/l10n/rust/l10nregistry-rs/src/testing.rs b/intl/l10n/rust/l10nregistry-rs/src/testing.rs new file mode 100644 index 0000000000..f1ff47e2db --- /dev/null +++ b/intl/l10n/rust/l10nregistry-rs/src/testing.rs @@ -0,0 +1,322 @@ +use crate::env::ErrorReporter; +use crate::errors::L10nRegistryError; +use crate::fluent::FluentBundle; +use crate::registry::BundleAdapter; +use crate::registry::L10nRegistry; +use crate::source::FileFetcher; +use async_trait::async_trait; +use fluent_fallback::{env::LocalesProvider, types::ResourceId}; +use fluent_testing::MockFileSystem; +use std::cell::RefCell; +use std::rc::Rc; +use unic_langid::LanguageIdentifier; + +pub struct RegistrySetup { + pub name: String, + pub file_sources: Vec<FileSource>, + pub locales: Vec<LanguageIdentifier>, +} + +pub struct FileSource { + pub name: String, + pub metasource: String, + pub locales: Vec<LanguageIdentifier>, + pub path_scheme: String, +} + +#[derive(Clone)] +pub struct MockBundleAdapter; + +impl BundleAdapter for MockBundleAdapter { + fn adapt_bundle(&self, _bundle: &mut FluentBundle) {} +} + +impl FileSource { + pub fn new<S>( + name: S, + metasource: Option<S>, + locales: Vec<LanguageIdentifier>, + path_scheme: S, + ) -> Self + where + S: ToString, + { + let metasource = match metasource { + Some(s) => s.to_string(), + None => String::default(), + }; + + Self { + name: name.to_string(), + metasource, + locales, + path_scheme: path_scheme.to_string(), + } + } +} + +impl RegistrySetup { + pub fn new( + name: &str, + file_sources: Vec<FileSource>, + locales: Vec<LanguageIdentifier>, + ) -> Self { + Self { + name: name.to_string(), + file_sources, + locales, + } + } +} + +impl From<fluent_testing::scenarios::structs::Scenario> for RegistrySetup { + fn from(s: fluent_testing::scenarios::structs::Scenario) -> Self { + Self { + name: s.name, + file_sources: s + .file_sources + .into_iter() + .map(|source| { + FileSource::new( + source.name, + None, + source + .locales + .into_iter() + .map(|l| l.parse().unwrap()) + .collect(), + source.path_scheme, + ) + }) + .collect(), + locales: s + .locales + .into_iter() + .map(|loc| loc.parse().unwrap()) + .collect(), + } + } +} + +impl From<&fluent_testing::scenarios::structs::Scenario> for RegistrySetup { + fn from(s: &fluent_testing::scenarios::structs::Scenario) -> Self { + Self { + name: s.name.clone(), + file_sources: s + .file_sources + .iter() + .map(|source| { + FileSource::new( + source.name.clone(), + None, + source.locales.iter().map(|l| l.parse().unwrap()).collect(), + source.path_scheme.clone(), + ) + }) + .collect(), + locales: s.locales.iter().map(|loc| loc.parse().unwrap()).collect(), + } + } +} + +#[derive(Default)] +struct InnerFileFetcher { + fs: MockFileSystem, +} + +#[derive(Clone)] +pub struct TestFileFetcher { + inner: Rc<InnerFileFetcher>, +} + +impl TestFileFetcher { + pub fn new() -> Self { + Self { + inner: Rc::new(InnerFileFetcher::default()), + } + } + + pub fn get_test_file_source( + &self, + name: &str, + metasource: Option<String>, + locales: Vec<LanguageIdentifier>, + path: &str, + ) -> crate::source::FileSource { + crate::source::FileSource::new( + name.to_string(), + metasource, + locales, + path.to_string(), + Default::default(), + self.clone(), + ) + } + + pub fn get_test_file_source_with_index( + &self, + name: &str, + metasource: Option<String>, + locales: Vec<LanguageIdentifier>, + path: &str, + index: Vec<&str>, + ) -> crate::source::FileSource { + crate::source::FileSource::new_with_index( + name.to_string(), + metasource, + locales, + path.to_string(), + Default::default(), + self.clone(), + index.into_iter().map(|s| s.to_string()).collect(), + ) + } + + pub fn get_registry<S>(&self, setup: S) -> L10nRegistry<TestEnvironment, MockBundleAdapter> + where + S: Into<RegistrySetup>, + { + self.get_registry_and_environment(setup).1 + } + + pub fn get_registry_and_environment<S>( + &self, + setup: S, + ) -> ( + TestEnvironment, + L10nRegistry<TestEnvironment, MockBundleAdapter>, + ) + where + S: Into<RegistrySetup>, + { + let setup: RegistrySetup = setup.into(); + let provider = TestEnvironment::new(setup.locales); + + let reg = L10nRegistry::with_provider(provider.clone()); + let sources = setup + .file_sources + .into_iter() + .map(|source| { + let mut s = self.get_test_file_source( + &source.name, + Some(source.metasource), + source.locales, + &source.path_scheme, + ); + s.set_reporter(provider.clone()); + s + }) + .collect(); + reg.register_sources(sources).unwrap(); + (provider, reg) + } + + pub fn get_registry_and_environment_with_adapter<S, B>( + &self, + setup: S, + bundle_adapter: B, + ) -> (TestEnvironment, L10nRegistry<TestEnvironment, B>) + where + S: Into<RegistrySetup>, + B: BundleAdapter, + { + let setup: RegistrySetup = setup.into(); + let provider = TestEnvironment::new(setup.locales); + + let mut reg = L10nRegistry::with_provider(provider.clone()); + let sources = setup + .file_sources + .into_iter() + .map(|source| { + let mut s = self.get_test_file_source( + &source.name, + None, + source.locales, + &source.path_scheme, + ); + s.set_reporter(provider.clone()); + s + }) + .collect(); + reg.register_sources(sources).unwrap(); + reg.set_bundle_adapter(bundle_adapter) + .expect("Failed to set bundle adapter."); + (provider, reg) + } +} + +#[async_trait(?Send)] +impl FileFetcher for TestFileFetcher { + fn fetch_sync(&self, resource_id: &ResourceId) -> std::io::Result<String> { + self.inner.fs.get_test_file_sync(&resource_id.value) + } + + async fn fetch(&self, resource_id: &ResourceId) -> std::io::Result<String> { + self.inner.fs.get_test_file_async(&resource_id.value).await + } +} + +pub enum ErrorStrategy { + Panic, + Report, + Nothing, +} + +pub struct InnerTestEnvironment { + locales: Vec<LanguageIdentifier>, + errors: Vec<L10nRegistryError>, + error_strategy: ErrorStrategy, +} + +#[derive(Clone)] +pub struct TestEnvironment { + inner: Rc<RefCell<InnerTestEnvironment>>, +} + +impl TestEnvironment { + pub fn new(locales: Vec<LanguageIdentifier>) -> Self { + Self { + inner: Rc::new(RefCell::new(InnerTestEnvironment { + locales, + errors: vec![], + error_strategy: ErrorStrategy::Report, + })), + } + } + + pub fn set_locales(&self, locales: Vec<LanguageIdentifier>) { + self.inner.borrow_mut().locales = locales; + } + + pub fn errors(&self) -> Vec<L10nRegistryError> { + self.inner.borrow().errors.clone() + } + + pub fn clear_errors(&self) { + self.inner.borrow_mut().errors.clear() + } +} + +impl LocalesProvider for TestEnvironment { + type Iter = std::vec::IntoIter<LanguageIdentifier>; + + fn locales(&self) -> Self::Iter { + self.inner.borrow().locales.clone().into_iter() + } +} + +impl ErrorReporter for TestEnvironment { + fn report_errors(&self, errors: Vec<L10nRegistryError>) { + match self.inner.borrow().error_strategy { + ErrorStrategy::Panic => { + panic!("Errors: {:#?}", errors); + } + ErrorStrategy::Report => { + #[cfg(test)] // Don't let printing affect benchmarks + eprintln!("Errors: {:#?}", errors); + } + ErrorStrategy::Nothing => {} + } + self.inner.borrow_mut().errors.extend(errors); + } +} |