summaryrefslogtreecommitdiffstats
path: root/vendor/icu_provider/src/request.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_provider/src/request.rs')
-rw-r--r--vendor/icu_provider/src/request.rs513
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/request.rs b/vendor/icu_provider/src/request.rs
new file mode 100644
index 000000000..7f6bb5911
--- /dev/null
+++ b/vendor/icu_provider/src/request.rs
@@ -0,0 +1,513 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::cmp::Ordering;
+use core::default::Default;
+use core::fmt;
+use core::fmt::Debug;
+use icu_locid::extensions::unicode as unicode_ext;
+use icu_locid::subtags::{Language, Region, Script, Variants};
+use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult};
+use writeable::{LengthHint, Writeable};
+
+#[cfg(doc)]
+use icu_locid::subtags::Variant;
+
+/// The request type passed into all data provider implementations.
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
+#[allow(clippy::exhaustive_structs)] // this type is stable
+pub struct DataRequest<'a> {
+ /// The locale for which to load data.
+ ///
+ /// If locale fallback is enabled, the resulting data may be from a different locale
+ /// than the one requested here.
+ pub locale: &'a DataLocale,
+ /// Metadata that may affect the behavior of the data provider.
+ pub metadata: DataRequestMetadata,
+}
+
+impl fmt::Display for DataRequest<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&self.locale, f)
+ }
+}
+
+/// Metadata for data requests. This is currently empty, but it may be extended with options
+/// for tuning locale fallback, buffer layout, and so forth.
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+#[non_exhaustive]
+pub struct DataRequestMetadata;
+
+/// The main locale type used by the ICU4X data provider.
+///
+/// [`DataLocale`] contains less functionality than [`Locale`] but more than
+/// [`LanguageIdentifier`] for better size and performance while still meeting
+/// the needs of the ICU4X data pipeline.
+///
+/// # Examples
+///
+/// Convert a [`Locale`] to a [`DataLocale`] and back:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_provider::DataLocale;
+///
+/// let locale1 = locale!("en-u-ca-buddhist");
+/// let data_locale = DataLocale::from(locale1);
+/// let locale2 = data_locale.into_locale();
+///
+/// assert_eq!(locale2.to_string(), "en-u-ca-buddhist");
+/// ```
+///
+/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more
+/// efficient than cloning the [`Locale`], but less efficient than converting an owned
+/// [`Locale`]:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_provider::DataLocale;
+///
+/// let locale1 = locale!("en-u-ca-buddhist");
+/// let data_locale = DataLocale::from(&locale1);
+/// let locale2 = data_locale.into_locale();
+///
+/// assert_eq!(locale1, locale2);
+/// ```
+///
+/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]:
+///
+/// ```
+/// use icu_locid::langid;
+/// use icu_provider::DataLocale;
+///
+/// let langid1 = langid!("es-CA-valencia");
+/// let data_locale = DataLocale::from(langid1);
+/// let langid2 = data_locale.get_langid();
+///
+/// assert_eq!(langid2.to_string(), "es-CA-valencia");
+/// ```
+///
+/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data
+/// lookup and fallback. This may change in the future.
+///
+/// ```
+/// use icu_locid::Locale;
+/// use icu_provider::DataLocale;
+///
+/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist"
+/// .parse::<Locale>()
+/// .unwrap();
+/// let data_locale = DataLocale::from(locale);
+///
+/// assert_eq!(data_locale.to_string(), "hi-u-ca-buddhist");
+/// ```
+#[derive(PartialEq, Clone, Default, Eq, Hash)]
+pub struct DataLocale {
+ langid: LanguageIdentifier,
+ keywords: unicode_ext::Keywords,
+}
+
+impl<'a> Default for &'a DataLocale {
+ fn default() -> Self {
+ static DEFAULT: DataLocale = DataLocale {
+ langid: LanguageIdentifier::UND,
+ keywords: unicode_ext::Keywords::new(),
+ };
+ &DEFAULT
+ }
+}
+
+impl fmt::Debug for DataLocale {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "DataLocale{{{}}}", self)
+ }
+}
+
+impl Writeable for DataLocale {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ self.langid.write_to(sink)?;
+ if !self.keywords.is_empty() {
+ sink.write_str("-u-")?;
+ self.keywords.write_to(sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> LengthHint {
+ self.langid.writeable_length_hint()
+ + if !self.keywords.is_empty() {
+ self.keywords.writeable_length_hint() + 3
+ } else {
+ LengthHint::exact(0)
+ }
+ }
+
+ fn write_to_string(&self) -> alloc::borrow::Cow<str> {
+ if self.keywords.is_empty() {
+ return self.langid.write_to_string();
+ }
+ let mut string =
+ alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
+ let _ = self.write_to(&mut string);
+ alloc::borrow::Cow::Owned(string)
+ }
+}
+
+writeable::impl_display_with_writeable!(DataLocale);
+
+impl From<LanguageIdentifier> for DataLocale {
+ fn from(langid: LanguageIdentifier) -> Self {
+ Self {
+ langid,
+ keywords: unicode_ext::Keywords::new(),
+ }
+ }
+}
+
+impl From<Locale> for DataLocale {
+ fn from(locale: Locale) -> Self {
+ Self {
+ langid: locale.id,
+ keywords: locale.extensions.unicode.keywords,
+ }
+ }
+}
+
+impl From<&LanguageIdentifier> for DataLocale {
+ fn from(langid: &LanguageIdentifier) -> Self {
+ Self {
+ langid: langid.clone(),
+ keywords: unicode_ext::Keywords::new(),
+ }
+ }
+}
+
+impl From<&Locale> for DataLocale {
+ fn from(locale: &Locale) -> Self {
+ Self {
+ langid: locale.id.clone(),
+ keywords: locale.extensions.unicode.keywords.clone(),
+ }
+ }
+}
+
+impl DataLocale {
+ /// Compare this [`DataLocale`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::Locale;
+ /// use icu_provider::DataLocale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "ca-ES",
+ /// "ca-ES-u-ca-buddhist",
+ /// "ca-ES-valencia",
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-fonipa",
+ /// "und-u-ca-hebrew",
+ /// "und-u-ca-japanese",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_loc: DataLocale = a.parse::<Locale>().unwrap().into();
+ /// assert_eq!(a, a_loc.to_string());
+ /// assert!(
+ /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal,
+ /// "{} == {}",
+ /// a,
+ /// a
+ /// );
+ /// assert!(
+ /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less,
+ /// "{} < {}",
+ /// a,
+ /// b
+ /// );
+ /// let b_loc: DataLocale = b.parse::<Locale>().unwrap().into();
+ /// assert_eq!(b, b_loc.to_string());
+ /// assert!(
+ /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal,
+ /// "{} == {}",
+ /// b,
+ /// b
+ /// );
+ /// assert!(
+ /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater,
+ /// "{} > {}",
+ /// b,
+ /// a
+ /// );
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ let subtags = other.split(|b| *b == b'-');
+ let mut subtag_result = self.langid.strict_cmp_iter(subtags);
+ if self.has_unicode_ext() {
+ let mut subtags = match subtag_result {
+ SubtagOrderingResult::Subtags(s) => s,
+ SubtagOrderingResult::Ordering(o) => return o,
+ };
+ match subtags.next() {
+ Some(b"u") => (),
+ Some(s) => return s.cmp(b"u").reverse(),
+ None => return Ordering::Greater,
+ }
+ subtag_result = self.keywords.strict_cmp_iter(subtags);
+ }
+ subtag_result.end()
+ }
+}
+
+impl DataLocale {
+ /// Returns whether this [`DataLocale`] has all empty fields (no components).
+ pub fn is_empty(&self) -> bool {
+ self == <&DataLocale>::default()
+ }
+
+ /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`.
+ ///
+ /// Note that this only checks the language identifier; extension keywords may also be set.
+ /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`].
+ pub fn is_langid_und(&self) -> bool {
+ self.langid == LanguageIdentifier::UND
+ }
+
+ /// Gets the [`LanguageIdentifier`] for this [`DataLocale`].
+ ///
+ /// This may allocate memory if there are variant subtags. If you need only the language,
+ /// script, and/or region subtag, use the specific getters for those subtags:
+ ///
+ /// - [`DataLocale::language()`]
+ /// - [`DataLocale::script()`]
+ /// - [`DataLocale::region()`]
+ ///
+ /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`]
+ /// and then access the `id` field.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::langid;
+ /// use icu_provider::prelude::*;
+ ///
+ /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
+ ///
+ /// let req_no_langid = DataRequest {
+ /// locale: &Default::default(),
+ /// metadata: Default::default(),
+ /// };
+ ///
+ /// let req_with_langid = DataRequest {
+ /// locale: &langid!("ar-EG").into(),
+ /// metadata: Default::default(),
+ /// };
+ ///
+ /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und"));
+ /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG"));
+ /// ```
+ pub fn get_langid(&self) -> LanguageIdentifier {
+ self.langid.clone()
+ }
+
+ /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`].
+ #[inline]
+ pub fn set_langid(&mut self, lid: LanguageIdentifier) {
+ self.langid = lid;
+ }
+
+ /// Converts this [`DataLocale`] into a [`Locale`].
+ ///
+ /// See also [`DataLocale::get_langid()`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::{
+ /// langid, subtags_language as language, subtags_region as region, Locale,
+ /// };
+ /// use icu_provider::prelude::*;
+ ///
+ /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
+ /// let locale: DataLocale = locale.into();
+ ///
+ /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic");
+ /// assert_eq!(locale.get_langid(), langid!("it-IT"));
+ /// assert_eq!(locale.language(), language!("it"));
+ /// assert_eq!(locale.script(), None);
+ /// assert_eq!(locale.region(), Some(region!("IT")));
+ ///
+ /// let locale = locale.into_locale();
+ /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic");
+ /// ```
+ pub fn into_locale(self) -> Locale {
+ let mut loc = Locale {
+ id: self.langid,
+ ..Default::default()
+ };
+ loc.extensions.unicode.keywords = self.keywords;
+ loc
+ }
+
+ /// Returns the [`Language`] for this [`DataLocale`].
+ #[inline]
+ pub fn language(&self) -> Language {
+ self.langid.language
+ }
+
+ /// Returns the [`Language`] for this [`DataLocale`].
+ #[inline]
+ pub fn set_language(&mut self, language: Language) {
+ self.langid.language = language;
+ }
+
+ /// Returns the [`Script`] for this [`DataLocale`].
+ #[inline]
+ pub fn script(&self) -> Option<Script> {
+ self.langid.script
+ }
+
+ /// Sets the [`Script`] for this [`DataLocale`].
+ #[inline]
+ pub fn set_script(&mut self, script: Option<Script>) {
+ self.langid.script = script;
+ }
+
+ /// Returns the [`Region`] for this [`DataLocale`].
+ #[inline]
+ pub fn region(&self) -> Option<Region> {
+ self.langid.region
+ }
+
+ /// Sets the [`Region`] for this [`DataLocale`].
+ #[inline]
+ pub fn set_region(&mut self, region: Option<Region>) {
+ self.langid.region = region;
+ }
+
+ /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`].
+ #[inline]
+ pub fn has_variants(&self) -> bool {
+ !self.langid.variants.is_empty()
+ }
+
+ /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously.
+ #[inline]
+ pub fn set_variants(&mut self, variants: Variants) {
+ self.langid.variants = variants;
+ }
+
+ /// Removes all [`Variant`] subtags in this [`DataLocale`].
+ #[inline]
+ pub fn clear_variants(&mut self) -> Variants {
+ self.langid.variants.clear()
+ }
+
+ /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`].
+ #[inline]
+ pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
+ self.keywords.get(key).cloned()
+ }
+
+ /// Returns whether there are any Unicode extension keywords in this [`DataLocale`].
+ #[inline]
+ pub fn has_unicode_ext(&self) -> bool {
+ !self.keywords.is_empty()
+ }
+
+ /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`].
+ #[inline]
+ pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool {
+ self.keywords.contains_key(key)
+ }
+
+ /// Returns whether this [`DataLocale`] contains a Unicode extension keyword
+ /// with the specified key and value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::{
+ /// extensions_unicode_key as key, extensions_unicode_value as value,
+ /// Locale,
+ /// };
+ /// use icu_provider::prelude::*;
+ ///
+ /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
+ /// let locale: DataLocale = locale.into();
+ ///
+ /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None);
+ /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic")));
+ /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),));
+ /// ```
+ #[inline]
+ pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool {
+ self.keywords.get(key) == Some(value)
+ }
+
+ /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`].
+ #[inline]
+ pub fn set_unicode_ext(
+ &mut self,
+ key: unicode_ext::Key,
+ value: unicode_ext::Value,
+ ) -> Option<unicode_ext::Value> {
+ self.keywords.set(key, value)
+ }
+
+ /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning
+ /// the value if it was present.
+ #[inline]
+ pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
+ self.keywords.remove(key)
+ }
+
+ /// Retains a subset of keywords as specified by the predicate function.
+ #[inline]
+ pub fn retain_unicode_ext<F>(&mut self, predicate: F)
+ where
+ F: FnMut(&unicode_ext::Key) -> bool,
+ {
+ self.keywords.retain_by_key(predicate)
+ }
+}
+
+#[test]
+fn test_data_locale_to_string() {
+ struct TestCase {
+ pub locale: DataLocale,
+ pub expected: &'static str,
+ }
+
+ for cas in [
+ TestCase {
+ locale: Locale::UND.into(),
+ expected: "und",
+ },
+ TestCase {
+ locale: "und-u-cu-gbp".parse::<Locale>().unwrap().into(),
+ expected: "und-u-cu-gbp",
+ },
+ TestCase {
+ locale: "en-ZA-u-cu-gbp".parse::<Locale>().unwrap().into(),
+ expected: "en-ZA-u-cu-gbp",
+ },
+ ] {
+ assert_eq!(cas.expected, cas.locale.to_string());
+ writeable::assert_writeable_eq!(&cas.locale, cas.expected);
+ }
+}