summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid/src/extensions/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid/src/extensions/unicode')
-rw-r--r--vendor/icu_locid/src/extensions/unicode/attribute.rs34
-rw-r--r--vendor/icu_locid/src/extensions/unicode/attributes.rs115
-rw-r--r--vendor/icu_locid/src/extensions/unicode/key.rs31
-rw-r--r--vendor/icu_locid/src/extensions/unicode/keywords.rs404
-rw-r--r--vendor/icu_locid/src/extensions/unicode/mod.rs233
-rw-r--r--vendor/icu_locid/src/extensions/unicode/value.rs199
6 files changed, 1016 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/extensions/unicode/attribute.rs b/vendor/icu_locid/src/extensions/unicode/attribute.rs
new file mode 100644
index 000000000..ba4b70924
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/attribute.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// An attribute used in a set of [`Attributes`](super::Attributes).
+ ///
+ /// An attribute has to be a sequence of alphanumerical characters no
+ /// shorter than three and no longer than eight characters.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Attribute;
+ /// use icu::locid::extensions_unicode_attribute as attribute;
+ ///
+ /// let attr: Attribute =
+ /// "buddhist".parse().expect("Failed to parse an Attribute.");
+ ///
+ /// assert_eq!(attr, attribute!("buddhist"));
+ /// ```
+ Attribute,
+ extensions::unicode::Attribute,
+ extensions_unicode_attribute,
+ 3..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["no", "toolooong"],
+);
diff --git a/vendor/icu_locid/src/extensions/unicode/attributes.rs b/vendor/icu_locid/src/extensions/unicode/attributes.rs
new file mode 100644
index 000000000..1f9536bfa
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/attributes.rs
@@ -0,0 +1,115 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Attribute;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`].
+///
+/// [`Unicode Extension Attributes`]: https://unicode.org/reports/tr35/tr35.html#u_Extension
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Attribute, Attributes};
+///
+/// let attribute1: Attribute =
+/// "foobar".parse().expect("Failed to parse a variant subtag.");
+///
+/// let attribute2: Attribute = "testing"
+/// .parse()
+/// .expect("Failed to parse a variant subtag.");
+/// let mut v = vec![attribute1, attribute2];
+/// v.sort();
+/// v.dedup();
+///
+/// let attributes: Attributes = Attributes::from_vec_unchecked(v);
+/// assert_eq!(attributes.to_string(), "foobar-testing");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Attributes(Vec<Attribute>);
+
+impl Attributes {
+ /// Returns a new empty set of attributes. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Attributes;
+ ///
+ /// assert_eq!(Attributes::new(), Attributes::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(Vec::new())
+ }
+
+ /// A constructor which takes a pre-sorted list of [`Attribute`] elements.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Attribute, Attributes};
+ ///
+ /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed.");
+ /// let attribute2: Attribute = "testing".parse().expect("Parsing failed.");
+ /// let mut v = vec![attribute1, attribute2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let attributes = Attributes::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Attribute>) -> Self {
+ Self(input)
+ }
+
+ /// Empties the [`Attributes`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Attribute, Attributes};
+ ///
+ /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed.");
+ /// let attribute2: Attribute = "testing".parse().expect("Parsing failed.");
+ /// let mut v = vec![attribute1, attribute2];
+ ///
+ /// let mut attributes: Attributes = Attributes::from_vec_unchecked(v);
+ ///
+ /// assert_eq!(attributes.to_string(), "foobar-testing");
+ ///
+ /// attributes.clear();
+ ///
+ /// assert_eq!(attributes.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Attributes, "foobar", "testing");
+
+impl Deref for Attributes {
+ type Target = [Attribute];
+
+ fn deref(&self) -> &[Attribute] {
+ self.0.deref()
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/unicode/key.rs b/vendor/icu_locid/src/extensions/unicode/key.rs
new file mode 100644
index 000000000..bdfdd4e5c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/key.rs
@@ -0,0 +1,31 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A key used in a list of [`Keywords`](super::Keywords).
+ ///
+ /// The key has to be a two ASCII alphanumerical characters long, with the first
+ /// character being alphanumeric, and the second being alphabetic.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ ///
+ /// assert!("ca".parse::<Key>().is_ok());
+ /// ```
+ Key,
+ extensions::unicode::Key,
+ extensions_unicode_key,
+ 2..=2,
+ s,
+ s.all_bytes()[0].is_ascii_alphanumeric() && s.all_bytes()[1].is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ (s.all_bytes()[0].is_ascii_lowercase() || s.all_bytes()[0].is_ascii_digit())
+ && s.all_bytes()[1].is_ascii_lowercase(),
+ InvalidExtension,
+ ["ca", "8a"],
+ ["a", "a8", "abc"],
+);
diff --git a/vendor/icu_locid/src/extensions/unicode/keywords.rs b/vendor/icu_locid/src/extensions/unicode/keywords.rs
new file mode 100644
index 000000000..dc9a15921
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/keywords.rs
@@ -0,0 +1,404 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+use core::cmp::Ordering;
+use core::iter::FromIterator;
+use litemap::LiteMap;
+
+use super::Key;
+use super::Value;
+use crate::helpers::ShortVec;
+use crate::ordering::SubtagOrderingResult;
+
+/// A list of [`Key`]-[`Value`] pairs representing functional information
+/// about locale's internationnalization preferences.
+///
+/// Here are examples of fields used in Unicode:
+/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
+/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
+/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
+///
+/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
+///
+/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
+///
+/// # Examples
+///
+/// Manually build up a [`Keywords`] object:
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+///
+/// let key: Key = "hc".parse().expect("Failed to parse a Key.");
+/// let value: Value = "h23".parse().expect("Failed to parse a Value.");
+/// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+///
+/// assert_eq!(&keywords.to_string(), "hc-h23");
+/// ```
+///
+/// Access a [`Keywords`] object from a [`Locale`]:
+///
+/// ```
+/// use icu::locid::{
+/// extensions_unicode_key as key, extensions_unicode_value as value,
+/// Locale,
+/// };
+///
+/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
+///
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("hc")),
+/// Some(&value!("h23"))
+/// );
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("kc")),
+/// Some(&value!("true"))
+/// );
+///
+/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
+/// ```
+///
+/// [`Locale`]: crate::Locale
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Keywords(LiteMap<Key, Value, ShortVec<(Key, Value)>>);
+
+impl Keywords {
+ /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ ///
+ /// assert_eq!(Keywords::new(), Keywords::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(LiteMap::new())
+ }
+
+ /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
+ #[inline]
+ pub const fn new_single(key: Key, value: Value) -> Self {
+ Self(LiteMap::from_sorted_store_unchecked(ShortVec::new_single(
+ (key, value),
+ )))
+ }
+
+ /// Returns `true` if there are no keywords.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
+ /// let loc2 = locale!("und-u-ca-buddhist");
+ ///
+ /// assert!(loc1.extensions.unicode.keywords.is_empty());
+ /// assert!(!loc2.extensions.unicode.keywords.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ /// use litemap::LiteMap;
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "gregory".parse().expect("Failed to parse a Value.");
+ /// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// assert!(&keywords.contains_key(&key));
+ /// ```
+ pub fn contains_key<Q>(&self, key: &Q) -> bool
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.contains_key(key)
+ }
+
+ /// Returns a reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "buddhist".parse().expect("Failed to parse a Value.");
+ /// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// assert_eq!(
+ /// keywords.get(&key).map(|v| v.to_string()),
+ /// Some("buddhist".to_string())
+ /// );
+ /// ```
+ pub fn get<Q>(&self, key: &Q) -> Option<&Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get(key)
+ }
+
+ /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ /// Returns `None` if the key doesn't exist or if the key has no value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "buddhist".parse().expect("Failed to parse a Value.");
+ /// let mut keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// if let Some(value) = keywords.get_mut(&key) {
+ /// *value = "gregory".parse().expect("Failed to parse a Value.");
+ /// }
+ /// assert_eq!(
+ /// keywords.get(&key).map(|v| v.to_string()),
+ /// Some("gregory".to_string())
+ /// );
+ /// ```
+ pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get_mut(key)
+ }
+
+ /// Sets the specified keyword, returning the old value if it already existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::extensions::unicode::Value;
+ /// use icu::locid::Locale;
+ /// use icu::locid::{
+ /// extensions_unicode_key as key, extensions_unicode_value as value,
+ /// };
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// let old_value = loc
+ /// .extensions
+ /// .unicode
+ /// .keywords
+ /// .set(key!("ca"), value!("japanese"));
+ ///
+ /// assert_eq!(old_value, Some(value!("buddhist")));
+ /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
+ /// ```
+ pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
+ self.0.insert(key, value)
+ }
+
+ /// Removes the specified keyword, returning the old value if it existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::extensions_unicode_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// loc.extensions.unicode.keywords.remove(key!("ca"));
+ /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
+ /// ```
+ pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
+ self.0.remove(key.borrow())
+ }
+
+ /// Clears all Unicode extension keywords, leaving Unicode attributes.
+ ///
+ /// Returns the old Unicode extension keywords.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.keywords.clear();
+ /// assert_eq!(loc, "und-u-hello".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ /// Retains a subset of keywords as specified by the predicate function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions_unicode_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("hc"));
+ /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("ms"));
+ /// assert_eq!(loc, Locale::UND);
+ /// ```
+ pub fn retain_by_key<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&Key) -> bool,
+ {
+ self.0.retain(|k, _| predicate(k))
+ }
+
+ /// Compare this [`Keywords`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] =
+ /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_kwds = format!("und-u-{}", a)
+ /// .parse::<Locale>()
+ /// .unwrap()
+ /// .extensions
+ /// .unicode
+ /// .keywords;
+ /// assert_eq!(a, a_kwds.to_string());
+ /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"];
+ ///
+ /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ for (k, v) in self.0.iter() {
+ f(k.as_str())?;
+ v.for_each_subtag_str(f)?;
+ }
+ Ok(())
+ }
+
+ /// This needs to be its own method to help with type inference in helpers.rs
+ #[cfg(test)]
+ pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
+ v.into_iter().collect()
+ }
+}
+
+impl From<LiteMap<Key, Value, ShortVec<(Key, Value)>>> for Keywords {
+ fn from(map: LiteMap<Key, Value, ShortVec<(Key, Value)>>) -> Self {
+ Self(map)
+ }
+}
+
+impl FromIterator<(Key, Value)> for Keywords {
+ fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
+ LiteMap::from_iter(iter).into()
+ }
+}
+
+impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
diff --git a/vendor/icu_locid/src/extensions/unicode/mod.rs b/vendor/icu_locid/src/extensions/unicode/mod.rs
new file mode 100644
index 000000000..fabf1036c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/mod.rs
@@ -0,0 +1,233 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Unicode Extensions provide information about user preferences in a given locale.
+//!
+//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
+//! [`Attributes`].
+//!
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::unicode::{Attribute, Key, Unicode, Value};
+//! use icu::locid::{LanguageIdentifier, Locale};
+//!
+//! let mut loc: Locale =
+//! "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
+//!
+//! let key: Key = "hc".parse().expect("Parsing key failed.");
+//! let value: Value = "h12".parse().expect("Parsing value failed.");
+//! let attribute: Attribute =
+//! "foobar".parse().expect("Parsing attribute failed.");
+//!
+//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+//! assert!(loc.extensions.unicode.attributes.contains(&attribute));
+//!
+//! assert_eq!(&loc.extensions.unicode.to_string(), "-u-foobar-hc-h12");
+//! ```
+mod attribute;
+mod attributes;
+mod key;
+mod keywords;
+mod value;
+
+use alloc::vec;
+pub use attribute::Attribute;
+pub use attributes::Attributes;
+pub use key::Key;
+pub use keywords::Keywords;
+pub use value::Value;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+use litemap::LiteMap;
+
+/// Unicode Extensions provide information about user preferences in a given locale.
+///
+/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Unicode extensions provide subtags that specify language and/or locale-based behavior
+/// or refinements to language tags, according to work done by the Unicode Consortium.
+/// (See [`RFC 6067`] for details).
+///
+/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
+/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Value};
+/// use icu::locid::Locale;
+///
+/// let mut loc: Locale =
+/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
+///
+/// let key: Key = "ca".parse().expect("Parsing key failed.");
+/// let value: Value = "buddhist".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+/// ```
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
+pub struct Unicode {
+ /// The key-value pairs present in this locale extension, with each extension key subtag
+ /// associated to its provided value subtag.
+ pub keywords: Keywords,
+ /// A canonically ordered sequence of single standalone subtags for this locale extension.
+ pub attributes: Attributes,
+}
+
+impl Unicode {
+ /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Unicode;
+ ///
+ /// assert_eq!(Unicode::new(), Unicode::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ keywords: Keywords::new(),
+ attributes: Attributes::new(),
+ }
+ }
+
+ /// Returns [`true`] if there list of keywords and attributes is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.unicode.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.keywords.is_empty() && self.attributes.is_empty()
+ }
+
+ /// Clears all Unicode extension keywords and attributes, effectively removing
+ /// the Unicode extension.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale =
+ /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.clear();
+ /// assert_eq!(loc, "und-t-mul".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) {
+ self.keywords.clear();
+ self.attributes.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut attributes = vec![];
+ let mut keywords = LiteMap::new();
+
+ let mut current_keyword = None;
+ let mut current_type = vec![];
+
+ while let Some(subtag) = iter.peek() {
+ if let Ok(attr) = Attribute::try_from_bytes(subtag) {
+ if let Err(idx) = attributes.binary_search(&attr) {
+ attributes.insert(idx, attr);
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ while let Some(subtag) = iter.peek() {
+ let slen = subtag.len();
+ if slen == 2 {
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
+ current_type = vec![];
+ }
+ current_keyword = Some(Key::try_from_bytes(subtag)?);
+ } else if current_keyword.is_some() {
+ match Value::parse_subtag(subtag) {
+ Ok(Some(t)) => current_type.push(t),
+ Ok(None) => {}
+ Err(_) => break,
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
+ }
+
+ // Ensure we've defined at least one attribute or keyword
+ if attributes.is_empty() && keywords.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ Ok(Self {
+ keywords: keywords.into(),
+ attributes: Attributes::from_vec_unchecked(attributes),
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("u")?;
+ self.attributes.for_each_subtag_str(f)?;
+ self.keywords.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+writeable::impl_display_with_writeable!(Unicode);
+
+impl writeable::Writeable for Unicode {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("-u")?;
+ if !self.attributes.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.attributes, sink)?;
+ }
+ if !self.keywords.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.keywords, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(2);
+ if !self.attributes.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
+ }
+ if !self.keywords.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
+ }
+ result
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/unicode/value.rs b/vendor/icu_locid/src/extensions/unicode/value.rs
new file mode 100644
index 000000000..ce9982a4c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/value.rs
@@ -0,0 +1,199 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::helpers::ShortVec;
+use crate::parser::{get_subtag_iterator, ParserError};
+use alloc::vec::Vec;
+use core::ops::RangeInclusive;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A value used in a list of [`Keywords`](super::Keywords).
+///
+/// The value has to be a sequence of one or more alphanumerical strings
+/// separated by `-`.
+/// Each part of the sequence has to be no shorter than three characters and no
+/// longer than 8.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::Value;
+///
+/// let value1: Value = "gregory".parse().expect("Failed to parse a Value.");
+/// let value2: Value =
+/// "islamic-civil".parse().expect("Failed to parse a Value.");
+/// let value3: Value = "true".parse().expect("Failed to parse a Value.");
+///
+/// assert_eq!(&value1.to_string(), "gregory");
+/// assert_eq!(&value2.to_string(), "islamic-civil");
+///
+/// // The value "true" is special-cased to an empty value
+/// assert_eq!(&value3.to_string(), "");
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Value(ShortVec<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
+
+const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
+const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
+
+impl Value {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Value`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// let value = Value::try_from_bytes(b"buddhist").expect("Parsing failed.");
+ ///
+ /// assert_eq!(&value.to_string(), "buddhist");
+ /// ```
+ pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
+ let mut v = ShortVec::new();
+
+ if !input.is_empty() {
+ for subtag in get_subtag_iterator(input) {
+ let val = Self::subtag_from_bytes(subtag)?;
+ if let Some(val) = val {
+ v.push(val);
+ }
+ }
+ }
+ Ok(Self(v))
+ }
+
+ /// Const constructor for when the value contains only a single subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
+ /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
+ /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
+ /// ```
+ pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
+ match Self::subtag_from_bytes(subtag) {
+ Err(_) => Err(ParserError::InvalidExtension),
+ Ok(option) => Ok(Self::from_tinystr(option)),
+ }
+ }
+
+ #[doc(hidden)]
+ pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
+ self.0.as_slice()
+ }
+
+ #[doc(hidden)]
+ pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
+ self.0.single()
+ }
+
+ #[doc(hidden)]
+ pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
+ match subtag {
+ None => Self(ShortVec::new()),
+ Some(val) => {
+ debug_assert!(val.is_ascii_alphanumeric());
+ debug_assert!(!matches!(val, TRUE_VALUE));
+ Self(ShortVec::new_single(val))
+ }
+ }
+ }
+
+ pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self {
+ Self(input.into())
+ }
+
+ #[doc(hidden)]
+ pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
+ }
+
+ pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
+ }
+
+ pub(crate) const fn parse_subtag_from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ let slice_len = end - start;
+ if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
+ Ok(TRUE_VALUE) => Ok(None),
+ Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
+ Ok(_) => Err(ParserError::InvalidExtension),
+ Err(_) => Err(ParserError::InvalidSubtag),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl FromStr for Value {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_tinystr_list!(Value, "", "islamic", "civil");
+
+/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag.
+///
+/// The macro only supports single-subtag values.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{
+/// extensions_unicode_key as key, extensions_unicode_value as value,
+/// };
+///
+/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap();
+///
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("ca")),
+/// Some(&value!("buddhist"))
+/// );
+/// ```
+///
+/// [`Value`]: crate::extensions::unicode::Value
+#[macro_export]
+macro_rules! extensions_unicode_value {
+ ($value:literal) => {{
+ // What we want:
+ // const R: $crate::extensions::unicode::Value =
+ // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) {
+ // Ok(r) => r,
+ // #[allow(clippy::panic)] // const context
+ // _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ // };
+ // Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
+ const R: $crate::extensions::unicode::Value =
+ $crate::extensions::unicode::Value::from_tinystr(
+ match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
+ Ok(r) => r,
+ _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ },
+ );
+ R
+ }};
+}