diff options
Diffstat (limited to 'vendor/icu_locid/src/extensions')
-rw-r--r-- | vendor/icu_locid/src/extensions/mod.rs | 313 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/other/mod.rs | 157 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/other/subtag.rs | 37 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/private/mod.rs | 167 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/private/other.rs | 31 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/transform/fields.rs | 228 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/transform/key.rs | 31 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/transform/mod.rs | 236 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/transform/value.rs | 119 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/attribute.rs | 34 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/attributes.rs | 115 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/key.rs | 31 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/keywords.rs | 404 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/mod.rs | 233 | ||||
-rw-r--r-- | vendor/icu_locid/src/extensions/unicode/value.rs | 199 |
15 files changed, 2335 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/extensions/mod.rs b/vendor/icu_locid/src/extensions/mod.rs new file mode 100644 index 000000000..42bfcd3c9 --- /dev/null +++ b/vendor/icu_locid/src/extensions/mod.rs @@ -0,0 +1,313 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Unicode Extensions provide a mechanism to extend the [`LanguageIdentifier`] with +//! additional bits of information - a combination of a [`LanguageIdentifier`] and [`Extensions`] +//! is called [`Locale`]. +//! +//! There are four types of extensions: +//! +//! * [`Unicode Extensions`] - marked as `u`. +//! * [`Transform Extensions`] - marked as `t`. +//! * [`Private Use Extensions`] - marked as `x`. +//! * [`Other Extensions`] - marked as any `a-z` except of `u`, `t` and `x`. +//! +//! One can think of extensions as a bag of extra information on top of basic 4 [`subtags`]. +//! +//! Notice: `Other` extension type is currently not supported. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::unicode::{Key, Value}; +//! use icu::locid::Locale; +//! +//! let loc: Locale = "en-US-u-ca-buddhist-t-en-US-h0-hybrid-x-foo" +//! .parse() +//! .expect("Failed to parse."); +//! +//! assert_eq!(loc.id.language, "en".parse().unwrap()); +//! assert_eq!(loc.id.script, None); +//! assert_eq!(loc.id.region, Some("US".parse().unwrap())); +//! assert_eq!(loc.id.variants.len(), 0); +//! +//! let key: Key = "ca".parse().expect("Parsing key failed."); +//! let value: Value = "buddhist".parse().expect("Parsing value failed."); +//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +//! ``` +//! +//! [`LanguageIdentifier`]: super::LanguageIdentifier +//! [`Locale`]: super::Locale +//! [`subtags`]: super::subtags +//! [`Other Extensions`]: other +//! [`Private Use Extensions`]: private +//! [`Transform Extensions`]: transform +//! [`Unicode Extensions`]: unicode +pub mod other; +pub mod private; +pub mod transform; +pub mod unicode; + +use other::Other; +use private::Private; +use transform::Transform; +use unicode::Unicode; + +use alloc::vec::Vec; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; + +/// Defines the type of extension. +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] +#[non_exhaustive] +pub enum ExtensionType { + /// Transform Extension Type marked as `t`. + Transform, + /// Unicode Extension Type marked as `u`. + Unicode, + /// Private Extension Type marked as `x`. + Private, + /// All other extension types. + Other(u8), +} + +impl ExtensionType { + pub(crate) const fn try_from_byte(key: u8) -> Result<Self, ParserError> { + let key = key.to_ascii_lowercase(); + match key { + b'u' => Ok(Self::Unicode), + b't' => Ok(Self::Transform), + b'x' => Ok(Self::Private), + b'a'..=b'z' => Ok(Self::Other(key)), + _ => Err(ParserError::InvalidExtension), + } + } + + pub(crate) const fn try_from_bytes_manual_slice( + bytes: &[u8], + start: usize, + end: usize, + ) -> Result<Self, ParserError> { + if end - start != 1 { + return Err(ParserError::InvalidExtension); + } + #[allow(clippy::indexing_slicing)] + Self::try_from_byte(bytes[start]) + } +} + +/// A map of extensions associated with a given [`Locale`](crate::Locale). +#[derive(Debug, Default, PartialEq, Eq, Clone, Hash)] +#[non_exhaustive] +pub struct Extensions { + /// A representation of the data for a Unicode extension, when present in the locale identifer. + pub unicode: Unicode, + /// A representation of the data for a transform extension, when present in the locale identifer. + pub transform: Transform, + /// A representation of the data for a private-use extension, when present in the locale identifer. + pub private: Private, + /// A sequence of any other extensions that are present in the locale identifier but are not formally + /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`], + /// and [`Private`] are. + pub other: Vec<Other>, +} + +impl Extensions { + /// Returns a new empty map of extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::Extensions; + /// + /// assert_eq!(Extensions::new(), Extensions::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + unicode: Unicode::new(), + transform: Transform::new(), + private: Private::new(), + other: Vec::new(), + } + } + + /// Function to create a new map of extensions containing exactly one unicode extension, callable in `const` + /// context. + #[inline] + pub const fn from_unicode(unicode: Unicode) -> Self { + Self { + unicode, + transform: Transform::new(), + private: Private::new(), + other: Vec::new(), + } + } + + /// Returns whether there are no extensions present. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.unicode.is_empty() + && self.transform.is_empty() + && self.private.is_empty() + && self.other.is_empty() + } + + /// Retains the specified extension types, clearing all others. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::ExtensionType; + /// use icu::locid::Locale; + /// + /// let loc: Locale = + /// "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap(); + /// + /// let mut only_unicode = loc.clone(); + /// only_unicode + /// .extensions + /// .retain_by_type(|t| t == ExtensionType::Unicode); + /// assert_eq!(only_unicode, "und-u-world".parse().unwrap()); + /// + /// let mut only_t_z = loc.clone(); + /// only_t_z.extensions.retain_by_type(|t| { + /// t == ExtensionType::Transform || t == ExtensionType::Other(b'z') + /// }); + /// assert_eq!(only_t_z, "und-t-mul-z-zzz".parse().unwrap()); + /// ``` + pub fn retain_by_type<F>(&mut self, mut predicate: F) + where + F: FnMut(ExtensionType) -> bool, + { + if !predicate(ExtensionType::Unicode) { + self.unicode.clear(); + } + if !predicate(ExtensionType::Transform) { + self.transform.clear(); + } + if !predicate(ExtensionType::Private) { + self.private.clear(); + } + self.other + .retain(|o| predicate(ExtensionType::Other(o.get_ext_byte()))); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { + let mut unicode = None; + let mut transform = None; + let mut private = None; + let mut other = Vec::new(); + + let mut st = iter.next(); + while let Some(subtag) = st { + match subtag.get(0).map(|b| ExtensionType::try_from_byte(*b)) { + Some(Ok(ExtensionType::Unicode)) => { + unicode = Some(Unicode::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Transform)) => { + transform = Some(Transform::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Private)) => { + private = Some(Private::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Other(ext))) => { + let parsed = Other::try_from_iter(ext, iter)?; + if let Err(idx) = other.binary_search(&parsed) { + other.insert(idx, parsed); + } else { + return Err(ParserError::InvalidExtension); + } + } + None => {} + _ => return Err(ParserError::InvalidExtension), + } + + st = iter.next(); + } + + Ok(Self { + unicode: unicode.unwrap_or_default(), + transform: transform.unwrap_or_default(), + private: private.unwrap_or_default(), + other, + }) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + let mut wrote_tu = false; + // Alphabetic by singleton + self.other.iter().try_for_each(|other| { + if other.get_ext() > 't' && !wrote_tu { + // Since 't' and 'u' are next to each other in alphabetical + // order, write both now. + self.transform.for_each_subtag_str(f)?; + self.unicode.for_each_subtag_str(f)?; + wrote_tu = true; + } + other.for_each_subtag_str(f)?; + Ok(()) + })?; + + if !wrote_tu { + self.transform.for_each_subtag_str(f)?; + self.unicode.for_each_subtag_str(f)?; + } + + // Private must be written last, since it allows single character + // keys. Extensions must also be written in alphabetical order, + // which would seem to imply that other extensions `y` and `z` are + // invalid, but this is not specified. + self.private.for_each_subtag_str(f)?; + Ok(()) + } +} + +impl_writeable_for_each_subtag_str_no_test!(Extensions); + +#[test] +fn test_writeable() { + use crate::Locale; + use writeable::assert_writeable_eq; + assert_writeable_eq!(Extensions::new(), "",); + assert_writeable_eq!( + "my-t-my-d0-zawgyi".parse::<Locale>().unwrap().extensions, + "t-my-d0-zawgyi", + ); + assert_writeable_eq!( + "ar-SA-u-ca-islamic-civil" + .parse::<Locale>() + .unwrap() + .extensions, + "u-ca-islamic-civil", + ); + assert_writeable_eq!( + "en-001-x-foo-bar".parse::<Locale>().unwrap().extensions, + "x-foo-bar", + ); + assert_writeable_eq!( + "und-t-m0-true".parse::<Locale>().unwrap().extensions, + "t-m0-true", + ); + assert_writeable_eq!( + "und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo" + .parse::<Locale>() + .unwrap() + .extensions, + "a-foo-t-foo-u-foo-w-foo-z-foo-x-foo", + ); +} diff --git a/vendor/icu_locid/src/extensions/other/mod.rs b/vendor/icu_locid/src/extensions/other/mod.rs new file mode 100644 index 000000000..36dbc49b6 --- /dev/null +++ b/vendor/icu_locid/src/extensions/other/mod.rs @@ -0,0 +1,157 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Other Use Extensions is a list of extensions other than unicode, +//! transform or private. +//! +//! Those extensions are treated as a pass-through, and no Unicode related +//! behavior depends on them. +//! +//! The main struct for this extension is [`Other`] which is a list of [`Subtag`]s. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::other::Other; +//! use icu::locid::Locale; +//! +//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed."); +//! ``` + +mod subtag; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; +use alloc::vec::Vec; +pub use subtag::Subtag; + +/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Those extensions are treated as a pass-through, and no Unicode related +/// behavior depends on them. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::other::{Other, Subtag}; +/// +/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); +/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); +/// +/// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]); +/// assert_eq!(&other.to_string(), "-a-foo-bar"); +/// ``` +/// +/// [`Other Use Extensions`]: https://unicode.org/reports/tr35/#other_extensions +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Other((u8, Vec<Subtag>)); + +impl Other { + /// A constructor which takes a pre-sorted list of [`Subtag`]. + /// + /// # Panics + /// + /// Panics if `ext` is not ASCII alphabetic. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::other::{Other, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// + /// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]); + /// assert_eq!(&other.to_string(), "-a-foo-bar"); + /// ``` + pub fn from_vec_unchecked(ext: u8, input: Vec<Subtag>) -> Self { + assert!(ext.is_ascii_alphabetic()); + Self((ext, input)) + } + + pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result<Self, ParserError> { + debug_assert!(ext.is_ascii_alphabetic()); + + let mut keys = Vec::new(); + while let Some(subtag) = iter.peek() { + if !Subtag::valid_key(subtag) { + break; + } + if let Ok(key) = Subtag::try_from_bytes(subtag) { + keys.push(key); + } + iter.next(); + } + + Ok(Self::from_vec_unchecked(ext, keys)) + } + + /// Gets the tag character for this extension as a char. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "und-a-hello-world".parse().unwrap(); + /// let other_ext = &loc.extensions.other[0]; + /// assert_eq!(other_ext.get_ext(), 'a'); + /// ``` + pub fn get_ext(&self) -> char { + self.get_ext_byte() as char + } + + /// Gets the tag character for this extension as a byte. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "und-a-hello-world".parse().unwrap(); + /// let other_ext = &loc.extensions.other[0]; + /// assert_eq!(other_ext.get_ext_byte(), b'a'); + /// ``` + pub fn get_ext_byte(&self) -> u8 { + self.0 .0 + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + let (ext, keys) = &self.0; + debug_assert!(ext.is_ascii_alphabetic()); + // Safety: ext is ascii_alphabetic, so it is valid UTF-8 + let ext_str = unsafe { core::str::from_utf8_unchecked(core::slice::from_ref(ext)) }; + f(ext_str)?; + keys.iter().map(|t| t.as_str()).try_for_each(f) + } +} + +writeable::impl_display_with_writeable!(Other); + +impl writeable::Writeable for Other { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + let (ext, keys) = &self.0; + sink.write_char('-')?; + sink.write_char(*ext as char)?; + for key in keys.iter() { + sink.write_char('-')?; + writeable::Writeable::write_to(key, sink)?; + } + + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + let mut result = writeable::LengthHint::exact(2); + for key in self.0 .1.iter() { + result += writeable::Writeable::writeable_length_hint(key) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/other/subtag.rs b/vendor/icu_locid/src/extensions/other/subtag.rs new file mode 100644 index 000000000..60995c395 --- /dev/null +++ b/vendor/icu_locid/src/extensions/other/subtag.rs @@ -0,0 +1,37 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A single item used in a list of [`Other`](super::Other) extensions. + /// + /// The subtag has to be an ASCII alphanumerical string no shorter than + /// two characters and no longer than eight. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::other::Subtag; + /// + /// let subtag: Subtag = "Foo".parse().expect("Failed to parse a Subtag."); + /// + /// assert_eq!(subtag.as_str(), "foo"); + /// ``` + Subtag, + extensions::other::Subtag, + extensions_other_subtag, + 2..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["y", "toolooong"], +); + +impl Subtag { + pub(crate) const fn valid_key(v: &[u8]) -> bool { + 2 <= v.len() && v.len() <= 8 + } +} diff --git a/vendor/icu_locid/src/extensions/private/mod.rs b/vendor/icu_locid/src/extensions/private/mod.rs new file mode 100644 index 000000000..13090c94a --- /dev/null +++ b/vendor/icu_locid/src/extensions/private/mod.rs @@ -0,0 +1,167 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Private Use Extensions is a list of extensions intended for +//! private use. +//! +//! Those extensions are treated as a pass-through, and no Unicode related +//! behavior depends on them. +//! +//! The main struct for this extension is [`Private`] which is a list of [`Subtag`]s. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::private::{Private, Subtag}; +//! use icu::locid::Locale; +//! +//! let mut loc: Locale = "en-US-x-foo-faa".parse().expect("Parsing failed."); +//! +//! let subtag: Subtag = "foo".parse().expect("Parsing subtag failed."); +//! assert!(loc.extensions.private.contains(&subtag)); +//! assert_eq!(loc.extensions.private.iter().next(), Some(&subtag)); +//! loc.extensions.private.clear(); +//! assert_eq!(loc.to_string(), "en-US"); +//! ``` + +mod other; + +use alloc::vec::Vec; +use core::ops::Deref; + +pub use other::Subtag; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; + +/// A list of [`Private Use Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Those extensions are treated as a pass-through, and no Unicode related +/// behavior depends on them. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::private::{Private, Subtag}; +/// +/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); +/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); +/// +/// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]); +/// assert_eq!(&private.to_string(), "-x-foo-bar"); +/// ``` +/// +/// [`Private Use Extensions`]: https://unicode.org/reports/tr35/#pu_extensions +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Private(Vec<Subtag>); + +impl Private { + /// Returns a new empty list of private-use extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::Private; + /// + /// assert_eq!(Private::new(), Private::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(Vec::new()) + } + + /// A constructor which takes a pre-sorted list of [`Subtag`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::{Private, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// + /// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]); + /// assert_eq!(&private.to_string(), "-x-foo-bar"); + /// ``` + pub fn from_vec_unchecked(input: Vec<Subtag>) -> Self { + Self(input) + } + + /// Empties the [`Private`] list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::{Private, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// let mut private = Private::from_vec_unchecked(vec![subtag1, subtag2]); + /// + /// assert_eq!(&private.to_string(), "-x-foo-bar"); + /// + /// private.clear(); + /// + /// assert_eq!(&private.to_string(), ""); + /// ``` + pub fn clear(&mut self) { + self.0.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { + let keys = iter + .map(Subtag::try_from_bytes) + .collect::<Result<Vec<_>, _>>()?; + + Ok(Self::from_vec_unchecked(keys)) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("x")?; + self.deref().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +writeable::impl_display_with_writeable!(Private); + +impl writeable::Writeable for Private { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-x")?; + for key in self.iter() { + sink.write_char('-')?; + writeable::Writeable::write_to(key, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + for key in self.iter() { + result += writeable::Writeable::writeable_length_hint(key) + 1; + } + result + } +} + +impl Deref for Private { + type Target = [Subtag]; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/vendor/icu_locid/src/extensions/private/other.rs b/vendor/icu_locid/src/extensions/private/other.rs new file mode 100644 index 000000000..a91e12855 --- /dev/null +++ b/vendor/icu_locid/src/extensions/private/other.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A single item used in a list of [`Private`](super::Private) extensions. + /// + /// The subtag has to be an ASCII alphanumerical string no shorter than + /// one character and no longer than eight. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::Subtag; + /// + /// let subtag1: Subtag = "Foo".parse().expect("Failed to parse a Subtag."); + /// + /// assert_eq!(subtag1.as_str(), "foo"); + /// ``` + Subtag, + extensions::private::Subtag, + extensions_private_subtag, + 1..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["toolooong"], +); diff --git a/vendor/icu_locid/src/extensions/transform/fields.rs b/vendor/icu_locid/src/extensions/transform/fields.rs new file mode 100644 index 000000000..ca10000a7 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/fields.rs @@ -0,0 +1,228 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::borrow::Borrow; +use core::iter::FromIterator; +use litemap::LiteMap; + +use super::Key; +use super::Value; + +/// A list of [`Key`]-[`Value`] pairs representing functional information +/// about content transformations. +/// +/// Here are examples of fields used in Unicode: +/// - `s0`, `d0` - Transform source/destination +/// - `t0` - Machine Translation +/// - `h0` - Hybrid Locale Identifiers +/// +/// You can find the full list in [`Unicode BCP 47 T Extension`] section of LDML. +/// +/// [`Unicode BCP 47 T Extension`]: https://unicode.org/reports/tr35/tr35.html#BCP47_T_Extension +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::{Fields, Key, Value}; +/// +/// let key: Key = "h0".parse().expect("Failed to parse a Key."); +/// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); +/// let fields: Fields = vec![(key, value)].into_iter().collect(); +/// +/// assert_eq!(&fields.to_string(), "h0-hybrid"); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Fields(LiteMap<Key, Value>); + +impl Fields { + /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Fields; + /// + /// assert_eq!(Fields::new(), Fields::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(LiteMap::new()) + } + + /// Returns `true` if there are no fields. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Fields; + /// use icu::locid::locale; + /// use icu::locid::Locale; + /// + /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap(); + /// let loc2 = locale!("und-u-ca-buddhist"); + /// + /// assert!(!loc1.extensions.transform.fields.is_empty()); + /// assert!(loc2.extensions.transform.fields.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Empties the [`Fields`] list. + /// + /// Returns the old list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// assert_eq!(&fields.to_string(), "h0-hybrid"); + /// + /// fields.clear(); + /// + /// assert_eq!(&fields.to_string(), ""); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// assert!(&fields.contains_key(&key)); + /// ``` + pub fn contains_key<Q>(&self, key: &Q) -> bool + where + Key: Borrow<Q>, + Q: Ord, + { + self.0.contains_key(key) + } + + /// Returns a reference to the [`Value`] corresponding to the [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// assert_eq!( + /// fields.get(&key).map(|v| v.to_string()), + /// Some("hybrid".to_string()) + /// ); + /// ``` + pub fn get<Q>(&self, key: &Q) -> Option<&Value> + where + Key: Borrow<Q>, + Q: Ord, + { + self.0.get(key) + } + + /// Sets the specified keyword, returning the old value if it already existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Key; + /// use icu::locid::extensions::transform::Value; + /// use icu::locid::extensions_transform_key as key; + /// use icu::locid::Locale; + /// + /// let lower = "lower".parse::<Value>().expect("valid extension subtag"); + /// let casefold = "casefold".parse::<Value>().expect("valid extension subtag"); + /// + /// let mut loc: Locale = "en-t-hi-d0-casefold" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// let old_value = loc.extensions.transform.fields.set(key!("d0"), lower); + /// + /// assert_eq!(old_value, Some(casefold)); + /// assert_eq!(loc, "en-t-hi-d0-lower".parse().unwrap()); + /// ``` + pub fn set(&mut self, key: Key, value: Value) -> Option<Value> { + self.0.insert(key, value) + } + + /// Retains a subset of fields as specified by the predicate function. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions_transform_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-t-h0-hybrid-d0-hex-m0-xml".parse().unwrap(); + /// + /// loc.extensions + /// .transform + /// .fields + /// .retain_by_key(|&k| k == key!("h0")); + /// assert_eq!(loc, "und-t-h0-hybrid".parse().unwrap()); + /// + /// loc.extensions + /// .transform + /// .fields + /// .retain_by_key(|&k| k == key!("d0")); + /// assert_eq!(loc, Locale::UND); + /// ``` + pub fn retain_by_key<F>(&mut self, mut predicate: F) + where + F: FnMut(&Key) -> bool, + { + self.0.retain(|k, _| predicate(k)) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + for (k, v) in self.0.iter() { + f(k.as_str())?; + v.for_each_subtag_str(f)?; + } + Ok(()) + } + + /// This needs to be its own method to help with type inference in helpers.rs + #[cfg(test)] + pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { + v.into_iter().collect() + } +} + +impl From<LiteMap<Key, Value>> for Fields { + fn from(map: LiteMap<Key, Value>) -> Self { + Self(map) + } +} + +impl FromIterator<(Key, Value)> for Fields { + fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self { + LiteMap::from_iter(iter).into() + } +} + +impl_writeable_for_key_value!(Fields, "h0", "hybrid", "m0", "m0-true"); diff --git a/vendor/icu_locid/src/extensions/transform/key.rs b/vendor/icu_locid/src/extensions/transform/key.rs new file mode 100644 index 000000000..5400988a1 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/key.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A key used in a list of [`Fields`](super::Fields). + /// + /// The key has to be a two ASCII characters long, with the first + /// character being alphabetic, and the second being a number. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Key; + /// + /// let key1: Key = "k0".parse().expect("Failed to parse a Key."); + /// + /// assert_eq!(key1.as_str(), "k0"); + /// ``` + Key, + extensions::transform::Key, + extensions_transform_key, + 2..=2, + s, + s.all_bytes()[0].is_ascii_alphabetic() && s.all_bytes()[1].is_ascii_digit(), + s.to_ascii_lowercase(), + s.all_bytes()[0].is_ascii_lowercase() && s.all_bytes()[1].is_ascii_digit(), + InvalidExtension, + ["k0"], + ["", "k", "0k", "k12"], +); diff --git a/vendor/icu_locid/src/extensions/transform/mod.rs b/vendor/icu_locid/src/extensions/transform/mod.rs new file mode 100644 index 000000000..a8c605146 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/mod.rs @@ -0,0 +1,236 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Transform Extensions provide information on content transformations in a given locale. +//! +//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an +//! optional [`LanguageIdentifier`]. +//! +//! [`LanguageIdentifier`]: super::super::LanguageIdentifier +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value}; +//! use icu::locid::{LanguageIdentifier, Locale}; +//! +//! let mut loc: Locale = +//! "en-US-t-es-AR-h0-hybrid".parse().expect("Parsing failed."); +//! +//! let lang: LanguageIdentifier = +//! "es-AR".parse().expect("Parsing LanguageIdentifier failed."); +//! +//! let key: Key = "h0".parse().expect("Parsing key failed."); +//! let value: Value = "hybrid".parse().expect("Parsing value failed."); +//! +//! assert_eq!(loc.extensions.transform.lang, Some(lang)); +//! assert!(loc.extensions.transform.fields.contains_key(&key)); +//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); +//! +//! assert_eq!(&loc.extensions.transform.to_string(), "-t-es-AR-h0-hybrid"); +//! ``` +mod fields; +mod key; +mod value; + +pub use fields::Fields; +pub use key::Key; +pub use value::Value; + +use crate::parser::SubtagIterator; +use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode}; +use crate::subtags::Language; +use crate::LanguageIdentifier; +use alloc::vec; +use litemap::LiteMap; + +/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Transform extension carries information about source language or script of +/// transformed content, including content that has been transliterated, transcribed, +/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details). +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::{Key, Value}; +/// use icu::locid::{LanguageIdentifier, Locale}; +/// +/// let mut loc: Locale = +/// "de-t-en-US-h0-hybrid".parse().expect("Parsing failed."); +/// +/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed."); +/// +/// assert_eq!(loc.extensions.transform.lang, Some(en_us)); +/// let key: Key = "h0".parse().expect("Parsing key failed."); +/// let value: Value = "hybrid".parse().expect("Parsing value failed."); +/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); +/// ``` +/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension +/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)] +#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure +pub struct Transform { + /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present. + pub lang: Option<LanguageIdentifier>, + /// The key-value pairs present in this locale extension, with each extension key subtag + /// associated to its provided value subtag. + pub fields: Fields, +} + +impl Transform { + /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Transform; + /// + /// assert_eq!(Transform::new(), Transform::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + lang: None, + fields: Fields::new(), + } + } + + /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "en-US-t-es-AR".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.transform.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.lang.is_none() && self.fields.is_empty() + } + + /// Clears the transform extension, effectively removing it from the locale. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "en-US-t-es-AR".parse().unwrap(); + /// loc.extensions.transform.clear(); + /// assert_eq!(loc, "en-US".parse().unwrap()); + /// ``` + pub fn clear(&mut self) { + self.lang = None; + self.fields.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { + let mut tlang = None; + let mut tfields = LiteMap::new(); + + if let Some(subtag) = iter.peek() { + if Language::try_from_bytes(subtag).is_ok() { + tlang = Some(parse_language_identifier_from_iter( + iter, + ParserMode::Partial, + )?); + } + } + + let mut current_tkey = None; + let mut current_tvalue = vec![]; + + while let Some(subtag) = iter.peek() { + if let Some(tkey) = current_tkey { + if let Ok(val) = Value::parse_subtag(subtag) { + current_tvalue.push(val); + } else { + if current_tvalue.is_empty() { + return Err(ParserError::InvalidExtension); + } + tfields.try_insert( + tkey, + Value::from_vec_unchecked(current_tvalue.drain(..).flatten().collect()), + ); + current_tkey = None; + continue; + } + } else if let Ok(tkey) = Key::try_from_bytes(subtag) { + current_tkey = Some(tkey); + } else { + break; + } + + iter.next(); + } + + if let Some(tkey) = current_tkey { + if current_tvalue.is_empty() { + return Err(ParserError::InvalidExtension); + } + tfields.try_insert( + tkey, + Value::from_vec_unchecked(current_tvalue.into_iter().flatten().collect()), + ); + } + + Ok(Self { + lang: tlang, + fields: tfields.into(), + }) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("t")?; + if let Some(lang) = &self.lang { + lang.for_each_subtag_str(f)?; + } + self.fields.for_each_subtag_str(f) + } +} + +writeable::impl_display_with_writeable!(Transform); + +impl writeable::Writeable for Transform { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-t")?; + if let Some(lang) = &self.lang { + sink.write_char('-')?; + writeable::Writeable::write_to(lang, sink)?; + } + if !self.fields.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.fields, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + if let Some(lang) = &self.lang { + result += writeable::Writeable::writeable_length_hint(lang) + 1; + } + if !self.fields.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.fields) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/transform/value.rs b/vendor/icu_locid/src/extensions/transform/value.rs new file mode 100644 index 000000000..84468361a --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/value.rs @@ -0,0 +1,119 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::parser::{get_subtag_iterator, ParserError}; +use alloc::vec; +use alloc::vec::Vec; +use core::ops::RangeInclusive; +use core::str::FromStr; +use tinystr::TinyAsciiStr; + +/// A value used in a list of [`Fields`](super::Fields). +/// +/// The value has to be a sequence of one or more alphanumerical strings +/// separated by `-`. +/// Each part of the sequence has to be no shorter than three characters and no +/// longer than 8. +/// +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::Value; +/// +/// let value1: Value = "hybrid".parse().expect("Failed to parse a Value."); +/// let value2: Value = +/// "hybrid-foobar".parse().expect("Failed to parse a Value."); +/// +/// assert_eq!(&value1.to_string(), "hybrid"); +/// assert_eq!(&value2.to_string(), "hybrid-foobar"); +/// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Value(Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>); + +const TYPE_LENGTH: RangeInclusive<usize> = 3..=8; +const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); + +impl Value { + /// A constructor which takes a utf8 slice, parses it and + /// produces a well-formed [`Value`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Value; + /// + /// let value = Value::try_from_bytes(b"hybrid").expect("Parsing failed."); + /// + /// assert_eq!(&value.to_string(), "hybrid"); + /// ``` + pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> { + let mut v = vec![]; + let mut has_value = false; + + for subtag in get_subtag_iterator(input) { + if !Self::is_type_subtag(subtag) { + return Err(ParserError::InvalidExtension); + } + has_value = true; + let val = + TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?; + if val != TRUE_TVALUE { + v.push(val); + } + } + + if !has_value { + return Err(ParserError::InvalidExtension); + } + Ok(Self(v)) + } + + pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>) -> Self { + Self(input) + } + + pub(crate) fn is_type_subtag(t: &[u8]) -> bool { + TYPE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric()) + } + + pub(crate) fn parse_subtag( + t: &[u8], + ) -> Result<Option<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>, ParserError> { + let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?; + if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() { + return Err(ParserError::InvalidExtension); + } + + let s = s.to_ascii_lowercase(); + + if s == TRUE_TVALUE { + Ok(None) + } else { + Ok(Some(s)) + } + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.0.is_empty() { + f("true")?; + } else { + self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?; + } + Ok(()) + } +} + +impl FromStr for Value { + type Err = ParserError; + + fn from_str(source: &str) -> Result<Self, Self::Err> { + Self::try_from_bytes(source.as_bytes()) + } +} + +impl_writeable_for_tinystr_list!(Value, "true", "hybrid", "foobar"); diff --git a/vendor/icu_locid/src/extensions/unicode/attribute.rs b/vendor/icu_locid/src/extensions/unicode/attribute.rs new file mode 100644 index 000000000..ba4b70924 --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/attribute.rs @@ -0,0 +1,34 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// An attribute used in a set of [`Attributes`](super::Attributes). + /// + /// An attribute has to be a sequence of alphanumerical characters no + /// shorter than three and no longer than eight characters. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Attribute; + /// use icu::locid::extensions_unicode_attribute as attribute; + /// + /// let attr: Attribute = + /// "buddhist".parse().expect("Failed to parse an Attribute."); + /// + /// assert_eq!(attr, attribute!("buddhist")); + /// ``` + Attribute, + extensions::unicode::Attribute, + extensions_unicode_attribute, + 3..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["no", "toolooong"], +); diff --git a/vendor/icu_locid/src/extensions/unicode/attributes.rs b/vendor/icu_locid/src/extensions/unicode/attributes.rs new file mode 100644 index 000000000..1f9536bfa --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/attributes.rs @@ -0,0 +1,115 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::Attribute; + +use alloc::vec::Vec; +use core::ops::Deref; + +/// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`]. +/// +/// [`Unicode Extension Attributes`]: https://unicode.org/reports/tr35/tr35.html#u_Extension +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::{Attribute, Attributes}; +/// +/// let attribute1: Attribute = +/// "foobar".parse().expect("Failed to parse a variant subtag."); +/// +/// let attribute2: Attribute = "testing" +/// .parse() +/// .expect("Failed to parse a variant subtag."); +/// let mut v = vec![attribute1, attribute2]; +/// v.sort(); +/// v.dedup(); +/// +/// let attributes: Attributes = Attributes::from_vec_unchecked(v); +/// assert_eq!(attributes.to_string(), "foobar-testing"); +/// ``` +#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Attributes(Vec<Attribute>); + +impl Attributes { + /// Returns a new empty set of attributes. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Attributes; + /// + /// assert_eq!(Attributes::new(), Attributes::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(Vec::new()) + } + + /// A constructor which takes a pre-sorted list of [`Attribute`] elements. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Attribute, Attributes}; + /// + /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed."); + /// let attribute2: Attribute = "testing".parse().expect("Parsing failed."); + /// let mut v = vec![attribute1, attribute2]; + /// v.sort(); + /// v.dedup(); + /// + /// let attributes = Attributes::from_vec_unchecked(v); + /// ``` + /// + /// Notice: For performance- and memory-constrained environments, it is recommended + /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort) + /// and [`dedup`](Vec::dedup()). + pub fn from_vec_unchecked(input: Vec<Attribute>) -> Self { + Self(input) + } + + /// Empties the [`Attributes`] list. + /// + /// Returns the old list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Attribute, Attributes}; + /// + /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed."); + /// let attribute2: Attribute = "testing".parse().expect("Parsing failed."); + /// let mut v = vec![attribute1, attribute2]; + /// + /// let mut attributes: Attributes = Attributes::from_vec_unchecked(v); + /// + /// assert_eq!(attributes.to_string(), "foobar-testing"); + /// + /// attributes.clear(); + /// + /// assert_eq!(attributes.to_string(), ""); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + self.deref().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +impl_writeable_for_subtag_list!(Attributes, "foobar", "testing"); + +impl Deref for Attributes { + type Target = [Attribute]; + + fn deref(&self) -> &[Attribute] { + self.0.deref() + } +} diff --git a/vendor/icu_locid/src/extensions/unicode/key.rs b/vendor/icu_locid/src/extensions/unicode/key.rs new file mode 100644 index 000000000..bdfdd4e5c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/key.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A key used in a list of [`Keywords`](super::Keywords). + /// + /// The key has to be a two ASCII alphanumerical characters long, with the first + /// character being alphanumeric, and the second being alphabetic. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// + /// assert!("ca".parse::<Key>().is_ok()); + /// ``` + Key, + extensions::unicode::Key, + extensions_unicode_key, + 2..=2, + s, + s.all_bytes()[0].is_ascii_alphanumeric() && s.all_bytes()[1].is_ascii_alphabetic(), + s.to_ascii_lowercase(), + (s.all_bytes()[0].is_ascii_lowercase() || s.all_bytes()[0].is_ascii_digit()) + && s.all_bytes()[1].is_ascii_lowercase(), + InvalidExtension, + ["ca", "8a"], + ["a", "a8", "abc"], +); diff --git a/vendor/icu_locid/src/extensions/unicode/keywords.rs b/vendor/icu_locid/src/extensions/unicode/keywords.rs new file mode 100644 index 000000000..dc9a15921 --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/keywords.rs @@ -0,0 +1,404 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::borrow::Borrow; +use core::cmp::Ordering; +use core::iter::FromIterator; +use litemap::LiteMap; + +use super::Key; +use super::Value; +use crate::helpers::ShortVec; +use crate::ordering::SubtagOrderingResult; + +/// A list of [`Key`]-[`Value`] pairs representing functional information +/// about locale's internationnalization preferences. +/// +/// Here are examples of fields used in Unicode: +/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`) +/// - `ca` - Calendar (`buddhist`, `gregory`, ...) +/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...) +/// +/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML. +/// +/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_ +/// +/// # Examples +/// +/// Manually build up a [`Keywords`] object: +/// +/// ``` +/// use icu::locid::extensions::unicode::{Key, Keywords, Value}; +/// +/// let key: Key = "hc".parse().expect("Failed to parse a Key."); +/// let value: Value = "h23".parse().expect("Failed to parse a Value."); +/// let keywords: Keywords = vec![(key, value)].into_iter().collect(); +/// +/// assert_eq!(&keywords.to_string(), "hc-h23"); +/// ``` +/// +/// Access a [`Keywords`] object from a [`Locale`]: +/// +/// ``` +/// use icu::locid::{ +/// extensions_unicode_key as key, extensions_unicode_value as value, +/// Locale, +/// }; +/// +/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47"); +/// +/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None); +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("hc")), +/// Some(&value!("h23")) +/// ); +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("kc")), +/// Some(&value!("true")) +/// ); +/// +/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc"); +/// ``` +/// +/// [`Locale`]: crate::Locale +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Keywords(LiteMap<Key, Value, ShortVec<(Key, Value)>>); + +impl Keywords { + /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// + /// assert_eq!(Keywords::new(), Keywords::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(LiteMap::new()) + } + + /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context. + #[inline] + pub const fn new_single(key: Key, value: Value) -> Self { + Self(LiteMap::from_sorted_store_unchecked(ShortVec::new_single( + (key, value), + ))) + } + + /// Returns `true` if there are no keywords. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::locale; + /// use icu::locid::Locale; + /// + /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap(); + /// let loc2 = locale!("und-u-ca-buddhist"); + /// + /// assert!(loc1.extensions.unicode.keywords.is_empty()); + /// assert!(!loc2.extensions.unicode.keywords.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// use litemap::LiteMap; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "gregory".parse().expect("Failed to parse a Value."); + /// let keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// assert!(&keywords.contains_key(&key)); + /// ``` + pub fn contains_key<Q>(&self, key: &Q) -> bool + where + Key: Borrow<Q>, + Q: Ord, + { + self.0.contains_key(key) + } + + /// Returns a reference to the [`Value`] corresponding to the [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "buddhist".parse().expect("Failed to parse a Value."); + /// let keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// assert_eq!( + /// keywords.get(&key).map(|v| v.to_string()), + /// Some("buddhist".to_string()) + /// ); + /// ``` + pub fn get<Q>(&self, key: &Q) -> Option<&Value> + where + Key: Borrow<Q>, + Q: Ord, + { + self.0.get(key) + } + + /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`]. + /// + /// Returns `None` if the key doesn't exist or if the key has no value. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "buddhist".parse().expect("Failed to parse a Value."); + /// let mut keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// if let Some(value) = keywords.get_mut(&key) { + /// *value = "gregory".parse().expect("Failed to parse a Value."); + /// } + /// assert_eq!( + /// keywords.get(&key).map(|v| v.to_string()), + /// Some("gregory".to_string()) + /// ); + /// ``` + pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value> + where + Key: Borrow<Q>, + Q: Ord, + { + self.0.get_mut(key) + } + + /// Sets the specified keyword, returning the old value if it already existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// use icu::locid::extensions::unicode::Value; + /// use icu::locid::Locale; + /// use icu::locid::{ + /// extensions_unicode_key as key, extensions_unicode_value as value, + /// }; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// let old_value = loc + /// .extensions + /// .unicode + /// .keywords + /// .set(key!("ca"), value!("japanese")); + /// + /// assert_eq!(old_value, Some(value!("buddhist"))); + /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap()); + /// ``` + pub fn set(&mut self, key: Key, value: Value) -> Option<Value> { + self.0.insert(key, value) + } + + /// Removes the specified keyword, returning the old value if it existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// use icu::locid::extensions_unicode_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// loc.extensions.unicode.keywords.remove(key!("ca")); + /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap()); + /// ``` + pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> { + self.0.remove(key.borrow()) + } + + /// Clears all Unicode extension keywords, leaving Unicode attributes. + /// + /// Returns the old Unicode extension keywords. + /// + /// # Example + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap(); + /// loc.extensions.unicode.keywords.clear(); + /// assert_eq!(loc, "und-u-hello".parse().unwrap()); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + /// Retains a subset of keywords as specified by the predicate function. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions_unicode_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap(); + /// + /// loc.extensions + /// .unicode + /// .keywords + /// .retain_by_key(|&k| k == key!("hc")); + /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap()); + /// + /// loc.extensions + /// .unicode + /// .keywords + /// .retain_by_key(|&k| k == key!("ms")); + /// assert_eq!(loc, Locale::UND); + /// ``` + pub fn retain_by_key<F>(&mut self, mut predicate: F) + where + F: FnMut(&Key) -> bool, + { + self.0.retain(|k, _| predicate(k)) + } + + /// Compare this [`Keywords`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`Keywords`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::Locale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = + /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert!(a.cmp(b) == Ordering::Less); + /// let a_kwds = format!("und-u-{}", a) + /// .parse::<Locale>() + /// .unwrap() + /// .extensions + /// .unicode + /// .keywords; + /// assert_eq!(a, a_kwds.to_string()); + /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal); + /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + self.strict_cmp_iter(other.split(|b| *b == b'-')).end() + } + + /// Compare this [`Keywords`] with an iterator of BCP-47 subtags. + /// + /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as + /// a more modular version that allows multiple subtag iterators to be chained together. + /// + /// For an additional example, see [`SubtagOrderingResult`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::locale; + /// use std::cmp::Ordering; + /// + /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"]; + /// + /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Equal, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// + /// let kwds = locale!("und").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Less, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// + /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Greater, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// ``` + pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> + where + I: Iterator<Item = &'l [u8]>, + { + let r = self.for_each_subtag_str(&mut |subtag| { + if let Some(other) = subtags.next() { + match subtag.as_bytes().cmp(other) { + Ordering::Equal => Ok(()), + not_equal => Err(not_equal), + } + } else { + Err(Ordering::Greater) + } + }); + match r { + Ok(_) => SubtagOrderingResult::Subtags(subtags), + Err(o) => SubtagOrderingResult::Ordering(o), + } + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + for (k, v) in self.0.iter() { + f(k.as_str())?; + v.for_each_subtag_str(f)?; + } + Ok(()) + } + + /// This needs to be its own method to help with type inference in helpers.rs + #[cfg(test)] + pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { + v.into_iter().collect() + } +} + +impl From<LiteMap<Key, Value, ShortVec<(Key, Value)>>> for Keywords { + fn from(map: LiteMap<Key, Value, ShortVec<(Key, Value)>>) -> Self { + Self(map) + } +} + +impl FromIterator<(Key, Value)> for Keywords { + fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self { + LiteMap::from_iter(iter).into() + } +} + +impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm"); diff --git a/vendor/icu_locid/src/extensions/unicode/mod.rs b/vendor/icu_locid/src/extensions/unicode/mod.rs new file mode 100644 index 000000000..fabf1036c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/mod.rs @@ -0,0 +1,233 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Unicode Extensions provide information about user preferences in a given locale. +//! +//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and +//! [`Attributes`]. +//! +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::unicode::{Attribute, Key, Unicode, Value}; +//! use icu::locid::{LanguageIdentifier, Locale}; +//! +//! let mut loc: Locale = +//! "en-US-u-foobar-hc-h12".parse().expect("Parsing failed."); +//! +//! let key: Key = "hc".parse().expect("Parsing key failed."); +//! let value: Value = "h12".parse().expect("Parsing value failed."); +//! let attribute: Attribute = +//! "foobar".parse().expect("Parsing attribute failed."); +//! +//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +//! assert!(loc.extensions.unicode.attributes.contains(&attribute)); +//! +//! assert_eq!(&loc.extensions.unicode.to_string(), "-u-foobar-hc-h12"); +//! ``` +mod attribute; +mod attributes; +mod key; +mod keywords; +mod value; + +use alloc::vec; +pub use attribute::Attribute; +pub use attributes::Attributes; +pub use key::Key; +pub use keywords::Keywords; +pub use value::Value; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; +use litemap::LiteMap; + +/// Unicode Extensions provide information about user preferences in a given locale. +/// +/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Unicode extensions provide subtags that specify language and/or locale-based behavior +/// or refinements to language tags, according to work done by the Unicode Consortium. +/// (See [`RFC 6067`] for details). +/// +/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension +/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::{Key, Value}; +/// use icu::locid::Locale; +/// +/// let mut loc: Locale = +/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed."); +/// +/// let key: Key = "ca".parse().expect("Parsing key failed."); +/// let value: Value = "buddhist".parse().expect("Parsing value failed."); +/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure +pub struct Unicode { + /// The key-value pairs present in this locale extension, with each extension key subtag + /// associated to its provided value subtag. + pub keywords: Keywords, + /// A canonically ordered sequence of single standalone subtags for this locale extension. + pub attributes: Attributes, +} + +impl Unicode { + /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Unicode; + /// + /// assert_eq!(Unicode::new(), Unicode::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + keywords: Keywords::new(), + attributes: Attributes::new(), + } + } + + /// Returns [`true`] if there list of keywords and attributes is empty. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.unicode.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.keywords.is_empty() && self.attributes.is_empty() + } + + /// Clears all Unicode extension keywords and attributes, effectively removing + /// the Unicode extension. + /// + /// # Example + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = + /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap(); + /// loc.extensions.unicode.clear(); + /// assert_eq!(loc, "und-t-mul".parse().unwrap()); + /// ``` + pub fn clear(&mut self) { + self.keywords.clear(); + self.attributes.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { + let mut attributes = vec![]; + let mut keywords = LiteMap::new(); + + let mut current_keyword = None; + let mut current_type = vec![]; + + while let Some(subtag) = iter.peek() { + if let Ok(attr) = Attribute::try_from_bytes(subtag) { + if let Err(idx) = attributes.binary_search(&attr) { + attributes.insert(idx, attr); + } + } else { + break; + } + iter.next(); + } + + while let Some(subtag) = iter.peek() { + let slen = subtag.len(); + if slen == 2 { + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); + current_type = vec![]; + } + current_keyword = Some(Key::try_from_bytes(subtag)?); + } else if current_keyword.is_some() { + match Value::parse_subtag(subtag) { + Ok(Some(t)) => current_type.push(t), + Ok(None) => {} + Err(_) => break, + } + } else { + break; + } + iter.next(); + } + + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); + } + + // Ensure we've defined at least one attribute or keyword + if attributes.is_empty() && keywords.is_empty() { + return Err(ParserError::InvalidExtension); + } + + Ok(Self { + keywords: keywords.into(), + attributes: Attributes::from_vec_unchecked(attributes), + }) + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("u")?; + self.attributes.for_each_subtag_str(f)?; + self.keywords.for_each_subtag_str(f)?; + Ok(()) + } +} + +writeable::impl_display_with_writeable!(Unicode); + +impl writeable::Writeable for Unicode { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-u")?; + if !self.attributes.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.attributes, sink)?; + } + if !self.keywords.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.keywords, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + if !self.attributes.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; + } + if !self.keywords.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/unicode/value.rs b/vendor/icu_locid/src/extensions/unicode/value.rs new file mode 100644 index 000000000..ce9982a4c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/value.rs @@ -0,0 +1,199 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::helpers::ShortVec; +use crate::parser::{get_subtag_iterator, ParserError}; +use alloc::vec::Vec; +use core::ops::RangeInclusive; +use core::str::FromStr; +use tinystr::TinyAsciiStr; + +/// A value used in a list of [`Keywords`](super::Keywords). +/// +/// The value has to be a sequence of one or more alphanumerical strings +/// separated by `-`. +/// Each part of the sequence has to be no shorter than three characters and no +/// longer than 8. +/// +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::Value; +/// +/// let value1: Value = "gregory".parse().expect("Failed to parse a Value."); +/// let value2: Value = +/// "islamic-civil".parse().expect("Failed to parse a Value."); +/// let value3: Value = "true".parse().expect("Failed to parse a Value."); +/// +/// assert_eq!(&value1.to_string(), "gregory"); +/// assert_eq!(&value2.to_string(), "islamic-civil"); +/// +/// // The value "true" is special-cased to an empty value +/// assert_eq!(&value3.to_string(), ""); +/// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Value(ShortVec<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>); + +const VALUE_LENGTH: RangeInclusive<usize> = 3..=8; +const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); + +impl Value { + /// A constructor which takes a utf8 slice, parses it and + /// produces a well-formed [`Value`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// let value = Value::try_from_bytes(b"buddhist").expect("Parsing failed."); + /// + /// assert_eq!(&value.to_string(), "buddhist"); + /// ``` + pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> { + let mut v = ShortVec::new(); + + if !input.is_empty() { + for subtag in get_subtag_iterator(input) { + let val = Self::subtag_from_bytes(subtag)?; + if let Some(val) = val { + v.push(val); + } + } + } + Ok(Self(v)) + } + + /// Const constructor for when the value contains only a single subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag"); + /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag"); + /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag"); + /// ``` + pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> { + match Self::subtag_from_bytes(subtag) { + Err(_) => Err(ParserError::InvalidExtension), + Ok(option) => Ok(Self::from_tinystr(option)), + } + } + + #[doc(hidden)] + pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] { + self.0.as_slice() + } + + #[doc(hidden)] + pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> { + self.0.single() + } + + #[doc(hidden)] + pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self { + match subtag { + None => Self(ShortVec::new()), + Some(val) => { + debug_assert!(val.is_ascii_alphanumeric()); + debug_assert!(!matches!(val, TRUE_VALUE)); + Self(ShortVec::new_single(val)) + } + } + } + + pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self { + Self(input.into()) + } + + #[doc(hidden)] + pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { + Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len()) + } + + pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { + Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len()) + } + + pub(crate) const fn parse_subtag_from_bytes_manual_slice( + bytes: &[u8], + start: usize, + end: usize, + ) -> Result<Option<TinyAsciiStr<8>>, ParserError> { + let slice_len = end - start; + if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() { + return Err(ParserError::InvalidExtension); + } + + match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) { + Ok(TRUE_VALUE) => Ok(None), + Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())), + Ok(_) => Err(ParserError::InvalidExtension), + Err(_) => Err(ParserError::InvalidSubtag), + } + } + + pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +impl FromStr for Value { + type Err = ParserError; + + fn from_str(source: &str) -> Result<Self, Self::Err> { + Self::try_from_bytes(source.as_bytes()) + } +} + +impl_writeable_for_tinystr_list!(Value, "", "islamic", "civil"); + +/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. +/// +/// The macro only supports single-subtag values. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::Locale; +/// use icu::locid::{ +/// extensions_unicode_key as key, extensions_unicode_value as value, +/// }; +/// +/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap(); +/// +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("ca")), +/// Some(&value!("buddhist")) +/// ); +/// ``` +/// +/// [`Value`]: crate::extensions::unicode::Value +#[macro_export] +macro_rules! extensions_unicode_value { + ($value:literal) => {{ + // What we want: + // const R: $crate::extensions::unicode::Value = + // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { + // Ok(r) => r, + // #[allow(clippy::panic)] // const context + // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + // }; + // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: + const R: $crate::extensions::unicode::Value = + $crate::extensions::unicode::Value::from_tinystr( + match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { + Ok(r) => r, + _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + }, + ); + R + }}; +} |