diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
commit | 4547b622d8d29df964fa2914213088b148c498fc (patch) | |
tree | 9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/icu_locid/src/parser | |
parent | Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz rustc-4547b622d8d29df964fa2914213088b148c498fc.zip |
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/icu_locid/src/parser')
-rw-r--r-- | vendor/icu_locid/src/parser/errors.rs | 54 | ||||
-rw-r--r-- | vendor/icu_locid/src/parser/langid.rs | 269 | ||||
-rw-r--r-- | vendor/icu_locid/src/parser/locale.rs | 42 | ||||
-rw-r--r-- | vendor/icu_locid/src/parser/mod.rs | 98 |
4 files changed, 463 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/parser/errors.rs b/vendor/icu_locid/src/parser/errors.rs new file mode 100644 index 000000000..a989bcc60 --- /dev/null +++ b/vendor/icu_locid/src/parser/errors.rs @@ -0,0 +1,54 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use displaydoc::Display; + +/// List of parser errors that can be generated +/// while parsing [`LanguageIdentifier`](crate::LanguageIdentifier), [`Locale`](crate::Locale), +/// [`subtags`](crate::subtags) or [`extensions`](crate::extensions). +#[derive(Display, Debug, PartialEq, Copy, Clone)] +#[non_exhaustive] +pub enum ParserError { + /// Invalid language subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// use icu::locid::ParserError; + /// + /// assert_eq!("x2".parse::<Language>(), Err(ParserError::InvalidLanguage)); + /// ``` + #[displaydoc("The given language subtag is invalid")] + InvalidLanguage, + + /// Invalid script, region or variant subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Region; + /// use icu::locid::ParserError; + /// + /// assert_eq!("#@2X".parse::<Region>(), Err(ParserError::InvalidSubtag)); + /// ``` + #[displaydoc("Invalid subtag")] + InvalidSubtag, + + /// Invalid extension subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// use icu::locid::ParserError; + /// + /// assert_eq!("#@2X".parse::<Key>(), Err(ParserError::InvalidExtension)); + /// ``` + #[displaydoc("Invalid extension")] + InvalidExtension, +} + +#[cfg(feature = "std")] +impl std::error::Error for ParserError {} diff --git a/vendor/icu_locid/src/parser/langid.rs b/vendor/icu_locid/src/parser/langid.rs new file mode 100644 index 000000000..9efa078ac --- /dev/null +++ b/vendor/icu_locid/src/parser/langid.rs @@ -0,0 +1,269 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +pub use super::errors::ParserError; +use crate::extensions::unicode::{Attribute, Key, Value}; +use crate::extensions::ExtensionType; +use crate::parser::{get_subtag_iterator, SubtagIterator}; +use crate::LanguageIdentifier; +use crate::{extensions, subtags}; +use alloc::vec::Vec; +use tinystr::TinyAsciiStr; + +#[derive(PartialEq, Clone, Copy)] +pub enum ParserMode { + LanguageIdentifier, + Locale, + Partial, +} + +#[derive(PartialEq, Clone, Copy)] +enum ParserPosition { + Script, + Region, + Variant, +} + +pub fn parse_language_identifier_from_iter( + iter: &mut SubtagIterator, + mode: ParserMode, +) -> Result<LanguageIdentifier, ParserError> { + let mut script = None; + let mut region = None; + let mut variants = Vec::new(); + + let language = if let Some(subtag) = iter.next() { + subtags::Language::try_from_bytes(subtag)? + } else { + return Err(ParserError::InvalidLanguage); + }; + + let mut position = ParserPosition::Script; + + while let Some(subtag) = iter.peek() { + if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 { + break; + } + + if position == ParserPosition::Script { + if let Ok(s) = subtags::Script::try_from_bytes(subtag) { + script = Some(s); + position = ParserPosition::Region; + } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) { + region = Some(s); + position = ParserPosition::Variant; + } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { + if let Err(idx) = variants.binary_search(&v) { + variants.insert(idx, v); + } + position = ParserPosition::Variant; + } else if mode == ParserMode::Partial { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + } else if position == ParserPosition::Region { + if let Ok(s) = subtags::Region::try_from_bytes(subtag) { + region = Some(s); + position = ParserPosition::Variant; + } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { + if let Err(idx) = variants.binary_search(&v) { + variants.insert(idx, v); + } + position = ParserPosition::Variant; + } else if mode == ParserMode::Partial { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { + if let Err(idx) = variants.binary_search(&v) { + variants.insert(idx, v); + } else { + return Err(ParserError::InvalidSubtag); + } + } else if mode == ParserMode::Partial { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + iter.next(); + } + + Ok(LanguageIdentifier { + language, + script, + region, + variants: subtags::Variants::from_vec_unchecked(variants), + }) +} + +pub fn parse_language_identifier( + t: &[u8], + mode: ParserMode, +) -> Result<LanguageIdentifier, ParserError> { + let mut iter = get_subtag_iterator(t); + parse_language_identifier_from_iter(&mut iter, mode) +} + +#[allow(clippy::type_complexity)] +pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter( + mut iter: SubtagIterator, + mode: ParserMode, +) -> Result< + ( + subtags::Language, + Option<subtags::Script>, + Option<subtags::Region>, + Option<subtags::Variant>, + Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>, + ), + ParserError, +> { + let language; + let mut script = None; + let mut region = None; + let mut variant = None; + let mut keyword = None; + + if let (i, Some((t, start, end))) = iter.next_manual() { + iter = i; + match subtags::Language::try_from_bytes_manual_slice(t, start, end) { + Ok(l) => language = l, + Err(e) => return Err(e), + } + } else { + return Err(ParserError::InvalidLanguage); + } + + let mut position = ParserPosition::Script; + + while let Some((t, start, end)) = iter.peek_manual() { + if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 { + break; + } + + if matches!(position, ParserPosition::Script) { + if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(t, start, end) { + script = Some(s); + position = ParserPosition::Region; + } else if let Ok(r) = subtags::Region::try_from_bytes_manual_slice(t, start, end) { + region = Some(r); + position = ParserPosition::Variant; + } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) { + // We cannot handle multiple variants in a const context + debug_assert!(variant.is_none()); + variant = Some(v); + position = ParserPosition::Variant; + } else if matches!(mode, ParserMode::Partial) { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + } else if matches!(position, ParserPosition::Region) { + if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(t, start, end) { + region = Some(s); + position = ParserPosition::Variant; + } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) { + // We cannot handle multiple variants in a const context + debug_assert!(variant.is_none()); + variant = Some(v); + position = ParserPosition::Variant; + } else if matches!(mode, ParserMode::Partial) { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) { + debug_assert!(matches!(position, ParserPosition::Variant)); + if variant.is_some() { + // We cannot handle multiple variants in a const context + return Err(ParserError::InvalidSubtag); + } + variant = Some(v); + } else if matches!(mode, ParserMode::Partial) { + break; + } else { + return Err(ParserError::InvalidSubtag); + } + + iter = iter.next_manual().0; + } + + if matches!(mode, ParserMode::Locale) { + if let Some((bytes, start, end)) = iter.peek_manual() { + match ExtensionType::try_from_bytes_manual_slice(bytes, start, end) { + Ok(ExtensionType::Unicode) => { + iter = iter.next_manual().0; + if let Some((bytes, start, end)) = iter.peek_manual() { + if Attribute::try_from_bytes_manual_slice(bytes, start, end).is_ok() { + // We cannot handle Attributes in a const context + return Err(ParserError::InvalidSubtag); + } + } + + let mut key = None; + let mut current_type = None; + + while let Some((bytes, start, end)) = iter.peek_manual() { + let slen = end - start; + if slen == 2 { + if key.is_some() { + // We cannot handle more than one Key in a const context + return Err(ParserError::InvalidSubtag); + } + match Key::try_from_bytes_manual_slice(bytes, start, end) { + Ok(k) => key = Some(k), + Err(e) => return Err(e), + }; + } else if key.is_some() { + match Value::parse_subtag_from_bytes_manual_slice(bytes, start, end) { + Ok(Some(t)) => { + if current_type.is_some() { + // We cannot handle more than one type in a const context + return Err(ParserError::InvalidSubtag); + } + current_type = Some(t); + } + Ok(None) => {} + Err(e) => return Err(e), + } + } else { + break; + } + iter = iter.next_manual().0 + } + if let Some(k) = key { + keyword = Some((k, current_type)); + } + } + // We cannot handle Transform, Private, Other extensions in a const context + Ok(_) => return Err(ParserError::InvalidSubtag), + Err(e) => return Err(e), + } + } + } + + Ok((language, script, region, variant, keyword)) +} + +#[allow(clippy::type_complexity)] +pub const fn parse_language_identifier_with_single_variant( + t: &[u8], + mode: ParserMode, +) -> Result< + ( + subtags::Language, + Option<subtags::Script>, + Option<subtags::Region>, + Option<subtags::Variant>, + ), + ParserError, +> { + let iter = get_subtag_iterator(t); + match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) { + Ok((l, s, r, v, _)) => Ok((l, s, r, v)), + Err(e) => Err(e), + } +} diff --git a/vendor/icu_locid/src/parser/locale.rs b/vendor/icu_locid/src/parser/locale.rs new file mode 100644 index 000000000..805b6c290 --- /dev/null +++ b/vendor/icu_locid/src/parser/locale.rs @@ -0,0 +1,42 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use tinystr::TinyAsciiStr; + +use crate::extensions::{self, Extensions}; +use crate::parser::errors::ParserError; +use crate::parser::{get_subtag_iterator, parse_language_identifier_from_iter, ParserMode}; +use crate::{subtags, Locale}; + +use super::parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter; + +pub fn parse_locale(t: &[u8]) -> Result<Locale, ParserError> { + let mut iter = get_subtag_iterator(t); + + let id = parse_language_identifier_from_iter(&mut iter, ParserMode::Locale)?; + let extensions = if iter.peek().is_some() { + Extensions::try_from_iter(&mut iter)? + } else { + Extensions::default() + }; + Ok(Locale { id, extensions }) +} + +#[allow(clippy::type_complexity)] +pub const fn parse_locale_with_single_variant_single_keyword_unicode_keyword_extension( + t: &[u8], + mode: ParserMode, +) -> Result< + ( + subtags::Language, + Option<subtags::Script>, + Option<subtags::Region>, + Option<subtags::Variant>, + Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>, + ), + ParserError, +> { + let iter = get_subtag_iterator(t); + parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) +} diff --git a/vendor/icu_locid/src/parser/mod.rs b/vendor/icu_locid/src/parser/mod.rs new file mode 100644 index 000000000..fef10b0ab --- /dev/null +++ b/vendor/icu_locid/src/parser/mod.rs @@ -0,0 +1,98 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +pub mod errors; +mod langid; +mod locale; + +pub use errors::ParserError; +pub use langid::{ + parse_language_identifier, parse_language_identifier_from_iter, + parse_language_identifier_with_single_variant, + parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter, ParserMode, +}; + +pub use locale::{ + parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension, +}; + +pub const fn get_subtag_iterator(slice: &[u8]) -> SubtagIterator { + let mut current_start = 0; + #[allow(clippy::indexing_slicing)] + while current_start < slice.len() + && (slice[current_start] == b'-' || slice[current_start] == b'_') + { + current_start += 1; + } + let mut current_end = current_start; + #[allow(clippy::indexing_slicing)] + while current_end < slice.len() && slice[current_end] != b'-' && slice[current_end] != b'_' { + current_end += 1; + } + SubtagIterator { + slice, + current_start, + current_end, + } +} + +#[derive(Copy, Clone, Debug)] +pub struct SubtagIterator<'a> { + slice: &'a [u8], + current_start: usize, + current_end: usize, +} + +pub type ManualSlice<'a> = (&'a [u8], usize, usize); + +impl<'a> SubtagIterator<'a> { + pub const fn next_manual(mut self) -> (Self, Option<ManualSlice<'a>>) { + if self.current_start == self.current_end { + (self, None) + } else { + let r = (self.slice, self.current_start, self.current_end); + self.current_start = self.current_end; + #[allow(clippy::indexing_slicing)] + while self.current_start < self.slice.len() + && (self.slice[self.current_start] == b'-' + || self.slice[self.current_start] == b'_') + { + self.current_start += 1; + } + self.current_end = self.current_start; + #[allow(clippy::indexing_slicing)] + while self.current_end < self.slice.len() + && self.slice[self.current_end] != b'-' + && self.slice[self.current_end] != b'_' + { + self.current_end += 1; + } + (self, Some(r)) + } + } + + pub const fn peek_manual(&self) -> Option<ManualSlice<'a>> { + if self.current_start == self.current_end { + None + } else { + Some((self.slice, self.current_start, self.current_end)) + } + } + + pub fn peek(&self) -> Option<&'a [u8]> { + #[allow(clippy::indexing_slicing)] // peek_manual returns valid indices + self.peek_manual().map(|(t, s, e)| &t[s..e]) + } +} + +impl<'a> Iterator for SubtagIterator<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option<Self::Item> { + let (s, res) = self.next_manual(); + self.clone_from(&s); + #[allow(clippy::indexing_slicing)] // next_manual returns valid indices + res.map(|(t, s, e)| &t[s..e]) + } +} |