summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid/src/parser
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:32 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:32 +0000
commit4547b622d8d29df964fa2914213088b148c498fc (patch)
tree9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/icu_locid/src/parser
parentReleasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz
rustc-4547b622d8d29df964fa2914213088b148c498fc.zip
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/icu_locid/src/parser')
-rw-r--r--vendor/icu_locid/src/parser/errors.rs54
-rw-r--r--vendor/icu_locid/src/parser/langid.rs269
-rw-r--r--vendor/icu_locid/src/parser/locale.rs42
-rw-r--r--vendor/icu_locid/src/parser/mod.rs98
4 files changed, 463 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/parser/errors.rs b/vendor/icu_locid/src/parser/errors.rs
new file mode 100644
index 000000000..a989bcc60
--- /dev/null
+++ b/vendor/icu_locid/src/parser/errors.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use displaydoc::Display;
+
+/// List of parser errors that can be generated
+/// while parsing [`LanguageIdentifier`](crate::LanguageIdentifier), [`Locale`](crate::Locale),
+/// [`subtags`](crate::subtags) or [`extensions`](crate::extensions).
+#[derive(Display, Debug, PartialEq, Copy, Clone)]
+#[non_exhaustive]
+pub enum ParserError {
+ /// Invalid language subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("x2".parse::<Language>(), Err(ParserError::InvalidLanguage));
+ /// ```
+ #[displaydoc("The given language subtag is invalid")]
+ InvalidLanguage,
+
+ /// Invalid script, region or variant subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Region>(), Err(ParserError::InvalidSubtag));
+ /// ```
+ #[displaydoc("Invalid subtag")]
+ InvalidSubtag,
+
+ /// Invalid extension subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Key>(), Err(ParserError::InvalidExtension));
+ /// ```
+ #[displaydoc("Invalid extension")]
+ InvalidExtension,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for ParserError {}
diff --git a/vendor/icu_locid/src/parser/langid.rs b/vendor/icu_locid/src/parser/langid.rs
new file mode 100644
index 000000000..9efa078ac
--- /dev/null
+++ b/vendor/icu_locid/src/parser/langid.rs
@@ -0,0 +1,269 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub use super::errors::ParserError;
+use crate::extensions::unicode::{Attribute, Key, Value};
+use crate::extensions::ExtensionType;
+use crate::parser::{get_subtag_iterator, SubtagIterator};
+use crate::LanguageIdentifier;
+use crate::{extensions, subtags};
+use alloc::vec::Vec;
+use tinystr::TinyAsciiStr;
+
+#[derive(PartialEq, Clone, Copy)]
+pub enum ParserMode {
+ LanguageIdentifier,
+ Locale,
+ Partial,
+}
+
+#[derive(PartialEq, Clone, Copy)]
+enum ParserPosition {
+ Script,
+ Region,
+ Variant,
+}
+
+pub fn parse_language_identifier_from_iter(
+ iter: &mut SubtagIterator,
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut script = None;
+ let mut region = None;
+ let mut variants = Vec::new();
+
+ let language = if let Some(subtag) = iter.next() {
+ subtags::Language::try_from_bytes(subtag)?
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ };
+
+ let mut position = ParserPosition::Script;
+
+ while let Some(subtag) = iter.peek() {
+ if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
+ break;
+ }
+
+ if position == ParserPosition::Script {
+ if let Ok(s) = subtags::Script::try_from_bytes(subtag) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if position == ParserPosition::Region {
+ if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ iter.next();
+ }
+
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ })
+}
+
+pub fn parse_language_identifier(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut iter = get_subtag_iterator(t);
+ parse_language_identifier_from_iter(&mut iter, mode)
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
+ mut iter: SubtagIterator,
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let language;
+ let mut script = None;
+ let mut region = None;
+ let mut variant = None;
+ let mut keyword = None;
+
+ if let (i, Some((t, start, end))) = iter.next_manual() {
+ iter = i;
+ match subtags::Language::try_from_bytes_manual_slice(t, start, end) {
+ Ok(l) => language = l,
+ Err(e) => return Err(e),
+ }
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ }
+
+ let mut position = ParserPosition::Script;
+
+ while let Some((t, start, end)) = iter.peek_manual() {
+ if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
+ break;
+ }
+
+ if matches!(position, ParserPosition::Script) {
+ if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(t, start, end) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(r) = subtags::Region::try_from_bytes_manual_slice(t, start, end) {
+ region = Some(r);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if matches!(position, ParserPosition::Region) {
+ if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(t, start, end) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ debug_assert!(matches!(position, ParserPosition::Variant));
+ if variant.is_some() {
+ // We cannot handle multiple variants in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ variant = Some(v);
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+
+ iter = iter.next_manual().0;
+ }
+
+ if matches!(mode, ParserMode::Locale) {
+ if let Some((bytes, start, end)) = iter.peek_manual() {
+ match ExtensionType::try_from_bytes_manual_slice(bytes, start, end) {
+ Ok(ExtensionType::Unicode) => {
+ iter = iter.next_manual().0;
+ if let Some((bytes, start, end)) = iter.peek_manual() {
+ if Attribute::try_from_bytes_manual_slice(bytes, start, end).is_ok() {
+ // We cannot handle Attributes in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ }
+
+ let mut key = None;
+ let mut current_type = None;
+
+ while let Some((bytes, start, end)) = iter.peek_manual() {
+ let slen = end - start;
+ if slen == 2 {
+ if key.is_some() {
+ // We cannot handle more than one Key in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ match Key::try_from_bytes_manual_slice(bytes, start, end) {
+ Ok(k) => key = Some(k),
+ Err(e) => return Err(e),
+ };
+ } else if key.is_some() {
+ match Value::parse_subtag_from_bytes_manual_slice(bytes, start, end) {
+ Ok(Some(t)) => {
+ if current_type.is_some() {
+ // We cannot handle more than one type in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ current_type = Some(t);
+ }
+ Ok(None) => {}
+ Err(e) => return Err(e),
+ }
+ } else {
+ break;
+ }
+ iter = iter.next_manual().0
+ }
+ if let Some(k) = key {
+ keyword = Some((k, current_type));
+ }
+ }
+ // We cannot handle Transform, Private, Other extensions in a const context
+ Ok(_) => return Err(ParserError::InvalidSubtag),
+ Err(e) => return Err(e),
+ }
+ }
+ }
+
+ Ok((language, script, region, variant, keyword))
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_language_identifier_with_single_variant(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ ),
+ ParserError,
+> {
+ let iter = get_subtag_iterator(t);
+ match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
+ Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
+ Err(e) => Err(e),
+ }
+}
diff --git a/vendor/icu_locid/src/parser/locale.rs b/vendor/icu_locid/src/parser/locale.rs
new file mode 100644
index 000000000..805b6c290
--- /dev/null
+++ b/vendor/icu_locid/src/parser/locale.rs
@@ -0,0 +1,42 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use tinystr::TinyAsciiStr;
+
+use crate::extensions::{self, Extensions};
+use crate::parser::errors::ParserError;
+use crate::parser::{get_subtag_iterator, parse_language_identifier_from_iter, ParserMode};
+use crate::{subtags, Locale};
+
+use super::parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter;
+
+pub fn parse_locale(t: &[u8]) -> Result<Locale, ParserError> {
+ let mut iter = get_subtag_iterator(t);
+
+ let id = parse_language_identifier_from_iter(&mut iter, ParserMode::Locale)?;
+ let extensions = if iter.peek().is_some() {
+ Extensions::try_from_iter(&mut iter)?
+ } else {
+ Extensions::default()
+ };
+ Ok(Locale { id, extensions })
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let iter = get_subtag_iterator(t);
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode)
+}
diff --git a/vendor/icu_locid/src/parser/mod.rs b/vendor/icu_locid/src/parser/mod.rs
new file mode 100644
index 000000000..fef10b0ab
--- /dev/null
+++ b/vendor/icu_locid/src/parser/mod.rs
@@ -0,0 +1,98 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub mod errors;
+mod langid;
+mod locale;
+
+pub use errors::ParserError;
+pub use langid::{
+ parse_language_identifier, parse_language_identifier_from_iter,
+ parse_language_identifier_with_single_variant,
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter, ParserMode,
+};
+
+pub use locale::{
+ parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
+};
+
+pub const fn get_subtag_iterator(slice: &[u8]) -> SubtagIterator {
+ let mut current_start = 0;
+ #[allow(clippy::indexing_slicing)]
+ while current_start < slice.len()
+ && (slice[current_start] == b'-' || slice[current_start] == b'_')
+ {
+ current_start += 1;
+ }
+ let mut current_end = current_start;
+ #[allow(clippy::indexing_slicing)]
+ while current_end < slice.len() && slice[current_end] != b'-' && slice[current_end] != b'_' {
+ current_end += 1;
+ }
+ SubtagIterator {
+ slice,
+ current_start,
+ current_end,
+ }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct SubtagIterator<'a> {
+ slice: &'a [u8],
+ current_start: usize,
+ current_end: usize,
+}
+
+pub type ManualSlice<'a> = (&'a [u8], usize, usize);
+
+impl<'a> SubtagIterator<'a> {
+ pub const fn next_manual(mut self) -> (Self, Option<ManualSlice<'a>>) {
+ if self.current_start == self.current_end {
+ (self, None)
+ } else {
+ let r = (self.slice, self.current_start, self.current_end);
+ self.current_start = self.current_end;
+ #[allow(clippy::indexing_slicing)]
+ while self.current_start < self.slice.len()
+ && (self.slice[self.current_start] == b'-'
+ || self.slice[self.current_start] == b'_')
+ {
+ self.current_start += 1;
+ }
+ self.current_end = self.current_start;
+ #[allow(clippy::indexing_slicing)]
+ while self.current_end < self.slice.len()
+ && self.slice[self.current_end] != b'-'
+ && self.slice[self.current_end] != b'_'
+ {
+ self.current_end += 1;
+ }
+ (self, Some(r))
+ }
+ }
+
+ pub const fn peek_manual(&self) -> Option<ManualSlice<'a>> {
+ if self.current_start == self.current_end {
+ None
+ } else {
+ Some((self.slice, self.current_start, self.current_end))
+ }
+ }
+
+ pub fn peek(&self) -> Option<&'a [u8]> {
+ #[allow(clippy::indexing_slicing)] // peek_manual returns valid indices
+ self.peek_manual().map(|(t, s, e)| &t[s..e])
+ }
+}
+
+impl<'a> Iterator for SubtagIterator<'a> {
+ type Item = &'a [u8];
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let (s, res) = self.next_manual();
+ self.clone_from(&s);
+ #[allow(clippy::indexing_slicing)] // next_manual returns valid indices
+ res.map(|(t, s, e)| &t[s..e])
+ }
+}