From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- .../rust/unic-langid-impl/src/parser/errors.rs | 20 ++++++ .../rust/unic-langid-impl/src/parser/mod.rs | 83 ++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 third_party/rust/unic-langid-impl/src/parser/errors.rs create mode 100644 third_party/rust/unic-langid-impl/src/parser/mod.rs (limited to 'third_party/rust/unic-langid-impl/src/parser') diff --git a/third_party/rust/unic-langid-impl/src/parser/errors.rs b/third_party/rust/unic-langid-impl/src/parser/errors.rs new file mode 100644 index 0000000000..acc36bff96 --- /dev/null +++ b/third_party/rust/unic-langid-impl/src/parser/errors.rs @@ -0,0 +1,20 @@ +use std::error::Error; +use std::fmt::{self, Display}; + +#[derive(Debug, PartialEq)] +pub enum ParserError { + InvalidLanguage, + InvalidSubtag, +} + +impl Error for ParserError {} + +impl Display for ParserError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let value = match self { + ParserError::InvalidLanguage => "The given language subtag is invalid", + ParserError::InvalidSubtag => "Invalid subtag", + }; + f.write_str(value) + } +} diff --git a/third_party/rust/unic-langid-impl/src/parser/mod.rs b/third_party/rust/unic-langid-impl/src/parser/mod.rs new file mode 100644 index 0000000000..d750ebba98 --- /dev/null +++ b/third_party/rust/unic-langid-impl/src/parser/mod.rs @@ -0,0 +1,83 @@ +pub mod errors; + +use std::iter::Peekable; + +pub use self::errors::ParserError; +use crate::subtags; +use crate::LanguageIdentifier; + +pub fn parse_language_identifier_from_iter<'a>( + iter: &mut Peekable>, + allow_extension: bool, +) -> Result { + let language = if let Some(subtag) = iter.next() { + subtags::Language::from_bytes(subtag)? + } else { + subtags::Language::default() + }; + + let mut script = None; + let mut region = None; + let mut variants = vec![]; + + let mut position = 1; + + while let Some(subtag) = iter.peek() { + if position == 1 { + if let Ok(s) = subtags::Script::from_bytes(subtag) { + script = Some(s); + position = 2; + } else if let Ok(s) = subtags::Region::from_bytes(subtag) { + region = Some(s); + position = 3; + } else if let Ok(v) = subtags::Variant::from_bytes(subtag) { + variants.push(v); + position = 3; + } else { + break; + } + } else if position == 2 { + if let Ok(s) = subtags::Region::from_bytes(subtag) { + region = Some(s); + position = 3; + } else if let Ok(v) = subtags::Variant::from_bytes(subtag) { + variants.push(v); + position = 3; + } else { + break; + } + } else { + // Variants + if let Ok(v) = subtags::Variant::from_bytes(subtag) { + variants.push(v); + } else { + break; + } + } + iter.next(); + } + + if !allow_extension && iter.peek().is_some() { + return Err(ParserError::InvalidSubtag); + } + + let variants = if variants.is_empty() { + None + } else { + variants.sort_unstable(); + variants.dedup(); + Some(variants.into_boxed_slice()) + }; + + Ok(LanguageIdentifier { + language, + script, + region, + variants, + }) +} + +pub fn parse_language_identifier(t: &[u8]) -> Result { + let mut iter = t.split(|c| *c == b'-' || *c == b'_').peekable(); + parse_language_identifier_from_iter(&mut iter, false) +} -- cgit v1.2.3