summaryrefslogtreecommitdiffstats
path: root/vendor/unic-langid-impl/src/parser
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/unic-langid-impl/src/parser')
-rw-r--r--vendor/unic-langid-impl/src/parser/errors.rs20
-rw-r--r--vendor/unic-langid-impl/src/parser/mod.rs83
2 files changed, 103 insertions, 0 deletions
diff --git a/vendor/unic-langid-impl/src/parser/errors.rs b/vendor/unic-langid-impl/src/parser/errors.rs
new file mode 100644
index 000000000..acc36bff9
--- /dev/null
+++ b/vendor/unic-langid-impl/src/parser/errors.rs
@@ -0,0 +1,20 @@
+use std::error::Error;
+use std::fmt::{self, Display};
+
+#[derive(Debug, PartialEq)]
+pub enum ParserError {
+ InvalidLanguage,
+ InvalidSubtag,
+}
+
+impl Error for ParserError {}
+
+impl Display for ParserError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let value = match self {
+ ParserError::InvalidLanguage => "The given language subtag is invalid",
+ ParserError::InvalidSubtag => "Invalid subtag",
+ };
+ f.write_str(value)
+ }
+}
diff --git a/vendor/unic-langid-impl/src/parser/mod.rs b/vendor/unic-langid-impl/src/parser/mod.rs
new file mode 100644
index 000000000..d750ebba9
--- /dev/null
+++ b/vendor/unic-langid-impl/src/parser/mod.rs
@@ -0,0 +1,83 @@
+pub mod errors;
+
+use std::iter::Peekable;
+
+pub use self::errors::ParserError;
+use crate::subtags;
+use crate::LanguageIdentifier;
+
+pub fn parse_language_identifier_from_iter<'a>(
+ iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
+ allow_extension: bool,
+) -> Result<LanguageIdentifier, ParserError> {
+ let language = if let Some(subtag) = iter.next() {
+ subtags::Language::from_bytes(subtag)?
+ } else {
+ subtags::Language::default()
+ };
+
+ let mut script = None;
+ let mut region = None;
+ let mut variants = vec![];
+
+ let mut position = 1;
+
+ while let Some(subtag) = iter.peek() {
+ if position == 1 {
+ if let Ok(s) = subtags::Script::from_bytes(subtag) {
+ script = Some(s);
+ position = 2;
+ } else if let Ok(s) = subtags::Region::from_bytes(subtag) {
+ region = Some(s);
+ position = 3;
+ } else if let Ok(v) = subtags::Variant::from_bytes(subtag) {
+ variants.push(v);
+ position = 3;
+ } else {
+ break;
+ }
+ } else if position == 2 {
+ if let Ok(s) = subtags::Region::from_bytes(subtag) {
+ region = Some(s);
+ position = 3;
+ } else if let Ok(v) = subtags::Variant::from_bytes(subtag) {
+ variants.push(v);
+ position = 3;
+ } else {
+ break;
+ }
+ } else {
+ // Variants
+ if let Ok(v) = subtags::Variant::from_bytes(subtag) {
+ variants.push(v);
+ } else {
+ break;
+ }
+ }
+ iter.next();
+ }
+
+ if !allow_extension && iter.peek().is_some() {
+ return Err(ParserError::InvalidSubtag);
+ }
+
+ let variants = if variants.is_empty() {
+ None
+ } else {
+ variants.sort_unstable();
+ variants.dedup();
+ Some(variants.into_boxed_slice())
+ };
+
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants,
+ })
+}
+
+pub fn parse_language_identifier(t: &[u8]) -> Result<LanguageIdentifier, ParserError> {
+ let mut iter = t.split(|c| *c == b'-' || *c == b'_').peekable();
+ parse_language_identifier_from_iter(&mut iter, false)
+}