summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unic-langid-impl/src/parser/mod.rs
blob: d750ebba987983a5bb1afd7c8dddde3e335475e7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
pub mod errors;

use std::iter::Peekable;

pub use self::errors::ParserError;
use crate::subtags;
use crate::LanguageIdentifier;

pub fn parse_language_identifier_from_iter<'a>(
    iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
    allow_extension: bool,
) -> Result<LanguageIdentifier, ParserError> {
    let language = if let Some(subtag) = iter.next() {
        subtags::Language::from_bytes(subtag)?
    } else {
        subtags::Language::default()
    };

    let mut script = None;
    let mut region = None;
    let mut variants = vec![];

    let mut position = 1;

    while let Some(subtag) = iter.peek() {
        if position == 1 {
            if let Ok(s) = subtags::Script::from_bytes(subtag) {
                script = Some(s);
                position = 2;
            } else if let Ok(s) = subtags::Region::from_bytes(subtag) {
                region = Some(s);
                position = 3;
            } else if let Ok(v) = subtags::Variant::from_bytes(subtag) {
                variants.push(v);
                position = 3;
            } else {
                break;
            }
        } else if position == 2 {
            if let Ok(s) = subtags::Region::from_bytes(subtag) {
                region = Some(s);
                position = 3;
            } else if let Ok(v) = subtags::Variant::from_bytes(subtag) {
                variants.push(v);
                position = 3;
            } else {
                break;
            }
        } else {
            // Variants
            if let Ok(v) = subtags::Variant::from_bytes(subtag) {
                variants.push(v);
            } else {
                break;
            }
        }
        iter.next();
    }

    if !allow_extension && iter.peek().is_some() {
        return Err(ParserError::InvalidSubtag);
    }

    let variants = if variants.is_empty() {
        None
    } else {
        variants.sort_unstable();
        variants.dedup();
        Some(variants.into_boxed_slice())
    };

    Ok(LanguageIdentifier {
        language,
        script,
        region,
        variants,
    })
}

pub fn parse_language_identifier(t: &[u8]) -> Result<LanguageIdentifier, ParserError> {
    let mut iter = t.split(|c| *c == b'-' || *c == b'_').peekable();
    parse_language_identifier_from_iter(&mut iter, false)
}