summaryrefslogtreecommitdiffstats
path: root/third_party/rust/strck_ident/src/unicode.rs
blob: 5cf991ed940e798cad4b9932c08e228d483e276a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//! Checked strings containing Unicode identifiers according to the
//! [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
//!
//! # Examples
//!
//! ```rust
//! use strck_ident::{IntoCk, unicode::UnicodeIdent};
//!
//! assert!("foo".ck::<UnicodeIdent>().is_ok());
//! assert!("struct".ck::<UnicodeIdent>().is_ok());
//! assert!("Москва".ck::<UnicodeIdent>().is_ok());
//! assert!("東京".ck::<UnicodeIdent>().is_ok());
//!
//! assert!("_identifier".ck::<UnicodeIdent>().is_err());
//! assert!("r#try".ck::<UnicodeIdent>().is_err());
//! assert!("👍".ck::<UnicodeIdent>().is_err());
//! ```
//!
//! # Aliases
//!
//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<UnicodeIdent>`
//! and `Check<UnicodeIdent>` respectively. These aliases are preferred to keep
//! type signatures succinct.
//!
//! These are also exported under the root, and can be accessed as
//! `strck_ident::Ident` and `strck_ident::IdentBuf`.
use core::fmt;
use strck::{Check, Ck, Invariant};

/// An [`Invariant`] for unicode identifiers according to
/// [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
///
/// # Invariants
///
/// * The string is nonempty.
/// * The first character is XID_Start.
/// * Any following characters are XID_Continue.
#[derive(Clone, Debug)]
pub struct UnicodeIdent;

/// Borrowed checked string containing a Unicode identifier.
///
/// See [`UnicodeIdent`] for more details.
pub type Ident = Ck<UnicodeIdent>;

/// Owned checked string containing a Unicode identifier.
///
/// See [`UnicodeIdent`] for more details.
pub type IdentBuf<B = String> = Check<UnicodeIdent, B>;

/// The error type returned from checking invariants of [`UnicodeIdent`].
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Error {
    /// Empty string.
    Empty,

    /// The first character isn't XID_Start.
    Start(char),

    /// A trailing character isn't XID_Continue.
    Continue(char),
}

impl std::error::Error for Error {}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Error::Empty => f.pad("empty"),
            Error::Start(ch) => write!(f, "invalid start '{ch}'"),
            Error::Continue(ch) => write!(f, "invalid continue '{ch}'"),
        }
    }
}

impl Invariant for UnicodeIdent {
    type Error = Error;

    fn check(slice: &str) -> Result<(), Self::Error> {
        let mut chars = slice.chars();
        let start = chars.next().ok_or(Error::Empty)?;

        if !unicode_ident::is_xid_start(start) {
            return Err(Error::Start(start));
        }

        for ch in chars {
            if !unicode_ident::is_xid_continue(ch) {
                return Err(Error::Continue(ch));
            }
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::{Error, UnicodeIdent};
    use strck::IntoCk;

    #[test]
    fn test_invalid() {
        assert_eq!("".ck::<UnicodeIdent>().unwrap_err(), Error::Empty);
        assert_eq!("12345".ck::<UnicodeIdent>().unwrap_err(), Error::Start('1'));
        assert_eq!(
            "😂_foo".ck::<UnicodeIdent>().unwrap_err(),
            Error::Start('😂')
        );
        assert_eq!(
            "foo_😂".ck::<UnicodeIdent>().unwrap_err(),
            Error::Continue('😂')
        );
        assert_eq!(
            "hello.there".ck::<UnicodeIdent>().unwrap_err(),
            Error::Continue('.')
        );
        assert_eq!(
            "\\as2mkf".ck::<UnicodeIdent>().unwrap_err(),
            Error::Start('\\')
        );
        assert_eq!(
            "the book".ck::<UnicodeIdent>().unwrap_err(),
            Error::Continue(' ')
        );
        assert_eq!(" book".ck::<UnicodeIdent>().unwrap_err(), Error::Start(' '));
        assert_eq!("\n".ck::<UnicodeIdent>().unwrap_err(), Error::Start('\n'));
        assert_eq!(
            "_underscore".ck::<UnicodeIdent>().unwrap_err(),
            Error::Start('_')
        );
        assert_eq!(
            "r#try".ck::<UnicodeIdent>().unwrap_err(),
            Error::Continue('#')
        );
    }

    #[test]
    fn test_valid() {
        assert!("a2345".ck::<UnicodeIdent>().is_ok());
        assert!("foo".ck::<UnicodeIdent>().is_ok());
        assert!("snake_case".ck::<UnicodeIdent>().is_ok());
        assert!("impl".ck::<UnicodeIdent>().is_ok());
        assert!("岡林".ck::<UnicodeIdent>().is_ok());
    }
}