diff options
Diffstat (limited to 'third_party/rust/strck_ident/src')
-rw-r--r-- | third_party/rust/strck_ident/src/lib.rs | 40 | ||||
-rw-r--r-- | third_party/rust/strck_ident/src/rust.rs | 136 | ||||
-rw-r--r-- | third_party/rust/strck_ident/src/unicode.rs | 145 |
3 files changed, 321 insertions, 0 deletions
diff --git a/third_party/rust/strck_ident/src/lib.rs b/third_party/rust/strck_ident/src/lib.rs new file mode 100644 index 0000000000..ab2669ebaa --- /dev/null +++ b/third_party/rust/strck_ident/src/lib.rs @@ -0,0 +1,40 @@ +//! [![github-img]][github-url] [![crates-img]][crates-url] [![docs-img]][docs-url] +//! +//! [github-url]: https://github.com/QnnOkabayashi/strck_ident +//! [crates-url]: https://crates.io/crates/strck_ident +//! [docs-url]: crate +//! [github-img]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-img]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-img]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=data:image/svg+xml;base64,PHN2ZyByb2xlPSJpbWciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDUxMiA1MTIiPjxwYXRoIGZpbGw9IiNmNWY1ZjUiIGQ9Ik00ODguNiAyNTAuMkwzOTIgMjE0VjEwNS41YzAtMTUtOS4zLTI4LjQtMjMuNC0zMy43bC0xMDAtMzcuNWMtOC4xLTMuMS0xNy4xLTMuMS0yNS4zIDBsLTEwMCAzNy41Yy0xNC4xIDUuMy0yMy40IDE4LjctMjMuNCAzMy43VjIxNGwtOTYuNiAzNi4yQzkuMyAyNTUuNSAwIDI2OC45IDAgMjgzLjlWMzk0YzAgMTMuNiA3LjcgMjYuMSAxOS45IDMyLjJsMTAwIDUwYzEwLjEgNS4xIDIyLjEgNS4xIDMyLjIgMGwxMDMuOS01MiAxMDMuOSA1MmMxMC4xIDUuMSAyMi4xIDUuMSAzMi4yIDBsMTAwLTUwYzEyLjItNi4xIDE5LjktMTguNiAxOS45LTMyLjJWMjgzLjljMC0xNS05LjMtMjguNC0yMy40LTMzLjd6TTM1OCAyMTQuOGwtODUgMzEuOXYtNjguMmw4NS0zN3Y3My4zek0xNTQgMTA0LjFsMTAyLTM4LjIgMTAyIDM4LjJ2LjZsLTEwMiA0MS40LTEwMi00MS40di0uNnptODQgMjkxLjFsLTg1IDQyLjV2LTc5LjFsODUtMzguOHY3NS40em0wLTExMmwtMTAyIDQxLjQtMTAyLTQxLjR2LS42bDEwMi0zOC4yIDEwMiAzOC4ydi42em0yNDAgMTEybC04NSA0Mi41di03OS4xbDg1LTM4Ljh2NzUuNHptMC0xMTJsLTEwMiA0MS40LTEwMi00MS40di0uNmwxMDItMzguMiAxMDIgMzguMnYuNnoiPjwvcGF0aD48L3N2Zz4K +//! +//! Checked owned and borrowed Unicode-based identifiers. +//! +//! # Overview +//! +//! [`strck`] is a crate for creating checked owned and borrowed strings with +//! arbitrary invariants as the type level. This crate extends `strct` by providing +//! [`Invariant`]s for [Unicode identifiers][unicode] and [Rust identifiers][rust]. +//! In the future, this crate may support identifiers for other languages as well. +//! +//! This crate re-exports [`Check`], [`Ck`], [`IntoCheck`], and [`IntoCk`] from +//! `strck`, so other libraries only have to depend on this crate. +//! +//! # Feature flags +//! * `rust`: Provide the [`rust`] module, containing an [`Invariant`] and type +//! aliases to [`Ck`] and [`Check`] for Rust identifiers. Disabled by default. +//! +//! [`Invariant`]: strck::Invariant +//! [`RustIdent`]: rust::RustIdent +//! [`Ck`]: strck::Ck +//! [`Check`]: strck::Check + +pub mod unicode; + +#[doc(no_inline)] +pub use unicode::{Ident, IdentBuf}; + +#[cfg(feature = "rust")] +pub mod rust; + +#[doc(no_inline)] +pub use strck::{Check, Ck, IntoCheck, IntoCk, Invariant}; diff --git a/third_party/rust/strck_ident/src/rust.rs b/third_party/rust/strck_ident/src/rust.rs new file mode 100644 index 0000000000..2ed3c9b066 --- /dev/null +++ b/third_party/rust/strck_ident/src/rust.rs @@ -0,0 +1,136 @@ +//! Checked strings containing Rust identifiers. +//! +//! Raw identifiers are unsupported. +//! +//! # Examples +//! +//! ```rust +//! use strck_ident::{IntoCk, rust::RustIdent}; +//! +//! assert!("foo".ck::<RustIdent>().is_ok()); +//! assert!("_identifier".ck::<RustIdent>().is_ok()); +//! assert!("Москва".ck::<RustIdent>().is_ok()); +//! assert!("東京".ck::<RustIdent>().is_ok()); +//! +//! assert!("struct".ck::<RustIdent>().is_err()); +//! assert!("r#try".ck::<RustIdent>().is_err()); +//! assert!("👍".ck::<RustIdent>().is_err()); +//! ``` +//! +//! # Aliases +//! +//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<RustIdent>` +//! and `Check<RustIdent>` respectively. These aliases are preferred to keep +//! type signatures succinct. +//! +//! # Requirements +//! +//! This module is only available when the `rust` feature flag is enabled. +use crate::unicode; +use core::fmt; +use strck::{Check, Ck, Invariant}; + +/// An [`Invariant`] for Rust identifiers. +/// +/// Raw identifiers are unsupported. +/// +/// # Invariants +/// +/// * The string is nonempty. +/// * The first character is `_` or XID_Start. +/// * Any following characters are XID_Continue. +/// * The string isn't a single underscore, e.g. `"_"`. +/// * The string isn't a [strict] or [reserved] keyword. +/// +/// [strict]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords +/// [reserved]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords +#[derive(Clone, Debug)] +pub struct RustIdent; + +/// Borrowed checked string containing a Rust identifier. +/// +/// See [`RustIdent`] for more details. +pub type Ident = Ck<RustIdent>; + +/// Owned checked string containing a Rust identifier. +/// +/// See [`RustIdent`] for more details. +pub type IdentBuf<B = String> = Check<RustIdent, B>; + +/// The error type returned from checking the invariants of [`RustIdent`]. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum Error { + /// An invalid unicode identifier. + Unicode(unicode::Error), + + /// A [strict] or [reserved] keyword. + /// + /// [strict]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords + /// [reserved]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords + Keyword(&'static str), + + /// A single underscore. + Wildcard, +} + +impl std::error::Error for Error {} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Unicode(err) => err.fmt(f), + Error::Keyword(keyword) => { + write!(f, "Rust keyword: '{keyword}'") + } + Error::Wildcard => f.pad("wildcard '_' isn't a valid Rust ident"), + } + } +} + +impl Invariant for RustIdent { + type Error = Error; + + fn check(slice: &str) -> Result<(), Self::Error> { + match unicode::UnicodeIdent::check(slice) { + Ok(()) => match KEYWORDS.binary_search(&slice) { + Ok(index) => Err(Error::Keyword(KEYWORDS[index])), + Err(_) => Ok(()), + }, + Err(unicode::Error::Start('_')) => match slice.len() { + 1 => Err(Error::Wildcard), // `_` isn't ok + _ => Ok(()), // `_x` is ok + }, + Err(e) => Err(Error::Unicode(e)), + } + } +} + +static KEYWORDS: [&str; 51] = [ + "Self", "abstract", "as", "async", "await", "become", "box", "break", "const", "continue", + "crate", "do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", + "in", "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref", + "return", "self", "static", "struct", "super", "trait", "true", "try", "type", "typeof", + "unsafe", "unsized", "use", "virtual", "where", "while", "yield", +]; + +#[cfg(test)] +mod tests { + use super::{Error, RustIdent}; + use strck::IntoCk; + + #[test] + fn test_underscore() { + assert_eq!("_".ck::<RustIdent>().unwrap_err(), Error::Wildcard); + assert!("_unused".ck::<RustIdent>().is_ok()); + assert!("__private".ck::<RustIdent>().is_ok()); + assert!("snake_case".ck::<RustIdent>().is_ok()); + } + + #[test] + fn test_rust_reference() { + assert!("foo".ck::<RustIdent>().is_ok()); + assert!("_identifier".ck::<RustIdent>().is_ok()); + assert!("Москва".ck::<RustIdent>().is_ok()); + assert!("東京".ck::<RustIdent>().is_ok()); + } +} diff --git a/third_party/rust/strck_ident/src/unicode.rs b/third_party/rust/strck_ident/src/unicode.rs new file mode 100644 index 0000000000..5cf991ed94 --- /dev/null +++ b/third_party/rust/strck_ident/src/unicode.rs @@ -0,0 +1,145 @@ +//! Checked strings containing Unicode identifiers according to the +//! [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/). +//! +//! # Examples +//! +//! ```rust +//! use strck_ident::{IntoCk, unicode::UnicodeIdent}; +//! +//! assert!("foo".ck::<UnicodeIdent>().is_ok()); +//! assert!("struct".ck::<UnicodeIdent>().is_ok()); +//! assert!("Москва".ck::<UnicodeIdent>().is_ok()); +//! assert!("東京".ck::<UnicodeIdent>().is_ok()); +//! +//! assert!("_identifier".ck::<UnicodeIdent>().is_err()); +//! assert!("r#try".ck::<UnicodeIdent>().is_err()); +//! assert!("👍".ck::<UnicodeIdent>().is_err()); +//! ``` +//! +//! # Aliases +//! +//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<UnicodeIdent>` +//! and `Check<UnicodeIdent>` respectively. These aliases are preferred to keep +//! type signatures succinct. +//! +//! These are also exported under the root, and can be accessed as +//! `strck_ident::Ident` and `strck_ident::IdentBuf`. +use core::fmt; +use strck::{Check, Ck, Invariant}; + +/// An [`Invariant`] for unicode identifiers according to +/// [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/). +/// +/// # Invariants +/// +/// * The string is nonempty. +/// * The first character is XID_Start. +/// * Any following characters are XID_Continue. +#[derive(Clone, Debug)] +pub struct UnicodeIdent; + +/// Borrowed checked string containing a Unicode identifier. +/// +/// See [`UnicodeIdent`] for more details. +pub type Ident = Ck<UnicodeIdent>; + +/// Owned checked string containing a Unicode identifier. +/// +/// See [`UnicodeIdent`] for more details. +pub type IdentBuf<B = String> = Check<UnicodeIdent, B>; + +/// The error type returned from checking invariants of [`UnicodeIdent`]. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum Error { + /// Empty string. + Empty, + + /// The first character isn't XID_Start. + Start(char), + + /// A trailing character isn't XID_Continue. + Continue(char), +} + +impl std::error::Error for Error {} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Empty => f.pad("empty"), + Error::Start(ch) => write!(f, "invalid start '{ch}'"), + Error::Continue(ch) => write!(f, "invalid continue '{ch}'"), + } + } +} + +impl Invariant for UnicodeIdent { + type Error = Error; + + fn check(slice: &str) -> Result<(), Self::Error> { + let mut chars = slice.chars(); + let start = chars.next().ok_or(Error::Empty)?; + + if !unicode_ident::is_xid_start(start) { + return Err(Error::Start(start)); + } + + for ch in chars { + if !unicode_ident::is_xid_continue(ch) { + return Err(Error::Continue(ch)); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::{Error, UnicodeIdent}; + use strck::IntoCk; + + #[test] + fn test_invalid() { + assert_eq!("".ck::<UnicodeIdent>().unwrap_err(), Error::Empty); + assert_eq!("12345".ck::<UnicodeIdent>().unwrap_err(), Error::Start('1')); + assert_eq!( + "😂_foo".ck::<UnicodeIdent>().unwrap_err(), + Error::Start('😂') + ); + assert_eq!( + "foo_😂".ck::<UnicodeIdent>().unwrap_err(), + Error::Continue('😂') + ); + assert_eq!( + "hello.there".ck::<UnicodeIdent>().unwrap_err(), + Error::Continue('.') + ); + assert_eq!( + "\\as2mkf".ck::<UnicodeIdent>().unwrap_err(), + Error::Start('\\') + ); + assert_eq!( + "the book".ck::<UnicodeIdent>().unwrap_err(), + Error::Continue(' ') + ); + assert_eq!(" book".ck::<UnicodeIdent>().unwrap_err(), Error::Start(' ')); + assert_eq!("\n".ck::<UnicodeIdent>().unwrap_err(), Error::Start('\n')); + assert_eq!( + "_underscore".ck::<UnicodeIdent>().unwrap_err(), + Error::Start('_') + ); + assert_eq!( + "r#try".ck::<UnicodeIdent>().unwrap_err(), + Error::Continue('#') + ); + } + + #[test] + fn test_valid() { + assert!("a2345".ck::<UnicodeIdent>().is_ok()); + assert!("foo".ck::<UnicodeIdent>().is_ok()); + assert!("snake_case".ck::<UnicodeIdent>().is_ok()); + assert!("impl".ck::<UnicodeIdent>().is_ok()); + assert!("岡林".ck::<UnicodeIdent>().is_ok()); + } +} |