summaryrefslogtreecommitdiffstats
path: root/third_party/rust/strck_ident/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/strck_ident/src
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/strck_ident/src')
-rw-r--r--third_party/rust/strck_ident/src/lib.rs40
-rw-r--r--third_party/rust/strck_ident/src/rust.rs136
-rw-r--r--third_party/rust/strck_ident/src/unicode.rs145
3 files changed, 321 insertions, 0 deletions
diff --git a/third_party/rust/strck_ident/src/lib.rs b/third_party/rust/strck_ident/src/lib.rs
new file mode 100644
index 0000000000..ab2669ebaa
--- /dev/null
+++ b/third_party/rust/strck_ident/src/lib.rs
@@ -0,0 +1,40 @@
+//! [![github-img]][github-url] [![crates-img]][crates-url] [![docs-img]][docs-url]
+//!
+//! [github-url]: https://github.com/QnnOkabayashi/strck_ident
+//! [crates-url]: https://crates.io/crates/strck_ident
+//! [docs-url]: crate
+//! [github-img]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
+//! [crates-img]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
+//! [docs-img]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=
+//!
+//! Checked owned and borrowed Unicode-based identifiers.
+//!
+//! # Overview
+//!
+//! [`strck`] is a crate for creating checked owned and borrowed strings with
+//! arbitrary invariants as the type level. This crate extends `strct` by providing
+//! [`Invariant`]s for [Unicode identifiers][unicode] and [Rust identifiers][rust].
+//! In the future, this crate may support identifiers for other languages as well.
+//!
+//! This crate re-exports [`Check`], [`Ck`], [`IntoCheck`], and [`IntoCk`] from
+//! `strck`, so other libraries only have to depend on this crate.
+//!
+//! # Feature flags
+//! * `rust`: Provide the [`rust`] module, containing an [`Invariant`] and type
+//! aliases to [`Ck`] and [`Check`] for Rust identifiers. Disabled by default.
+//!
+//! [`Invariant`]: strck::Invariant
+//! [`RustIdent`]: rust::RustIdent
+//! [`Ck`]: strck::Ck
+//! [`Check`]: strck::Check
+
+pub mod unicode;
+
+#[doc(no_inline)]
+pub use unicode::{Ident, IdentBuf};
+
+#[cfg(feature = "rust")]
+pub mod rust;
+
+#[doc(no_inline)]
+pub use strck::{Check, Ck, IntoCheck, IntoCk, Invariant};
diff --git a/third_party/rust/strck_ident/src/rust.rs b/third_party/rust/strck_ident/src/rust.rs
new file mode 100644
index 0000000000..2ed3c9b066
--- /dev/null
+++ b/third_party/rust/strck_ident/src/rust.rs
@@ -0,0 +1,136 @@
+//! Checked strings containing Rust identifiers.
+//!
+//! Raw identifiers are unsupported.
+//!
+//! # Examples
+//!
+//! ```rust
+//! use strck_ident::{IntoCk, rust::RustIdent};
+//!
+//! assert!("foo".ck::<RustIdent>().is_ok());
+//! assert!("_identifier".ck::<RustIdent>().is_ok());
+//! assert!("Москва".ck::<RustIdent>().is_ok());
+//! assert!("東京".ck::<RustIdent>().is_ok());
+//!
+//! assert!("struct".ck::<RustIdent>().is_err());
+//! assert!("r#try".ck::<RustIdent>().is_err());
+//! assert!("👍".ck::<RustIdent>().is_err());
+//! ```
+//!
+//! # Aliases
+//!
+//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<RustIdent>`
+//! and `Check<RustIdent>` respectively. These aliases are preferred to keep
+//! type signatures succinct.
+//!
+//! # Requirements
+//!
+//! This module is only available when the `rust` feature flag is enabled.
+use crate::unicode;
+use core::fmt;
+use strck::{Check, Ck, Invariant};
+
+/// An [`Invariant`] for Rust identifiers.
+///
+/// Raw identifiers are unsupported.
+///
+/// # Invariants
+///
+/// * The string is nonempty.
+/// * The first character is `_` or XID_Start.
+/// * Any following characters are XID_Continue.
+/// * The string isn't a single underscore, e.g. `"_"`.
+/// * The string isn't a [strict] or [reserved] keyword.
+///
+/// [strict]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords
+/// [reserved]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
+#[derive(Clone, Debug)]
+pub struct RustIdent;
+
+/// Borrowed checked string containing a Rust identifier.
+///
+/// See [`RustIdent`] for more details.
+pub type Ident = Ck<RustIdent>;
+
+/// Owned checked string containing a Rust identifier.
+///
+/// See [`RustIdent`] for more details.
+pub type IdentBuf<B = String> = Check<RustIdent, B>;
+
+/// The error type returned from checking the invariants of [`RustIdent`].
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Error {
+ /// An invalid unicode identifier.
+ Unicode(unicode::Error),
+
+ /// A [strict] or [reserved] keyword.
+ ///
+ /// [strict]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords
+ /// [reserved]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
+ Keyword(&'static str),
+
+ /// A single underscore.
+ Wildcard,
+}
+
+impl std::error::Error for Error {}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Error::Unicode(err) => err.fmt(f),
+ Error::Keyword(keyword) => {
+ write!(f, "Rust keyword: '{keyword}'")
+ }
+ Error::Wildcard => f.pad("wildcard '_' isn't a valid Rust ident"),
+ }
+ }
+}
+
+impl Invariant for RustIdent {
+ type Error = Error;
+
+ fn check(slice: &str) -> Result<(), Self::Error> {
+ match unicode::UnicodeIdent::check(slice) {
+ Ok(()) => match KEYWORDS.binary_search(&slice) {
+ Ok(index) => Err(Error::Keyword(KEYWORDS[index])),
+ Err(_) => Ok(()),
+ },
+ Err(unicode::Error::Start('_')) => match slice.len() {
+ 1 => Err(Error::Wildcard), // `_` isn't ok
+ _ => Ok(()), // `_x` is ok
+ },
+ Err(e) => Err(Error::Unicode(e)),
+ }
+ }
+}
+
+static KEYWORDS: [&str; 51] = [
+ "Self", "abstract", "as", "async", "await", "become", "box", "break", "const", "continue",
+ "crate", "do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl",
+ "in", "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref",
+ "return", "self", "static", "struct", "super", "trait", "true", "try", "type", "typeof",
+ "unsafe", "unsized", "use", "virtual", "where", "while", "yield",
+];
+
+#[cfg(test)]
+mod tests {
+ use super::{Error, RustIdent};
+ use strck::IntoCk;
+
+ #[test]
+ fn test_underscore() {
+ assert_eq!("_".ck::<RustIdent>().unwrap_err(), Error::Wildcard);
+ assert!("_unused".ck::<RustIdent>().is_ok());
+ assert!("__private".ck::<RustIdent>().is_ok());
+ assert!("snake_case".ck::<RustIdent>().is_ok());
+ }
+
+ #[test]
+ fn test_rust_reference() {
+ assert!("foo".ck::<RustIdent>().is_ok());
+ assert!("_identifier".ck::<RustIdent>().is_ok());
+ assert!("Москва".ck::<RustIdent>().is_ok());
+ assert!("東京".ck::<RustIdent>().is_ok());
+ }
+}
diff --git a/third_party/rust/strck_ident/src/unicode.rs b/third_party/rust/strck_ident/src/unicode.rs
new file mode 100644
index 0000000000..5cf991ed94
--- /dev/null
+++ b/third_party/rust/strck_ident/src/unicode.rs
@@ -0,0 +1,145 @@
+//! Checked strings containing Unicode identifiers according to the
+//! [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
+//!
+//! # Examples
+//!
+//! ```rust
+//! use strck_ident::{IntoCk, unicode::UnicodeIdent};
+//!
+//! assert!("foo".ck::<UnicodeIdent>().is_ok());
+//! assert!("struct".ck::<UnicodeIdent>().is_ok());
+//! assert!("Москва".ck::<UnicodeIdent>().is_ok());
+//! assert!("東京".ck::<UnicodeIdent>().is_ok());
+//!
+//! assert!("_identifier".ck::<UnicodeIdent>().is_err());
+//! assert!("r#try".ck::<UnicodeIdent>().is_err());
+//! assert!("👍".ck::<UnicodeIdent>().is_err());
+//! ```
+//!
+//! # Aliases
+//!
+//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<UnicodeIdent>`
+//! and `Check<UnicodeIdent>` respectively. These aliases are preferred to keep
+//! type signatures succinct.
+//!
+//! These are also exported under the root, and can be accessed as
+//! `strck_ident::Ident` and `strck_ident::IdentBuf`.
+use core::fmt;
+use strck::{Check, Ck, Invariant};
+
+/// An [`Invariant`] for unicode identifiers according to
+/// [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
+///
+/// # Invariants
+///
+/// * The string is nonempty.
+/// * The first character is XID_Start.
+/// * Any following characters are XID_Continue.
+#[derive(Clone, Debug)]
+pub struct UnicodeIdent;
+
+/// Borrowed checked string containing a Unicode identifier.
+///
+/// See [`UnicodeIdent`] for more details.
+pub type Ident = Ck<UnicodeIdent>;
+
+/// Owned checked string containing a Unicode identifier.
+///
+/// See [`UnicodeIdent`] for more details.
+pub type IdentBuf<B = String> = Check<UnicodeIdent, B>;
+
+/// The error type returned from checking invariants of [`UnicodeIdent`].
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Error {
+ /// Empty string.
+ Empty,
+
+ /// The first character isn't XID_Start.
+ Start(char),
+
+ /// A trailing character isn't XID_Continue.
+ Continue(char),
+}
+
+impl std::error::Error for Error {}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Error::Empty => f.pad("empty"),
+ Error::Start(ch) => write!(f, "invalid start '{ch}'"),
+ Error::Continue(ch) => write!(f, "invalid continue '{ch}'"),
+ }
+ }
+}
+
+impl Invariant for UnicodeIdent {
+ type Error = Error;
+
+ fn check(slice: &str) -> Result<(), Self::Error> {
+ let mut chars = slice.chars();
+ let start = chars.next().ok_or(Error::Empty)?;
+
+ if !unicode_ident::is_xid_start(start) {
+ return Err(Error::Start(start));
+ }
+
+ for ch in chars {
+ if !unicode_ident::is_xid_continue(ch) {
+ return Err(Error::Continue(ch));
+ }
+ }
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{Error, UnicodeIdent};
+ use strck::IntoCk;
+
+ #[test]
+ fn test_invalid() {
+ assert_eq!("".ck::<UnicodeIdent>().unwrap_err(), Error::Empty);
+ assert_eq!("12345".ck::<UnicodeIdent>().unwrap_err(), Error::Start('1'));
+ assert_eq!(
+ "😂_foo".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Start('😂')
+ );
+ assert_eq!(
+ "foo_😂".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Continue('😂')
+ );
+ assert_eq!(
+ "hello.there".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Continue('.')
+ );
+ assert_eq!(
+ "\\as2mkf".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Start('\\')
+ );
+ assert_eq!(
+ "the book".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Continue(' ')
+ );
+ assert_eq!(" book".ck::<UnicodeIdent>().unwrap_err(), Error::Start(' '));
+ assert_eq!("\n".ck::<UnicodeIdent>().unwrap_err(), Error::Start('\n'));
+ assert_eq!(
+ "_underscore".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Start('_')
+ );
+ assert_eq!(
+ "r#try".ck::<UnicodeIdent>().unwrap_err(),
+ Error::Continue('#')
+ );
+ }
+
+ #[test]
+ fn test_valid() {
+ assert!("a2345".ck::<UnicodeIdent>().is_ok());
+ assert!("foo".ck::<UnicodeIdent>().is_ok());
+ assert!("snake_case".ck::<UnicodeIdent>().is_ok());
+ assert!("impl".ck::<UnicodeIdent>().is_ok());
+ assert!("岡林".ck::<UnicodeIdent>().is_ok());
+ }
+}