summaryrefslogtreecommitdiffstats
path: root/third_party/rust/litrs/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/litrs/src/lib.rs')
-rw-r--r--third_party/rust/litrs/src/lib.rs370
1 files changed, 370 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/lib.rs b/third_party/rust/litrs/src/lib.rs
new file mode 100644
index 0000000000..64ed7813c9
--- /dev/null
+++ b/third_party/rust/litrs/src/lib.rs
@@ -0,0 +1,370 @@
+//! Parsing and inspecting Rust literal tokens.
+//!
+//! This library offers functionality to parse Rust literals, i.e. tokens in the
+//! Rust programming language that represent fixed values. The grammar for
+//! those is defined [here][ref].
+//!
+//! This kind of functionality already exists in the crate `syn`. However, as
+//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
+//! built. This crate also offers a bit more flexibility compared to `syn`
+//! (only regarding literals, of course).
+//!
+//!
+//! # Quick start
+//!
+//! | **`StringLit::try_from(tt)?.value()`** |
+//! | - |
+//!
+//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be
+//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).
+//! Calling `value()` returns the value that is represented by the literal.
+//!
+//! **Mini Example**
+//!
+//! ```ignore
+//! use proc_macro::TokenStream;
+//!
+//! #[proc_macro]
+//! pub fn foo(input: TokenStream) -> TokenStream {
+//! let first_token = input.into_iter().next().unwrap(); // Do proper error handling!
+//! let string_value = match litrs::StringLit::try_from(first_token) {
+//! Ok(string_lit) => string_lit.value(),
+//! Err(e) => return e.to_compile_error(),
+//! };
+//!
+//! // `string_value` is the string value with all escapes resolved.
+//! todo!()
+//! }
+//! ```
+//!
+//! # Overview
+//!
+//! The main types of this library are [`Literal`], representing any kind of
+//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a
+//! specific kind of literal.
+//!
+//! There are different ways to obtain such a literal type:
+//!
+//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,
+//! ParseError>`. For example: [`Literal::parse`] and
+//! [`IntegerLit::parse`].
+//!
+//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from
+//! the `proc_macro` crate into a `Literal` from this crate.
+//!
+//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a
+//! `proc_macro::Literal` into a specific literal type of this crate. If
+//! the input is a literal of a different kind, `Err(InvalidToken)` is
+//! returned.
+//!
+//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a
+//! literal type of this crate. An error is returned if the token tree is
+//! not a literal, or if you are trying to turn it into a specific kind of
+//! literal and the token tree is a different kind of literal.
+//!
+//! All of the `From` and `TryFrom` conversions also work for reference to
+//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
+//! enabled (which it is by default), all these `From` and `TryFrom` impls also
+//! exist for the corresponding `proc_macro2` types.
+//!
+//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.
+//! The `TryFrom<TokenTree>` impls check for those two special idents and
+//! return a [`BoolLit`] appropriately. For that reason, there is also no
+//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`
+//! simply cannot represent bool literals.
+//!
+//!
+//! # Examples
+//!
+//! In a proc-macro:
+//!
+//! ```ignore
+//! use std::convert::TryFrom;
+//! use proc_macro::TokenStream;
+//! use litrs::FloatLit;
+//!
+//! #[proc_macro]
+//! pub fn foo(input: TokenStream) -> TokenStream {
+//! let mut input = input.into_iter().collect::<Vec<_>>();
+//! if input.len() != 1 {
+//! // Please do proper error handling in your real code!
+//! panic!("expected exactly one token as input");
+//! }
+//! let token = input.remove(0);
+//!
+//! match FloatLit::try_from(token) {
+//! Ok(float_lit) => { /* do something */ }
+//! Err(e) => return e.to_compile_error(),
+//! }
+//!
+//! // Dummy output
+//! TokenStream::new()
+//! }
+//! ```
+//!
+//! Parsing from string:
+//!
+//! ```
+//! use litrs::{FloatLit, Literal};
+//!
+//! // Parse a specific kind of literal (float in this case):
+//! let float_lit = FloatLit::parse("3.14f32");
+//! assert!(float_lit.is_ok());
+//! assert_eq!(float_lit.unwrap().suffix(), "f32");
+//! assert!(FloatLit::parse("'c'").is_err());
+//!
+//! // Parse any kind of literal. After parsing, you can inspect the literal
+//! // and decide what to do in each case.
+//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
+//! match lit {
+//! Literal::Integer(lit) => { /* ... */ }
+//! Literal::Float(lit) => { /* ... */ }
+//! Literal::Bool(lit) => { /* ... */ }
+//! Literal::Char(lit) => { /* ... */ }
+//! Literal::String(lit) => { /* ... */ }
+//! Literal::Byte(lit) => { /* ... */ }
+//! Literal::ByteString(lit) => { /* ... */ }
+//! }
+//! ```
+//!
+//!
+//!
+//! # Crate features
+//!
+//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of
+//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
+//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the
+//! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,
+//! only an approximate check (only in ASCII range) is done. If you are
+//! writing a proc macro, you don't need to enable this as the suffix is
+//! already checked by the compiler.
+//!
+//!
+//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
+//!
+
+#![deny(missing_debug_implementations)]
+
+extern crate proc_macro;
+
+#[cfg(test)]
+#[macro_use]
+mod test_util;
+
+#[cfg(test)]
+mod tests;
+
+mod bool;
+mod byte;
+mod bytestr;
+mod char;
+mod err;
+mod escape;
+mod float;
+mod impls;
+mod integer;
+mod parse;
+mod string;
+
+
+use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};
+
+pub use self::{
+ bool::BoolLit,
+ byte::ByteLit,
+ bytestr::ByteStringLit,
+ char::CharLit,
+ err::{InvalidToken, ParseError},
+ float::{FloatLit, FloatType},
+ integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},
+ string::StringLit,
+};
+
+
+// ==============================================================================================
+// ===== `Literal` and type defs
+// ==============================================================================================
+
+/// A literal. This is the main type of this library.
+///
+/// This type is generic over the underlying buffer `B`, which can be `&str` or
+/// `String`.
+///
+/// To create this type, you have to either call [`Literal::parse`] with an
+/// input string or use the `From<_>` impls of this type. The impls are only
+/// available of the corresponding crate features are enabled (they are enabled
+/// by default).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Literal<B: Buffer> {
+ Bool(BoolLit),
+ Integer(IntegerLit<B>),
+ Float(FloatLit<B>),
+ Char(CharLit<B>),
+ String(StringLit<B>),
+ Byte(ByteLit<B>),
+ ByteString(ByteStringLit<B>),
+}
+
+impl<B: Buffer> Literal<B> {
+ /// Parses the given input as a Rust literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ parse::parse(input)
+ }
+
+ /// Returns the suffix of this literal or `""` if it doesn't have one.
+ ///
+ /// Rust token grammar actually allows suffixes for all kinds of tokens.
+ /// Most Rust programmer only know the type suffixes for integer and
+ /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an
+ /// error. But it is possible to pass literals with arbitrary suffixes to
+ /// proc macros, for example:
+ ///
+ /// ```ignore
+ /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong);
+ /// ```
+ ///
+ /// Boolean literals, not actually being literals, but idents, cannot have
+ /// suffixes and this method always returns `""` for those.
+ ///
+ /// There are some edge cases to be aware of:
+ /// - Integer suffixes must not start with `e` or `E` as that conflicts with
+ /// the exponent grammar for floats. `0e1` is a float; `0eel` is also
+ /// parsed as a float and results in an error.
+ /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a
+ /// suffix von `gh`.
+ /// - Suffixes can contain and start with `_`, but for integer and number
+ /// literals, `_` is eagerly parsed as part of the number, so `1_x` has
+ /// the suffix `x`.
+ /// - The input `55f32` is regarded as integer literal with suffix `f32`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use litrs::Literal;
+ ///
+ /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");
+ /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");
+ /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");
+ /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");
+ /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");
+ /// ```
+ pub fn suffix(&self) -> &str {
+ match self {
+ Literal::Bool(_) => "",
+ Literal::Integer(l) => l.suffix(),
+ Literal::Float(l) => l.suffix(),
+ Literal::Char(l) => l.suffix(),
+ Literal::String(l) => l.suffix(),
+ Literal::Byte(l) => l.suffix(),
+ Literal::ByteString(l) => l.suffix(),
+ }
+ }
+}
+
+impl Literal<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn into_owned(self) -> Literal<String> {
+ match self {
+ Literal::Bool(l) => Literal::Bool(l.to_owned()),
+ Literal::Integer(l) => Literal::Integer(l.to_owned()),
+ Literal::Float(l) => Literal::Float(l.to_owned()),
+ Literal::Char(l) => Literal::Char(l.to_owned()),
+ Literal::String(l) => Literal::String(l.into_owned()),
+ Literal::Byte(l) => Literal::Byte(l.to_owned()),
+ Literal::ByteString(l) => Literal::ByteString(l.into_owned()),
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for Literal<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Literal::Bool(l) => l.fmt(f),
+ Literal::Integer(l) => l.fmt(f),
+ Literal::Float(l) => l.fmt(f),
+ Literal::Char(l) => l.fmt(f),
+ Literal::String(l) => l.fmt(f),
+ Literal::Byte(l) => l.fmt(f),
+ Literal::ByteString(l) => l.fmt(f),
+ }
+ }
+}
+
+
+// ==============================================================================================
+// ===== Buffer
+// ==============================================================================================
+
+/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.
+///
+/// This is trait is implementation detail of this library, cannot be
+/// implemented in other crates and is not subject to semantic versioning.
+/// `litrs` only guarantees that this trait is implemented for `String` and
+/// `for<'a> &'a str`.
+pub trait Buffer: sealed::Sealed + Deref<Target = str> {
+ /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.
+ type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;
+
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow;
+
+ /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
+ type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;
+
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow;
+
+ /// Cuts away some characters at the beginning and some at the end. Given
+ /// range has to be in bounds.
+ #[doc(hidden)]
+ fn cut(self, range: Range<usize>) -> Self;
+}
+
+mod sealed {
+ pub trait Sealed {}
+}
+
+impl<'a> sealed::Sealed for &'a str {}
+impl<'a> Buffer for &'a str {
+ #[doc(hidden)]
+ fn cut(self, range: Range<usize>) -> Self {
+ &self[range]
+ }
+
+ type Cow = Cow<'a, str>;
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow {
+ self.into()
+ }
+ type ByteCow = Cow<'a, [u8]>;
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow {
+ self.as_bytes().into()
+ }
+}
+
+impl sealed::Sealed for String {}
+impl Buffer for String {
+ #[doc(hidden)]
+ fn cut(mut self, range: Range<usize>) -> Self {
+ // This is not the most efficient way, but it works. First we cut the
+ // end, then the beginning. Note that `drain` also removes the range if
+ // the iterator is not consumed.
+ self.truncate(range.end);
+ self.drain(..range.start);
+ self
+ }
+
+ type Cow = Cow<'static, str>;
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow {
+ self.into()
+ }
+
+ type ByteCow = Cow<'static, [u8]>;
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow {
+ self.into_bytes().into()
+ }
+}