//! Parsing and inspecting Rust literal tokens. //! //! This library offers functionality to parse Rust literals, i.e. tokens in the //! Rust programming language that represent fixed values. The grammar for //! those is defined [here][ref]. //! //! This kind of functionality already exists in the crate `syn`. However, as //! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was //! built. This crate also offers a bit more flexibility compared to `syn` //! (only regarding literals, of course). //! //! //! # Quick start //! //! | **`StringLit::try_from(tt)?.value()`** | //! | - | //! //! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be //! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). //! Calling `value()` returns the value that is represented by the literal. //! //! **Mini Example** //! //! ```ignore //! use proc_macro::TokenStream; //! //! #[proc_macro] //! pub fn foo(input: TokenStream) -> TokenStream { //! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! //! let string_value = match litrs::StringLit::try_from(first_token) { //! Ok(string_lit) => string_lit.value(), //! Err(e) => return e.to_compile_error(), //! }; //! //! // `string_value` is the string value with all escapes resolved. //! todo!() //! } //! ``` //! //! # Overview //! //! The main types of this library are [`Literal`], representing any kind of //! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a //! specific kind of literal. //! //! There are different ways to obtain such a literal type: //! //! - **`parse`**: parses a `&str` or `String` and returns `Result<_, //! ParseError>`. For example: [`Literal::parse`] and //! [`IntegerLit::parse`]. //! //! - **`From for Literal`**: turns a `Literal` value from //! the `proc_macro` crate into a `Literal` from this crate. //! //! - **`TryFrom for *Lit`**: tries to turn a //! `proc_macro::Literal` into a specific literal type of this crate. If //! the input is a literal of a different kind, `Err(InvalidToken)` is //! returned. //! //! - **`TryFrom`**: attempts to turn a token tree into a //! literal type of this crate. An error is returned if the token tree is //! not a literal, or if you are trying to turn it into a specific kind of //! literal and the token tree is a different kind of literal. //! //! All of the `From` and `TryFrom` conversions also work for reference to //! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is //! enabled (which it is by default), all these `From` and `TryFrom` impls also //! exist for the corresponding `proc_macro2` types. //! //! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. //! The `TryFrom` impls check for those two special idents and //! return a [`BoolLit`] appropriately. For that reason, there is also no //! `TryFrom` impl for [`BoolLit`]. The `proc_macro::Literal` //! simply cannot represent bool literals. //! //! //! # Examples //! //! In a proc-macro: //! //! ```ignore //! use std::convert::TryFrom; //! use proc_macro::TokenStream; //! use litrs::FloatLit; //! //! #[proc_macro] //! pub fn foo(input: TokenStream) -> TokenStream { //! let mut input = input.into_iter().collect::>(); //! if input.len() != 1 { //! // Please do proper error handling in your real code! //! panic!("expected exactly one token as input"); //! } //! let token = input.remove(0); //! //! match FloatLit::try_from(token) { //! Ok(float_lit) => { /* do something */ } //! Err(e) => return e.to_compile_error(), //! } //! //! // Dummy output //! TokenStream::new() //! } //! ``` //! //! Parsing from string: //! //! ``` //! use litrs::{FloatLit, Literal}; //! //! // Parse a specific kind of literal (float in this case): //! let float_lit = FloatLit::parse("3.14f32"); //! assert!(float_lit.is_ok()); //! assert_eq!(float_lit.unwrap().suffix(), "f32"); //! assert!(FloatLit::parse("'c'").is_err()); //! //! // Parse any kind of literal. After parsing, you can inspect the literal //! // and decide what to do in each case. //! let lit = Literal::parse("0xff80").expect("failed to parse literal"); //! match lit { //! Literal::Integer(lit) => { /* ... */ } //! Literal::Float(lit) => { /* ... */ } //! Literal::Bool(lit) => { /* ... */ } //! Literal::Char(lit) => { /* ... */ } //! Literal::String(lit) => { /* ... */ } //! Literal::Byte(lit) => { /* ... */ } //! Literal::ByteString(lit) => { /* ... */ } //! } //! ``` //! //! //! //! # Crate features //! //! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of //! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. //! - `check_suffix`: if enabled, `parse` functions will exactly verify that the //! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, //! only an approximate check (only in ASCII range) is done. If you are //! writing a proc macro, you don't need to enable this as the suffix is //! already checked by the compiler. //! //! //! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals //! #![deny(missing_debug_implementations)] extern crate proc_macro; #[cfg(test)] #[macro_use] mod test_util; #[cfg(test)] mod tests; mod bool; mod byte; mod bytestr; mod char; mod err; mod escape; mod float; mod impls; mod integer; mod parse; mod string; use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; pub use self::{ bool::BoolLit, byte::ByteLit, bytestr::ByteStringLit, char::CharLit, err::{InvalidToken, ParseError}, float::{FloatLit, FloatType}, integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, string::StringLit, }; // ============================================================================================== // ===== `Literal` and type defs // ============================================================================================== /// A literal. This is the main type of this library. /// /// This type is generic over the underlying buffer `B`, which can be `&str` or /// `String`. /// /// To create this type, you have to either call [`Literal::parse`] with an /// input string or use the `From<_>` impls of this type. The impls are only /// available of the corresponding crate features are enabled (they are enabled /// by default). #[derive(Debug, Clone, PartialEq, Eq)] pub enum Literal { Bool(BoolLit), Integer(IntegerLit), Float(FloatLit), Char(CharLit), String(StringLit), Byte(ByteLit), ByteString(ByteStringLit), } impl Literal { /// Parses the given input as a Rust literal. pub fn parse(input: B) -> Result { parse::parse(input) } /// Returns the suffix of this literal or `""` if it doesn't have one. /// /// Rust token grammar actually allows suffixes for all kinds of tokens. /// Most Rust programmer only know the type suffixes for integer and /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an /// error. But it is possible to pass literals with arbitrary suffixes to /// proc macros, for example: /// /// ```ignore /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong); /// ``` /// /// Boolean literals, not actually being literals, but idents, cannot have /// suffixes and this method always returns `""` for those. /// /// There are some edge cases to be aware of: /// - Integer suffixes must not start with `e` or `E` as that conflicts with /// the exponent grammar for floats. `0e1` is a float; `0eel` is also /// parsed as a float and results in an error. /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a /// suffix von `gh`. /// - Suffixes can contain and start with `_`, but for integer and number /// literals, `_` is eagerly parsed as part of the number, so `1_x` has /// the suffix `x`. /// - The input `55f32` is regarded as integer literal with suffix `f32`. /// /// # Example /// /// ``` /// use litrs::Literal; /// /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33"); /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman"); /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck"); /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy"); /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong"); /// ``` pub fn suffix(&self) -> &str { match self { Literal::Bool(_) => "", Literal::Integer(l) => l.suffix(), Literal::Float(l) => l.suffix(), Literal::Char(l) => l.suffix(), Literal::String(l) => l.suffix(), Literal::Byte(l) => l.suffix(), Literal::ByteString(l) => l.suffix(), } } } impl Literal<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn into_owned(self) -> Literal { match self { Literal::Bool(l) => Literal::Bool(l.to_owned()), Literal::Integer(l) => Literal::Integer(l.to_owned()), Literal::Float(l) => Literal::Float(l.to_owned()), Literal::Char(l) => Literal::Char(l.to_owned()), Literal::String(l) => Literal::String(l.into_owned()), Literal::Byte(l) => Literal::Byte(l.to_owned()), Literal::ByteString(l) => Literal::ByteString(l.into_owned()), } } } impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Literal::Bool(l) => l.fmt(f), Literal::Integer(l) => l.fmt(f), Literal::Float(l) => l.fmt(f), Literal::Char(l) => l.fmt(f), Literal::String(l) => l.fmt(f), Literal::Byte(l) => l.fmt(f), Literal::ByteString(l) => l.fmt(f), } } } // ============================================================================================== // ===== Buffer // ============================================================================================== /// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. /// /// This is trait is implementation detail of this library, cannot be /// implemented in other crates and is not subject to semantic versioning. /// `litrs` only guarantees that this trait is implemented for `String` and /// `for<'a> &'a str`. pub trait Buffer: sealed::Sealed + Deref { /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. type Cow: From + AsRef + Borrow + Deref; #[doc(hidden)] fn into_cow(self) -> Self::Cow; /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. type ByteCow: From> + AsRef<[u8]> + Borrow<[u8]> + Deref; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow; /// Cuts away some characters at the beginning and some at the end. Given /// range has to be in bounds. #[doc(hidden)] fn cut(self, range: Range) -> Self; } mod sealed { pub trait Sealed {} } impl<'a> sealed::Sealed for &'a str {} impl<'a> Buffer for &'a str { #[doc(hidden)] fn cut(self, range: Range) -> Self { &self[range] } type Cow = Cow<'a, str>; #[doc(hidden)] fn into_cow(self) -> Self::Cow { self.into() } type ByteCow = Cow<'a, [u8]>; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow { self.as_bytes().into() } } impl sealed::Sealed for String {} impl Buffer for String { #[doc(hidden)] fn cut(mut self, range: Range) -> Self { // This is not the most efficient way, but it works. First we cut the // end, then the beginning. Note that `drain` also removes the range if // the iterator is not consumed. self.truncate(range.end); self.drain(..range.start); self } type Cow = Cow<'static, str>; #[doc(hidden)] fn into_cow(self) -> Self::Cow { self.into() } type ByteCow = Cow<'static, [u8]>; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow { self.into_bytes().into() } }