diff options
Diffstat (limited to 'third_party/rust/litrs/src/lib.rs')
-rw-r--r-- | third_party/rust/litrs/src/lib.rs | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/lib.rs b/third_party/rust/litrs/src/lib.rs new file mode 100644 index 0000000000..64ed7813c9 --- /dev/null +++ b/third_party/rust/litrs/src/lib.rs @@ -0,0 +1,370 @@ +//! Parsing and inspecting Rust literal tokens. +//! +//! This library offers functionality to parse Rust literals, i.e. tokens in the +//! Rust programming language that represent fixed values. The grammar for +//! those is defined [here][ref]. +//! +//! This kind of functionality already exists in the crate `syn`. However, as +//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was +//! built. This crate also offers a bit more flexibility compared to `syn` +//! (only regarding literals, of course). +//! +//! +//! # Quick start +//! +//! | **`StringLit::try_from(tt)?.value()`** | +//! | - | +//! +//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be +//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). +//! Calling `value()` returns the value that is represented by the literal. +//! +//! **Mini Example** +//! +//! ```ignore +//! use proc_macro::TokenStream; +//! +//! #[proc_macro] +//! pub fn foo(input: TokenStream) -> TokenStream { +//! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! +//! let string_value = match litrs::StringLit::try_from(first_token) { +//! Ok(string_lit) => string_lit.value(), +//! Err(e) => return e.to_compile_error(), +//! }; +//! +//! // `string_value` is the string value with all escapes resolved. +//! todo!() +//! } +//! ``` +//! +//! # Overview +//! +//! The main types of this library are [`Literal`], representing any kind of +//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a +//! specific kind of literal. +//! +//! There are different ways to obtain such a literal type: +//! +//! - **`parse`**: parses a `&str` or `String` and returns `Result<_, +//! ParseError>`. For example: [`Literal::parse`] and +//! [`IntegerLit::parse`]. +//! +//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from +//! the `proc_macro` crate into a `Literal` from this crate. +//! +//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a +//! `proc_macro::Literal` into a specific literal type of this crate. If +//! the input is a literal of a different kind, `Err(InvalidToken)` is +//! returned. +//! +//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a +//! literal type of this crate. An error is returned if the token tree is +//! not a literal, or if you are trying to turn it into a specific kind of +//! literal and the token tree is a different kind of literal. +//! +//! All of the `From` and `TryFrom` conversions also work for reference to +//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is +//! enabled (which it is by default), all these `From` and `TryFrom` impls also +//! exist for the corresponding `proc_macro2` types. +//! +//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. +//! The `TryFrom<TokenTree>` impls check for those two special idents and +//! return a [`BoolLit`] appropriately. For that reason, there is also no +//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal` +//! simply cannot represent bool literals. +//! +//! +//! # Examples +//! +//! In a proc-macro: +//! +//! ```ignore +//! use std::convert::TryFrom; +//! use proc_macro::TokenStream; +//! use litrs::FloatLit; +//! +//! #[proc_macro] +//! pub fn foo(input: TokenStream) -> TokenStream { +//! let mut input = input.into_iter().collect::<Vec<_>>(); +//! if input.len() != 1 { +//! // Please do proper error handling in your real code! +//! panic!("expected exactly one token as input"); +//! } +//! let token = input.remove(0); +//! +//! match FloatLit::try_from(token) { +//! Ok(float_lit) => { /* do something */ } +//! Err(e) => return e.to_compile_error(), +//! } +//! +//! // Dummy output +//! TokenStream::new() +//! } +//! ``` +//! +//! Parsing from string: +//! +//! ``` +//! use litrs::{FloatLit, Literal}; +//! +//! // Parse a specific kind of literal (float in this case): +//! let float_lit = FloatLit::parse("3.14f32"); +//! assert!(float_lit.is_ok()); +//! assert_eq!(float_lit.unwrap().suffix(), "f32"); +//! assert!(FloatLit::parse("'c'").is_err()); +//! +//! // Parse any kind of literal. After parsing, you can inspect the literal +//! // and decide what to do in each case. +//! let lit = Literal::parse("0xff80").expect("failed to parse literal"); +//! match lit { +//! Literal::Integer(lit) => { /* ... */ } +//! Literal::Float(lit) => { /* ... */ } +//! Literal::Bool(lit) => { /* ... */ } +//! Literal::Char(lit) => { /* ... */ } +//! Literal::String(lit) => { /* ... */ } +//! Literal::Byte(lit) => { /* ... */ } +//! Literal::ByteString(lit) => { /* ... */ } +//! } +//! ``` +//! +//! +//! +//! # Crate features +//! +//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of +//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. +//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the +//! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, +//! only an approximate check (only in ASCII range) is done. If you are +//! writing a proc macro, you don't need to enable this as the suffix is +//! already checked by the compiler. +//! +//! +//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals +//! + +#![deny(missing_debug_implementations)] + +extern crate proc_macro; + +#[cfg(test)] +#[macro_use] +mod test_util; + +#[cfg(test)] +mod tests; + +mod bool; +mod byte; +mod bytestr; +mod char; +mod err; +mod escape; +mod float; +mod impls; +mod integer; +mod parse; +mod string; + + +use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; + +pub use self::{ + bool::BoolLit, + byte::ByteLit, + bytestr::ByteStringLit, + char::CharLit, + err::{InvalidToken, ParseError}, + float::{FloatLit, FloatType}, + integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, + string::StringLit, +}; + + +// ============================================================================================== +// ===== `Literal` and type defs +// ============================================================================================== + +/// A literal. This is the main type of this library. +/// +/// This type is generic over the underlying buffer `B`, which can be `&str` or +/// `String`. +/// +/// To create this type, you have to either call [`Literal::parse`] with an +/// input string or use the `From<_>` impls of this type. The impls are only +/// available of the corresponding crate features are enabled (they are enabled +/// by default). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Literal<B: Buffer> { + Bool(BoolLit), + Integer(IntegerLit<B>), + Float(FloatLit<B>), + Char(CharLit<B>), + String(StringLit<B>), + Byte(ByteLit<B>), + ByteString(ByteStringLit<B>), +} + +impl<B: Buffer> Literal<B> { + /// Parses the given input as a Rust literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + parse::parse(input) + } + + /// Returns the suffix of this literal or `""` if it doesn't have one. + /// + /// Rust token grammar actually allows suffixes for all kinds of tokens. + /// Most Rust programmer only know the type suffixes for integer and + /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an + /// error. But it is possible to pass literals with arbitrary suffixes to + /// proc macros, for example: + /// + /// ```ignore + /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong); + /// ``` + /// + /// Boolean literals, not actually being literals, but idents, cannot have + /// suffixes and this method always returns `""` for those. + /// + /// There are some edge cases to be aware of: + /// - Integer suffixes must not start with `e` or `E` as that conflicts with + /// the exponent grammar for floats. `0e1` is a float; `0eel` is also + /// parsed as a float and results in an error. + /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a + /// suffix von `gh`. + /// - Suffixes can contain and start with `_`, but for integer and number + /// literals, `_` is eagerly parsed as part of the number, so `1_x` has + /// the suffix `x`. + /// - The input `55f32` is regarded as integer literal with suffix `f32`. + /// + /// # Example + /// + /// ``` + /// use litrs::Literal; + /// + /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33"); + /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman"); + /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck"); + /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy"); + /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong"); + /// ``` + pub fn suffix(&self) -> &str { + match self { + Literal::Bool(_) => "", + Literal::Integer(l) => l.suffix(), + Literal::Float(l) => l.suffix(), + Literal::Char(l) => l.suffix(), + Literal::String(l) => l.suffix(), + Literal::Byte(l) => l.suffix(), + Literal::ByteString(l) => l.suffix(), + } + } +} + +impl Literal<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn into_owned(self) -> Literal<String> { + match self { + Literal::Bool(l) => Literal::Bool(l.to_owned()), + Literal::Integer(l) => Literal::Integer(l.to_owned()), + Literal::Float(l) => Literal::Float(l.to_owned()), + Literal::Char(l) => Literal::Char(l.to_owned()), + Literal::String(l) => Literal::String(l.into_owned()), + Literal::Byte(l) => Literal::Byte(l.to_owned()), + Literal::ByteString(l) => Literal::ByteString(l.into_owned()), + } + } +} + +impl<B: Buffer> fmt::Display for Literal<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Literal::Bool(l) => l.fmt(f), + Literal::Integer(l) => l.fmt(f), + Literal::Float(l) => l.fmt(f), + Literal::Char(l) => l.fmt(f), + Literal::String(l) => l.fmt(f), + Literal::Byte(l) => l.fmt(f), + Literal::ByteString(l) => l.fmt(f), + } + } +} + + +// ============================================================================================== +// ===== Buffer +// ============================================================================================== + +/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. +/// +/// This is trait is implementation detail of this library, cannot be +/// implemented in other crates and is not subject to semantic versioning. +/// `litrs` only guarantees that this trait is implemented for `String` and +/// `for<'a> &'a str`. +pub trait Buffer: sealed::Sealed + Deref<Target = str> { + /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. + type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>; + + #[doc(hidden)] + fn into_cow(self) -> Self::Cow; + + /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. + type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>; + + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow; + + /// Cuts away some characters at the beginning and some at the end. Given + /// range has to be in bounds. + #[doc(hidden)] + fn cut(self, range: Range<usize>) -> Self; +} + +mod sealed { + pub trait Sealed {} +} + +impl<'a> sealed::Sealed for &'a str {} +impl<'a> Buffer for &'a str { + #[doc(hidden)] + fn cut(self, range: Range<usize>) -> Self { + &self[range] + } + + type Cow = Cow<'a, str>; + #[doc(hidden)] + fn into_cow(self) -> Self::Cow { + self.into() + } + type ByteCow = Cow<'a, [u8]>; + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow { + self.as_bytes().into() + } +} + +impl sealed::Sealed for String {} +impl Buffer for String { + #[doc(hidden)] + fn cut(mut self, range: Range<usize>) -> Self { + // This is not the most efficient way, but it works. First we cut the + // end, then the beginning. Note that `drain` also removes the range if + // the iterator is not consumed. + self.truncate(range.end); + self.drain(..range.start); + self + } + + type Cow = Cow<'static, str>; + #[doc(hidden)] + fn into_cow(self) -> Self::Cow { + self.into() + } + + type ByteCow = Cow<'static, [u8]>; + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow { + self.into_bytes().into() + } +} |