diff options
Diffstat (limited to 'third_party/rust/litrs/src')
21 files changed, 4712 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/bool/mod.rs b/third_party/rust/litrs/src/bool/mod.rs new file mode 100644 index 0000000000..d7b54a1b9f --- /dev/null +++ b/third_party/rust/litrs/src/bool/mod.rs @@ -0,0 +1,55 @@ +use std::fmt; + +use crate::{ParseError, err::{perr, ParseErrorKind::*}}; + + +/// A bool literal: `true` or `false`. Also see [the reference][ref]. +/// +/// Notice that, strictly speaking, from Rust point of view "boolean literals" are not +/// actual literals but [keywords]. +/// +/// [ref]: https://doc.rust-lang.org/reference/expressions/literal-expr.html#boolean-literal-expressions +/// [keywords]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BoolLit { + False, + True, +} + +impl BoolLit { + /// Parses the input as a bool literal. Returns an error if the input is + /// invalid or represents a different kind of literal. + pub fn parse(s: &str) -> Result<Self, ParseError> { + match s { + "false" => Ok(Self::False), + "true" => Ok(Self::True), + _ => Err(perr(None, InvalidLiteral)), + } + } + + /// Returns the actual Boolean value of this literal. + pub fn value(self) -> bool { + match self { + Self::False => false, + Self::True => true, + } + } + + /// Returns the literal as string. + pub fn as_str(&self) -> &'static str { + match self { + Self::False => "false", + Self::True => "true", + } + } +} + +impl fmt::Display for BoolLit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(self.as_str()) + } +} + + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/bool/tests.rs b/third_party/rust/litrs/src/bool/tests.rs new file mode 100644 index 0000000000..4b829244b8 --- /dev/null +++ b/third_party/rust/litrs/src/bool/tests.rs @@ -0,0 +1,48 @@ +use crate::{ + Literal, BoolLit, + test_util::assert_parse_ok_eq, +}; + +macro_rules! assert_bool_parse { + ($input:literal, $expected:expr) => { + assert_parse_ok_eq( + $input, Literal::parse($input), Literal::Bool($expected), "Literal::parse"); + assert_parse_ok_eq($input, BoolLit::parse($input), $expected, "BoolLit::parse"); + }; +} + + + +#[test] +fn parse_ok() { + assert_bool_parse!("false", BoolLit::False); + assert_bool_parse!("true", BoolLit::True); +} + +#[test] +fn parse_err() { + assert!(Literal::parse("fa").is_err()); + assert!(Literal::parse("fal").is_err()); + assert!(Literal::parse("fals").is_err()); + assert!(Literal::parse(" false").is_err()); + assert!(Literal::parse("false ").is_err()); + assert!(Literal::parse("False").is_err()); + + assert!(Literal::parse("tr").is_err()); + assert!(Literal::parse("tru").is_err()); + assert!(Literal::parse(" true").is_err()); + assert!(Literal::parse("true ").is_err()); + assert!(Literal::parse("True").is_err()); +} + +#[test] +fn value() { + assert!(!BoolLit::False.value()); + assert!(BoolLit::True.value()); +} + +#[test] +fn as_str() { + assert_eq!(BoolLit::False.as_str(), "false"); + assert_eq!(BoolLit::True.as_str(), "true"); +} diff --git a/third_party/rust/litrs/src/byte/mod.rs b/third_party/rust/litrs/src/byte/mod.rs new file mode 100644 index 0000000000..ffdff5d04a --- /dev/null +++ b/third_party/rust/litrs/src/byte/mod.rs @@ -0,0 +1,107 @@ +use core::fmt; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + escape::unescape, + parse::check_suffix, +}; + + +/// A (single) byte literal, e.g. `b'k'` or `b'!'`. +/// +/// See [the reference][ref] for more information. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ByteLit<B: Buffer> { + raw: B, + /// Start index of the suffix or `raw.len()` if there is no suffix. + start_suffix: usize, + value: u8, +} + +impl<B: Buffer> ByteLit<B> { + /// Parses the input as a byte literal. Returns an error if the input is + /// invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + if input.is_empty() { + return Err(perr(None, Empty)); + } + if !input.starts_with("b'") { + return Err(perr(None, InvalidByteLiteralStart)); + } + + let (value, start_suffix) = parse_impl(&input)?; + Ok(Self { raw: input, value, start_suffix }) + } + + /// Returns the byte value that this literal represents. + pub fn value(&self) -> u8 { + self.value + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.start_suffix..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + +} + +impl ByteLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> ByteLit<String> { + ByteLit { + raw: self.raw.to_owned(), + start_suffix: self.start_suffix, + value: self.value, + } + } +} + +impl<B: Buffer> fmt::Display for ByteLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.raw) + } +} + +/// Precondition: must start with `b'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> { + let input_bytes = input.as_bytes(); + let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?; + let (c, len) = match first { + b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)), + b'\'' => return Err(perr(None, EmptyByteLiteral)), + b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)), + b'\\' => unescape::<u8>(&input[2..], 2)?, + other if other.is_ascii() => (*other, 1), + _ => return Err(perr(2, NonAsciiInByteLiteral)), + }; + + match input[2 + len..].find('\'') { + Some(0) => {} + Some(_) => return Err(perr(None, OverlongByteLiteral)), + None => return Err(perr(None, UnterminatedByteLiteral)), + } + + let start_suffix = 2 + len + 1; + let suffix = &input[start_suffix..]; + check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; + + Ok((c, start_suffix)) +} + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/byte/tests.rs b/third_party/rust/litrs/src/byte/tests.rs new file mode 100644 index 0000000000..3cf16b5fc2 --- /dev/null +++ b/third_party/rust/litrs/src/byte/tests.rs @@ -0,0 +1,188 @@ +use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; + +// ===== Utility functions ======================================================================= + +macro_rules! check { + ($lit:literal) => { check!($lit, stringify!($lit), "") }; + ($lit:literal, $input:expr, $suffix:literal) => { + let input = $input; + let expected = ByteLit { + raw: input, + start_suffix: input.len() - $suffix.len(), + value: $lit, + }; + + assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse"); + assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse"); + let lit = ByteLit::parse(input).unwrap(); + assert_eq!(lit.value(), $lit); + assert_eq!(lit.suffix(), $suffix); + assert_roundtrip(expected.to_owned(), input); + }; +} + + +// ===== Actual tests ============================================================================ + +#[test] +fn alphanumeric() { + check!(b'a'); + check!(b'b'); + check!(b'y'); + check!(b'z'); + check!(b'A'); + check!(b'B'); + check!(b'Y'); + check!(b'Z'); + + check!(b'0'); + check!(b'1'); + check!(b'8'); + check!(b'9'); +} + +#[test] +fn special_chars() { + check!(b' '); + check!(b'!'); + check!(b'"'); + check!(b'#'); + check!(b'$'); + check!(b'%'); + check!(b'&'); + check!(b'('); + check!(b')'); + check!(b'*'); + check!(b'+'); + check!(b','); + check!(b'-'); + check!(b'.'); + check!(b'/'); + check!(b':'); + check!(b';'); + check!(b'<'); + check!(b'='); + check!(b'>'); + check!(b'?'); + check!(b'@'); + check!(b'['); + check!(b']'); + check!(b'^'); + check!(b'_'); + check!(b'`'); + check!(b'{'); + check!(b'|'); + check!(b'}'); + check!(b'~'); +} + +#[test] +fn quote_escapes() { + check!(b'\''); + check!(b'\"'); +} + +#[test] +fn ascii_escapes() { + check!(b'\n'); + check!(b'\r'); + check!(b'\t'); + check!(b'\\'); + check!(b'\0'); + + check!(b'\x00'); + check!(b'\x01'); + check!(b'\x0c'); + check!(b'\x0D'); + check!(b'\x13'); + check!(b'\x30'); + check!(b'\x30'); + check!(b'\x4B'); + check!(b'\x6b'); + check!(b'\x7F'); + check!(b'\x7f'); +} + +#[test] +fn byte_escapes() { + check!(b'\x80'); + check!(b'\x8a'); + check!(b'\x8C'); + check!(b'\x99'); + check!(b'\xa0'); + check!(b'\xAd'); + check!(b'\xfe'); + check!(b'\xFe'); + check!(b'\xfF'); + check!(b'\xFF'); +} + +#[test] +fn suffixes() { + check!(b'a', r##"b'a'peter"##, "peter"); + check!(b'#', r##"b'#'peter"##, "peter"); + check!(b'\n', r##"b'\n'peter"##, "peter"); + check!(b'\'', r##"b'\''peter"##, "peter"); + check!(b'\"', r##"b'\"'peter"##, "peter"); + check!(b'\xFF', r##"b'\xFF'peter"##, "peter"); +} + +#[test] +fn invald_escapes() { + assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4); + assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4); + assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3); + assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5); + assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6); + assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6); + assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6); +} + +#[test] +fn unicode_escape_not_allowed() { + assert_err!(ByteLit, r"b'\u{0}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{00}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{b}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{B}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{7e}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{E4}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{e4}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{fc}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{Fc}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{fC}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{FC}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{b10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{B10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{0b10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{2764}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{1f602}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{1F602}'", UnicodeEscapeInByteLiteral, 2..4); +} + +#[test] +fn parse_err() { + assert_err!(ByteLit, r"b''", EmptyByteLiteral, None); + assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5); + + assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None); + + assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None); + assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None); + assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None); + + assert_err!(ByteLit, r"", Empty, None); + + assert_err!(ByteLit, r"b'''", UnescapedSingleQuote, 2); + assert_err!(ByteLit, r"b''''", UnescapedSingleQuote, 2); + + assert_err!(ByteLit, "b'\n'", UnescapedSpecialWhitespace, 2); + assert_err!(ByteLit, "b'\t'", UnescapedSpecialWhitespace, 2); + assert_err!(ByteLit, "b'\r'", UnescapedSpecialWhitespace, 2); + + assert_err!(ByteLit, "b'న'", NonAsciiInByteLiteral, 2); + assert_err!(ByteLit, "b'犬'", NonAsciiInByteLiteral, 2); + assert_err!(ByteLit, "b'🦊'", NonAsciiInByteLiteral, 2); +} diff --git a/third_party/rust/litrs/src/bytestr/mod.rs b/third_party/rust/litrs/src/bytestr/mod.rs new file mode 100644 index 0000000000..a0e09727f4 --- /dev/null +++ b/third_party/rust/litrs/src/bytestr/mod.rs @@ -0,0 +1,126 @@ +use std::{fmt, ops::Range}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + escape::{scan_raw_string, unescape_string}, +}; + + +/// A byte string or raw byte string literal, e.g. `b"hello"` or `br#"abc"def"#`. +/// +/// See [the reference][ref] for more information. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-string-literals +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ByteStringLit<B: Buffer> { + /// The raw input. + raw: B, + + /// The string value (with all escaped unescaped), or `None` if there were + /// no escapes. In the latter case, `input` is the string value. + value: Option<Vec<u8>>, + + /// The number of hash signs in case of a raw string literal, or `None` if + /// it's not a raw string literal. + num_hashes: Option<u32>, + + /// Start index of the suffix or `raw.len()` if there is no suffix. + start_suffix: usize, +} + +impl<B: Buffer> ByteStringLit<B> { + /// Parses the input as a (raw) byte string literal. Returns an error if the + /// input is invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + if input.is_empty() { + return Err(perr(None, Empty)); + } + if !input.starts_with(r#"b""#) && !input.starts_with("br") { + return Err(perr(None, InvalidByteStringLiteralStart)); + } + + let (value, num_hashes, start_suffix) = parse_impl(&input)?; + Ok(Self { raw: input, value, num_hashes, start_suffix }) + } + + /// Returns the string value this literal represents (where all escapes have + /// been turned into their respective values). + pub fn value(&self) -> &[u8] { + self.value.as_deref().unwrap_or(&self.raw.as_bytes()[self.inner_range()]) + } + + /// Like `value` but returns a potentially owned version of the value. + /// + /// The return value is either `Cow<'static, [u8]>` if `B = String`, or + /// `Cow<'a, [u8]>` if `B = &'a str`. + pub fn into_value(self) -> B::ByteCow { + let inner_range = self.inner_range(); + let Self { raw, value, .. } = self; + value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow()) + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.start_suffix..] + } + + /// Returns whether this literal is a raw string literal (starting with + /// `r`). + pub fn is_raw_byte_string(&self) -> bool { + self.num_hashes.is_some() + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + + /// The range within `self.raw` that excludes the quotes and potential `r#`. + fn inner_range(&self) -> Range<usize> { + match self.num_hashes { + None => 2..self.start_suffix - 1, + Some(n) => 2 + n as usize + 1..self.start_suffix - n as usize - 1, + } + } +} + +impl ByteStringLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn into_owned(self) -> ByteStringLit<String> { + ByteStringLit { + raw: self.raw.to_owned(), + value: self.value, + num_hashes: self.num_hashes, + start_suffix: self.start_suffix, + } + } +} + +impl<B: Buffer> fmt::Display for ByteStringLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.raw) + } +} + + +/// Precondition: input has to start with either `b"` or `br`. +#[inline(never)] +fn parse_impl(input: &str) -> Result<(Option<Vec<u8>>, Option<u32>, usize), ParseError> { + if input.starts_with("br") { + scan_raw_string::<u8>(&input, 2) + .map(|(v, num, start_suffix)| (v.map(String::into_bytes), Some(num), start_suffix)) + } else { + unescape_string::<u8>(&input, 2) + .map(|(v, start_suffix)| (v.map(String::into_bytes), None, start_suffix)) + } +} + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/bytestr/tests.rs b/third_party/rust/litrs/src/bytestr/tests.rs new file mode 100644 index 0000000000..2afef5a99c --- /dev/null +++ b/third_party/rust/litrs/src/bytestr/tests.rs @@ -0,0 +1,224 @@ +use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; + +// ===== Utility functions ======================================================================= + +macro_rules! check { + ($lit:literal, $has_escapes:expr, $num_hashes:expr) => { + check!($lit, stringify!($lit), $has_escapes, $num_hashes, "") + }; + ($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => { + let input = $input; + let expected = ByteStringLit { + raw: input, + value: if $has_escapes { Some($lit.to_vec()) } else { None }, + num_hashes: $num_hashes, + start_suffix: input.len() - $suffix.len(), + }; + + assert_parse_ok_eq( + input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse"); + assert_parse_ok_eq( + input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse"); + let lit = ByteStringLit::parse(input).unwrap(); + assert_eq!(lit.value(), $lit); + assert_eq!(lit.suffix(), $suffix); + assert_eq!(lit.into_value().as_ref(), $lit); + assert_roundtrip(expected.into_owned(), input); + }; +} + + +// ===== Actual tests ============================================================================ + +#[test] +fn simple() { + check!(b"", false, None); + check!(b"a", false, None); + check!(b"peter", false, None); +} + +#[test] +fn special_whitespace() { + let strings = ["\n", "\t", "foo\tbar", "baz\n"]; + + for &s in &strings { + let input = format!(r#"b"{}""#, s); + let input_raw = format!(r#"br"{}""#, s); + for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] { + let expected = ByteStringLit { + raw: &*input, + value: None, + num_hashes, + start_suffix: input.len(), + }; + assert_parse_ok_eq( + &input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse"); + assert_parse_ok_eq( + &input, Literal::parse(&*input), Literal::ByteString(expected), "Literal::parse"); + assert_eq!(ByteStringLit::parse(&*input).unwrap().value(), s.as_bytes()); + assert_eq!(ByteStringLit::parse(&*input).unwrap().into_value(), s.as_bytes()); + } + } + + let res = ByteStringLit::parse("br\"\r\"").expect("failed to parse"); + assert_eq!(res.value(), b"\r"); +} + +#[test] +fn simple_escapes() { + check!(b"a\nb", true, None); + check!(b"\nb", true, None); + check!(b"a\n", true, None); + check!(b"\n", true, None); + + check!(b"\x60foo \t bar\rbaz\n banana \0kiwi", true, None); + check!(b"foo \\ferris", true, None); + check!(b"baz \\ferris\"box", true, None); + check!(b"\\foo\\ banana\" baz\"", true, None); + check!(b"\"foo \\ferris \" baz\\", true, None); + + check!(b"\x00", true, None); + check!(b" \x01", true, None); + check!(b"\x0c foo", true, None); + check!(b" foo\x0D ", true, None); + check!(b"\\x13", true, None); + check!(b"\"x30", true, None); +} + +#[test] +fn string_continue() { + check!(b"foo\ + bar", true, None); + check!(b"foo\ +bar", true, None); + + check!(b"foo\ + + banana", true, None); + + // Weird whitespace characters + let lit = ByteStringLit::parse("b\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse"); + assert_eq!(lit.value(), b"foobar"); + + // Raw strings do not handle "string continues" + check!(br"foo\ + bar", false, Some(0)); +} + +#[test] +fn crlf_newlines() { + let lit = ByteStringLit::parse("b\"foo\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), b"foo\nbar"); + + let lit = ByteStringLit::parse("b\"\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), b"\nbar"); + + let lit = ByteStringLit::parse("b\"foo\r\n\"").expect("failed to parse"); + assert_eq!(lit.value(), b"foo\n"); + + let lit = ByteStringLit::parse("br\"foo\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), b"foo\nbar"); + + let lit = ByteStringLit::parse("br#\"\r\nbar\"#").expect("failed to parse"); + assert_eq!(lit.value(), b"\nbar"); + + let lit = ByteStringLit::parse("br##\"foo\r\n\"##").expect("failed to parse"); + assert_eq!(lit.value(), b"foo\n"); +} + +#[test] +fn raw_byte_string() { + check!(br"", false, Some(0)); + check!(br"a", false, Some(0)); + check!(br"peter", false, Some(0)); + check!(br"Greetings jason!", false, Some(0)); + + check!(br#""#, false, Some(1)); + check!(br#"a"#, false, Some(1)); + check!(br##"peter"##, false, Some(2)); + check!(br###"Greetings # Jason!"###, false, Some(3)); + check!(br########"we ## need #### more ####### hashtags"########, false, Some(8)); + + check!(br#"foo " bar"#, false, Some(1)); + check!(br##"foo " bar"##, false, Some(2)); + check!(br#"foo """" '"'" bar"#, false, Some(1)); + check!(br#""foo""#, false, Some(1)); + check!(br###""foo'"###, false, Some(3)); + check!(br#""x'#_#s'"#, false, Some(1)); + check!(br"#", false, Some(0)); + check!(br"foo#", false, Some(0)); + check!(br"##bar", false, Some(0)); + check!(br###""##foo"##bar'"###, false, Some(3)); + + check!(br"foo\n\t\r\0\\x60\u{123}doggo", false, Some(0)); + check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1)); +} + +#[test] +fn suffixes() { + check!(b"hello", r###"b"hello"suffix"###, false, None, "suffix"); + check!(b"fox", r#"b"fox"peter"#, false, None, "peter"); + check!(b"a\x0cb\\", r#"b"a\x0cb\\"_jürgen"#, true, None, "_jürgen"); + check!(br"a\x0cb\\", r###"br#"a\x0cb\\"#_jürgen"###, false, Some(1), "_jürgen"); +} + +#[test] +fn parse_err() { + assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None); + assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None); + assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None); + assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None); + + assert_err!(ByteStringLit, r#"b"fox"peter""#, InvalidSuffix, 6); + assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10); + + assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2); + assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4); + + assert_err!(ByteStringLit, r##"br####""##, UnterminatedRawString, None); + assert_err!(ByteStringLit, r#####"br##"foo"#bar"#####, UnterminatedRawString, None); + assert_err!(ByteStringLit, r##"br####"##, InvalidLiteral, None); + assert_err!(ByteStringLit, r##"br####x"##, InvalidLiteral, None); +} + +#[test] +fn non_ascii() { + assert_err!(ByteStringLit, r#"b"న""#, NonAsciiInByteLiteral, 2); + assert_err!(ByteStringLit, r#"b"foo犬""#, NonAsciiInByteLiteral, 5); + assert_err!(ByteStringLit, r#"b"x🦊baz""#, NonAsciiInByteLiteral, 3); + assert_err!(ByteStringLit, r#"br"న""#, NonAsciiInByteLiteral, 3); + assert_err!(ByteStringLit, r#"br"foo犬""#, NonAsciiInByteLiteral, 6); + assert_err!(ByteStringLit, r#"br"x🦊baz""#, NonAsciiInByteLiteral, 4); +} + +#[test] +fn invalid_escapes() { + assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4); + assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7); + assert_err!(ByteStringLit, r#"b"\"#, UnterminatedEscape, 2); + assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4); + assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8); + assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7); + assert_err!(ByteStringLit, r#"b"\xjbbaz""#, InvalidXEscape, 2..6); +} + +#[test] +fn unicode_escape_not_allowed() { + assert_err!(ByteStringLit, r#"b"\u{0}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{00}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{b}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{B}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{7e}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{E4}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{e4}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{fc}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{Fc}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{fC}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{FC}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{b10}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{B10}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{0b10}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{2764}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{1f602}""#, UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteStringLit, r#"b"\u{1F602}""#, UnicodeEscapeInByteLiteral, 2..4); +} diff --git a/third_party/rust/litrs/src/char/mod.rs b/third_party/rust/litrs/src/char/mod.rs new file mode 100644 index 0000000000..54f6f1137f --- /dev/null +++ b/third_party/rust/litrs/src/char/mod.rs @@ -0,0 +1,105 @@ +use std::fmt; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + escape::unescape, + parse::{first_byte_or_empty, check_suffix}, +}; + + +/// A character literal, e.g. `'g'` or `'🦊'`. +/// +/// See [the reference][ref] for more information. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#character-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct CharLit<B: Buffer> { + raw: B, + /// Start index of the suffix or `raw.len()` if there is no suffix. + start_suffix: usize, + value: char, +} + +impl<B: Buffer> CharLit<B> { + /// Parses the input as a character literal. Returns an error if the input + /// is invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + match first_byte_or_empty(&input)? { + b'\'' => { + let (value, start_suffix) = parse_impl(&input)?; + Ok(Self { raw: input, value, start_suffix }) + }, + _ => Err(perr(0, DoesNotStartWithQuote)), + } + } + + /// Returns the character value that this literal represents. + pub fn value(&self) -> char { + self.value + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.start_suffix..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + +} + +impl CharLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> CharLit<String> { + CharLit { + raw: self.raw.to_owned(), + start_suffix: self.start_suffix, + value: self.value, + } + } +} + +impl<B: Buffer> fmt::Display for CharLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.raw) + } +} + +/// Precondition: first character in input must be `'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<(char, usize), ParseError> { + let first = input.chars().nth(1).ok_or(perr(None, UnterminatedCharLiteral))?; + let (c, len) = match first { + '\'' if input.chars().nth(2) == Some('\'') => return Err(perr(1, UnescapedSingleQuote)), + '\'' => return Err(perr(None, EmptyCharLiteral)), + '\n' | '\t' | '\r' + => return Err(perr(1, UnescapedSpecialWhitespace)), + + '\\' => unescape::<char>(&input[1..], 1)?, + other => (other, other.len_utf8()), + }; + + match input[1 + len..].find('\'') { + Some(0) => {} + Some(_) => return Err(perr(None, OverlongCharLiteral)), + None => return Err(perr(None, UnterminatedCharLiteral)), + } + + let start_suffix = 1 + len + 1; + let suffix = &input[start_suffix..]; + check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; + + Ok((c, start_suffix)) +} + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/char/tests.rs b/third_party/rust/litrs/src/char/tests.rs new file mode 100644 index 0000000000..19219db73b --- /dev/null +++ b/third_party/rust/litrs/src/char/tests.rs @@ -0,0 +1,227 @@ +use crate::{Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; +use super::CharLit; + +// ===== Utility functions ======================================================================= + +macro_rules! check { + ($lit:literal) => { check!($lit, stringify!($lit), "") }; + ($lit:literal, $input:expr, $suffix:literal) => { + let input = $input; + let expected = CharLit { + raw: input, + start_suffix: input.len() - $suffix.len(), + value: $lit, + }; + + assert_parse_ok_eq(input, CharLit::parse(input), expected.clone(), "CharLit::parse"); + assert_parse_ok_eq(input, Literal::parse(input), Literal::Char(expected), "Literal::parse"); + let lit = CharLit::parse(input).unwrap(); + assert_eq!(lit.value(), $lit); + assert_eq!(lit.suffix(), $suffix); + assert_roundtrip(expected.to_owned(), input); + }; +} + + +// ===== Actual tests ============================================================================ + +#[test] +fn alphanumeric() { + check!('a'); + check!('b'); + check!('y'); + check!('z'); + check!('A'); + check!('B'); + check!('Y'); + check!('Z'); + + check!('0'); + check!('1'); + check!('8'); + check!('9'); +} + +#[test] +fn special_chars() { + check!(' '); + check!('!'); + check!('"'); + check!('#'); + check!('$'); + check!('%'); + check!('&'); + check!('('); + check!(')'); + check!('*'); + check!('+'); + check!(','); + check!('-'); + check!('.'); + check!('/'); + check!(':'); + check!(';'); + check!('<'); + check!('='); + check!('>'); + check!('?'); + check!('@'); + check!('['); + check!(']'); + check!('^'); + check!('_'); + check!('`'); + check!('{'); + check!('|'); + check!('}'); + check!('~'); +} + +#[test] +fn unicode() { + check!('న'); + check!('犬'); + check!('🦊'); +} + +#[test] +fn quote_escapes() { + check!('\''); + check!('\"'); +} + +#[test] +fn ascii_escapes() { + check!('\n'); + check!('\r'); + check!('\t'); + check!('\\'); + check!('\0'); + + check!('\x00'); + check!('\x01'); + check!('\x0c'); + check!('\x0D'); + check!('\x13'); + check!('\x30'); + check!('\x30'); + check!('\x4B'); + check!('\x6b'); + check!('\x7F'); + check!('\x7f'); +} + +#[test] +fn unicode_escapes() { + check!('\u{0}'); + check!('\u{00}'); + check!('\u{b}'); + check!('\u{B}'); + check!('\u{7e}'); + check!('\u{E4}'); + check!('\u{e4}'); + check!('\u{fc}'); + check!('\u{Fc}'); + check!('\u{fC}'); + check!('\u{FC}'); + check!('\u{b10}'); + check!('\u{B10}'); + check!('\u{0b10}'); + check!('\u{2764}'); + check!('\u{1f602}'); + check!('\u{1F602}'); + + check!('\u{0}'); + check!('\u{0__}'); + check!('\u{3_b}'); + check!('\u{1_F_6_0_2}'); + check!('\u{1_F6_02_____}'); +} + +#[test] +fn suffixes() { + check!('a', r##"'a'peter"##, "peter"); + check!('#', r##"'#'peter"##, "peter"); + check!('\n', r##"'\n'peter"##, "peter"); + check!('\'', r##"'\''peter"##, "peter"); + check!('\"', r##"'\"'peter"##, "peter"); +} + +#[test] +fn invald_ascii_escapes() { + assert_err!(CharLit, r"'\x80'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\x81'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\x8a'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\x8F'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xa0'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xB0'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xc3'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xDf'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xff'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xfF'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xFf'", NonAsciiXEscape, 1..5); + assert_err!(CharLit, r"'\xFF'", NonAsciiXEscape, 1..5); +} + +#[test] +fn invalid_escapes() { + assert_err!(CharLit, r"'\a'", UnknownEscape, 1..3); + assert_err!(CharLit, r"'\y'", UnknownEscape, 1..3); + assert_err!(CharLit, r"'\", UnterminatedEscape, 1); + assert_err!(CharLit, r"'\x'", UnterminatedEscape, 1..4); + assert_err!(CharLit, r"'\x1'", InvalidXEscape, 1..5); + assert_err!(CharLit, r"'\xaj'", InvalidXEscape, 1..5); + assert_err!(CharLit, r"'\xjb'", InvalidXEscape, 1..5); +} + +#[test] +fn invalid_unicode_escapes() { + assert_err!(CharLit, r"'\u'", UnicodeEscapeWithoutBrace, 1..3); + assert_err!(CharLit, r"'\u '", UnicodeEscapeWithoutBrace, 1..3); + assert_err!(CharLit, r"'\u3'", UnicodeEscapeWithoutBrace, 1..3); + + assert_err!(CharLit, r"'\u{'", UnterminatedUnicodeEscape, 1..5); + assert_err!(CharLit, r"'\u{12'", UnterminatedUnicodeEscape, 1..7); + assert_err!(CharLit, r"'\u{a0b'", UnterminatedUnicodeEscape, 1..8); + assert_err!(CharLit, r"'\u{a0_b '", UnterminatedUnicodeEscape, 1..11); + + assert_err!(CharLit, r"'\u{_}'", InvalidStartOfUnicodeEscape, 4); + assert_err!(CharLit, r"'\u{_5f}'", InvalidStartOfUnicodeEscape, 4); + + assert_err!(CharLit, r"'\u{x}'", NonHexDigitInUnicodeEscape, 4); + assert_err!(CharLit, r"'\u{0x}'", NonHexDigitInUnicodeEscape, 5); + assert_err!(CharLit, r"'\u{3bx}'", NonHexDigitInUnicodeEscape, 6); + assert_err!(CharLit, r"'\u{3b_x}'", NonHexDigitInUnicodeEscape, 7); + assert_err!(CharLit, r"'\u{4x_}'", NonHexDigitInUnicodeEscape, 5); + + assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10); + assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10); + assert_err!(CharLit, r"'\u{1_23_4_56_7}'", TooManyDigitInUnicodeEscape, 14); + assert_err!(CharLit, r"'\u{abcdef123}'", TooManyDigitInUnicodeEscape, 10); + + assert_err!(CharLit, r"'\u{110000}'", InvalidUnicodeEscapeChar, 1..10); +} + +#[test] +fn parse_err() { + assert_err!(CharLit, r"''", EmptyCharLiteral, None); + assert_err!(CharLit, r"' ''", UnexpectedChar, 3); + + assert_err!(CharLit, r"'", UnterminatedCharLiteral, None); + assert_err!(CharLit, r"'a", UnterminatedCharLiteral, None); + assert_err!(CharLit, r"'\n", UnterminatedCharLiteral, None); + assert_err!(CharLit, r"'\x35", UnterminatedCharLiteral, None); + + assert_err!(CharLit, r"'ab'", OverlongCharLiteral, None); + assert_err!(CharLit, r"'a _'", OverlongCharLiteral, None); + assert_err!(CharLit, r"'\n3'", OverlongCharLiteral, None); + + assert_err!(CharLit, r"", Empty, None); + + assert_err!(CharLit, r"'''", UnescapedSingleQuote, 1); + assert_err!(CharLit, r"''''", UnescapedSingleQuote, 1); + + assert_err!(CharLit, "'\n'", UnescapedSpecialWhitespace, 1); + assert_err!(CharLit, "'\t'", UnescapedSpecialWhitespace, 1); + assert_err!(CharLit, "'\r'", UnescapedSpecialWhitespace, 1); +} diff --git a/third_party/rust/litrs/src/err.rs b/third_party/rust/litrs/src/err.rs new file mode 100644 index 0000000000..86d51dc4a8 --- /dev/null +++ b/third_party/rust/litrs/src/err.rs @@ -0,0 +1,371 @@ +use std::{fmt, ops::Range}; + + +/// An error signaling that a different kind of token was expected. Returned by +/// the various `TryFrom` impls. +#[derive(Debug, Clone, Copy)] +pub struct InvalidToken { + pub(crate) expected: TokenKind, + pub(crate) actual: TokenKind, + pub(crate) span: Span, +} + +impl InvalidToken { + /// Returns a token stream representing `compile_error!("msg");` where + /// `"msg"` is the output of `self.to_string()`. **Panics if called outside + /// of a proc-macro context!** + pub fn to_compile_error(&self) -> proc_macro::TokenStream { + use proc_macro::{Delimiter, Ident, Group, Punct, Spacing, TokenTree}; + + let span = match self.span { + Span::One(s) => s, + #[cfg(feature = "proc-macro2")] + Span::Two(s) => s.unwrap(), + }; + let msg = self.to_string(); + let tokens = vec![ + TokenTree::from(Ident::new("compile_error", span)), + TokenTree::from(Punct::new('!', Spacing::Alone)), + TokenTree::from(Group::new( + Delimiter::Parenthesis, + TokenTree::from(proc_macro::Literal::string(&msg)).into(), + )), + ]; + + + tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect() + } + + /// Like [`to_compile_error`][Self::to_compile_error], but returns a token + /// stream from `proc_macro2` and does not panic outside of a proc-macro + /// context. + #[cfg(feature = "proc-macro2")] + pub fn to_compile_error2(&self) -> proc_macro2::TokenStream { + use proc_macro2::{Delimiter, Ident, Group, Punct, Spacing, TokenTree}; + + let span = match self.span { + Span::One(s) => proc_macro2::Span::from(s), + Span::Two(s) => s, + }; + let msg = self.to_string(); + let tokens = vec![ + TokenTree::from(Ident::new("compile_error", span)), + TokenTree::from(Punct::new('!', Spacing::Alone)), + TokenTree::from(Group::new( + Delimiter::Parenthesis, + TokenTree::from(proc_macro2::Literal::string(&msg)).into(), + )), + ]; + + + tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect() + } +} + +impl std::error::Error for InvalidToken {} + +impl fmt::Display for InvalidToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn kind_desc(kind: TokenKind) -> &'static str { + match kind { + TokenKind::Punct => "a punctuation character", + TokenKind::Ident => "an identifier", + TokenKind::Group => "a group", + TokenKind::Literal => "a literal", + TokenKind::BoolLit => "a bool literal (`true` or `false`)", + TokenKind::ByteLit => "a byte literal (e.g. `b'r')", + TokenKind::ByteStringLit => r#"a byte string literal (e.g. `b"fox"`)"#, + TokenKind::CharLit => "a character literal (e.g. `'P'`)", + TokenKind::FloatLit => "a float literal (e.g. `3.14`)", + TokenKind::IntegerLit => "an integer literal (e.g. `27`)", + TokenKind::StringLit => r#"a string literal (e.g. "Ferris")"#, + } + } + + write!(f, "expected {}, but found {}", kind_desc(self.expected), kind_desc(self.actual)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum TokenKind { + Punct, + Ident, + Group, + Literal, + BoolLit, + ByteLit, + ByteStringLit, + CharLit, + FloatLit, + IntegerLit, + StringLit, +} + +/// Unfortunately, we have to deal with both cases. +#[derive(Debug, Clone, Copy)] +pub(crate) enum Span { + One(proc_macro::Span), + #[cfg(feature = "proc-macro2")] + Two(proc_macro2::Span), +} + +impl From<proc_macro::Span> for Span { + fn from(src: proc_macro::Span) -> Self { + Self::One(src) + } +} + +#[cfg(feature = "proc-macro2")] +impl From<proc_macro2::Span> for Span { + fn from(src: proc_macro2::Span) -> Self { + Self::Two(src) + } +} + +/// Errors during parsing. +/// +/// This type should be seen primarily for error reporting and not for catching +/// specific cases. The span and error kind are not guaranteed to be stable +/// over different versions of this library, meaning that a returned error can +/// change from one version to the next. There are simply too many fringe cases +/// that are not easy to classify as a specific error kind. It depends entirely +/// on the specific parser code how an invalid input is categorized. +/// +/// Consider these examples: +/// - `'\` can be seen as +/// - invalid escape in character literal, or +/// - unterminated character literal. +/// - `'''` can be seen as +/// - empty character literal, or +/// - unescaped quote character in character literal. +/// - `0b64` can be seen as +/// - binary integer literal with invalid digit 6, or +/// - binary integer literal with invalid digit 4, or +/// - decimal integer literal with invalid digit b, or +/// - decimal integer literal 0 with unknown type suffix `b64`. +/// +/// If you want to see more if these examples, feel free to check out the unit +/// tests of this library. +/// +/// While this library does its best to emit sensible and precise errors, and to +/// keep the returned errors as stable as possible, full stability cannot be +/// guaranteed. +#[derive(Debug, Clone)] +pub struct ParseError { + pub(crate) span: Option<Range<usize>>, + pub(crate) kind: ParseErrorKind, +} + +impl ParseError { + /// Returns a span of this error, if available. **Note**: the returned span + /// might change in future versions of this library. See [the documentation + /// of this type][ParseError] for more information. + pub fn span(&self) -> Option<Range<usize>> { + self.span.clone() + } +} + +/// This is a free standing function instead of an associated one to reduce +/// noise around parsing code. There are lots of places that create errors, we +/// I wanna keep them as short as possible. +pub(crate) fn perr(span: impl SpanLike, kind: ParseErrorKind) -> ParseError { + ParseError { + span: span.into_span(), + kind, + } +} + +pub(crate) trait SpanLike { + fn into_span(self) -> Option<Range<usize>>; +} + +impl SpanLike for Option<Range<usize>> { + #[inline(always)] + fn into_span(self) -> Option<Range<usize>> { + self + } +} +impl SpanLike for Range<usize> { + #[inline(always)] + fn into_span(self) -> Option<Range<usize>> { + Some(self) + } +} +impl SpanLike for usize { + #[inline(always)] + fn into_span(self) -> Option<Range<usize>> { + Some(self..self + 1) + } +} + + +/// Kinds of errors. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub(crate) enum ParseErrorKind { + /// The input was an empty string + Empty, + + /// An unexpected char was encountered. + UnexpectedChar, + + /// Literal was not recognized. + InvalidLiteral, + + /// Input does not start with decimal digit when trying to parse an integer. + DoesNotStartWithDigit, + + /// A digit invalid for the specified integer base was found. + InvalidDigit, + + /// Integer literal does not contain any valid digits. + NoDigits, + + /// Exponent of a float literal does not contain any digits. + NoExponentDigits, + + /// An unknown escape code, e.g. `\b`. + UnknownEscape, + + /// A started escape sequence where the input ended before the escape was + /// finished. + UnterminatedEscape, + + /// An `\x` escape where the two digits are not valid hex digits. + InvalidXEscape, + + /// A string or character literal using the `\xNN` escape where `NN > 0x7F`. + NonAsciiXEscape, + + /// A `\u{...}` escape in a byte or byte string literal. + UnicodeEscapeInByteLiteral, + + /// A Unicode escape that does not start with a hex digit. + InvalidStartOfUnicodeEscape, + + /// A `\u{...}` escape that lacks the opening brace. + UnicodeEscapeWithoutBrace, + + /// In a `\u{...}` escape, a non-hex digit and non-underscore character was + /// found. + NonHexDigitInUnicodeEscape, + + /// More than 6 digits found in unicode escape. + TooManyDigitInUnicodeEscape, + + /// The value from a unicode escape does not represent a valid character. + InvalidUnicodeEscapeChar, + + /// A `\u{..` escape that is not terminated (lacks the closing brace). + UnterminatedUnicodeEscape, + + /// A character literal that's not terminated. + UnterminatedCharLiteral, + + /// A character literal that contains more than one character. + OverlongCharLiteral, + + /// An empty character literal, i.e. `''`. + EmptyCharLiteral, + + UnterminatedByteLiteral, + OverlongByteLiteral, + EmptyByteLiteral, + NonAsciiInByteLiteral, + + /// A `'` character was not escaped in a character or byte literal, or a `"` + /// character was not escaped in a string or byte string literal. + UnescapedSingleQuote, + + /// A \n, \t or \r raw character in a char or byte literal. + UnescapedSpecialWhitespace, + + /// When parsing a character, byte, string or byte string literal directly + /// and the input does not start with the corresponding quote character + /// (plus optional raw string prefix). + DoesNotStartWithQuote, + + /// Unterminated raw string literal. + UnterminatedRawString, + + /// String literal without a `"` at the end. + UnterminatedString, + + /// Invalid start for a string literal. + InvalidStringLiteralStart, + + /// Invalid start for a byte literal. + InvalidByteLiteralStart, + + InvalidByteStringLiteralStart, + + /// An literal `\r` character not followed by a `\n` character in a + /// (raw) string or byte string literal. + IsolatedCr, + + /// Literal suffix is not a valid identifier. + InvalidSuffix, + + /// Returned by `Float::parse` if an integer literal (no fractional nor + /// exponent part) is passed. + UnexpectedIntegerLit, + + /// Integer suffixes cannot start with `e` or `E` as this conflicts with the + /// grammar for float literals. + IntegerSuffixStartingWithE, +} + +impl std::error::Error for ParseError {} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use ParseErrorKind::*; + + let description = match self.kind { + Empty => "input is empty", + UnexpectedChar => "unexpected character", + InvalidLiteral => "invalid literal", + DoesNotStartWithDigit => "number literal does not start with decimal digit", + InvalidDigit => "integer literal contains a digit invalid for its base", + NoDigits => "integer literal does not contain any digits", + NoExponentDigits => "exponent of floating point literal does not contain any digits", + UnknownEscape => "unknown escape", + UnterminatedEscape => "unterminated escape: input ended too soon", + InvalidXEscape => r"invalid `\x` escape: not followed by two hex digits", + NonAsciiXEscape => r"`\x` escape in char/string literal exceed ASCII range", + UnicodeEscapeInByteLiteral => r"`\u{...}` escape in byte (string) literal not allowed", + InvalidStartOfUnicodeEscape => r"invalid start of `\u{...}` escape", + UnicodeEscapeWithoutBrace => r"`Unicode \u{...}` escape without opening brace", + NonHexDigitInUnicodeEscape => r"non-hex digit found in `\u{...}` escape", + TooManyDigitInUnicodeEscape => r"more than six digits in `\u{...}` escape", + InvalidUnicodeEscapeChar => r"value specified in `\u{...}` escape is not a valid char", + UnterminatedUnicodeEscape => r"unterminated `\u{...}` escape", + UnterminatedCharLiteral => "character literal is not terminated", + OverlongCharLiteral => "character literal contains more than one character", + EmptyCharLiteral => "empty character literal", + UnterminatedByteLiteral => "byte literal is not terminated", + OverlongByteLiteral => "byte literal contains more than one byte", + EmptyByteLiteral => "empty byte literal", + NonAsciiInByteLiteral => "non ASCII character in byte (string) literal", + UnescapedSingleQuote => "character literal contains unescaped ' character", + UnescapedSpecialWhitespace => r"unescaped newline (\n), tab (\t) or cr (\r) character", + DoesNotStartWithQuote => "invalid start for char/byte/string literal", + UnterminatedRawString => "unterminated raw (byte) string literal", + UnterminatedString => "unterminated (byte) string literal", + InvalidStringLiteralStart => "invalid start for string literal", + InvalidByteLiteralStart => "invalid start for byte literal", + InvalidByteStringLiteralStart => "invalid start for byte string literal", + IsolatedCr => r"`\r` not immediately followed by `\n` in string", + InvalidSuffix => "literal suffix is not a valid identifier", + UnexpectedIntegerLit => "expected float literal, but found integer", + IntegerSuffixStartingWithE => "integer literal suffix must not start with 'e' or 'E'", + }; + + description.fmt(f)?; + if let Some(span) = &self.span { + write!(f, " (at {}..{})", span.start, span.end)?; + } + + Ok(()) + } +} diff --git a/third_party/rust/litrs/src/escape.rs b/third_party/rust/litrs/src/escape.rs new file mode 100644 index 0000000000..5eb8382bc4 --- /dev/null +++ b/third_party/rust/litrs/src/escape.rs @@ -0,0 +1,262 @@ +use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::{hex_digit_value, check_suffix}}; + + +/// Must start with `\` +pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> { + let first = input.as_bytes().get(1) + .ok_or(perr(offset, UnterminatedEscape))?; + let out = match first { + // Quote escapes + b'\'' => (E::from_byte(b'\''), 2), + b'"' => (E::from_byte(b'"'), 2), + + // Ascii escapes + b'n' => (E::from_byte(b'\n'), 2), + b'r' => (E::from_byte(b'\r'), 2), + b't' => (E::from_byte(b'\t'), 2), + b'\\' => (E::from_byte(b'\\'), 2), + b'0' => (E::from_byte(b'\0'), 2), + b'x' => { + let hex_string = input.get(2..4) + .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))? + .as_bytes(); + let first = hex_digit_value(hex_string[0]) + .ok_or(perr(offset..offset + 4, InvalidXEscape))?; + let second = hex_digit_value(hex_string[1]) + .ok_or(perr(offset..offset + 4, InvalidXEscape))?; + let value = second + 16 * first; + + if E::SUPPORTS_UNICODE && value > 0x7F { + return Err(perr(offset..offset + 4, NonAsciiXEscape)); + } + + (E::from_byte(value), 4) + }, + + // Unicode escape + b'u' => { + if !E::SUPPORTS_UNICODE { + return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral)); + } + + if input.as_bytes().get(2) != Some(&b'{') { + return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace)); + } + + let closing_pos = input.bytes().position(|b| b == b'}') + .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?; + + let inner = &input[3..closing_pos]; + if inner.as_bytes().first() == Some(&b'_') { + return Err(perr(4, InvalidStartOfUnicodeEscape)); + } + + let mut v: u32 = 0; + let mut digit_count = 0; + for (i, b) in inner.bytes().enumerate() { + if b == b'_'{ + continue; + } + + let digit = hex_digit_value(b) + .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?; + + if digit_count == 6 { + return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape)); + } + digit_count += 1; + v = 16 * v + digit as u32; + } + + let c = std::char::from_u32(v) + .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?; + + (E::from_char(c), closing_pos + 1) + } + + _ => return Err(perr(offset..offset + 2, UnknownEscape)), + }; + + Ok(out) +} + +pub(crate) trait Escapee: Into<char> { + const SUPPORTS_UNICODE: bool; + fn from_byte(b: u8) -> Self; + fn from_char(c: char) -> Self; +} + +impl Escapee for u8 { + const SUPPORTS_UNICODE: bool = false; + fn from_byte(b: u8) -> Self { + b + } + fn from_char(_: char) -> Self { + panic!("bug: `<u8 as Escapee>::from_char` was called"); + } +} + +impl Escapee for char { + const SUPPORTS_UNICODE: bool = true; + fn from_byte(b: u8) -> Self { + b.into() + } + fn from_char(c: char) -> Self { + c + } +} + +/// Checks whether the character is skipped after a string continue start +/// (unescaped backlash followed by `\n`). +fn is_string_continue_skipable_whitespace(b: u8) -> bool { + b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' +} + +/// Unescapes a whole string or byte string. +#[inline(never)] +pub(crate) fn unescape_string<E: Escapee>( + input: &str, + offset: usize, +) -> Result<(Option<String>, usize), ParseError> { + let mut closing_quote_pos = None; + let mut i = offset; + let mut end_last_escape = offset; + let mut value = String::new(); + while i < input.len() { + match input.as_bytes()[i] { + // Handle "string continue". + b'\\' if input.as_bytes().get(i + 1) == Some(&b'\n') => { + value.push_str(&input[end_last_escape..i]); + + // Find the first non-whitespace character. + let end_escape = input[i + 2..].bytes() + .position(|b| !is_string_continue_skipable_whitespace(b)) + .ok_or(perr(None, UnterminatedString))?; + + i += 2 + end_escape; + end_last_escape = i; + } + b'\\' => { + let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?; + value.push_str(&input[end_last_escape..i]); + value.push(c.into()); + i += len; + end_last_escape = i; + } + b'\r' => { + if input.as_bytes().get(i + 1) == Some(&b'\n') { + value.push_str(&input[end_last_escape..i]); + value.push('\n'); + i += 2; + end_last_escape = i; + } else { + return Err(perr(i, IsolatedCr)) + } + } + b'"' => { + closing_quote_pos = Some(i); + break; + }, + b if !E::SUPPORTS_UNICODE && !b.is_ascii() + => return Err(perr(i, NonAsciiInByteLiteral)), + _ => i += 1, + } + } + + let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedString))?; + + let start_suffix = closing_quote_pos + 1; + let suffix = &input[start_suffix..]; + check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; + + // `value` is only empty if there was no escape in the input string + // (with the special case of the input being empty). This means the + // string value basically equals the input, so we store `None`. + let value = if value.is_empty() { + None + } else { + // There was an escape in the string, so we need to push the + // remaining unescaped part of the string still. + value.push_str(&input[end_last_escape..closing_quote_pos]); + Some(value) + }; + + Ok((value, start_suffix)) +} + +/// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to +/// just `\n` sequences. Returns an optional new string (if the input contained +/// any `\r\n`) and the number of hashes used by the literal. +#[inline(never)] +pub(crate) fn scan_raw_string<E: Escapee>( + input: &str, + offset: usize, +) -> Result<(Option<String>, u32, usize), ParseError> { + // Raw string literal + let num_hashes = input[offset..].bytes().position(|b| b != b'#') + .ok_or(perr(None, InvalidLiteral))?; + + if input.as_bytes().get(offset + num_hashes) != Some(&b'"') { + return Err(perr(None, InvalidLiteral)); + } + let start_inner = offset + num_hashes + 1; + let hashes = &input[offset..num_hashes + offset]; + + let mut closing_quote_pos = None; + let mut i = start_inner; + let mut end_last_escape = start_inner; + let mut value = String::new(); + while i < input.len() { + let b = input.as_bytes()[i]; + if b == b'"' && input[i + 1..].starts_with(hashes) { + closing_quote_pos = Some(i); + break; + } + + if b == b'\r' { + // Convert `\r\n` into `\n`. This is currently not well documented + // in the Rust reference, but is done even for raw strings. That's + // because rustc simply converts all line endings when reading + // source files. + if input.as_bytes().get(i + 1) == Some(&b'\n') { + value.push_str(&input[end_last_escape..i]); + value.push('\n'); + i += 2; + end_last_escape = i; + continue; + } else if E::SUPPORTS_UNICODE { + // If no \n follows the \r and we are scanning a raw string + // (not raw byte string), we error. + return Err(perr(i, IsolatedCr)) + } + } + + if !E::SUPPORTS_UNICODE { + if !b.is_ascii() { + return Err(perr(i, NonAsciiInByteLiteral)); + } + } + + i += 1; + } + + let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedRawString))?; + + let start_suffix = closing_quote_pos + num_hashes + 1; + let suffix = &input[start_suffix..]; + check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; + + // `value` is only empty if there was no \r\n in the input string (with the + // special case of the input being empty). This means the string value + // equals the input, so we store `None`. + let value = if value.is_empty() { + None + } else { + // There was an \r\n in the string, so we need to push the remaining + // unescaped part of the string still. + value.push_str(&input[end_last_escape..closing_quote_pos]); + Some(value) + }; + + Ok((value, num_hashes as u32, start_suffix)) +} diff --git a/third_party/rust/litrs/src/float/mod.rs b/third_party/rust/litrs/src/float/mod.rs new file mode 100644 index 0000000000..0518633a6b --- /dev/null +++ b/third_party/rust/litrs/src/float/mod.rs @@ -0,0 +1,257 @@ +use std::{fmt, str::FromStr}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + parse::{end_dec_digits, first_byte_or_empty, check_suffix}, +}; + + + +/// A floating point literal, e.g. `3.14`, `8.`, `135e12`, or `1.956e2f64`. +/// +/// This kind of literal has several forms, but generally consists of a main +/// number part, an optional exponent and an optional type suffix. See +/// [the reference][ref] for more information. +/// +/// A leading minus sign `-` is not part of the literal grammar! `-3.14` are two +/// tokens in the Rust grammar. Further, `27` and `27f32` are both not float, +/// but integer literals! Consequently `FloatLit::parse` will reject them. +/// +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FloatLit<B: Buffer> { + /// The whole raw input. The `usize` fields in this struct partition this + /// string. Always true: `end_integer_part <= end_fractional_part`. + /// + /// ```text + /// 12_3.4_56e789f32 + /// ╷ ╷ ╷ + /// | | └ end_number_part = 13 + /// | └ end_fractional_part = 9 + /// └ end_integer_part = 4 + /// + /// 246. + /// ╷╷ + /// |└ end_fractional_part = end_number_part = 4 + /// └ end_integer_part = 3 + /// + /// 1234e89 + /// ╷ ╷ + /// | └ end_number_part = 7 + /// └ end_integer_part = end_fractional_part = 4 + /// ``` + raw: B, + + /// The first index not part of the integer part anymore. Since the integer + /// part is at the start, this is also the length of that part. + end_integer_part: usize, + + /// The first index after the fractional part. + end_fractional_part: usize, + + /// The first index after the whole number part (everything except type suffix). + end_number_part: usize, +} + +impl<B: Buffer> FloatLit<B> { + /// Parses the input as a floating point literal. Returns an error if the + /// input is invalid or represents a different kind of literal. Will also + /// reject decimal integer literals like `23` or `17f32`, in accordance + /// with the spec. + pub fn parse(s: B) -> Result<Self, ParseError> { + match first_byte_or_empty(&s)? { + b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let FloatLit { + end_integer_part, + end_fractional_part, + end_number_part, + .. + } = parse_impl(&s)?; + + Ok(Self { raw: s, end_integer_part, end_fractional_part, end_number_part }) + }, + _ => Err(perr(0, DoesNotStartWithDigit)), + } + } + + /// Returns the number part (including integer part, fractional part and + /// exponent), but without the suffix. If you want an actual floating + /// point value, you need to parse this string, e.g. with `f32::from_str` + /// or an external crate. + pub fn number_part(&self) -> &str { + &(*self.raw)[..self.end_number_part] + } + + /// Returns the non-empty integer part of this literal. + pub fn integer_part(&self) -> &str { + &(*self.raw)[..self.end_integer_part] + } + + /// Returns the optional fractional part of this literal. Does not include + /// the period. If a period exists in the input, `Some` is returned, `None` + /// otherwise. Note that `Some("")` might be returned, e.g. for `3.`. + pub fn fractional_part(&self) -> Option<&str> { + if self.end_integer_part == self.end_fractional_part { + None + } else { + Some(&(*self.raw)[self.end_integer_part + 1..self.end_fractional_part]) + } + } + + /// Optional exponent part. Might be empty if there was no exponent part in + /// the input. Includes the `e` or `E` at the beginning. + pub fn exponent_part(&self) -> &str { + &(*self.raw)[self.end_fractional_part..self.end_number_part] + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.end_number_part..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } +} + +impl FloatLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> FloatLit<String> { + FloatLit { + raw: self.raw.to_owned(), + end_integer_part: self.end_integer_part, + end_fractional_part: self.end_fractional_part, + end_number_part: self.end_number_part, + } + } +} + +impl<B: Buffer> fmt::Display for FloatLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &*self.raw) + } +} + +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<FloatLit<&str>, ParseError> { + // Integer part. + let end_integer_part = end_dec_digits(input.as_bytes()); + let rest = &input[end_integer_part..]; + + + // Fractional part. + let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') { + // The fractional part must not start with `_`. + if rest.as_bytes().get(1) == Some(&b'_') { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + end_dec_digits(rest[1..].as_bytes()) + 1 + end_integer_part + } else { + end_integer_part + }; + let rest = &input[end_fractional_part..]; + + // If we have a period that is not followed by decimal digits, the + // literal must end now. + if end_integer_part + 1 == end_fractional_part && !rest.is_empty() { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + // Optional exponent. + let end_number_part = if rest.starts_with('e') || rest.starts_with('E') { + // Strip single - or + sign at the beginning. + let exp_number_start = match rest.as_bytes().get(1) { + Some(b'-') | Some(b'+') => 2, + _ => 1, + }; + + // Find end of exponent and make sure there is at least one digit. + let end_exponent = end_dec_digits(rest[exp_number_start..].as_bytes()) + exp_number_start; + if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) { + return Err(perr( + end_fractional_part..end_fractional_part + end_exponent, + NoExponentDigits, + )); + } + + end_exponent + end_fractional_part + } else { + end_fractional_part + }; + + // Make sure the suffix is valid. + let suffix = &input[end_number_part..]; + check_suffix(suffix).map_err(|kind| perr(end_number_part..input.len(), kind))?; + + // A float literal needs either a fractional or exponent part, otherwise its + // an integer literal. + if end_integer_part == end_number_part { + return Err(perr(None, UnexpectedIntegerLit)); + } + + Ok(FloatLit { + raw: input, + end_integer_part, + end_fractional_part, + end_number_part, + }) +} + + +/// All possible float type suffixes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum FloatType { + F32, + F64, +} + +impl FloatType { + /// Returns the type corresponding to the given suffix (e.g. `"f32"` is + /// mapped to `Self::F32`). If the suffix is not a valid float type, `None` + /// is returned. + pub fn from_suffix(suffix: &str) -> Option<Self> { + match suffix { + "f32" => Some(FloatType::F32), + "f64" => Some(FloatType::F64), + _ => None, + } + } + + /// Returns the suffix for this type, e.g. `"f32"` for `Self::F32`. + pub fn suffix(self) -> &'static str { + match self { + Self::F32 => "f32", + Self::F64 => "f64", + } + } +} + +impl FromStr for FloatType { + type Err = (); + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_suffix(s).ok_or(()) + } +} + +impl fmt::Display for FloatType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.suffix().fmt(f) + } +} + + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/float/tests.rs b/third_party/rust/litrs/src/float/tests.rs new file mode 100644 index 0000000000..f22443bd19 --- /dev/null +++ b/third_party/rust/litrs/src/float/tests.rs @@ -0,0 +1,253 @@ +use crate::{ + Literal, ParseError, + test_util::{assert_parse_ok_eq, assert_roundtrip}, +}; +use super::{FloatLit, FloatType}; + + +// ===== Utility functions ======================================================================= + +/// Helper macro to check parsing a float. +/// +/// This macro contains quite a bit of logic itself (which can be buggy of +/// course), so we have a few test functions below to test a bunch of cases +/// manually. +macro_rules! check { + ($intpart:literal $fracpart:literal $exppart:literal $suffix:tt) => { + let input = concat!($intpart, $fracpart, $exppart, check!(@stringify_suffix $suffix)); + let expected_float = FloatLit { + raw: input, + end_integer_part: $intpart.len(), + end_fractional_part: $intpart.len() + $fracpart.len(), + end_number_part: $intpart.len() + $fracpart.len() + $exppart.len(), + }; + + assert_parse_ok_eq( + input, FloatLit::parse(input), expected_float.clone(), "FloatLit::parse"); + assert_parse_ok_eq( + input, Literal::parse(input), Literal::Float(expected_float), "Literal::parse"); + assert_eq!(FloatLit::parse(input).unwrap().suffix(), check!(@ty $suffix)); + assert_roundtrip(expected_float.to_owned(), input); + }; + (@ty f32) => { "f32" }; + (@ty f64) => { "f64" }; + (@ty -) => { "" }; + (@stringify_suffix -) => { "" }; + (@stringify_suffix $suffix:ident) => { stringify!($suffix) }; +} + + +// ===== Actual tests =========================================================================== + +#[test] +fn manual_without_suffix() -> Result<(), ParseError> { + let f = FloatLit::parse("3.14")?; + assert_eq!(f.number_part(), "3.14"); + assert_eq!(f.integer_part(), "3"); + assert_eq!(f.fractional_part(), Some("14")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("9.")?; + assert_eq!(f.number_part(), "9."); + assert_eq!(f.integer_part(), "9"); + assert_eq!(f.fractional_part(), Some("")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8e1")?; + assert_eq!(f.number_part(), "8e1"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "e1"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8E3")?; + assert_eq!(f.number_part(), "8E3"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "E3"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8_7_6.1_23e15")?; + assert_eq!(f.number_part(), "8_7_6.1_23e15"); + assert_eq!(f.integer_part(), "8_7_6"); + assert_eq!(f.fractional_part(), Some("1_23")); + assert_eq!(f.exponent_part(), "e15"); + assert_eq!(f.suffix(), ""); + + let f = FloatLit::parse("8.2e-_04_9")?; + assert_eq!(f.number_part(), "8.2e-_04_9"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), Some("2")); + assert_eq!(f.exponent_part(), "e-_04_9"); + assert_eq!(f.suffix(), ""); + + Ok(()) +} + +#[test] +fn manual_with_suffix() -> Result<(), ParseError> { + let f = FloatLit::parse("3.14f32")?; + assert_eq!(f.number_part(), "3.14"); + assert_eq!(f.integer_part(), "3"); + assert_eq!(f.fractional_part(), Some("14")); + assert_eq!(f.exponent_part(), ""); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); + + let f = FloatLit::parse("8e1f64")?; + assert_eq!(f.number_part(), "8e1"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), None); + assert_eq!(f.exponent_part(), "e1"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); + + let f = FloatLit::parse("8_7_6.1_23e15f32")?; + assert_eq!(f.number_part(), "8_7_6.1_23e15"); + assert_eq!(f.integer_part(), "8_7_6"); + assert_eq!(f.fractional_part(), Some("1_23")); + assert_eq!(f.exponent_part(), "e15"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); + + let f = FloatLit::parse("8.2e-_04_9f64")?; + assert_eq!(f.number_part(), "8.2e-_04_9"); + assert_eq!(f.integer_part(), "8"); + assert_eq!(f.fractional_part(), Some("2")); + assert_eq!(f.exponent_part(), "e-_04_9"); + assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); + + Ok(()) +} + +#[test] +fn simple() { + check!("3" ".14" "" -); + check!("3" ".14" "" f32); + check!("3" ".14" "" f64); + + check!("3" "" "e987654321" -); + check!("3" "" "e987654321" f64); + + check!("42_888" ".05" "" -); + check!("42_888" ".05" "E5___" f32); + check!("123456789" "" "e_1" f64); + check!("123456789" ".99" "e_1" f64); + check!("123456789" ".99" "" f64); + check!("123456789" ".99" "" -); + + check!("147" ".3_33" "" -); + check!("147" ".3_33__" "E3" f64); + check!("147" ".3_33__" "" f32); + + check!("147" ".333" "e-10" -); + check!("147" ".333" "e-_7" f32); + check!("147" ".333" "e+10" -); + check!("147" ".333" "e+_7" f32); + + check!("86" "." "" -); + check!("0" "." "" -); + check!("0_" "." "" -); + check!("0" ".0000001" "" -); + check!("0" ".000_0001" "" -); + + check!("0" ".0" "e+0" -); + check!("0" "" "E+0" -); + check!("34" "" "e+0" -); + check!("0" ".9182" "E+0" f32); +} + +#[test] +fn non_standard_suffixes() { + #[track_caller] + fn check_suffix( + input: &str, + integer_part: &str, + fractional_part: Option<&str>, + exponent_part: &str, + suffix: &str, + ) { + let lit = FloatLit::parse(input) + .unwrap_or_else(|e| panic!("expected to parse '{}' but got {}", input, e)); + assert_eq!(lit.integer_part(), integer_part); + assert_eq!(lit.fractional_part(), fractional_part); + assert_eq!(lit.exponent_part(), exponent_part); + assert_eq!(lit.suffix(), suffix); + + let lit = match Literal::parse(input) { + Ok(Literal::Float(f)) => f, + other => panic!("Expected float literal, but got {:?} for '{}'", other, input), + }; + assert_eq!(lit.integer_part(), integer_part); + assert_eq!(lit.fractional_part(), fractional_part); + assert_eq!(lit.exponent_part(), exponent_part); + assert_eq!(lit.suffix(), suffix); + } + + check_suffix("7.1f23", "7", Some("1"), "", "f23"); + check_suffix("7.1f320", "7", Some("1"), "", "f320"); + check_suffix("7.1f64_", "7", Some("1"), "", "f64_"); + check_suffix("8.1f649", "8", Some("1"), "", "f649"); + check_suffix("8.1f64f32", "8", Some("1"), "", "f64f32"); + check_suffix("23e2_banana", "23", None, "e2_", "banana"); + check_suffix("23.2_banana", "23", Some("2_"), "", "banana"); + check_suffix("23e2pe55ter", "23", None, "e2", "pe55ter"); + check_suffix("23e2p_e55ter", "23", None, "e2", "p_e55ter"); + check_suffix("3.15Jürgen", "3", Some("15"), "", "Jürgen"); + check_suffix("3e2e5", "3", None, "e2", "e5"); + check_suffix("3e2e5f", "3", None, "e2", "e5f"); +} + +#[test] +fn parse_err() { + assert_err!(FloatLit, "", Empty, None); + assert_err_single!(FloatLit::parse("."), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("+"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("-"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("e"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("e8"), DoesNotStartWithDigit, 0); + assert_err!(FloatLit, "0e", NoExponentDigits, 1..2); + assert_err_single!(FloatLit::parse("f32"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("foo"), DoesNotStartWithDigit, 0); + + assert_err_single!(FloatLit::parse("inf"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("nan"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("NaN"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse("NAN"), DoesNotStartWithDigit, 0); + + assert_err_single!(FloatLit::parse("_2.7"), DoesNotStartWithDigit, 0); + assert_err_single!(FloatLit::parse(".5"), DoesNotStartWithDigit, 0); + assert_err!(FloatLit, "1e", NoExponentDigits, 1..2); + assert_err!(FloatLit, "1.e4", UnexpectedChar, 2); + assert_err!(FloatLit, "3._4", UnexpectedChar, 2); + assert_err!(FloatLit, "3.f32", UnexpectedChar, 2); + assert_err!(FloatLit, "3.e5", UnexpectedChar, 2); + assert_err!(FloatLit, "12345._987", UnexpectedChar, 6); + assert_err!(FloatLit, "46._", UnexpectedChar, 3); + assert_err!(FloatLit, "46.f32", UnexpectedChar, 3); + assert_err!(FloatLit, "46.e3", UnexpectedChar, 3); + assert_err!(FloatLit, "46._e3", UnexpectedChar, 3); + assert_err!(FloatLit, "46.e3f64", UnexpectedChar, 3); + assert_err!(FloatLit, "23.4e_", NoExponentDigits, 4..6); + assert_err!(FloatLit, "23E___f32", NoExponentDigits, 2..6); + assert_err!(FloatLit, "55e3.1", UnexpectedChar, 4..6); + + assert_err!(FloatLit, "3.7+", UnexpectedChar, 3..4); + assert_err!(FloatLit, "3.7+2", UnexpectedChar, 3..5); + assert_err!(FloatLit, "3.7-", UnexpectedChar, 3..4); + assert_err!(FloatLit, "3.7-2", UnexpectedChar, 3..5); + assert_err!(FloatLit, "3.7e+", NoExponentDigits, 3..5); + assert_err!(FloatLit, "3.7e-", NoExponentDigits, 3..5); + assert_err!(FloatLit, "3.7e-+3", NoExponentDigits, 3..5); // suboptimal error + assert_err!(FloatLit, "3.7e+-3", NoExponentDigits, 3..5); // suboptimal error + assert_err_single!(FloatLit::parse("0x44.5"), InvalidSuffix, 1..6); + + assert_err_single!(FloatLit::parse("3"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("35_389"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("9_8_7f32"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("9_8_7banana"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f23"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f320"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("7f64_"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("8f649"), UnexpectedIntegerLit, None); + assert_err_single!(FloatLit::parse("8f64f32"), UnexpectedIntegerLit, None); +} diff --git a/third_party/rust/litrs/src/impls.rs b/third_party/rust/litrs/src/impls.rs new file mode 100644 index 0000000000..61a314dd84 --- /dev/null +++ b/third_party/rust/litrs/src/impls.rs @@ -0,0 +1,401 @@ +use std::convert::TryFrom; + +use crate::{Literal, err::{InvalidToken, TokenKind}}; + + +/// Helper macro to call a `callback` macro four times for all combinations of +/// `proc_macro`/`proc_macro2` and `&`/owned. +macro_rules! helper { + ($callback:ident, $($input:tt)*) => { + $callback!([proc_macro::] => $($input)*); + $callback!([&proc_macro::] => $($input)*); + #[cfg(feature = "proc-macro2")] + $callback!([proc_macro2::] => $($input)*); + #[cfg(feature = "proc-macro2")] + $callback!([&proc_macro2::] => $($input)*); + }; +} + +/// Like `helper!` but without reference types. +macro_rules! helper_no_refs { + ($callback:ident, $($input:tt)*) => { + $callback!([proc_macro::] => $($input)*); + #[cfg(feature = "proc-macro2")] + $callback!([proc_macro2::] => $($input)*); + }; +} + + +// ============================================================================================== +// ===== `From<*Lit> for Literal` +// ============================================================================================== + +macro_rules! impl_specific_lit_to_lit { + ($ty:ty, $variant:ident) => { + impl<B: crate::Buffer> From<$ty> for Literal<B> { + fn from(src: $ty) -> Self { + Literal::$variant(src) + } + } + }; +} + +impl_specific_lit_to_lit!(crate::BoolLit, Bool); +impl_specific_lit_to_lit!(crate::IntegerLit<B>, Integer); +impl_specific_lit_to_lit!(crate::FloatLit<B>, Float); +impl_specific_lit_to_lit!(crate::CharLit<B>, Char); +impl_specific_lit_to_lit!(crate::StringLit<B>, String); +impl_specific_lit_to_lit!(crate::ByteLit<B>, Byte); +impl_specific_lit_to_lit!(crate::ByteStringLit<B>, ByteString); + + + +// ============================================================================================== +// ===== `From<pm::Literal> for Literal` +// ============================================================================================== + + +macro_rules! impl_tt_to_lit { + ([$($prefix:tt)*] => ) => { + impl From<$($prefix)* Literal> for Literal<String> { + fn from(src: $($prefix)* Literal) -> Self { + // We call `expect` in all these impls: this library aims to implement exactly + // the Rust grammar, so if we have a valid Rust literal, we should always be + // able to parse it. + Self::parse(src.to_string()) + .expect("bug: failed to parse output of `Literal::to_string`") + } + } + } +} + +helper!(impl_tt_to_lit, ); + + +// ============================================================================================== +// ===== `TryFrom<pm::TokenTree> for Literal` +// ============================================================================================== + +macro_rules! impl_tt_to_lit { + ([$($prefix:tt)*] => ) => { + impl TryFrom<$($prefix)* TokenTree> for Literal<String> { + type Error = InvalidToken; + fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> { + let span = tt.span(); + let res = match tt { + $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group), + $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct), + $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true" + => return Ok(Literal::Bool(crate::BoolLit::True)), + $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false" + => return Ok(Literal::Bool(crate::BoolLit::False)), + $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident), + $($prefix)* TokenTree::Literal(ref lit) => Ok(lit), + }; + + match res { + Ok(lit) => Ok(From::from(lit)), + Err(actual) => Err(InvalidToken { + actual, + expected: TokenKind::Literal, + span: span.into(), + }), + } + } + } + } +} + +helper!(impl_tt_to_lit, ); + + +// ============================================================================================== +// ===== `TryFrom<pm::Literal>`, `TryFrom<pm::TokenTree>` for non-bool `*Lit` +// ============================================================================================== + +fn kind_of(lit: &Literal<String>) -> TokenKind { + match lit { + Literal::String(_) => TokenKind::StringLit, + Literal::Bool(_) => TokenKind::BoolLit, + Literal::Integer(_) => TokenKind::IntegerLit, + Literal::Float(_) => TokenKind::FloatLit, + Literal::Char(_) => TokenKind::CharLit, + Literal::Byte(_) => TokenKind::ByteLit, + Literal::ByteString(_) => TokenKind::ByteStringLit, + } +} + +macro_rules! impl_for_specific_lit { + ([$($prefix:tt)*] => $ty:ty, $variant:ident, $kind:ident) => { + impl TryFrom<$($prefix)* Literal> for $ty { + type Error = InvalidToken; + fn try_from(src: $($prefix)* Literal) -> Result<Self, Self::Error> { + let span = src.span(); + let lit: Literal<String> = src.into(); + match lit { + Literal::$variant(s) => Ok(s), + other => Err(InvalidToken { + expected: TokenKind::$kind, + actual: kind_of(&other), + span: span.into(), + }), + } + } + } + + impl TryFrom<$($prefix)* TokenTree> for $ty { + type Error = InvalidToken; + fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> { + let span = tt.span(); + let res = match tt { + $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group), + $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct), + $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident), + $($prefix)* TokenTree::Literal(ref lit) => Ok(lit), + }; + + match res { + Ok(lit) => <$ty>::try_from(lit), + Err(actual) => Err(InvalidToken { + actual, + expected: TokenKind::$kind, + span: span.into(), + }), + } + } + } + }; +} + +helper!(impl_for_specific_lit, crate::IntegerLit<String>, Integer, IntegerLit); +helper!(impl_for_specific_lit, crate::FloatLit<String>, Float, FloatLit); +helper!(impl_for_specific_lit, crate::CharLit<String>, Char, CharLit); +helper!(impl_for_specific_lit, crate::StringLit<String>, String, StringLit); +helper!(impl_for_specific_lit, crate::ByteLit<String>, Byte, ByteLit); +helper!(impl_for_specific_lit, crate::ByteStringLit<String>, ByteString, ByteStringLit); + + +// ============================================================================================== +// ===== `From<*Lit> for pm::Literal` +// ============================================================================================== + +macro_rules! impl_specific_lit_to_pm_lit { + ([$($prefix:tt)*] => $ty:ident, $variant:ident, $kind:ident) => { + impl<B: crate::Buffer> From<crate::$ty<B>> for $($prefix)* Literal { + fn from(l: crate::$ty<B>) -> Self { + // This should never fail: an input that is parsed successfuly + // as one of our literal types should always parse as a + // proc_macro literal as well! + l.raw_input().parse().unwrap_or_else(|e| { + panic!( + "failed to parse `{}` as `{}`: {}", + l.raw_input(), + std::any::type_name::<Self>(), + e, + ) + }) + } + } + }; +} + +helper_no_refs!(impl_specific_lit_to_pm_lit, IntegerLit, Integer, IntegerLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, FloatLit, Float, FloatLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, CharLit, Char, CharLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, StringLit, String, StringLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, ByteLit, Byte, ByteLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, ByteStringLit, ByteString, ByteStringLit); + + +// ============================================================================================== +// ===== `TryFrom<pm::TokenTree> for BoolLit` +// ============================================================================================== + +macro_rules! impl_from_tt_for_bool { + ([$($prefix:tt)*] => ) => { + impl TryFrom<$($prefix)* TokenTree> for crate::BoolLit { + type Error = InvalidToken; + fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> { + let span = tt.span(); + let actual = match tt { + $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true" + => return Ok(crate::BoolLit::True), + $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false" + => return Ok(crate::BoolLit::False), + + $($prefix)* TokenTree::Group(_) => TokenKind::Group, + $($prefix)* TokenTree::Punct(_) => TokenKind::Punct, + $($prefix)* TokenTree::Ident(_) => TokenKind::Ident, + $($prefix)* TokenTree::Literal(ref lit) => kind_of(&Literal::from(lit)), + }; + + Err(InvalidToken { + actual, + expected: TokenKind::BoolLit, + span: span.into(), + }) + } + } + }; +} + +helper!(impl_from_tt_for_bool, ); + +// ============================================================================================== +// ===== `From<BoolLit> for pm::Ident` +// ============================================================================================== + +macro_rules! impl_bool_lit_to_pm_lit { + ([$($prefix:tt)*] => ) => { + impl From<crate::BoolLit> for $($prefix)* Ident { + fn from(l: crate::BoolLit) -> Self { + Self::new(l.as_str(), $($prefix)* Span::call_site()) + } + } + }; +} + +helper_no_refs!(impl_bool_lit_to_pm_lit, ); + + +mod tests { + //! # Tests + //! + //! ```no_run + //! extern crate proc_macro; + //! + //! use std::convert::TryFrom; + //! use litrs::Literal; + //! + //! fn give<T>() -> T { + //! panic!() + //! } + //! + //! let _ = litrs::Literal::<String>::from(give::<litrs::BoolLit>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::IntegerLit<String>>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::FloatLit<String>>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::CharLit<String>>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::StringLit<String>>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::ByteLit<String>>()); + //! let _ = litrs::Literal::<String>::from(give::<litrs::ByteStringLit<String>>()); + //! + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::BoolLit>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::IntegerLit<&'static str>>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::FloatLit<&'static str>>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::CharLit<&'static str>>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::StringLit<&'static str>>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::ByteLit<&'static str>>()); + //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::ByteStringLit<&'static str>>()); + //! + //! + //! let _ = litrs::Literal::from(give::<proc_macro::Literal>()); + //! let _ = litrs::Literal::from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::Literal::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::Literal::try_from(give::<&proc_macro::TokenTree>()); + //! + //! + //! let _ = litrs::IntegerLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::FloatLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::CharLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::CharLit::try_from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::StringLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::StringLit::try_from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::ByteLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::Literal>()); + //! + //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro::Literal>()); + //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::Literal>()); + //! + //! + //! let _ = litrs::BoolLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::BoolLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::IntegerLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::FloatLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::CharLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::CharLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::StringLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::StringLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::ByteLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::TokenTree>()); + //! + //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro::TokenTree>()); + //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::TokenTree>()); + //! ``` +} + +#[cfg(feature = "proc-macro2")] +mod tests_proc_macro2 { + //! # Tests + //! + //! ```no_run + //! extern crate proc_macro; + //! + //! use std::convert::TryFrom; + //! use litrs::Literal; + //! + //! fn give<T>() -> T { + //! panic!() + //! } + //! + //! let _ = litrs::Literal::from(give::<proc_macro2::Literal>()); + //! let _ = litrs::Literal::from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::Literal::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::Literal::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! + //! let _ = litrs::IntegerLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::FloatLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::CharLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::StringLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::ByteLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro2::Literal>()); + //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::Literal>()); + //! + //! + //! let _ = litrs::BoolLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::BoolLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::IntegerLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::FloatLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::CharLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::StringLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::ByteLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::TokenTree>()); + //! + //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro2::TokenTree>()); + //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::TokenTree>()); + //! ``` +} diff --git a/third_party/rust/litrs/src/integer/mod.rs b/third_party/rust/litrs/src/integer/mod.rs new file mode 100644 index 0000000000..cecd79d3fb --- /dev/null +++ b/third_party/rust/litrs/src/integer/mod.rs @@ -0,0 +1,356 @@ +use std::{fmt, str::FromStr}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + parse::{first_byte_or_empty, hex_digit_value, check_suffix}, +}; + + +/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. +/// +/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), +/// the main part (digits and underscores), and an optional type suffix +/// (e.g. `u64` or `i8`). See [the reference][ref] for more information. +/// +/// Note that integer literals are always positive: the grammar does not contain +/// the minus sign at all. The minus sign is just the unary negate operator, +/// not part of the literal. Which is interesting for cases like `- 128i8`: +/// here, the literal itself would overflow the specified type (`i8` cannot +/// represent 128). That's why in rustc, the literal overflow check is +/// performed as a lint after parsing, not during the lexing stage. Similarly, +/// [`IntegerLit::parse`] does not perform an overflow check. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub struct IntegerLit<B: Buffer> { + /// The raw literal. Grammar: `<prefix?><main part><suffix?>`. + raw: B, + /// First index of the main number part (after the base prefix). + start_main_part: usize, + /// First index not part of the main number part. + end_main_part: usize, + /// Parsed `raw[..start_main_part]`. + base: IntegerBase, +} + +impl<B: Buffer> IntegerLit<B> { + /// Parses the input as an integer literal. Returns an error if the input is + /// invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + match first_byte_or_empty(&input)? { + digit @ b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let IntegerLit { + start_main_part, + end_main_part, + base, + .. + } = parse_impl(&input, digit)?; + + Ok(Self { raw: input, start_main_part, end_main_part, base }) + }, + _ => Err(perr(0, DoesNotStartWithDigit)), + } + } + + /// Performs the actual string to int conversion to obtain the integer + /// value. The optional type suffix of the literal **is ignored by this + /// method**. This means `N` does not need to match the type suffix! + /// + /// Returns `None` if the literal overflows `N`. + /// + /// Hint: `u128` can represent all possible values integer literal values, + /// as there are no negative literals (see type docs). Thus you can, for + /// example, safely use `lit.value::<u128>().to_string()` to get a decimal + /// string. (Technically, Rust integer literals can represent arbitrarily + /// large numbers, but those would be rejected at a later stage by the Rust + /// compiler). + pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> { + let base = N::from_small_number(self.base.value()); + + let mut acc = N::from_small_number(0); + for digit in self.raw_main_part().bytes() { + if digit == b'_' { + continue; + } + + // We don't actually need the base here: we already know this main + // part only contains digits valid for the specified base. + let digit = hex_digit_value(digit) + .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit")); + + acc = acc.checked_mul(base)?; + acc = acc.checked_add(N::from_small_number(digit))?; + } + + Some(acc) + } + + /// The base of this integer literal. + pub fn base(&self) -> IntegerBase { + self.base + } + + /// The main part containing the digits and potentially `_`. Do not try to + /// parse this directly as that would ignore the base! + pub fn raw_main_part(&self) -> &str { + &(*self.raw)[self.start_main_part..self.end_main_part] + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + /// + /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.end_main_part..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } +} + +impl IntegerLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> IntegerLit<String> { + IntegerLit { + raw: self.raw.to_owned(), + start_main_part: self.start_main_part, + end_main_part: self.end_main_part, + base: self.base, + } + } +} + +impl<B: Buffer> fmt::Display for IntegerLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &*self.raw) + } +} + +/// Integer literal types. *Implementation detail*. +/// +/// Implemented for all integer literal types. This trait is sealed and cannot +/// be implemented outside of this crate. The trait's methods are implementation +/// detail of this library and are not subject to semver. +pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { + /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. + #[doc(hidden)] + fn from_small_number(n: u8) -> Self; + + #[doc(hidden)] + fn checked_add(self, rhs: Self) -> Option<Self>; + + #[doc(hidden)] + fn checked_mul(self, rhs: Self) -> Option<Self>; + + #[doc(hidden)] + fn ty() -> IntegerType; +} + +macro_rules! impl_from_int_literal { + ($( $ty:ty => $variant:ident ,)* ) => { + $( + impl self::sealed::Sealed for $ty {} + impl FromIntegerLiteral for $ty { + fn from_small_number(n: u8) -> Self { + n as Self + } + fn checked_add(self, rhs: Self) -> Option<Self> { + self.checked_add(rhs) + } + fn checked_mul(self, rhs: Self) -> Option<Self> { + self.checked_mul(rhs) + } + fn ty() -> IntegerType { + IntegerType::$variant + } + } + )* + }; +} + +impl_from_int_literal!( + u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, + i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, +); + +mod sealed { + pub trait Sealed {} +} + +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { + // Figure out base and strip prefix base, if it exists. + let (end_prefix, base) = match (first, input.as_bytes().get(1)) { + (b'0', Some(b'b')) => (2, IntegerBase::Binary), + (b'0', Some(b'o')) => (2, IntegerBase::Octal), + (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), + + // Everything else is treated as decimal. Several cases are caught + // by this: + // - "123" + // - "0" + // - "0u8" + // - "0r" -> this will error later + _ => (0, IntegerBase::Decimal), + }; + let without_prefix = &input[end_prefix..]; + + + // Scan input to find the first character that's not a valid digit. + let is_valid_digit = match base { + IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'), + IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'), + IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'), + IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'), + }; + let end_main = without_prefix.bytes() + .position(|b| !is_valid_digit(b)) + .unwrap_or(without_prefix.len()); + let (main_part, suffix) = without_prefix.split_at(end_main); + + check_suffix(suffix).map_err(|kind| { + // This is just to have a nicer error kind for this special case. If the + // suffix is invalid, it is non-empty -> unwrap ok. + let first = suffix.as_bytes()[0]; + if !is_valid_digit(first) && first.is_ascii_digit() { + perr(end_main + end_prefix, InvalidDigit) + } else { + perr(end_main + end_prefix..input.len(), kind) + } + })?; + if suffix.starts_with('e') || suffix.starts_with('E') { + return Err(perr(end_main, IntegerSuffixStartingWithE)); + } + + // Make sure main number part is not empty. + if main_part.bytes().filter(|&b| b != b'_').count() == 0 { + return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); + } + + Ok(IntegerLit { + raw: input, + start_main_part: end_prefix, + end_main_part: end_main + end_prefix, + base, + }) +} + + +/// The bases in which an integer can be specified. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IntegerBase { + Binary, + Octal, + Decimal, + Hexadecimal, +} + +impl IntegerBase { + /// Returns the literal prefix that indicates this base, i.e. `"0b"`, + /// `"0o"`, `""` and `"0x"`. + pub fn prefix(self) -> &'static str { + match self { + Self::Binary => "0b", + Self::Octal => "0o", + Self::Decimal => "", + Self::Hexadecimal => "0x", + } + } + + /// Returns the base value, i.e. 2, 8, 10 or 16. + pub fn value(self) -> u8 { + match self { + Self::Binary => 2, + Self::Octal => 8, + Self::Decimal => 10, + Self::Hexadecimal => 16, + } + } +} + +/// All possible integer type suffixes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum IntegerType { + U8, + U16, + U32, + U64, + U128, + Usize, + I8, + I16, + I32, + I64, + I128, + Isize, +} + +impl IntegerType { + /// Returns the type corresponding to the given suffix (e.g. `"u8"` is + /// mapped to `Self::U8`). If the suffix is not a valid integer type, + /// `None` is returned. + pub fn from_suffix(suffix: &str) -> Option<Self> { + match suffix { + "u8" => Some(Self::U8), + "u16" => Some(Self::U16), + "u32" => Some(Self::U32), + "u64" => Some(Self::U64), + "u128" => Some(Self::U128), + "usize" => Some(Self::Usize), + "i8" => Some(Self::I8), + "i16" => Some(Self::I16), + "i32" => Some(Self::I32), + "i64" => Some(Self::I64), + "i128" => Some(Self::I128), + "isize" => Some(Self::Isize), + _ => None, + } + } + + /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. + pub fn suffix(self) -> &'static str { + match self { + Self::U8 => "u8", + Self::U16 => "u16", + Self::U32 => "u32", + Self::U64 => "u64", + Self::U128 => "u128", + Self::Usize => "usize", + Self::I8 => "i8", + Self::I16 => "i16", + Self::I32 => "i32", + Self::I64 => "i64", + Self::I128 => "i128", + Self::Isize => "isize", + } + } +} + +impl FromStr for IntegerType { + type Err = (); + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_suffix(s).ok_or(()) + } +} + +impl fmt::Display for IntegerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.suffix().fmt(f) + } +} + + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/integer/tests.rs b/third_party/rust/litrs/src/integer/tests.rs new file mode 100644 index 0000000000..e6dad3f031 --- /dev/null +++ b/third_party/rust/litrs/src/integer/tests.rs @@ -0,0 +1,357 @@ +use std::fmt::{Debug, Display}; +use crate::{ + FromIntegerLiteral, Literal, IntegerLit, IntegerType as Ty, IntegerBase, IntegerBase::*, + test_util::{assert_parse_ok_eq, assert_roundtrip}, +}; + + +// ===== Utility functions ======================================================================= + +#[track_caller] +fn check<T: FromIntegerLiteral + PartialEq + Debug + Display>( + input: &str, + value: T, + base: IntegerBase, + main_part: &str, + type_suffix: Option<Ty>, +) { + let expected_integer = IntegerLit { + raw: input, + start_main_part: base.prefix().len(), + end_main_part: base.prefix().len() + main_part.len(), + base, + }; + assert_parse_ok_eq( + input, IntegerLit::parse(input), expected_integer.clone(), "IntegerLit::parse"); + assert_parse_ok_eq( + input, Literal::parse(input), Literal::Integer(expected_integer), "Literal::parse"); + assert_roundtrip(expected_integer.to_owned(), input); + assert_eq!(Ty::from_suffix(IntegerLit::parse(input).unwrap().suffix()), type_suffix); + + let actual_value = IntegerLit::parse(input) + .unwrap() + .value::<T>() + .unwrap_or_else(|| panic!("unexpected overflow in `IntegerLit::value` for `{}`", input)); + if actual_value != value { + panic!( + "Parsing int literal `{}` should give value `{}`, but actually resulted in `{}`", + input, + value, + actual_value, + ); + } +} + + +// ===== Actual tests =========================================================================== + +#[test] +fn parse_decimal() { + check("0", 0u128, Decimal, "0", None); + check("1", 1u8, Decimal, "1", None); + check("8", 8u16, Decimal, "8", None); + check("9", 9u32, Decimal, "9", None); + check("10", 10u64, Decimal, "10", None); + check("11", 11i8, Decimal, "11", None); + check("123456789", 123456789i128, Decimal, "123456789", None); + + check("05", 5i16, Decimal, "05", None); + check("00005", 5i32, Decimal, "00005", None); + check("0123456789", 123456789i64, Decimal, "0123456789", None); + + check("123_456_789", 123_456_789, Decimal, "123_456_789", None); + check("0___4", 4, Decimal, "0___4", None); + check("0___4_3", 43, Decimal, "0___4_3", None); + check("0___4_3", 43, Decimal, "0___4_3", None); + check("123___________", 123, Decimal, "123___________", None); + + check( + "340282366920938463463374607431768211455", + 340282366920938463463374607431768211455u128, + Decimal, + "340282366920938463463374607431768211455", + None, + ); + check( + "340_282_366_920_938_463_463_374_607_431_768_211_455", + 340282366920938463463374607431768211455u128, + Decimal, + "340_282_366_920_938_463_463_374_607_431_768_211_455", + None, + ); + check( + "3_40_282_3669_20938_463463_3746074_31768211_455___", + 340282366920938463463374607431768211455u128, + Decimal, + "3_40_282_3669_20938_463463_3746074_31768211_455___", + None, + ); +} + +#[test] +fn parse_binary() { + check("0b0", 0b0, Binary, "0", None); + check("0b000", 0b000, Binary, "000", None); + check("0b1", 0b1, Binary, "1", None); + check("0b01", 0b01, Binary, "01", None); + check("0b101010", 0b101010, Binary, "101010", None); + check("0b10_10_10", 0b10_10_10, Binary, "10_10_10", None); + check("0b01101110____", 0b01101110____, Binary, "01101110____", None); + + check("0b10010u8", 0b10010u8, Binary, "10010", Some(Ty::U8)); + check("0b10010i8", 0b10010u8, Binary, "10010", Some(Ty::I8)); + check("0b10010u64", 0b10010u64, Binary, "10010", Some(Ty::U64)); + check("0b10010i64", 0b10010i64, Binary, "10010", Some(Ty::I64)); + check( + "0b1011001_00110000_00101000_10100101u32", + 0b1011001_00110000_00101000_10100101u32, + Binary, + "1011001_00110000_00101000_10100101", + Some(Ty::U32), + ); +} + +#[test] +fn parse_octal() { + check("0o0", 0o0, Octal, "0", None); + check("0o1", 0o1, Octal, "1", None); + check("0o6", 0o6, Octal, "6", None); + check("0o7", 0o7, Octal, "7", None); + check("0o17", 0o17, Octal, "17", None); + check("0o123", 0o123, Octal, "123", None); + check("0o7654321", 0o7654321, Octal, "7654321", None); + check("0o7_53_1", 0o7_53_1, Octal, "7_53_1", None); + check("0o66_", 0o66_, Octal, "66_", None); + + check("0o755u16", 0o755u16, Octal, "755", Some(Ty::U16)); + check("0o755i128", 0o755i128, Octal, "755", Some(Ty::I128)); +} + +#[test] +fn parse_hexadecimal() { + check("0x0", 0x0, Hexadecimal, "0", None); + check("0x1", 0x1, Hexadecimal, "1", None); + check("0x9", 0x9, Hexadecimal, "9", None); + + check("0xa", 0xa, Hexadecimal, "a", None); + check("0xf", 0xf, Hexadecimal, "f", None); + check("0x17", 0x17, Hexadecimal, "17", None); + check("0x1b", 0x1b, Hexadecimal, "1b", None); + check("0x123", 0x123, Hexadecimal, "123", None); + check("0xace", 0xace, Hexadecimal, "ace", None); + check("0xfdb971", 0xfdb971, Hexadecimal, "fdb971", None); + check("0xa_54_f", 0xa_54_f, Hexadecimal, "a_54_f", None); + check("0x6d_", 0x6d_, Hexadecimal, "6d_", None); + + check("0xA", 0xA, Hexadecimal, "A", None); + check("0xF", 0xF, Hexadecimal, "F", None); + check("0x17", 0x17, Hexadecimal, "17", None); + check("0x1B", 0x1B, Hexadecimal, "1B", None); + check("0x123", 0x123, Hexadecimal, "123", None); + check("0xACE", 0xACE, Hexadecimal, "ACE", None); + check("0xFDB971", 0xFDB971, Hexadecimal, "FDB971", None); + check("0xA_54_F", 0xA_54_F, Hexadecimal, "A_54_F", None); + check("0x6D_", 0x6D_, Hexadecimal, "6D_", None); + + check("0xFdB97a1", 0xFdB97a1, Hexadecimal, "FdB97a1", None); + check("0xfdB97A1", 0xfdB97A1, Hexadecimal, "fdB97A1", None); + + check("0x40u16", 0x40u16, Hexadecimal, "40", Some(Ty::U16)); + check("0xffi128", 0xffi128, Hexadecimal, "ff", Some(Ty::I128)); +} + +#[test] +fn starting_underscore() { + check("0b_1", 1, Binary, "_1", None); + check("0b_010i16", 0b_010, Binary, "_010", Some(Ty::I16)); + + check("0o_5", 5, Octal, "_5", None); + check("0o_750u128", 0o_750u128, Octal, "_750", Some(Ty::U128)); + + check("0x_c", 0xc, Hexadecimal, "_c", None); + check("0x_cf3i8", 0x_cf3, Hexadecimal, "_cf3", Some(Ty::I8)); +} + +#[test] +fn parse_overflowing_just_fine() { + check("256u8", 256u16, Decimal, "256", Some(Ty::U8)); + check("123_456_789u8", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U8)); + check("123_456_789u16", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U16)); + + check("123_123_456_789u8", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U8)); + check("123_123_456_789u16", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U16)); + check("123_123_456_789u32", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U32)); +} + +#[test] +fn suffixes() { + [ + ("123i8", Ty::I8), + ("123i16", Ty::I16), + ("123i32", Ty::I32), + ("123i64", Ty::I64), + ("123i128", Ty::I128), + ("123u8", Ty::U8), + ("123u16", Ty::U16), + ("123u32", Ty::U32), + ("123u64", Ty::U64), + ("123u128", Ty::U128), + ].iter().for_each(|&(s, ty)| { + assert_eq!(Ty::from_suffix(IntegerLit::parse(s).unwrap().suffix()), Some(ty)); + }); +} + +#[test] +fn overflow_u128() { + let inputs = [ + "340282366920938463463374607431768211456", + "0x100000000000000000000000000000000", + "0o4000000000000000000000000000000000000000000", + "0b1000000000000000000000000000000000000000000000000000000000000000000\ + 00000000000000000000000000000000000000000000000000000000000000", + "340282366920938463463374607431768211456u128", + "340282366920938463463374607431768211457", + "3_40_282_3669_20938_463463_3746074_31768211_456___", + "3_40_282_3669_20938_463463_3746074_31768211_455___1", + "3_40_282_3669_20938_463463_3746074_31768211_455___0u128", + "3402823669209384634633746074317682114570", + ]; + + for &input in &inputs { + let lit = IntegerLit::parse(input).expect("failed to parse"); + assert!(lit.value::<u128>().is_none()); + } +} + +#[test] +fn overflow_u8() { + let inputs = [ + "256", "0x100", "0o400", "0b100000000", + "257", "0x101", "0o401", "0b100000001", + "300", + "1548", + "2548985", + "256u128", + "256u8", + "2_5_6", + "256_____1", + "256__", + ]; + + for &input in &inputs { + let lit = IntegerLit::parse(input).expect("failed to parse"); + assert!(lit.value::<u8>().is_none()); + } +} + +#[test] +fn parse_err() { + assert_err!(IntegerLit, "", Empty, None); + assert_err_single!(IntegerLit::parse("a"), DoesNotStartWithDigit, 0); + assert_err_single!(IntegerLit::parse(";"), DoesNotStartWithDigit, 0); + assert_err_single!(IntegerLit::parse("0;"), UnexpectedChar, 1..2); + assert_err!(IntegerLit, "0b", NoDigits, 2..2); + assert_err_single!(IntegerLit::parse(" 0"), DoesNotStartWithDigit, 0); + assert_err_single!(IntegerLit::parse("0 "), UnexpectedChar, 1); + assert_err!(IntegerLit, "0b3", InvalidDigit, 2); + assert_err_single!(IntegerLit::parse("_"), DoesNotStartWithDigit, 0); + assert_err_single!(IntegerLit::parse("_3"), DoesNotStartWithDigit, 0); + assert_err!(IntegerLit, "0x44.5", UnexpectedChar, 4..6); + assert_err_single!(IntegerLit::parse("123em"), IntegerSuffixStartingWithE, 3); +} + +#[test] +fn invalid_digits() { + assert_err!(IntegerLit, "0b10201", InvalidDigit, 4); + assert_err!(IntegerLit, "0b9", InvalidDigit, 2); + assert_err!(IntegerLit, "0b07", InvalidDigit, 3); + + assert_err!(IntegerLit, "0o12380", InvalidDigit, 5); + assert_err!(IntegerLit, "0o192", InvalidDigit, 3); + + assert_err_single!(IntegerLit::parse("a_123"), DoesNotStartWithDigit, 0); + assert_err_single!(IntegerLit::parse("B_123"), DoesNotStartWithDigit, 0); +} + +#[test] +fn no_valid_digits() { + assert_err!(IntegerLit, "0x_", NoDigits, 2..3); + assert_err!(IntegerLit, "0x__", NoDigits, 2..4); + assert_err!(IntegerLit, "0x________", NoDigits, 2..10); + assert_err!(IntegerLit, "0x_i8", NoDigits, 2..3); + assert_err!(IntegerLit, "0x_u8", NoDigits, 2..3); + assert_err!(IntegerLit, "0x_isize", NoDigits, 2..3); + assert_err!(IntegerLit, "0x_usize", NoDigits, 2..3); + + assert_err!(IntegerLit, "0o_", NoDigits, 2..3); + assert_err!(IntegerLit, "0o__", NoDigits, 2..4); + assert_err!(IntegerLit, "0o________", NoDigits, 2..10); + assert_err!(IntegerLit, "0o_i32", NoDigits, 2..3); + assert_err!(IntegerLit, "0o_u32", NoDigits, 2..3); + + assert_err!(IntegerLit, "0b_", NoDigits, 2..3); + assert_err!(IntegerLit, "0b__", NoDigits, 2..4); + assert_err!(IntegerLit, "0b________", NoDigits, 2..10); + assert_err!(IntegerLit, "0b_i128", NoDigits, 2..3); + assert_err!(IntegerLit, "0b_u128", NoDigits, 2..3); +} + +#[test] +fn non_standard_suffixes() { + #[track_caller] + fn check_suffix<T: FromIntegerLiteral + PartialEq + Debug + Display>( + input: &str, + value: T, + base: IntegerBase, + main_part: &str, + suffix: &str, + ) { + check(input, value, base, main_part, None); + assert_eq!(IntegerLit::parse(input).unwrap().suffix(), suffix); + } + + check_suffix("5u7", 5, Decimal, "5", "u7"); + check_suffix("5u7", 5, Decimal, "5", "u7"); + check_suffix("5u9", 5, Decimal, "5", "u9"); + check_suffix("5u0", 5, Decimal, "5", "u0"); + check_suffix("33u12", 33, Decimal, "33", "u12"); + check_suffix("84u17", 84, Decimal, "84", "u17"); + check_suffix("99u80", 99, Decimal, "99", "u80"); + check_suffix("1234uu16", 1234, Decimal, "1234", "uu16"); + + check_suffix("5i7", 5, Decimal, "5", "i7"); + check_suffix("5i9", 5, Decimal, "5", "i9"); + check_suffix("5i0", 5, Decimal, "5", "i0"); + check_suffix("33i12", 33, Decimal, "33", "i12"); + check_suffix("84i17", 84, Decimal, "84", "i17"); + check_suffix("99i80", 99, Decimal, "99", "i80"); + check_suffix("1234ii16", 1234, Decimal, "1234", "ii16"); + + check_suffix("0ui32", 0, Decimal, "0", "ui32"); + check_suffix("1iu32", 1, Decimal, "1", "iu32"); + check_suffix("54321a64", 54321, Decimal, "54321", "a64"); + check_suffix("54321b64", 54321, Decimal, "54321", "b64"); + check_suffix("54321x64", 54321, Decimal, "54321", "x64"); + check_suffix("54321o64", 54321, Decimal, "54321", "o64"); + + check_suffix("0a", 0, Decimal, "0", "a"); + check_suffix("0a3", 0, Decimal, "0", "a3"); + check_suffix("0z", 0, Decimal, "0", "z"); + check_suffix("0z3", 0, Decimal, "0", "z3"); + check_suffix("0b0a", 0, Binary, "0", "a"); + check_suffix("0b0A", 0, Binary, "0", "A"); + check_suffix("0b01f", 1, Binary, "01", "f"); + check_suffix("0b01F", 1, Binary, "01", "F"); + check_suffix("0o7a_", 7, Octal, "7", "a_"); + check_suffix("0o7A_", 7, Octal, "7", "A_"); + check_suffix("0o72f_0", 0o72, Octal, "72", "f_0"); + check_suffix("0o72F_0", 0o72, Octal, "72", "F_0"); + + check_suffix("0x8cg", 0x8c, Hexadecimal, "8c", "g"); + check_suffix("0x8cG", 0x8c, Hexadecimal, "8c", "G"); + check_suffix("0x8c1h_", 0x8c1, Hexadecimal, "8c1", "h_"); + check_suffix("0x8c1H_", 0x8c1, Hexadecimal, "8c1", "H_"); + check_suffix("0x8czu16", 0x8c, Hexadecimal, "8c", "zu16"); + + check_suffix("123_foo", 123, Decimal, "123_", "foo"); +} diff --git a/third_party/rust/litrs/src/lib.rs b/third_party/rust/litrs/src/lib.rs new file mode 100644 index 0000000000..64ed7813c9 --- /dev/null +++ b/third_party/rust/litrs/src/lib.rs @@ -0,0 +1,370 @@ +//! Parsing and inspecting Rust literal tokens. +//! +//! This library offers functionality to parse Rust literals, i.e. tokens in the +//! Rust programming language that represent fixed values. The grammar for +//! those is defined [here][ref]. +//! +//! This kind of functionality already exists in the crate `syn`. However, as +//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was +//! built. This crate also offers a bit more flexibility compared to `syn` +//! (only regarding literals, of course). +//! +//! +//! # Quick start +//! +//! | **`StringLit::try_from(tt)?.value()`** | +//! | - | +//! +//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be +//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). +//! Calling `value()` returns the value that is represented by the literal. +//! +//! **Mini Example** +//! +//! ```ignore +//! use proc_macro::TokenStream; +//! +//! #[proc_macro] +//! pub fn foo(input: TokenStream) -> TokenStream { +//! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! +//! let string_value = match litrs::StringLit::try_from(first_token) { +//! Ok(string_lit) => string_lit.value(), +//! Err(e) => return e.to_compile_error(), +//! }; +//! +//! // `string_value` is the string value with all escapes resolved. +//! todo!() +//! } +//! ``` +//! +//! # Overview +//! +//! The main types of this library are [`Literal`], representing any kind of +//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a +//! specific kind of literal. +//! +//! There are different ways to obtain such a literal type: +//! +//! - **`parse`**: parses a `&str` or `String` and returns `Result<_, +//! ParseError>`. For example: [`Literal::parse`] and +//! [`IntegerLit::parse`]. +//! +//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from +//! the `proc_macro` crate into a `Literal` from this crate. +//! +//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a +//! `proc_macro::Literal` into a specific literal type of this crate. If +//! the input is a literal of a different kind, `Err(InvalidToken)` is +//! returned. +//! +//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a +//! literal type of this crate. An error is returned if the token tree is +//! not a literal, or if you are trying to turn it into a specific kind of +//! literal and the token tree is a different kind of literal. +//! +//! All of the `From` and `TryFrom` conversions also work for reference to +//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is +//! enabled (which it is by default), all these `From` and `TryFrom` impls also +//! exist for the corresponding `proc_macro2` types. +//! +//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. +//! The `TryFrom<TokenTree>` impls check for those two special idents and +//! return a [`BoolLit`] appropriately. For that reason, there is also no +//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal` +//! simply cannot represent bool literals. +//! +//! +//! # Examples +//! +//! In a proc-macro: +//! +//! ```ignore +//! use std::convert::TryFrom; +//! use proc_macro::TokenStream; +//! use litrs::FloatLit; +//! +//! #[proc_macro] +//! pub fn foo(input: TokenStream) -> TokenStream { +//! let mut input = input.into_iter().collect::<Vec<_>>(); +//! if input.len() != 1 { +//! // Please do proper error handling in your real code! +//! panic!("expected exactly one token as input"); +//! } +//! let token = input.remove(0); +//! +//! match FloatLit::try_from(token) { +//! Ok(float_lit) => { /* do something */ } +//! Err(e) => return e.to_compile_error(), +//! } +//! +//! // Dummy output +//! TokenStream::new() +//! } +//! ``` +//! +//! Parsing from string: +//! +//! ``` +//! use litrs::{FloatLit, Literal}; +//! +//! // Parse a specific kind of literal (float in this case): +//! let float_lit = FloatLit::parse("3.14f32"); +//! assert!(float_lit.is_ok()); +//! assert_eq!(float_lit.unwrap().suffix(), "f32"); +//! assert!(FloatLit::parse("'c'").is_err()); +//! +//! // Parse any kind of literal. After parsing, you can inspect the literal +//! // and decide what to do in each case. +//! let lit = Literal::parse("0xff80").expect("failed to parse literal"); +//! match lit { +//! Literal::Integer(lit) => { /* ... */ } +//! Literal::Float(lit) => { /* ... */ } +//! Literal::Bool(lit) => { /* ... */ } +//! Literal::Char(lit) => { /* ... */ } +//! Literal::String(lit) => { /* ... */ } +//! Literal::Byte(lit) => { /* ... */ } +//! Literal::ByteString(lit) => { /* ... */ } +//! } +//! ``` +//! +//! +//! +//! # Crate features +//! +//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of +//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. +//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the +//! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, +//! only an approximate check (only in ASCII range) is done. If you are +//! writing a proc macro, you don't need to enable this as the suffix is +//! already checked by the compiler. +//! +//! +//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals +//! + +#![deny(missing_debug_implementations)] + +extern crate proc_macro; + +#[cfg(test)] +#[macro_use] +mod test_util; + +#[cfg(test)] +mod tests; + +mod bool; +mod byte; +mod bytestr; +mod char; +mod err; +mod escape; +mod float; +mod impls; +mod integer; +mod parse; +mod string; + + +use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; + +pub use self::{ + bool::BoolLit, + byte::ByteLit, + bytestr::ByteStringLit, + char::CharLit, + err::{InvalidToken, ParseError}, + float::{FloatLit, FloatType}, + integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, + string::StringLit, +}; + + +// ============================================================================================== +// ===== `Literal` and type defs +// ============================================================================================== + +/// A literal. This is the main type of this library. +/// +/// This type is generic over the underlying buffer `B`, which can be `&str` or +/// `String`. +/// +/// To create this type, you have to either call [`Literal::parse`] with an +/// input string or use the `From<_>` impls of this type. The impls are only +/// available of the corresponding crate features are enabled (they are enabled +/// by default). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Literal<B: Buffer> { + Bool(BoolLit), + Integer(IntegerLit<B>), + Float(FloatLit<B>), + Char(CharLit<B>), + String(StringLit<B>), + Byte(ByteLit<B>), + ByteString(ByteStringLit<B>), +} + +impl<B: Buffer> Literal<B> { + /// Parses the given input as a Rust literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + parse::parse(input) + } + + /// Returns the suffix of this literal or `""` if it doesn't have one. + /// + /// Rust token grammar actually allows suffixes for all kinds of tokens. + /// Most Rust programmer only know the type suffixes for integer and + /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an + /// error. But it is possible to pass literals with arbitrary suffixes to + /// proc macros, for example: + /// + /// ```ignore + /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong); + /// ``` + /// + /// Boolean literals, not actually being literals, but idents, cannot have + /// suffixes and this method always returns `""` for those. + /// + /// There are some edge cases to be aware of: + /// - Integer suffixes must not start with `e` or `E` as that conflicts with + /// the exponent grammar for floats. `0e1` is a float; `0eel` is also + /// parsed as a float and results in an error. + /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a + /// suffix von `gh`. + /// - Suffixes can contain and start with `_`, but for integer and number + /// literals, `_` is eagerly parsed as part of the number, so `1_x` has + /// the suffix `x`. + /// - The input `55f32` is regarded as integer literal with suffix `f32`. + /// + /// # Example + /// + /// ``` + /// use litrs::Literal; + /// + /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33"); + /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman"); + /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck"); + /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy"); + /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong"); + /// ``` + pub fn suffix(&self) -> &str { + match self { + Literal::Bool(_) => "", + Literal::Integer(l) => l.suffix(), + Literal::Float(l) => l.suffix(), + Literal::Char(l) => l.suffix(), + Literal::String(l) => l.suffix(), + Literal::Byte(l) => l.suffix(), + Literal::ByteString(l) => l.suffix(), + } + } +} + +impl Literal<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn into_owned(self) -> Literal<String> { + match self { + Literal::Bool(l) => Literal::Bool(l.to_owned()), + Literal::Integer(l) => Literal::Integer(l.to_owned()), + Literal::Float(l) => Literal::Float(l.to_owned()), + Literal::Char(l) => Literal::Char(l.to_owned()), + Literal::String(l) => Literal::String(l.into_owned()), + Literal::Byte(l) => Literal::Byte(l.to_owned()), + Literal::ByteString(l) => Literal::ByteString(l.into_owned()), + } + } +} + +impl<B: Buffer> fmt::Display for Literal<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Literal::Bool(l) => l.fmt(f), + Literal::Integer(l) => l.fmt(f), + Literal::Float(l) => l.fmt(f), + Literal::Char(l) => l.fmt(f), + Literal::String(l) => l.fmt(f), + Literal::Byte(l) => l.fmt(f), + Literal::ByteString(l) => l.fmt(f), + } + } +} + + +// ============================================================================================== +// ===== Buffer +// ============================================================================================== + +/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. +/// +/// This is trait is implementation detail of this library, cannot be +/// implemented in other crates and is not subject to semantic versioning. +/// `litrs` only guarantees that this trait is implemented for `String` and +/// `for<'a> &'a str`. +pub trait Buffer: sealed::Sealed + Deref<Target = str> { + /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. + type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>; + + #[doc(hidden)] + fn into_cow(self) -> Self::Cow; + + /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. + type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>; + + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow; + + /// Cuts away some characters at the beginning and some at the end. Given + /// range has to be in bounds. + #[doc(hidden)] + fn cut(self, range: Range<usize>) -> Self; +} + +mod sealed { + pub trait Sealed {} +} + +impl<'a> sealed::Sealed for &'a str {} +impl<'a> Buffer for &'a str { + #[doc(hidden)] + fn cut(self, range: Range<usize>) -> Self { + &self[range] + } + + type Cow = Cow<'a, str>; + #[doc(hidden)] + fn into_cow(self) -> Self::Cow { + self.into() + } + type ByteCow = Cow<'a, [u8]>; + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow { + self.as_bytes().into() + } +} + +impl sealed::Sealed for String {} +impl Buffer for String { + #[doc(hidden)] + fn cut(mut self, range: Range<usize>) -> Self { + // This is not the most efficient way, but it works. First we cut the + // end, then the beginning. Note that `drain` also removes the range if + // the iterator is not consumed. + self.truncate(range.end); + self.drain(..range.start); + self + } + + type Cow = Cow<'static, str>; + #[doc(hidden)] + fn into_cow(self) -> Self::Cow { + self.into() + } + + type ByteCow = Cow<'static, [u8]>; + #[doc(hidden)] + fn into_byte_cow(self) -> Self::ByteCow { + self.into_bytes().into() + } +} diff --git a/third_party/rust/litrs/src/parse.rs b/third_party/rust/litrs/src/parse.rs new file mode 100644 index 0000000000..efc6b870f6 --- /dev/null +++ b/third_party/rust/litrs/src/parse.rs @@ -0,0 +1,125 @@ +use crate::{ + BoolLit, + Buffer, + ByteLit, + ByteStringLit, + CharLit, + ParseError, + FloatLit, + IntegerLit, + Literal, + StringLit, + err::{perr, ParseErrorKind::{*, self}}, +}; + + +pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> { + let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?; + let second = input.as_bytes().get(1).copied(); + + match first { + b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)), + b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)), + + // A number literal (integer or float). + b'0'..=b'9' => { + // To figure out whether this is a float or integer, we do some + // quick inspection here. Yes, this is technically duplicate + // work with what is happening in the integer/float parse + // methods, but it makes the code way easier for now and won't + // be a huge performance loss. + // + // The first non-decimal char in a float literal must + // be '.', 'e' or 'E'. + match input.as_bytes().get(1 + end_dec_digits(rest)) { + Some(b'.') | Some(b'e') | Some(b'E') + => FloatLit::parse(input).map(Literal::Float), + + _ => IntegerLit::parse(input).map(Literal::Integer), + } + }, + + b'\'' => CharLit::parse(input).map(Literal::Char), + b'"' | b'r' => StringLit::parse(input).map(Literal::String), + + b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte), + b'b' if second == Some(b'r') || second == Some(b'"') + => ByteStringLit::parse(input).map(Literal::ByteString), + + _ => Err(perr(None, InvalidLiteral)), + } +} + + +pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> { + s.as_bytes().get(0).copied().ok_or(perr(None, Empty)) +} + +/// Returns the index of the first non-underscore, non-decimal digit in `input`, +/// or the `input.len()` if all characters are decimal digits. +pub(crate) fn end_dec_digits(input: &[u8]) -> usize { + input.iter() + .position(|b| !matches!(b, b'_' | b'0'..=b'9')) + .unwrap_or(input.len()) +} + +pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> { + match digit { + b'0'..=b'9' => Some(digit - b'0'), + b'a'..=b'f' => Some(digit - b'a' + 10), + b'A'..=b'F' => Some(digit - b'A' + 10), + _ => None, + } +} + +/// Makes sure that `s` is a valid literal suffix. +pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> { + if s.is_empty() { + return Ok(()); + } + + let mut chars = s.chars(); + let first = chars.next().unwrap(); + let rest = chars.as_str(); + if first == '_' && rest.is_empty() { + return Err(InvalidSuffix); + } + + // This is just an extra check to improve the error message. If the first + // character of the "suffix" is already some invalid ASCII + // char, "unexpected character" seems like the more fitting error. + if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { + return Err(UnexpectedChar); + } + + // Proper check is optional as it's not really necessary in proc macro + // context. + #[cfg(feature = "check_suffix")] + fn is_valid_suffix(first: char, rest: &str) -> bool { + use unicode_xid::UnicodeXID; + + (first == '_' || first.is_xid_start()) + && rest.chars().all(|c| c.is_xid_continue()) + } + + // When avoiding the dependency on `unicode_xid`, we just do a best effort + // to catch the most common errors. + #[cfg(not(feature = "check_suffix"))] + fn is_valid_suffix(first: char, rest: &str) -> bool { + if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { + return false; + } + for c in rest.chars() { + if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') { + return false; + } + } + true + } + + if is_valid_suffix(first, rest) { + Ok(()) + } else { + Err(InvalidSuffix) + } +} diff --git a/third_party/rust/litrs/src/string/mod.rs b/third_party/rust/litrs/src/string/mod.rs new file mode 100644 index 0000000000..d2034a62a9 --- /dev/null +++ b/third_party/rust/litrs/src/string/mod.rs @@ -0,0 +1,125 @@ +use std::{fmt, ops::Range}; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + escape::{scan_raw_string, unescape_string}, + parse::first_byte_or_empty, +}; + + +/// A string or raw string literal, e.g. `"foo"`, `"Grüße"` or `r#"a🦊c"d🦀f"#`. +/// +/// See [the reference][ref] for more information. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#string-literals +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StringLit<B: Buffer> { + /// The raw input. + raw: B, + + /// The string value (with all escapes unescaped), or `None` if there were + /// no escapes. In the latter case, the string value is in `raw`. + value: Option<String>, + + /// The number of hash signs in case of a raw string literal, or `None` if + /// it's not a raw string literal. + num_hashes: Option<u32>, + + /// Start index of the suffix or `raw.len()` if there is no suffix. + start_suffix: usize, +} + +impl<B: Buffer> StringLit<B> { + /// Parses the input as a (raw) string literal. Returns an error if the + /// input is invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + match first_byte_or_empty(&input)? { + b'r' | b'"' => { + let (value, num_hashes, start_suffix) = parse_impl(&input)?; + Ok(Self { raw: input, value, num_hashes, start_suffix }) + } + _ => Err(perr(0, InvalidStringLiteralStart)), + } + } + + /// Returns the string value this literal represents (where all escapes have + /// been turned into their respective values). + pub fn value(&self) -> &str { + self.value.as_deref().unwrap_or(&self.raw[self.inner_range()]) + } + + /// Like `value` but returns a potentially owned version of the value. + /// + /// The return value is either `Cow<'static, str>` if `B = String`, or + /// `Cow<'a, str>` if `B = &'a str`. + pub fn into_value(self) -> B::Cow { + let inner_range = self.inner_range(); + let Self { raw, value, .. } = self; + value.map(B::Cow::from).unwrap_or_else(|| raw.cut(inner_range).into_cow()) + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.start_suffix..] + } + + /// Returns whether this literal is a raw string literal (starting with + /// `r`). + pub fn is_raw_string(&self) -> bool { + self.num_hashes.is_some() + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + + /// The range within `self.raw` that excludes the quotes and potential `r#`. + fn inner_range(&self) -> Range<usize> { + match self.num_hashes { + None => 1..self.start_suffix - 1, + Some(n) => 1 + n as usize + 1..self.start_suffix - n as usize - 1, + } + } +} + +impl StringLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn into_owned(self) -> StringLit<String> { + StringLit { + raw: self.raw.to_owned(), + value: self.value, + num_hashes: self.num_hashes, + start_suffix: self.start_suffix, + } + } +} + +impl<B: Buffer> fmt::Display for StringLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.raw) + } +} + +/// Precondition: input has to start with either `"` or `r`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<(Option<String>, Option<u32>, usize), ParseError> { + if input.starts_with('r') { + scan_raw_string::<char>(&input, 1) + .map(|(v, hashes, start_suffix)| (v, Some(hashes), start_suffix)) + } else { + unescape_string::<char>(&input, 1) + .map(|(v, start_suffix)| (v, None, start_suffix)) + } +} + + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/string/tests.rs b/third_party/rust/litrs/src/string/tests.rs new file mode 100644 index 0000000000..1c0cb63061 --- /dev/null +++ b/third_party/rust/litrs/src/string/tests.rs @@ -0,0 +1,278 @@ +use crate::{Literal, StringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; + +// ===== Utility functions ======================================================================= + +macro_rules! check { + ($lit:literal, $has_escapes:expr, $num_hashes:expr) => { + check!($lit, stringify!($lit), $has_escapes, $num_hashes, "") + }; + ($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => { + let input = $input; + let expected = StringLit { + raw: input, + value: if $has_escapes { Some($lit.to_string()) } else { None }, + num_hashes: $num_hashes, + start_suffix: input.len() - $suffix.len(), + }; + + assert_parse_ok_eq(input, StringLit::parse(input), expected.clone(), "StringLit::parse"); + assert_parse_ok_eq( + input, Literal::parse(input), Literal::String(expected.clone()), "Literal::parse"); + let lit = StringLit::parse(input).unwrap(); + assert_eq!(lit.value(), $lit); + assert_eq!(lit.suffix(), $suffix); + assert_eq!(lit.into_value(), $lit); + assert_roundtrip(expected.into_owned(), input); + }; +} + + +// ===== Actual tests ============================================================================ + +#[test] +fn simple() { + check!("", false, None); + check!("a", false, None); + check!("peter", false, None); + check!("Sei gegrüßt, Bärthelt!", false, None); + check!("أنا لا أتحدث العربية", false, None); + check!("お前はもう死んでいる", false, None); + check!("Пушки - интересные музыкальные инструменты", false, None); + check!("lit 👌 😂 af", false, None); +} + +#[test] +fn special_whitespace() { + let strings = ["\n", "\t", "foo\tbar", "🦊\n"]; + + for &s in &strings { + let input = format!(r#""{}""#, s); + let input_raw = format!(r#"r"{}""#, s); + for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] { + let expected = StringLit { + raw: &*input, + value: None, + num_hashes, + start_suffix: input.len(), + }; + assert_parse_ok_eq( + &input, StringLit::parse(&*input), expected.clone(), "StringLit::parse"); + assert_parse_ok_eq( + &input, Literal::parse(&*input), Literal::String(expected), "Literal::parse"); + assert_eq!(StringLit::parse(&*input).unwrap().value(), s); + assert_eq!(StringLit::parse(&*input).unwrap().into_value(), s); + } + } +} + +#[test] +fn simple_escapes() { + check!("a\nb", true, None); + check!("\nb", true, None); + check!("a\n", true, None); + check!("\n", true, None); + + check!("\x60犬 \t 猫\r馬\n うさぎ \0ネズミ", true, None); + check!("నా \\పిల్లి లావుగా ఉంది", true, None); + check!("నా \\పిల్లి లావుగా 🐈\"ఉంది", true, None); + check!("\\నా\\ పిల్లి లావుగా\" ఉంది\"", true, None); + check!("\"నా \\🐈 పిల్లి లావుగా \" ఉంది\\", true, None); + + check!("\x00", true, None); + check!(" \x01", true, None); + check!("\x0c 🦊", true, None); + check!(" 🦊\x0D ", true, None); + check!("\\x13", true, None); + check!("\"x30", true, None); +} + +#[test] +fn unicode_escapes() { + check!("\u{0}", true, None); + check!(" \u{00}", true, None); + check!("\u{b} ", true, None); + check!(" \u{B} ", true, None); + check!("\u{7e}", true, None); + check!("నక్క\u{E4}", true, None); + check!("\u{e4} నక్క", true, None); + check!(" \u{fc}నక్క ", true, None); + check!("\u{Fc}", true, None); + check!("\u{fC}🦊\nлиса", true, None); + check!("лиса\u{FC}", true, None); + check!("лиса\u{b10}నక్క🦊", true, None); + check!("\"నక్క\u{B10}", true, None); + check!("лиса\\\u{0b10}", true, None); + check!("ли🦊са\\\"\u{0b10}", true, None); + check!("నక్క\\\\u{0b10}", true, None); + check!("\u{2764}Füchsin", true, None); + check!("Füchse \u{1f602}", true, None); + check!("cd\u{1F602}ab", true, None); + + check!("\u{0}🦊", true, None); + check!("лиса\u{0__}", true, None); + check!("\\🦊\u{3_b}", true, None); + check!("🦊\u{1_F_6_0_2}Füchsin", true, None); + check!("నక్క\\\u{1_F6_02_____}నక్క", true, None); +} + +#[test] +fn string_continue() { + check!("నక్క\ + bar", true, None); + check!("foo\ +🦊", true, None); + + check!("foo\ + + banana", true, None); + + // Weird whitespace characters + let lit = StringLit::parse("\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse"); + assert_eq!(lit.value(), "foobar"); + let lit = StringLit::parse("\"foo\\\n\u{85}bar\"").expect("failed to parse"); + assert_eq!(lit.value(), "foo\u{85}bar"); + let lit = StringLit::parse("\"foo\\\n\u{a0}bar\"").expect("failed to parse"); + assert_eq!(lit.value(), "foo\u{a0}bar"); + + // Raw strings do not handle "string continues" + check!(r"foo\ + bar", false, Some(0)); +} + +#[test] +fn crlf_newlines() { + let lit = StringLit::parse("\"foo\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), "foo\nbar"); + + let lit = StringLit::parse("\"\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), "\nbar"); + + let lit = StringLit::parse("\"лиса\r\n\"").expect("failed to parse"); + assert_eq!(lit.value(), "лиса\n"); + + let lit = StringLit::parse("r\"foo\r\nbar\"").expect("failed to parse"); + assert_eq!(lit.value(), "foo\nbar"); + + let lit = StringLit::parse("r#\"\r\nbar\"#").expect("failed to parse"); + assert_eq!(lit.value(), "\nbar"); + + let lit = StringLit::parse("r##\"лиса\r\n\"##").expect("failed to parse"); + assert_eq!(lit.value(), "лиса\n"); +} + +#[test] +fn raw_string() { + check!(r"", false, Some(0)); + check!(r"a", false, Some(0)); + check!(r"peter", false, Some(0)); + check!(r"Sei gegrüßt, Bärthelt!", false, Some(0)); + check!(r"أنا لا أتحدث العربية", false, Some(0)); + check!(r"お前はもう死んでいる", false, Some(0)); + check!(r"Пушки - интересные музыкальные инструменты", false, Some(0)); + check!(r"lit 👌 😂 af", false, Some(0)); + + check!(r#""#, false, Some(1)); + check!(r#"a"#, false, Some(1)); + check!(r##"peter"##, false, Some(2)); + check!(r###"Sei gegrüßt, Bärthelt!"###, false, Some(3)); + check!(r########"lit 👌 😂 af"########, false, Some(8)); + + check!(r#"foo " bar"#, false, Some(1)); + check!(r##"foo " bar"##, false, Some(2)); + check!(r#"foo """" '"'" bar"#, false, Some(1)); + check!(r#""foo""#, false, Some(1)); + check!(r###""foo'"###, false, Some(3)); + check!(r#""x'#_#s'"#, false, Some(1)); + check!(r"#", false, Some(0)); + check!(r"foo#", false, Some(0)); + check!(r"##bar", false, Some(0)); + check!(r###""##foo"##bar'"###, false, Some(3)); + + check!(r"さび\n\t\r\0\\x60\u{123}フェリス", false, Some(0)); + check!(r#"さび\n\t\r\0\\x60\u{123}フェリス"#, false, Some(1)); +} + +#[test] +fn suffixes() { + check!("hello", r###""hello"suffix"###, false, None, "suffix"); + check!(r"お前はもう死んでいる", r###"r"お前はもう死んでいる"_banana"###, false, Some(0), "_banana"); + check!("fox", r#""fox"peter"#, false, None, "peter"); + check!("🦊", r#""🦊"peter"#, false, None, "peter"); + check!("నక్క\\\\u{0b10}", r###""నక్క\\\\u{0b10}"jü_rgen"###, true, None, "jü_rgen"); +} + +#[test] +fn parse_err() { + assert_err!(StringLit, r#"""#, UnterminatedString, None); + assert_err!(StringLit, r#""犬"#, UnterminatedString, None); + assert_err!(StringLit, r#""Jürgen"#, UnterminatedString, None); + assert_err!(StringLit, r#""foo bar baz"#, UnterminatedString, None); + + assert_err!(StringLit, r#""fox"peter""#, InvalidSuffix, 5); + assert_err!(StringLit, r###"r#"foo "# bar"#"###, UnexpectedChar, 9); + + assert_err!(StringLit, "\"\r\"", IsolatedCr, 1); + assert_err!(StringLit, "\"fo\rx\"", IsolatedCr, 3); + assert_err!(StringLit, "r\"\r\"", IsolatedCr, 2); + assert_err!(StringLit, "r\"fo\rx\"", IsolatedCr, 4); + + assert_err!(StringLit, r##"r####""##, UnterminatedRawString, None); + assert_err!(StringLit, r#####"r##"foo"#bar"#####, UnterminatedRawString, None); + assert_err!(StringLit, r##"r####"##, InvalidLiteral, None); + assert_err!(StringLit, r##"r####x"##, InvalidLiteral, None); +} + +#[test] +fn invald_ascii_escapes() { + assert_err!(StringLit, r#""\x80""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#""🦊\x81""#, NonAsciiXEscape, 5..9); + assert_err!(StringLit, r#"" \x8a""#, NonAsciiXEscape, 2..6); + assert_err!(StringLit, r#""\x8Ff""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#""\xa0 ""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#""నక్క\xB0""#, NonAsciiXEscape, 13..17); + assert_err!(StringLit, r#""\xc3నక్క""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#""\xDf🦊""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#""నక్క\xffనక్క""#, NonAsciiXEscape, 13..17); + assert_err!(StringLit, r#""\xfF ""#, NonAsciiXEscape, 1..5); + assert_err!(StringLit, r#"" \xFf""#, NonAsciiXEscape, 2..6); + assert_err!(StringLit, r#""నక్క \xFF""#, NonAsciiXEscape, 15..19); +} + +#[test] +fn invalid_escapes() { + assert_err!(StringLit, r#""\a""#, UnknownEscape, 1..3); + assert_err!(StringLit, r#""foo\y""#, UnknownEscape, 4..6); + assert_err!(StringLit, r#""\"#, UnterminatedEscape, 1); + assert_err!(StringLit, r#""\x""#, UnterminatedEscape, 1..3); + assert_err!(StringLit, r#""🦊\x1""#, UnterminatedEscape, 5..8); + assert_err!(StringLit, r#"" \xaj""#, InvalidXEscape, 2..6); + assert_err!(StringLit, r#""నక్క\xjb""#, InvalidXEscape, 13..17); +} + +#[test] +fn invalid_unicode_escapes() { + assert_err!(StringLit, r#""\u""#, UnicodeEscapeWithoutBrace, 1..3); + assert_err!(StringLit, r#""🦊\u ""#, UnicodeEscapeWithoutBrace, 5..7); + assert_err!(StringLit, r#""\u3""#, UnicodeEscapeWithoutBrace, 1..3); + + assert_err!(StringLit, r#""\u{""#, UnterminatedUnicodeEscape, 1..4); + assert_err!(StringLit, r#""\u{12""#, UnterminatedUnicodeEscape, 1..6); + assert_err!(StringLit, r#""🦊\u{a0b""#, UnterminatedUnicodeEscape, 5..11); + assert_err!(StringLit, r#""\u{a0_b ""#, UnterminatedUnicodeEscape, 1..10); + + assert_err!(StringLit, r#""\u{_}నక్క""#, InvalidStartOfUnicodeEscape, 4); + assert_err!(StringLit, r#""\u{_5f}""#, InvalidStartOfUnicodeEscape, 4); + + assert_err!(StringLit, r#""fox\u{x}""#, NonHexDigitInUnicodeEscape, 7); + assert_err!(StringLit, r#""\u{0x}🦊""#, NonHexDigitInUnicodeEscape, 5); + assert_err!(StringLit, r#""నక్క\u{3bx}""#, NonHexDigitInUnicodeEscape, 18); + assert_err!(StringLit, r#""\u{3b_x}лиса""#, NonHexDigitInUnicodeEscape, 7); + assert_err!(StringLit, r#""\u{4x_}""#, NonHexDigitInUnicodeEscape, 5); + + assert_err!(StringLit, r#""\u{1234567}""#, TooManyDigitInUnicodeEscape, 10); + assert_err!(StringLit, r#""నక్క\u{1234567}🦊""#, TooManyDigitInUnicodeEscape, 22); + assert_err!(StringLit, r#""నక్క\u{1_23_4_56_7}""#, TooManyDigitInUnicodeEscape, 26); + assert_err!(StringLit, r#""\u{abcdef123}лиса""#, TooManyDigitInUnicodeEscape, 10); + + assert_err!(StringLit, r#""\u{110000}fox""#, InvalidUnicodeEscapeChar, 1..10); +} diff --git a/third_party/rust/litrs/src/test_util.rs b/third_party/rust/litrs/src/test_util.rs new file mode 100644 index 0000000000..fd284e984e --- /dev/null +++ b/third_party/rust/litrs/src/test_util.rs @@ -0,0 +1,128 @@ +use crate::*; +use std::fmt::{Debug, Display}; + + +#[track_caller] +pub(crate) fn assert_parse_ok_eq<T: PartialEq + Debug + Display>( + input: &str, + result: Result<T, ParseError>, + expected: T, + parse_method: &str, +) { + match result { + Ok(actual) if actual == expected => { + if actual.to_string() != input { + panic!( + "formatting does not yield original input `{}`: {:?}", + input, + actual, + ); + } + } + Ok(actual) => { + panic!( + "unexpected parsing result (with `{}`) for `{}`:\nactual: {:?}\nexpected: {:?}", + parse_method, + input, + actual, + expected, + ); + } + Err(e) => { + panic!( + "expected `{}` to be parsed (with `{}`) successfully, but it failed: {:?}", + input, + parse_method, + e, + ); + } + } +} + +// This is not ideal, but to perform this check we need `proc-macro2`. So we +// just don't do anything if that feature is not enabled. +#[cfg(not(feature = "proc-macro2"))] +pub(crate) fn assert_roundtrip<T>(_: T, _: &str) {} + +#[cfg(feature = "proc-macro2")] +#[track_caller] +pub(crate) fn assert_roundtrip<T>(ours: T, input: &str) +where + T: std::convert::TryFrom<proc_macro2::Literal> + fmt::Debug + PartialEq + Clone, + proc_macro2::Literal: From<T>, + <T as std::convert::TryFrom<proc_macro2::Literal>>::Error: std::fmt::Display, +{ + let pm_lit = input.parse::<proc_macro2::Literal>() + .expect("failed to parse input as proc_macro2::Literal"); + let t_name = std::any::type_name::<T>(); + + // Unfortunately, `proc_macro2::Literal` does not implement `PartialEq`, so + // this is the next best thing. + if proc_macro2::Literal::from(ours.clone()).to_string() != pm_lit.to_string() { + panic!( + "Converting {} to proc_macro2::Literal has unexpected result:\ + \nconverted: {:?}\nexpected: {:?}", + t_name, + proc_macro2::Literal::from(ours), + pm_lit, + ); + } + + match T::try_from(pm_lit) { + Err(e) => { + panic!("Trying to convert proc_macro2::Literal to {} results in error: {}", t_name, e); + } + Ok(res) => { + if res != ours { + panic!( + "Converting proc_macro2::Literal to {} has unexpected result:\ + \nactual: {:?}\nexpected: {:?}", + t_name, + res, + ours, + ); + } + } + } +} + +macro_rules! assert_err { + ($ty:ident, $input:literal, $kind:ident, $( $span:tt )+ ) => { + assert_err_single!($ty::parse($input), $kind, $($span)+); + assert_err_single!($crate::Literal::parse($input), $kind, $($span)+); + }; +} + +macro_rules! assert_err_single { + ($expr:expr, $kind:ident, $( $span:tt )+ ) => { + let res = $expr; + let err = match res { + Err(e) => e, + Ok(v) => panic!( + "Expected `{}` to return an error, but it returned Ok({:?})", + stringify!($expr), + v, + ), + }; + if err.kind != $crate::err::ParseErrorKind::$kind { + panic!( + "Expected error kind {} for `{}` but got {:?}", + stringify!($kind), + stringify!($expr), + err.kind, + ) + } + let expected_span = assert_err_single!(@span $($span)+); + if err.span != expected_span { + panic!( + "Expected error span {:?} for `{}` but got {:?}", + expected_span, + stringify!($expr), + err.span, + ) + } + }; + (@span $start:literal .. $end:literal) => { Some($start .. $end) }; + (@span $at:literal) => { Some($at.. $at + 1) }; + (@span None) => { None }; +} diff --git a/third_party/rust/litrs/src/tests.rs b/third_party/rust/litrs/src/tests.rs new file mode 100644 index 0000000000..613b429540 --- /dev/null +++ b/third_party/rust/litrs/src/tests.rs @@ -0,0 +1,349 @@ +use crate::Literal; + + +#[test] +fn empty() { + assert_err!(Literal, "", Empty, None); +} + +#[test] +fn invalid_literals() { + assert_err_single!(Literal::parse("."), InvalidLiteral, None); + assert_err_single!(Literal::parse("+"), InvalidLiteral, None); + assert_err_single!(Literal::parse("-"), InvalidLiteral, None); + assert_err_single!(Literal::parse("e"), InvalidLiteral, None); + assert_err_single!(Literal::parse("e8"), InvalidLiteral, None); + assert_err_single!(Literal::parse("f32"), InvalidLiteral, None); + assert_err_single!(Literal::parse("foo"), InvalidLiteral, None); + assert_err_single!(Literal::parse("inf"), InvalidLiteral, None); + assert_err_single!(Literal::parse("nan"), InvalidLiteral, None); + assert_err_single!(Literal::parse("NaN"), InvalidLiteral, None); + assert_err_single!(Literal::parse("NAN"), InvalidLiteral, None); + assert_err_single!(Literal::parse("_2.7"), InvalidLiteral, None); + assert_err_single!(Literal::parse(".5"), InvalidLiteral, None); +} + +#[test] +fn misc() { + assert_err_single!(Literal::parse("0x44.5"), UnexpectedChar, 4..6); + assert_err_single!(Literal::parse("a"), InvalidLiteral, None); + assert_err_single!(Literal::parse(";"), InvalidLiteral, None); + assert_err_single!(Literal::parse("0;"), UnexpectedChar, 1); + assert_err_single!(Literal::parse(" 0"), InvalidLiteral, None); + assert_err_single!(Literal::parse("0 "), UnexpectedChar, 1); + assert_err_single!(Literal::parse("_"), InvalidLiteral, None); + assert_err_single!(Literal::parse("_3"), InvalidLiteral, None); + assert_err_single!(Literal::parse("a_123"), InvalidLiteral, None); + assert_err_single!(Literal::parse("B_123"), InvalidLiteral, None); +} + +macro_rules! assert_no_panic { + ($input:expr) => { + let arr = $input; + let input = std::str::from_utf8(&arr).expect("not unicode"); + let res = std::panic::catch_unwind(move || { + let _ = Literal::parse(input); + let _ = crate::BoolLit::parse(input); + let _ = crate::IntegerLit::parse(input); + let _ = crate::FloatLit::parse(input); + let _ = crate::CharLit::parse(input); + let _ = crate::StringLit::parse(input); + let _ = crate::ByteLit::parse(input); + let _ = crate::ByteStringLit::parse(input); + }); + + if let Err(e) = res { + println!("\n!!! panic for: {:?}", input); + std::panic::resume_unwind(e); + } + }; +} + +#[test] +#[ignore] +fn never_panic_up_to_3() { + for a in 0..128 { + assert_no_panic!([a]); + for b in 0..128 { + assert_no_panic!([a, b]); + for c in 0..128 { + assert_no_panic!([a, b, c]); + } + } + } +} + +// This test takes super long in debug mode, but in release mode it's fine. +#[test] +#[ignore] +fn never_panic_len_4() { + for a in 0..128 { + for b in 0..128 { + for c in 0..128 { + for d in 0..128 { + assert_no_panic!([a, b, c, d]); + } + } + } + } +} + +#[cfg(feature = "proc-macro2")] +#[test] +fn proc_macro() { + use std::convert::TryFrom; + use proc_macro2::{ + self as pm2, TokenTree, Group, TokenStream, Delimiter, Spacing, Punct, Span, Ident, + }; + use crate::{ + BoolLit, ByteLit, ByteStringLit, CharLit, FloatLit, IntegerLit, StringLit, err::TokenKind + }; + + + macro_rules! assert_invalid_token { + ($input:expr, expected: $expected:path, actual: $actual:path $(,)?) => { + let err = $input.unwrap_err(); + if err.expected != $expected { + panic!( + "err.expected was expected to be {:?}, but is {:?}", + $expected, + err.expected, + ); + } + if err.actual != $actual { + panic!("err.actual was expected to be {:?}, but is {:?}", $actual, err.actual); + } + }; + } + + + let pm_u16_lit = pm2::Literal::u16_suffixed(2700); + let pm_i16_lit = pm2::Literal::i16_unsuffixed(3912); + let pm_f32_lit = pm2::Literal::f32_unsuffixed(3.14); + let pm_f64_lit = pm2::Literal::f64_suffixed(99.3); + let pm_string_lit = pm2::Literal::string("hello 🦊"); + let pm_bytestr_lit = pm2::Literal::byte_string(b"hello \nfoxxo"); + let pm_char_lit = pm2::Literal::character('🦀'); + + let u16_lit = Literal::parse("2700u16".to_string()).unwrap(); + let i16_lit = Literal::parse("3912".to_string()).unwrap(); + let f32_lit = Literal::parse("3.14".to_string()).unwrap(); + let f64_lit = Literal::parse("99.3f64".to_string()).unwrap(); + let string_lit = Literal::parse(r#""hello 🦊""#.to_string()).unwrap(); + let bytestr_lit = Literal::parse(r#"b"hello \nfoxxo""#.to_string()).unwrap(); + let char_lit = Literal::parse("'🦀'".to_string()).unwrap(); + + assert_eq!(Literal::from(&pm_u16_lit), u16_lit); + assert_eq!(Literal::from(&pm_i16_lit), i16_lit); + assert_eq!(Literal::from(&pm_f32_lit), f32_lit); + assert_eq!(Literal::from(&pm_f64_lit), f64_lit); + assert_eq!(Literal::from(&pm_string_lit), string_lit); + assert_eq!(Literal::from(&pm_bytestr_lit), bytestr_lit); + assert_eq!(Literal::from(&pm_char_lit), char_lit); + + + let group = TokenTree::from(Group::new(Delimiter::Brace, TokenStream::new())); + let punct = TokenTree::from(Punct::new(':', Spacing::Alone)); + let ident = TokenTree::from(Ident::new("peter", Span::call_site())); + + assert_eq!( + Literal::try_from(TokenTree::Literal(pm2::Literal::string("hello 🦊"))).unwrap(), + Literal::String(StringLit::parse(r#""hello 🦊""#.to_string()).unwrap()), + ); + assert_invalid_token!( + Literal::try_from(punct.clone()), + expected: TokenKind::Literal, + actual: TokenKind::Punct, + ); + assert_invalid_token!( + Literal::try_from(group.clone()), + expected: TokenKind::Literal, + actual: TokenKind::Group, + ); + assert_invalid_token!( + Literal::try_from(ident.clone()), + expected: TokenKind::Literal, + actual: TokenKind::Ident, + ); + + + assert_eq!(Literal::from(IntegerLit::try_from(pm_u16_lit.clone()).unwrap()), u16_lit); + assert_eq!(Literal::from(IntegerLit::try_from(pm_i16_lit.clone()).unwrap()), i16_lit); + assert_eq!(Literal::from(FloatLit::try_from(pm_f32_lit.clone()).unwrap()), f32_lit); + assert_eq!(Literal::from(FloatLit::try_from(pm_f64_lit.clone()).unwrap()), f64_lit); + assert_eq!(Literal::from(StringLit::try_from(pm_string_lit.clone()).unwrap()), string_lit); + assert_eq!( + Literal::from(ByteStringLit::try_from(pm_bytestr_lit.clone()).unwrap()), + bytestr_lit, + ); + assert_eq!(Literal::from(CharLit::try_from(pm_char_lit.clone()).unwrap()), char_lit); + + assert_invalid_token!( + StringLit::try_from(pm_u16_lit.clone()), + expected: TokenKind::StringLit, + actual: TokenKind::IntegerLit, + ); + assert_invalid_token!( + StringLit::try_from(pm_f32_lit.clone()), + expected: TokenKind::StringLit, + actual: TokenKind::FloatLit, + ); + assert_invalid_token!( + ByteLit::try_from(pm_bytestr_lit.clone()), + expected: TokenKind::ByteLit, + actual: TokenKind::ByteStringLit, + ); + assert_invalid_token!( + ByteLit::try_from(pm_i16_lit.clone()), + expected: TokenKind::ByteLit, + actual: TokenKind::IntegerLit, + ); + assert_invalid_token!( + IntegerLit::try_from(pm_string_lit.clone()), + expected: TokenKind::IntegerLit, + actual: TokenKind::StringLit, + ); + assert_invalid_token!( + IntegerLit::try_from(pm_char_lit.clone()), + expected: TokenKind::IntegerLit, + actual: TokenKind::CharLit, + ); + + + assert_eq!( + Literal::from(IntegerLit::try_from(TokenTree::from(pm_u16_lit.clone())).unwrap()), + u16_lit, + ); + assert_eq!( + Literal::from(IntegerLit::try_from(TokenTree::from(pm_i16_lit.clone())).unwrap()), + i16_lit, + ); + assert_eq!( + Literal::from(FloatLit::try_from(TokenTree::from(pm_f32_lit.clone())).unwrap()), + f32_lit, + ); + assert_eq!( + Literal::from(FloatLit::try_from(TokenTree::from(pm_f64_lit.clone())).unwrap()), + f64_lit, + ); + assert_eq!( + Literal::from(StringLit::try_from(TokenTree::from(pm_string_lit.clone())).unwrap()), + string_lit, + ); + assert_eq!( + Literal::from(ByteStringLit::try_from(TokenTree::from(pm_bytestr_lit.clone())).unwrap()), + bytestr_lit, + ); + assert_eq!( + Literal::from(CharLit::try_from(TokenTree::from(pm_char_lit.clone())).unwrap()), + char_lit, + ); + + assert_invalid_token!( + StringLit::try_from(TokenTree::from(pm_u16_lit.clone())), + expected: TokenKind::StringLit, + actual: TokenKind::IntegerLit, + ); + assert_invalid_token!( + StringLit::try_from(TokenTree::from(pm_f32_lit.clone())), + expected: TokenKind::StringLit, + actual: TokenKind::FloatLit, + ); + assert_invalid_token!( + BoolLit::try_from(TokenTree::from(pm_bytestr_lit.clone())), + expected: TokenKind::BoolLit, + actual: TokenKind::ByteStringLit, + ); + assert_invalid_token!( + BoolLit::try_from(TokenTree::from(pm_i16_lit.clone())), + expected: TokenKind::BoolLit, + actual: TokenKind::IntegerLit, + ); + assert_invalid_token!( + IntegerLit::try_from(TokenTree::from(pm_string_lit.clone())), + expected: TokenKind::IntegerLit, + actual: TokenKind::StringLit, + ); + assert_invalid_token!( + IntegerLit::try_from(TokenTree::from(pm_char_lit.clone())), + expected: TokenKind::IntegerLit, + actual: TokenKind::CharLit, + ); + + assert_invalid_token!( + StringLit::try_from(TokenTree::from(group)), + expected: TokenKind::StringLit, + actual: TokenKind::Group, + ); + assert_invalid_token!( + BoolLit::try_from(TokenTree::from(punct)), + expected: TokenKind::BoolLit, + actual: TokenKind::Punct, + ); + assert_invalid_token!( + FloatLit::try_from(TokenTree::from(ident)), + expected: TokenKind::FloatLit, + actual: TokenKind::Ident, + ); +} + +#[cfg(feature = "proc-macro2")] +#[test] +fn bool_try_from_tt() { + use std::convert::TryFrom; + use proc_macro2::{Ident, Span, TokenTree}; + use crate::BoolLit; + + + let ident = |s: &str| Ident::new(s, Span::call_site()); + + assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("true"))).unwrap(), BoolLit::True); + assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("false"))).unwrap(), BoolLit::False); + + assert!(BoolLit::try_from(TokenTree::Ident(ident("falsex"))).is_err()); + assert!(BoolLit::try_from(TokenTree::Ident(ident("_false"))).is_err()); + assert!(BoolLit::try_from(TokenTree::Ident(ident("False"))).is_err()); + assert!(BoolLit::try_from(TokenTree::Ident(ident("True"))).is_err()); + assert!(BoolLit::try_from(TokenTree::Ident(ident("ltrue"))).is_err()); + + + assert_eq!( + Literal::try_from(TokenTree::Ident(ident("true"))).unwrap(), + Literal::Bool(BoolLit::True), + ); + assert_eq!( + Literal::try_from(TokenTree::Ident(ident("false"))).unwrap(), + Literal::Bool(BoolLit::False), + ); + + assert!(Literal::try_from(TokenTree::Ident(ident("falsex"))).is_err()); + assert!(Literal::try_from(TokenTree::Ident(ident("_false"))).is_err()); + assert!(Literal::try_from(TokenTree::Ident(ident("False"))).is_err()); + assert!(Literal::try_from(TokenTree::Ident(ident("True"))).is_err()); + assert!(Literal::try_from(TokenTree::Ident(ident("ltrue"))).is_err()); +} + +#[cfg(feature = "proc-macro2")] +#[test] +fn invalid_token_display() { + use crate::{InvalidToken, err::TokenKind}; + + let span = crate::err::Span::Two(proc_macro2::Span::call_site()); + assert_eq!( + InvalidToken { + actual: TokenKind::StringLit, + expected: TokenKind::FloatLit, + span, + }.to_string(), + r#"expected a float literal (e.g. `3.14`), but found a string literal (e.g. "Ferris")"#, + ); + + assert_eq!( + InvalidToken { + actual: TokenKind::Punct, + expected: TokenKind::Literal, + span, + }.to_string(), + r#"expected a literal, but found a punctuation character"#, + ); +} |