use crate::Error; use proc_macro2::{Delimiter, Ident, Literal, Span, TokenStream, TokenTree}; use std::char; macro_rules! unexpected_content { () => { "expected one of: byte string literal, string literal, identifier" }; } pub(crate) fn parse_input(mut input: TokenStream) -> Result<(Vec, Span), Error> { loop { let mut tokens = input.into_iter(); let token = match tokens.next() { Some(token) => token, None => { return Err(Error( Span::call_site(), concat!("unexpected end of input, ", unexpected_content!()), )) } }; let span = token.span(); let result = match token { // Unwrap any empty group which may be created from macro expansion. TokenTree::Group(group) if group.delimiter() == Delimiter::None => Err(group), TokenTree::Literal(literal) => match parse_literal(literal) { Ok(result) => Ok(result), Err(msg) => return Err(Error(span, msg)), }, TokenTree::Ident(ident) => Ok(parse_ident(ident)), _ => return Err(Error(span, unexpected_content!())), }; if let Some(token) = tokens.next() { return Err(Error(token.span(), "unexpected token")); } match result { Ok(result) => return Ok((result, span)), Err(group) => input = group.stream(), } } } fn parse_literal(literal: Literal) -> Result, &'static str> { let s = literal.to_string(); let s = s.as_bytes(); match s[0] { b'"' => Ok(parse_cooked_content(s)), b'r' => Ok(parse_raw_content(&s[1..])), b'b' => match s[1] { b'"' => Ok(parse_cooked_content(&s[1..])), b'r' => Ok(parse_raw_content(&s[2..])), _ => Err(unexpected_content!()), }, _ => Err(unexpected_content!()), } } fn all_pounds(bytes: &[u8]) -> bool { bytes.iter().all(|b| *b == b'#') } /// Parses raw string / bytes content after `r` prefix. fn parse_raw_content(s: &[u8]) -> Vec { let q_start = s.iter().position(|b| *b == b'"').unwrap(); let q_end = s.iter().rposition(|b| *b == b'"').unwrap(); assert!(all_pounds(&s[0..q_start])); assert!(all_pounds(&s[q_end + 1..q_end + q_start + 1])); Vec::from(&s[q_start + 1..q_end]) } /// Parses the cooked string / bytes content within quotes. fn parse_cooked_content(mut s: &[u8]) -> Vec { s = &s[1..s.iter().rposition(|b| *b == b'"').unwrap()]; let mut result = Vec::new(); while !s.is_empty() { match s[0] { b'\\' => {} b'\r' => { assert_eq!(s[1], b'\n'); result.push(b'\n'); s = &s[2..]; continue; } b => { result.push(b); s = &s[1..]; continue; } } let b = s[1]; s = &s[2..]; match b { b'x' => { let (b, rest) = backslash_x(s); result.push(b); s = rest; } b'u' => { let (c, rest) = backslash_u(s); result.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()); s = rest; } b'n' => result.push(b'\n'), b'r' => result.push(b'\r'), b't' => result.push(b'\t'), b'\\' => result.push(b'\\'), b'0' => result.push(b'\0'), b'\'' => result.push(b'\''), b'"' => result.push(b'"'), b'\r' | b'\n' => { let next = s.iter().position(|b| { let ch = char::from_u32(u32::from(*b)).unwrap(); !ch.is_whitespace() }); match next { Some(pos) => s = &s[pos..], None => s = b"", } } b => panic!("unexpected byte {:?} after \\", b), } } result } fn backslash_x(s: &[u8]) -> (u8, &[u8]) { let ch = hex_to_u8(s[0]) * 0x10 + hex_to_u8(s[1]); (ch, &s[2..]) } fn hex_to_u8(b: u8) -> u8 { match b { b'0'..=b'9' => b - b'0', b'a'..=b'f' => b - b'a' + 10, b'A'..=b'F' => b - b'A' + 10, _ => unreachable!("unexpected non-hex character {:?} after \\x", b), } } fn backslash_u(s: &[u8]) -> (char, &[u8]) { assert_eq!(s[0], b'{'); let end = s[1..].iter().position(|b| *b == b'}').unwrap(); let mut ch = 0; for b in &s[1..=end] { ch *= 0x10; ch += u32::from(hex_to_u8(*b)); } (char::from_u32(ch).unwrap(), &s[end + 2..]) } fn parse_ident(ident: Ident) -> Vec { ident.to_string().into_bytes() } #[cfg(test)] mod tests { use super::*; use std::str::FromStr; // Tests below were modified from // https://github.com/dtolnay/syn/blob/cd5fdc0f530f822446fccaf831669cd0cf4a0fc9/tests/test_lit.rs fn lit(s: &str) -> Vec { match TokenStream::from_str(s) .unwrap() .into_iter() .next() .unwrap() { TokenTree::Literal(lit) => parse_literal(lit).unwrap(), _ => panic!(), } } #[test] fn strings() { #[track_caller] fn test_string(s: &str, value: &[u8]) { assert_eq!(lit(s), value); } test_string("\"a\"", b"a"); test_string("\"\\n\"", b"\n"); test_string("\"\\r\"", b"\r"); test_string("\"\\t\"", b"\t"); test_string("\"🐕\"", b"\xf0\x9f\x90\x95"); // NOTE: This is an emoji test_string("\"\\\"\"", b"\""); test_string("\"'\"", b"'"); test_string("\"\"", b""); test_string("\"\\u{1F415}\"", b"\xf0\x9f\x90\x95"); test_string( "\"contains\nnewlines\\\nescaped newlines\"", b"contains\nnewlinesescaped newlines", ); test_string("r\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere"); test_string("\"...\"q", b"..."); test_string("r\"...\"q", b"..."); test_string("r##\"...\"##q", b"..."); } #[test] fn byte_strings() { #[track_caller] fn test_byte_string(s: &str, value: &[u8]) { assert_eq!(lit(s), value); } test_byte_string("b\"a\"", b"a"); test_byte_string("b\"\\n\"", b"\n"); test_byte_string("b\"\\r\"", b"\r"); test_byte_string("b\"\\t\"", b"\t"); test_byte_string("b\"\\\"\"", b"\""); test_byte_string("b\"'\"", b"'"); test_byte_string("b\"\"", b""); test_byte_string( "b\"contains\nnewlines\\\nescaped newlines\"", b"contains\nnewlinesescaped newlines", ); test_byte_string("br\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere"); test_byte_string("b\"...\"q", b"..."); test_byte_string("br\"...\"q", b"..."); test_byte_string("br##\"...\"##q", b"..."); } }