diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 13:39:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 13:39:29 +0000 |
commit | 1bbf3d3cebcb6ff09231ee43296d62d6a93fcfd0 (patch) | |
tree | 6dc13e28a6078185b871acc5b7f9a2df6c5d948a /src/parse.rs | |
parent | Initial commit. (diff) | |
download | rust-proc-macro2-1bbf3d3cebcb6ff09231ee43296d62d6a93fcfd0.tar.xz rust-proc-macro2-1bbf3d3cebcb6ff09231ee43296d62d6a93fcfd0.zip |
Adding upstream version 1.0.76.upstream/1.0.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/parse.rs')
-rw-r--r-- | src/parse.rs | 996 |
1 files changed, 996 insertions, 0 deletions
diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..07239bc --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,996 @@ +use crate::fallback::{ + self, is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream, + TokenStreamBuilder, +}; +use crate::{Delimiter, Punct, Spacing, TokenTree}; +use core::char; +use core::str::{Bytes, CharIndices, Chars}; + +#[derive(Copy, Clone, Eq, PartialEq)] +pub(crate) struct Cursor<'a> { + pub rest: &'a str, + #[cfg(span_locations)] + pub off: u32, +} + +impl<'a> Cursor<'a> { + pub fn advance(&self, bytes: usize) -> Cursor<'a> { + let (_front, rest) = self.rest.split_at(bytes); + Cursor { + rest, + #[cfg(span_locations)] + off: self.off + _front.chars().count() as u32, + } + } + + pub fn starts_with(&self, s: &str) -> bool { + self.rest.starts_with(s) + } + + pub fn starts_with_char(&self, ch: char) -> bool { + self.rest.starts_with(ch) + } + + pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool + where + Pattern: FnMut(char) -> bool, + { + self.rest.starts_with(f) + } + + pub fn is_empty(&self) -> bool { + self.rest.is_empty() + } + + fn len(&self) -> usize { + self.rest.len() + } + + fn as_bytes(&self) -> &'a [u8] { + self.rest.as_bytes() + } + + fn bytes(&self) -> Bytes<'a> { + self.rest.bytes() + } + + fn chars(&self) -> Chars<'a> { + self.rest.chars() + } + + fn char_indices(&self) -> CharIndices<'a> { + self.rest.char_indices() + } + + fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> { + if self.starts_with(tag) { + Ok(self.advance(tag.len())) + } else { + Err(Reject) + } + } +} + +pub(crate) struct Reject; +type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; + +fn skip_whitespace(input: Cursor) -> Cursor { + let mut s = input; + + while !s.is_empty() { + let byte = s.as_bytes()[0]; + if byte == b'/' { + if s.starts_with("//") + && (!s.starts_with("///") || s.starts_with("////")) + && !s.starts_with("//!") + { + let (cursor, _) = take_until_newline_or_eof(s); + s = cursor; + continue; + } else if s.starts_with("/**/") { + s = s.advance(4); + continue; + } else if s.starts_with("/*") + && (!s.starts_with("/**") || s.starts_with("/***")) + && !s.starts_with("/*!") + { + match block_comment(s) { + Ok((rest, _)) => { + s = rest; + continue; + } + Err(Reject) => return s, + } + } + } + match byte { + b' ' | 0x09..=0x0d => { + s = s.advance(1); + continue; + } + b if b.is_ascii() => {} + _ => { + let ch = s.chars().next().unwrap(); + if is_whitespace(ch) { + s = s.advance(ch.len_utf8()); + continue; + } + } + } + return s; + } + s +} + +fn block_comment(input: Cursor) -> PResult<&str> { + if !input.starts_with("/*") { + return Err(Reject); + } + + let mut depth = 0usize; + let bytes = input.as_bytes(); + let mut i = 0usize; + let upper = bytes.len() - 1; + + while i < upper { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 1; // eat '*' + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + if depth == 0 { + return Ok((input.advance(i + 2), &input.rest[..i + 2])); + } + i += 1; // eat '/' + } + i += 1; + } + + Err(Reject) +} + +fn is_whitespace(ch: char) -> bool { + // Rust treats left-to-right mark and right-to-left mark as whitespace + ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' +} + +fn word_break(input: Cursor) -> Result<Cursor, Reject> { + match input.chars().next() { + Some(ch) if is_ident_continue(ch) => Err(Reject), + Some(_) | None => Ok(input), + } +} + +// Rustc's representation of a macro expansion error in expression position or +// type position. +const ERROR: &str = "(/*ERROR*/)"; + +pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> { + let mut trees = TokenStreamBuilder::new(); + let mut stack = Vec::new(); + + loop { + input = skip_whitespace(input); + + if let Ok((rest, ())) = doc_comment(input, &mut trees) { + input = rest; + continue; + } + + #[cfg(span_locations)] + let lo = input.off; + + let first = match input.bytes().next() { + Some(first) => first, + None => match stack.last() { + None => return Ok(trees.build()), + #[cfg(span_locations)] + Some((lo, _frame)) => { + return Err(LexError { + span: Span { lo: *lo, hi: *lo }, + }) + } + #[cfg(not(span_locations))] + Some(_frame) => return Err(LexError { span: Span {} }), + }, + }; + + if let Some(open_delimiter) = match first { + b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), + b'[' => Some(Delimiter::Bracket), + b'{' => Some(Delimiter::Brace), + _ => None, + } { + input = input.advance(1); + let frame = (open_delimiter, trees); + #[cfg(span_locations)] + let frame = (lo, frame); + stack.push(frame); + trees = TokenStreamBuilder::new(); + } else if let Some(close_delimiter) = match first { + b')' => Some(Delimiter::Parenthesis), + b']' => Some(Delimiter::Bracket), + b'}' => Some(Delimiter::Brace), + _ => None, + } { + let frame = match stack.pop() { + Some(frame) => frame, + None => return Err(lex_error(input)), + }; + #[cfg(span_locations)] + let (lo, frame) = frame; + let (open_delimiter, outer) = frame; + if open_delimiter != close_delimiter { + return Err(lex_error(input)); + } + input = input.advance(1); + let mut g = Group::new(open_delimiter, trees.build()); + g.set_span(Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: input.off, + }); + trees = outer; + trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g))); + } else { + let (rest, mut tt) = match leaf_token(input) { + Ok((rest, tt)) => (rest, tt), + Err(Reject) => return Err(lex_error(input)), + }; + tt.set_span(crate::Span::_new_fallback(Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: rest.off, + })); + trees.push_token_from_parser(tt); + input = rest; + } + } +} + +fn lex_error(cursor: Cursor) -> LexError { + #[cfg(not(span_locations))] + let _ = cursor; + LexError { + span: Span { + #[cfg(span_locations)] + lo: cursor.off, + #[cfg(span_locations)] + hi: cursor.off, + }, + } +} + +fn leaf_token(input: Cursor) -> PResult<TokenTree> { + if let Ok((input, l)) = literal(input) { + // must be parsed before ident + Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l)))) + } else if let Ok((input, p)) = punct(input) { + Ok((input, TokenTree::Punct(p))) + } else if let Ok((input, i)) = ident(input) { + Ok((input, TokenTree::Ident(i))) + } else if input.starts_with(ERROR) { + let rest = input.advance(ERROR.len()); + let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); + Ok((rest, TokenTree::Literal(repr))) + } else { + Err(Reject) + } +} + +fn ident(input: Cursor) -> PResult<crate::Ident> { + if [ + "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", + ] + .iter() + .any(|prefix| input.starts_with(prefix)) + { + Err(Reject) + } else { + ident_any(input) + } +} + +fn ident_any(input: Cursor) -> PResult<crate::Ident> { + let raw = input.starts_with("r#"); + let rest = input.advance((raw as usize) << 1); + + let (rest, sym) = ident_not_raw(rest)?; + + if !raw { + let ident = crate::Ident::_new(crate::imp::Ident::new_unchecked( + sym, + fallback::Span::call_site(), + )); + return Ok((rest, ident)); + } + + match sym { + "_" | "super" | "self" | "Self" | "crate" => return Err(Reject), + _ => {} + } + + let ident = crate::Ident::_new(crate::imp::Ident::new_raw_unchecked( + sym, + fallback::Span::call_site(), + )); + Ok((rest, ident)) +} + +fn ident_not_raw(input: Cursor) -> PResult<&str> { + let mut chars = input.char_indices(); + + match chars.next() { + Some((_, ch)) if is_ident_start(ch) => {} + _ => return Err(Reject), + } + + let mut end = input.len(); + for (i, ch) in chars { + if !is_ident_continue(ch) { + end = i; + break; + } + } + + Ok((input.advance(end), &input.rest[..end])) +} + +pub(crate) fn literal(input: Cursor) -> PResult<Literal> { + let rest = literal_nocapture(input)?; + let end = input.len() - rest.len(); + Ok((rest, Literal::_new(input.rest[..end].to_string()))) +} + +fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> { + if let Ok(ok) = string(input) { + Ok(ok) + } else if let Ok(ok) = byte_string(input) { + Ok(ok) + } else if let Ok(ok) = c_string(input) { + Ok(ok) + } else if let Ok(ok) = byte(input) { + Ok(ok) + } else if let Ok(ok) = character(input) { + Ok(ok) + } else if let Ok(ok) = float(input) { + Ok(ok) + } else if let Ok(ok) = int(input) { + Ok(ok) + } else { + Err(Reject) + } +} + +fn literal_suffix(input: Cursor) -> Cursor { + match ident_not_raw(input) { + Ok((input, _)) => input, + Err(Reject) => input, + } +} + +fn string(input: Cursor) -> Result<Cursor, Reject> { + if let Ok(input) = input.parse("\"") { + cooked_string(input) + } else if let Ok(input) = input.parse("r") { + raw_string(input) + } else { + Err(Reject) + } +} + +fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> { + let mut chars = input.char_indices(); + + while let Some((i, ch)) = chars.next() { + match ch { + '"' => { + let input = input.advance(i + 1); + return Ok(literal_suffix(input)); + } + '\r' => match chars.next() { + Some((_, '\n')) => {} + _ => break, + }, + '\\' => match chars.next() { + Some((_, 'x')) => { + backslash_x_char(&mut chars)?; + } + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} + Some((_, 'u')) => { + backslash_u(&mut chars)?; + } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); + } + _ => break, + }, + _ch => {} + } + } + Err(Reject) +} + +fn raw_string(input: Cursor) -> Result<Cursor, Reject> { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + _ => {} + } + } + Err(Reject) +} + +fn byte_string(input: Cursor) -> Result<Cursor, Reject> { + if let Ok(input) = input.parse("b\"") { + cooked_byte_string(input) + } else if let Ok(input) = input.parse("br") { + raw_byte_string(input) + } else { + Err(Reject) + } +} + +fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> { + let mut bytes = input.bytes().enumerate(); + while let Some((offset, b)) = bytes.next() { + match b { + b'"' => { + let input = input.advance(offset + 1); + return Ok(literal_suffix(input)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + b'\\' => match bytes.next() { + Some((_, b'x')) => { + backslash_x_byte(&mut bytes)?; + } + Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} + Some((newline, b @ (b'\n' | b'\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, b)?; + bytes = input.bytes().enumerate(); + } + _ => break, + }, + b if b.is_ascii() => {} + _ => break, + } + } + Err(Reject) +} + +fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { + for (i, byte) in input.bytes().enumerate() { + match byte { + b'"' => { + if i > 255 { + // https://github.com/rust-lang/rust/pull/95251 + return Err(Reject); + } + return Ok((input.advance(i + 1), &input.rest[..i])); + } + b'#' => {} + _ => break, + } + } + Err(Reject) +} + +fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + other => { + if !other.is_ascii() { + break; + } + } + } + } + Err(Reject) +} + +fn c_string(input: Cursor) -> Result<Cursor, Reject> { + if let Ok(input) = input.parse("c\"") { + cooked_c_string(input) + } else if let Ok(input) = input.parse("cr") { + raw_c_string(input) + } else { + Err(Reject) + } +} + +fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + b'\0' => break, + _ => {} + } + } + Err(Reject) +} + +fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> { + let mut chars = input.char_indices(); + + while let Some((i, ch)) = chars.next() { + match ch { + '"' => { + let input = input.advance(i + 1); + return Ok(literal_suffix(input)); + } + '\r' => match chars.next() { + Some((_, '\n')) => {} + _ => break, + }, + '\\' => match chars.next() { + Some((_, 'x')) => { + backslash_x_nonzero(&mut chars)?; + } + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} + Some((_, 'u')) => { + if backslash_u(&mut chars)? == '\0' { + break; + } + } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); + } + _ => break, + }, + '\0' => break, + _ch => {} + } + } + Err(Reject) +} + +fn byte(input: Cursor) -> Result<Cursor, Reject> { + let input = input.parse("b'")?; + let mut bytes = input.bytes().enumerate(); + let ok = match bytes.next().map(|(_, b)| b) { + Some(b'\\') => match bytes.next().map(|(_, b)| b) { + Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), + Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, + _ => false, + }, + b => b.is_some(), + }; + if !ok { + return Err(Reject); + } + let (offset, _) = bytes.next().ok_or(Reject)?; + if !input.chars().as_str().is_char_boundary(offset) { + return Err(Reject); + } + let input = input.advance(offset).parse("'")?; + Ok(literal_suffix(input)) +} + +fn character(input: Cursor) -> Result<Cursor, Reject> { + let input = input.parse("'")?; + let mut chars = input.char_indices(); + let ok = match chars.next().map(|(_, ch)| ch) { + Some('\\') => match chars.next().map(|(_, ch)| ch) { + Some('x') => backslash_x_char(&mut chars).is_ok(), + Some('u') => backslash_u(&mut chars).is_ok(), + Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, + _ => false, + }, + ch => ch.is_some(), + }; + if !ok { + return Err(Reject); + } + let (idx, _) = chars.next().ok_or(Reject)?; + let input = input.advance(idx).parse("'")?; + Ok(literal_suffix(input)) +} + +macro_rules! next_ch { + ($chars:ident @ $pat:pat) => { + match $chars.next() { + Some((_, ch)) => match ch { + $pat => ch, + _ => return Err(Reject), + }, + None => return Err(Reject), + } + }; +} + +fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> +where + I: Iterator<Item = (usize, char)>, +{ + next_ch!(chars @ '0'..='7'); + next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + Ok(()) +} + +fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> +where + I: Iterator<Item = (usize, u8)>, +{ + next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); + next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); + Ok(()) +} + +fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> +where + I: Iterator<Item = (usize, char)>, +{ + let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + if first == '0' && second == '0' { + Err(Reject) + } else { + Ok(()) + } +} + +fn backslash_u<I>(chars: &mut I) -> Result<char, Reject> +where + I: Iterator<Item = (usize, char)>, +{ + next_ch!(chars @ '{'); + let mut value = 0; + let mut len = 0; + for (_, ch) in chars { + let digit = match ch { + '0'..='9' => ch as u8 - b'0', + 'a'..='f' => 10 + ch as u8 - b'a', + 'A'..='F' => 10 + ch as u8 - b'A', + '_' if len > 0 => continue, + '}' if len > 0 => return char::from_u32(value).ok_or(Reject), + _ => break, + }; + if len == 6 { + break; + } + value *= 0x10; + value += u32::from(digit); + len += 1; + } + Err(Reject) +} + +fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { + let mut whitespace = input.bytes().enumerate(); + loop { + if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { + return Err(Reject); + } + match whitespace.next() { + Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { + last = b; + } + Some((offset, _)) => { + *input = input.advance(offset); + return Ok(()); + } + None => return Err(Reject), + } + } +} + +fn float(input: Cursor) -> Result<Cursor, Reject> { + let mut rest = float_digits(input)?; + if let Some(ch) = rest.chars().next() { + if is_ident_start(ch) { + rest = ident_not_raw(rest)?.0; + } + } + word_break(rest) +} + +fn float_digits(input: Cursor) -> Result<Cursor, Reject> { + let mut chars = input.chars().peekable(); + match chars.next() { + Some(ch) if '0' <= ch && ch <= '9' => {} + _ => return Err(Reject), + } + + let mut len = 1; + let mut has_dot = false; + let mut has_exp = false; + while let Some(&ch) = chars.peek() { + match ch { + '0'..='9' | '_' => { + chars.next(); + len += 1; + } + '.' => { + if has_dot { + break; + } + chars.next(); + if chars + .peek() + .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) + { + return Err(Reject); + } + len += 1; + has_dot = true; + } + 'e' | 'E' => { + chars.next(); + len += 1; + has_exp = true; + break; + } + _ => break, + } + } + + if !(has_dot || has_exp) { + return Err(Reject); + } + + if has_exp { + let token_before_exp = if has_dot { + Ok(input.advance(len - 1)) + } else { + Err(Reject) + }; + let mut has_sign = false; + let mut has_exp_value = false; + while let Some(&ch) = chars.peek() { + match ch { + '+' | '-' => { + if has_exp_value { + break; + } + if has_sign { + return token_before_exp; + } + chars.next(); + len += 1; + has_sign = true; + } + '0'..='9' => { + chars.next(); + len += 1; + has_exp_value = true; + } + '_' => { + chars.next(); + len += 1; + } + _ => break, + } + } + if !has_exp_value { + return token_before_exp; + } + } + + Ok(input.advance(len)) +} + +fn int(input: Cursor) -> Result<Cursor, Reject> { + let mut rest = digits(input)?; + if let Some(ch) = rest.chars().next() { + if is_ident_start(ch) { + rest = ident_not_raw(rest)?.0; + } + } + word_break(rest) +} + +fn digits(mut input: Cursor) -> Result<Cursor, Reject> { + let base = if input.starts_with("0x") { + input = input.advance(2); + 16 + } else if input.starts_with("0o") { + input = input.advance(2); + 8 + } else if input.starts_with("0b") { + input = input.advance(2); + 2 + } else { + 10 + }; + + let mut len = 0; + let mut empty = true; + for b in input.bytes() { + match b { + b'0'..=b'9' => { + let digit = (b - b'0') as u64; + if digit >= base { + return Err(Reject); + } + } + b'a'..=b'f' => { + let digit = 10 + (b - b'a') as u64; + if digit >= base { + break; + } + } + b'A'..=b'F' => { + let digit = 10 + (b - b'A') as u64; + if digit >= base { + break; + } + } + b'_' => { + if empty && base == 10 { + return Err(Reject); + } + len += 1; + continue; + } + _ => break, + }; + len += 1; + empty = false; + } + if empty { + Err(Reject) + } else { + Ok(input.advance(len)) + } +} + +fn punct(input: Cursor) -> PResult<Punct> { + let (rest, ch) = punct_char(input)?; + if ch == '\'' { + if ident_any(rest)?.0.starts_with_char('\'') { + Err(Reject) + } else { + Ok((rest, Punct::new('\'', Spacing::Joint))) + } + } else { + let kind = match punct_char(rest) { + Ok(_) => Spacing::Joint, + Err(Reject) => Spacing::Alone, + }; + Ok((rest, Punct::new(ch, kind))) + } +} + +fn punct_char(input: Cursor) -> PResult<char> { + if input.starts_with("//") || input.starts_with("/*") { + // Do not accept `/` of a comment as a punct. + return Err(Reject); + } + + let mut chars = input.chars(); + let first = match chars.next() { + Some(ch) => ch, + None => { + return Err(Reject); + } + }; + let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; + if recognized.contains(first) { + Ok((input.advance(first.len_utf8()), first)) + } else { + Err(Reject) + } +} + +fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { + #[cfg(span_locations)] + let lo = input.off; + let (rest, (comment, inner)) = doc_comment_contents(input)?; + let fallback_span = Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: rest.off, + }; + let span = crate::Span::_new_fallback(fallback_span); + + let mut scan_for_bare_cr = comment; + while let Some(cr) = scan_for_bare_cr.find('\r') { + let rest = &scan_for_bare_cr[cr + 1..]; + if !rest.starts_with('\n') { + return Err(Reject); + } + scan_for_bare_cr = rest; + } + + let mut pound = Punct::new('#', Spacing::Alone); + pound.set_span(span); + trees.push_token_from_parser(TokenTree::Punct(pound)); + + if inner { + let mut bang = Punct::new('!', Spacing::Alone); + bang.set_span(span); + trees.push_token_from_parser(TokenTree::Punct(bang)); + } + + let doc_ident = crate::Ident::_new(crate::imp::Ident::new_unchecked("doc", fallback_span)); + let mut equal = Punct::new('=', Spacing::Alone); + equal.set_span(span); + let mut literal = crate::Literal::string(comment); + literal.set_span(span); + let mut bracketed = TokenStreamBuilder::with_capacity(3); + bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); + bracketed.push_token_from_parser(TokenTree::Punct(equal)); + bracketed.push_token_from_parser(TokenTree::Literal(literal)); + let group = Group::new(Delimiter::Bracket, bracketed.build()); + let mut group = crate::Group::_new_fallback(group); + group.set_span(span); + trees.push_token_from_parser(TokenTree::Group(group)); + + Ok((rest, ())) +} + +fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { + if input.starts_with("//!") { + let input = input.advance(3); + let (input, s) = take_until_newline_or_eof(input); + Ok((input, (s, true))) + } else if input.starts_with("/*!") { + let (input, s) = block_comment(input)?; + Ok((input, (&s[3..s.len() - 2], true))) + } else if input.starts_with("///") { + let input = input.advance(3); + if input.starts_with_char('/') { + return Err(Reject); + } + let (input, s) = take_until_newline_or_eof(input); + Ok((input, (s, false))) + } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { + let (input, s) = block_comment(input)?; + Ok((input, (&s[3..s.len() - 2], false))) + } else { + Err(Reject) + } +} + +fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { + let chars = input.char_indices(); + + for (i, ch) in chars { + if ch == '\n' { + return (input.advance(i), &input.rest[..i]); + } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { + return (input.advance(i + 1), &input.rest[..i]); + } + } + + (input.advance(input.len()), input.rest) +} |