use crate::token::CommentKind; use rustc_span::source_map::SourceMap; use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol}; #[cfg(test)] mod tests; #[derive(Clone, Copy, PartialEq, Debug)] pub enum CommentStyle { /// No code on either side of each line of the comment Isolated, /// Code exists to the left of the comment Trailing, /// Code before /* foo */ and after the comment Mixed, /// Just a manual blank line "\n\n", for layout BlankLine, } #[derive(Clone)] pub struct Comment { pub style: CommentStyle, pub lines: Vec, pub pos: BytePos, } /// A fast conservative estimate on whether the string can contain documentation links. /// A pair of square brackets `[]` must exist in the string, but we only search for the /// opening bracket because brackets always go in pairs in practice. #[inline] pub fn may_have_doc_links(s: &str) -> bool { s.contains('[') } /// Makes a doc string more presentable to users. /// Used by rustdoc and perhaps other tools, but not by rustc. pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol { fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> { let mut i = 0; let mut j = lines.len(); // first line of all-stars should be omitted if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { i += 1; } // like the first, a last line of all stars should be omitted if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') { j -= 1; } if i != 0 || j != lines.len() { Some((i, j)) } else { None } } fn get_horizontal_trim<'a>(lines: &'a [&str], kind: CommentKind) -> Option { let mut i = usize::MAX; let mut first = true; // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are // present. However, we first need to strip the empty lines so they don't get in the middle // when we try to compute the "horizontal trim". let lines = if kind == CommentKind::Block { // Whatever happens, we skip the first line. let mut i = lines .get(0) .map(|l| if l.trim_start().starts_with('*') { 0 } else { 1 }) .unwrap_or(0); let mut j = lines.len(); while i < j && lines[i].trim().is_empty() { i += 1; } while j > i && lines[j - 1].trim().is_empty() { j -= 1; } &lines[i..j] } else { lines }; for line in lines { for (j, c) in line.chars().enumerate() { if j > i || !"* \t".contains(c) { return None; } if c == '*' { if first { i = j; first = false; } else if i != j { return None; } break; } } if i >= line.len() { return None; } } if lines.is_empty() { None } else { Some(lines[0][..i].into()) } } let data_s = data.as_str(); if data_s.contains('\n') { let mut lines = data_s.lines().collect::>(); let mut changes = false; let lines = if let Some((i, j)) = get_vertical_trim(&lines) { changes = true; // remove whitespace-only lines from the start/end of lines &mut lines[i..j] } else { &mut lines }; if let Some(horizontal) = get_horizontal_trim(&lines, kind) { changes = true; // remove a "[ \t]*\*" block from each line, if possible for line in lines.iter_mut() { if let Some(tmp) = line.strip_prefix(&horizontal) { *line = tmp; if kind == CommentKind::Block && (*line == "*" || line.starts_with("* ") || line.starts_with("**")) { *line = &line[1..]; } } } } if changes { return Symbol::intern(&lines.join("\n")); } } data } /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. /// Otherwise returns `Some(k)` where `k` is first char offset after that leading /// whitespace. Note that `k` may be outside bounds of `s`. fn all_whitespace(s: &str, col: CharPos) -> Option { let mut idx = 0; for (i, ch) in s.char_indices().take(col.to_usize()) { if !ch.is_whitespace() { return None; } idx = i + ch.len_utf8(); } Some(idx) } fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { let len = s.len(); match all_whitespace(&s, col) { Some(col) => { if col < len { &s[col..] } else { "" } } None => s, } } fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec { let mut res: Vec = vec![]; let mut lines = text.lines(); // just push the first line res.extend(lines.next().map(|it| it.to_string())); // for other lines, strip common whitespace prefix for line in lines { res.push(trim_whitespace_prefix(line, col).to_string()) } res } // it appears this function is called only from pprust... that's // probably not a good thing. pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec { let sm = SourceMap::new(sm.path_mapping().clone()); let source_file = sm.new_source_file(path, src); let text = (*source_file.src.as_ref().unwrap()).clone(); let text: &str = text.as_str(); let start_bpos = source_file.start_pos; let mut pos = 0; let mut comments: Vec = Vec::new(); let mut code_to_the_left = false; if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { comments.push(Comment { style: CommentStyle::Isolated, lines: vec![text[..shebang_len].to_string()], pos: start_bpos, }); pos += shebang_len; } for token in rustc_lexer::tokenize(&text[pos..]) { let token_text = &text[pos..pos + token.len as usize]; match token.kind { rustc_lexer::TokenKind::Whitespace => { if let Some(mut idx) = token_text.find('\n') { code_to_the_left = false; while let Some(next_newline) = &token_text[idx + 1..].find('\n') { idx += 1 + next_newline; comments.push(Comment { style: CommentStyle::BlankLine, lines: vec![], pos: start_bpos + BytePos((pos + idx) as u32), }); } } } rustc_lexer::TokenKind::BlockComment { doc_style, .. } => { if doc_style.is_none() { let code_to_the_right = !matches!( text[pos + token.len as usize..].chars().next(), Some('\r' | '\n') ); let style = match (code_to_the_left, code_to_the_right) { (_, true) => CommentStyle::Mixed, (false, false) => CommentStyle::Isolated, (true, false) => CommentStyle::Trailing, }; // Count the number of chars since the start of the line by rescanning. let pos_in_file = start_bpos + BytePos(pos as u32); let line_begin_in_file = source_file.line_begin_pos(pos_in_file); let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); let col = CharPos(text[line_begin_pos..pos].chars().count()); let lines = split_block_comment_into_lines(token_text, col); comments.push(Comment { style, lines, pos: pos_in_file }) } } rustc_lexer::TokenKind::LineComment { doc_style } => { if doc_style.is_none() { comments.push(Comment { style: if code_to_the_left { CommentStyle::Trailing } else { CommentStyle::Isolated }, lines: vec![token_text.to_string()], pos: start_bpos + BytePos(pos as u32), }) } } _ => { code_to_the_left = true; } } pos += token.len as usize; } comments }