//! Detects unescaped backticks (\`) in doc comments. use crate::clean::Item; use crate::core::DocContext; use crate::html::markdown::main_body_opts; use pulldown_cmark::{BrokenLink, Event, Parser}; use rustc_errors::DiagnosticBuilder; use rustc_lint_defs::Applicability; use rustc_resolve::rustdoc::source_span_for_markdown_range; use std::ops::Range; pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { let tcx = cx.tcx; let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { // If non-local, no need to check anything. return; }; let dox = item.doc_value(); if dox.is_empty() { return; } let link_names = item.link_names(&cx.cache); let mut replacer = |broken_link: BrokenLink<'_>| { link_names .iter() .find(|link| *link.original_text == *broken_link.reference) .map(|link| ((*link.href).into(), (*link.new_text).into())) }; let parser = Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer)) .into_offset_iter(); let mut element_stack = Vec::new(); let mut prev_text_end = 0; for (event, event_range) in parser { match event { Event::Start(_) => { element_stack.push(Element::new(event_range)); } Event::End(_) => { let element = element_stack.pop().unwrap(); let Some(backtick_index) = element.backtick_index else { continue; }; // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, // use the span of the entire attribute as a fallback. let span = source_span_for_markdown_range( tcx, &dox, &(backtick_index..backtick_index + 1), &item.attrs.doc_strings, ) .unwrap_or_else(|| item.attr_span(tcx)); tcx.struct_span_lint_hir(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, "unescaped backtick", |lint| { let mut help_emitted = false; match element.prev_code_guess { PrevCodeGuess::None => {} PrevCodeGuess::Start { guess, .. } => { // "foo` `bar`" -> "`foo` `bar`" if let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) && can_suggest_backtick(&dox, suggest_index) { suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of a previous inline code may be missing"); help_emitted = true; } } PrevCodeGuess::End { guess, .. } => { // "`foo `bar`" -> "`foo` `bar`" // Don't `clamp_end` here, because the suggestion is guaranteed to be inside // an inline code node and we intentionally "break" the inline code here. let suggest_index = guess; if can_suggest_backtick(&dox, suggest_index) { suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "a previous inline code might be longer than expected"); help_emitted = true; } } } if !element.prev_code_guess.is_confident() { // "`foo` bar`" -> "`foo` `bar`" if let Some(guess) = guess_start_of_code(&dox, element.element_range.start..backtick_index) && let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) && can_suggest_backtick(&dox, suggest_index) { suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of an inline code may be missing"); help_emitted = true; } // "`foo` `bar" -> "`foo` `bar`" // Don't suggest closing backtick after single trailing char, // if we already suggested opening backtick. For example: // "foo`." -> "`foo`." or "foo`s" -> "`foo`s". if let Some(guess) = guess_end_of_code(&dox, backtick_index + 1..element.element_range.end) && let Some(suggest_index) = clamp_end(guess, &element.suggestible_ranges) && can_suggest_backtick(&dox, suggest_index) && (!help_emitted || suggest_index - backtick_index > 2) { suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the closing backtick of an inline code may be missing"); help_emitted = true; } } if !help_emitted { lint.help("the opening or closing backtick of an inline code may be missing"); } suggest_insertion(cx, item, &dox, lint, backtick_index, '\\', "if you meant to use a literal backtick, escape it"); lint }); } Event::Code(_) => { let element = element_stack .last_mut() .expect("expected inline code node to be inside of an element"); assert!( event_range.start >= element.element_range.start && event_range.end <= element.element_range.end ); // This inline code might be longer than it's supposed to be. // Only check single backtick inline code for now. if !element.prev_code_guess.is_confident() && dox.as_bytes().get(event_range.start) == Some(&b'`') && dox.as_bytes().get(event_range.start + 1) != Some(&b'`') { let range_inside = event_range.start + 1..event_range.end - 1; let text_inside = &dox[range_inside.clone()]; let is_confident = text_inside.starts_with(char::is_whitespace) || text_inside.ends_with(char::is_whitespace); if let Some(guess) = guess_end_of_code(&dox, range_inside) { // Find earlier end of code. element.prev_code_guess = PrevCodeGuess::End { guess, is_confident }; } else { // Find alternate start of code. let range_before = element.element_range.start..event_range.start; if let Some(guess) = guess_start_of_code(&dox, range_before) { element.prev_code_guess = PrevCodeGuess::Start { guess, is_confident }; } } } } Event::Text(text) => { let element = element_stack .last_mut() .expect("expected inline text node to be inside of an element"); assert!( event_range.start >= element.element_range.start && event_range.end <= element.element_range.end ); // The first char is escaped if the prev char is \ and not part of a text node. let is_escaped = prev_text_end < event_range.start && dox.as_bytes()[event_range.start - 1] == b'\\'; // Don't lint backslash-escaped (\`) or html-escaped (`) backticks. if *text == *"`" && !is_escaped && *text == dox[event_range.clone()] { // We found a stray backtick. assert!( element.backtick_index.is_none(), "expected at most one unescaped backtick per element", ); element.backtick_index = Some(event_range.start); } prev_text_end = event_range.end; if is_escaped { // Ensure that we suggest "`\x" and not "\`x". element.suggestible_ranges.push(event_range.start - 1..event_range.end); } else { element.suggestible_ranges.push(event_range); } } _ => {} } } } /// A previous inline code node, that looks wrong. /// /// `guess` is the position, where we want to suggest a \` and the guess `is_confident` if an /// inline code starts or ends with a whitespace. #[derive(Debug)] enum PrevCodeGuess { None, /// Missing \` at start. /// /// ```markdown /// foo` `bar` /// ``` Start { guess: usize, is_confident: bool, }, /// Missing \` at end. /// /// ```markdown /// `foo `bar` /// ``` End { guess: usize, is_confident: bool, }, } impl PrevCodeGuess { fn is_confident(&self) -> bool { match *self { PrevCodeGuess::None => false, PrevCodeGuess::Start { is_confident, .. } | PrevCodeGuess::End { is_confident, .. } => { is_confident } } } } /// A markdown [tagged element], which may or may not contain an unescaped backtick. /// /// [tagged element]: https://docs.rs/pulldown-cmark/0.9/pulldown_cmark/enum.Tag.html #[derive(Debug)] struct Element { /// The full range (span) of the element in the doc string. element_range: Range, /// The ranges where we're allowed to put backticks. /// This is used to prevent breaking markdown elements like links or lists. suggestible_ranges: Vec>, /// The unescaped backtick. backtick_index: Option, /// Suggest a different start or end of an inline code. prev_code_guess: PrevCodeGuess, } impl Element { const fn new(element_range: Range) -> Self { Self { element_range, suggestible_ranges: Vec::new(), backtick_index: None, prev_code_guess: PrevCodeGuess::None, } } } /// Given a potentially unclosed inline code, attempt to find the start. fn guess_start_of_code(dox: &str, range: Range) -> Option { assert!(dox.as_bytes()[range.end] == b'`'); let mut braces = 0; let mut guess = 0; for (idx, ch) in dox[range.clone()].char_indices().rev() { match ch { ')' | ']' | '}' => braces += 1, '(' | '[' | '{' => { if braces == 0 { guess = idx + 1; break; } braces -= 1; } ch if ch.is_whitespace() && braces == 0 => { guess = idx + 1; break; } _ => (), } } guess += range.start; // Don't suggest empty inline code or duplicate backticks. can_suggest_backtick(dox, guess).then_some(guess) } /// Given a potentially unclosed inline code, attempt to find the end. fn guess_end_of_code(dox: &str, range: Range) -> Option { // Punctuation that should be outside of the inline code. const TRAILING_PUNCTUATION: &[u8] = b".,"; assert!(dox.as_bytes()[range.start - 1] == b'`'); let text = dox[range.clone()].trim_end(); let mut braces = 0; let mut guess = text.len(); for (idx, ch) in text.char_indices() { match ch { '(' | '[' | '{' => braces += 1, ')' | ']' | '}' => { if braces == 0 { guess = idx; break; } braces -= 1; } ch if ch.is_whitespace() && braces == 0 => { guess = idx; break; } _ => (), } } // Strip a single trailing punctuation. if guess >= 1 && TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 1]) && (guess < 2 || !TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 2])) { guess -= 1; } guess += range.start; // Don't suggest empty inline code or duplicate backticks. can_suggest_backtick(dox, guess).then_some(guess) } /// Returns whether inserting a backtick at `dox[index]` will not produce double backticks. fn can_suggest_backtick(dox: &str, index: usize) -> bool { (index == 0 || dox.as_bytes()[index - 1] != b'`') && (index == dox.len() || dox.as_bytes()[index] != b'`') } /// Increase the index until it is inside or one past the end of one of the ranges. /// /// The ranges must be sorted for this to work correctly. fn clamp_start(index: usize, ranges: &[Range]) -> Option { for range in ranges { if range.start >= index { return Some(range.start); } if index <= range.end { return Some(index); } } None } /// Decrease the index until it is inside or one past the end of one of the ranges. /// /// The ranges must be sorted for this to work correctly. fn clamp_end(index: usize, ranges: &[Range]) -> Option { for range in ranges.iter().rev() { if range.end <= index { return Some(range.end); } if index >= range.start { return Some(index); } } None } /// Try to emit a span suggestion and fall back to help messages if we can't find a suitable span. /// /// This helps finding backticks in huge macro-generated docs. fn suggest_insertion( cx: &DocContext<'_>, item: &Item, dox: &str, lint: &mut DiagnosticBuilder<'_, ()>, insert_index: usize, suggestion: char, message: &'static str, ) { /// Maximum bytes of context to show around the insertion. const CONTEXT_MAX_LEN: usize = 80; if let Some(span) = source_span_for_markdown_range( cx.tcx, &dox, &(insert_index..insert_index), &item.attrs.doc_strings, ) { lint.span_suggestion(span, message, suggestion, Applicability::MaybeIncorrect); } else { let line_start = dox[..insert_index].rfind('\n').map_or(0, |idx| idx + 1); let line_end = dox[insert_index..].find('\n').map_or(dox.len(), |idx| idx + insert_index); let context_before_max_len = if insert_index - line_start < CONTEXT_MAX_LEN / 2 { insert_index - line_start } else if line_end - insert_index < CONTEXT_MAX_LEN / 2 { CONTEXT_MAX_LEN - (line_end - insert_index) } else { CONTEXT_MAX_LEN / 2 }; let context_after_max_len = CONTEXT_MAX_LEN - context_before_max_len; let (prefix, context_start) = if insert_index - line_start <= context_before_max_len { ("", line_start) } else { ("...", dox.ceil_char_boundary(insert_index - context_before_max_len)) }; let (suffix, context_end) = if line_end - insert_index <= context_after_max_len { ("", line_end) } else { ("...", dox.floor_char_boundary(insert_index + context_after_max_len)) }; let context_full = &dox[context_start..context_end].trim_end(); let context_before = &dox[context_start..insert_index]; let context_after = &dox[insert_index..context_end].trim_end(); lint.help(format!( "{message}\n change: {prefix}{context_full}{suffix}\nto this: {prefix}{context_before}{suggestion}{context_after}{suffix}" )); } }