From 837b550238aa671a591ccf282dddeab29cadb206 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 04:49:42 +0200 Subject: Merging upstream version 1.71.1+dfsg1. Signed-off-by: Daniel Baumann --- src/librustdoc/passes/lint/bare_urls.rs | 2 +- .../passes/lint/check_code_block_syntax.rs | 8 +- src/librustdoc/passes/lint/html_tags.rs | 2 +- src/librustdoc/passes/lint/unescaped_backticks.rs | 416 +++++++++++++++++++++ 4 files changed, 422 insertions(+), 6 deletions(-) create mode 100644 src/librustdoc/passes/lint/unescaped_backticks.rs (limited to 'src/librustdoc/passes/lint') diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs index 423230cfe..a10d5fdb4 100644 --- a/src/librustdoc/passes/lint/bare_urls.rs +++ b/src/librustdoc/passes/lint/bare_urls.rs @@ -18,7 +18,7 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) { // If non-local, no need to check anything. return; }; - let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + let dox = item.doc_value(); if !dox.is_empty() { let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range| { let sp = source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs) diff --git a/src/librustdoc/passes/lint/check_code_block_syntax.rs b/src/librustdoc/passes/lint/check_code_block_syntax.rs index 26fbb03a4..f489f5081 100644 --- a/src/librustdoc/passes/lint/check_code_block_syntax.rs +++ b/src/librustdoc/passes/lint/check_code_block_syntax.rs @@ -17,7 +17,7 @@ use crate::html::markdown::{self, RustCodeBlock}; use crate::passes::source_span_for_markdown_range; pub(crate) fn visit_item(cx: &DocContext<'_>, item: &clean::Item) { - if let Some(dox) = &item.attrs.collapsed_doc_value() { + if let Some(dox) = &item.opt_doc_value() { let sp = item.attr_span(cx.tcx); let extra = crate::html::markdown::ExtraInfo::new(cx.tcx, item.item_id.expect_def_id(), sp); for code_block in markdown::rust_code_blocks(dox, &extra) { @@ -108,7 +108,7 @@ fn check_rust_syntax( // just give a `help` instead. lint.span_help( sp.from_inner(InnerSpan::new(0, 3)), - &format!("{}: ```text", explanation), + format!("{}: ```text", explanation), ); } else if empty_block { lint.span_suggestion( @@ -119,12 +119,12 @@ fn check_rust_syntax( ); } } else if empty_block || is_ignore { - lint.help(&format!("{}: ```text", explanation)); + lint.help(format!("{}: ```text", explanation)); } // FIXME(#67563): Provide more context for these errors by displaying the spans inline. for message in buffer.messages.iter() { - lint.note(message); + lint.note(message.clone()); } lint diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index 4f72df5a5..f0403647a 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -15,7 +15,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) // If non-local, no need to check anything. else { return }; - let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + let dox = item.doc_value(); if !dox.is_empty() { let report_diag = |msg: &str, range: &Range, is_open_tag: bool| { let sp = match source_span_for_markdown_range(tcx, &dox, range, &item.attrs) { diff --git a/src/librustdoc/passes/lint/unescaped_backticks.rs b/src/librustdoc/passes/lint/unescaped_backticks.rs new file mode 100644 index 000000000..865212205 --- /dev/null +++ b/src/librustdoc/passes/lint/unescaped_backticks.rs @@ -0,0 +1,416 @@ +//! Detects unescaped backticks (\`) in doc comments. + +use crate::clean::Item; +use crate::core::DocContext; +use crate::html::markdown::main_body_opts; +use crate::passes::source_span_for_markdown_range; +use pulldown_cmark::{BrokenLink, Event, Parser}; +use rustc_errors::DiagnosticBuilder; +use rustc_lint_defs::Applicability; +use std::ops::Range; + +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { + let tcx = cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { + // If non-local, no need to check anything. + return; + }; + + let dox = item.doc_value(); + if dox.is_empty() { + return; + } + + let link_names = item.link_names(&cx.cache); + let mut replacer = |broken_link: BrokenLink<'_>| { + link_names + .iter() + .find(|link| *link.original_text == *broken_link.reference) + .map(|link| ((*link.href).into(), (*link.new_text).into())) + }; + let parser = Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer)) + .into_offset_iter(); + + let mut element_stack = Vec::new(); + + let mut prev_text_end = 0; + for (event, event_range) in parser { + match event { + Event::Start(_) => { + element_stack.push(Element::new(event_range)); + } + Event::End(_) => { + let element = element_stack.pop().unwrap(); + + let Some(backtick_index) = element.backtick_index else { + continue; + }; + + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, + // use the span of the entire attribute as a fallback. + let span = source_span_for_markdown_range( + tcx, + &dox, + &(backtick_index..backtick_index + 1), + &item.attrs, + ) + .unwrap_or_else(|| item.attr_span(tcx)); + + tcx.struct_span_lint_hir(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, "unescaped backtick", |lint| { + let mut help_emitted = false; + + match element.prev_code_guess { + PrevCodeGuess::None => {} + PrevCodeGuess::Start { guess, .. } => { + // "foo` `bar`" -> "`foo` `bar`" + if let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) + && can_suggest_backtick(&dox, suggest_index) + { + suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of a previous inline code may be missing"); + help_emitted = true; + } + } + PrevCodeGuess::End { guess, .. } => { + // "`foo `bar`" -> "`foo` `bar`" + // Don't `clamp_end` here, because the suggestion is guaranteed to be inside + // an inline code node and we intentionally "break" the inline code here. + let suggest_index = guess; + if can_suggest_backtick(&dox, suggest_index) { + suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "a previous inline code might be longer than expected"); + help_emitted = true; + } + } + } + + if !element.prev_code_guess.is_confident() { + // "`foo` bar`" -> "`foo` `bar`" + if let Some(guess) = guess_start_of_code(&dox, element.element_range.start..backtick_index) + && let Some(suggest_index) = clamp_start(guess, &element.suggestible_ranges) + && can_suggest_backtick(&dox, suggest_index) + { + suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the opening backtick of an inline code may be missing"); + help_emitted = true; + } + + // "`foo` `bar" -> "`foo` `bar`" + // Don't suggest closing backtick after single trailing char, + // if we already suggested opening backtick. For example: + // "foo`." -> "`foo`." or "foo`s" -> "`foo`s". + if let Some(guess) = guess_end_of_code(&dox, backtick_index + 1..element.element_range.end) + && let Some(suggest_index) = clamp_end(guess, &element.suggestible_ranges) + && can_suggest_backtick(&dox, suggest_index) + && (!help_emitted || suggest_index - backtick_index > 2) + { + suggest_insertion(cx, item, &dox, lint, suggest_index, '`', "the closing backtick of an inline code may be missing"); + help_emitted = true; + } + } + + if !help_emitted { + lint.help("the opening or closing backtick of an inline code may be missing"); + } + + suggest_insertion(cx, item, &dox, lint, backtick_index, '\\', "if you meant to use a literal backtick, escape it"); + + lint + }); + } + Event::Code(_) => { + let element = element_stack + .last_mut() + .expect("expected inline code node to be inside of an element"); + assert!( + event_range.start >= element.element_range.start + && event_range.end <= element.element_range.end + ); + + // This inline code might be longer than it's supposed to be. + // Only check single backtick inline code for now. + if !element.prev_code_guess.is_confident() + && dox.as_bytes().get(event_range.start) == Some(&b'`') + && dox.as_bytes().get(event_range.start + 1) != Some(&b'`') + { + let range_inside = event_range.start + 1..event_range.end - 1; + let text_inside = &dox[range_inside.clone()]; + + let is_confident = text_inside.starts_with(char::is_whitespace) + || text_inside.ends_with(char::is_whitespace); + + if let Some(guess) = guess_end_of_code(&dox, range_inside) { + // Find earlier end of code. + element.prev_code_guess = PrevCodeGuess::End { guess, is_confident }; + } else { + // Find alternate start of code. + let range_before = element.element_range.start..event_range.start; + if let Some(guess) = guess_start_of_code(&dox, range_before) { + element.prev_code_guess = PrevCodeGuess::Start { guess, is_confident }; + } + } + } + } + Event::Text(text) => { + let element = element_stack + .last_mut() + .expect("expected inline text node to be inside of an element"); + assert!( + event_range.start >= element.element_range.start + && event_range.end <= element.element_range.end + ); + + // The first char is escaped if the prev char is \ and not part of a text node. + let is_escaped = prev_text_end < event_range.start + && dox.as_bytes()[event_range.start - 1] == b'\\'; + + // Don't lint backslash-escaped (\`) or html-escaped (`) backticks. + if *text == *"`" && !is_escaped && *text == dox[event_range.clone()] { + // We found a stray backtick. + assert!( + element.backtick_index.is_none(), + "expected at most one unescaped backtick per element", + ); + element.backtick_index = Some(event_range.start); + } + + prev_text_end = event_range.end; + + if is_escaped { + // Ensure that we suggest "`\x" and not "\`x". + element.suggestible_ranges.push(event_range.start - 1..event_range.end); + } else { + element.suggestible_ranges.push(event_range); + } + } + _ => {} + } + } +} + +/// A previous inline code node, that looks wrong. +/// +/// `guess` is the position, where we want to suggest a \` and the guess `is_confident` if an +/// inline code starts or ends with a whitespace. +#[derive(Debug)] +enum PrevCodeGuess { + None, + + /// Missing \` at start. + /// + /// ```markdown + /// foo` `bar` + /// ``` + Start { + guess: usize, + is_confident: bool, + }, + + /// Missing \` at end. + /// + /// ```markdown + /// `foo `bar` + /// ``` + End { + guess: usize, + is_confident: bool, + }, +} + +impl PrevCodeGuess { + fn is_confident(&self) -> bool { + match *self { + PrevCodeGuess::None => false, + PrevCodeGuess::Start { is_confident, .. } | PrevCodeGuess::End { is_confident, .. } => { + is_confident + } + } + } +} + +/// A markdown [tagged element], which may or may not contain an unescaped backtick. +/// +/// [tagged element]: https://docs.rs/pulldown-cmark/0.9/pulldown_cmark/enum.Tag.html +#[derive(Debug)] +struct Element { + /// The full range (span) of the element in the doc string. + element_range: Range, + + /// The ranges where we're allowed to put backticks. + /// This is used to prevent breaking markdown elements like links or lists. + suggestible_ranges: Vec>, + + /// The unescaped backtick. + backtick_index: Option, + + /// Suggest a different start or end of an inline code. + prev_code_guess: PrevCodeGuess, +} + +impl Element { + const fn new(element_range: Range) -> Self { + Self { + element_range, + suggestible_ranges: Vec::new(), + backtick_index: None, + prev_code_guess: PrevCodeGuess::None, + } + } +} + +/// Given a potentially unclosed inline code, attempt to find the start. +fn guess_start_of_code(dox: &str, range: Range) -> Option { + assert!(dox.as_bytes()[range.end] == b'`'); + + let mut braces = 0; + let mut guess = 0; + for (idx, ch) in dox[range.clone()].char_indices().rev() { + match ch { + ')' | ']' | '}' => braces += 1, + '(' | '[' | '{' => { + if braces == 0 { + guess = idx + 1; + break; + } + braces -= 1; + } + ch if ch.is_whitespace() && braces == 0 => { + guess = idx + 1; + break; + } + _ => (), + } + } + + guess += range.start; + + // Don't suggest empty inline code or duplicate backticks. + can_suggest_backtick(dox, guess).then_some(guess) +} + +/// Given a potentially unclosed inline code, attempt to find the end. +fn guess_end_of_code(dox: &str, range: Range) -> Option { + // Punctuation that should be outside of the inline code. + const TRAILING_PUNCTUATION: &[u8] = b".,"; + + assert!(dox.as_bytes()[range.start - 1] == b'`'); + + let text = dox[range.clone()].trim_end(); + let mut braces = 0; + let mut guess = text.len(); + for (idx, ch) in text.char_indices() { + match ch { + '(' | '[' | '{' => braces += 1, + ')' | ']' | '}' => { + if braces == 0 { + guess = idx; + break; + } + braces -= 1; + } + ch if ch.is_whitespace() && braces == 0 => { + guess = idx; + break; + } + _ => (), + } + } + + // Strip a single trailing punctuation. + if guess >= 1 + && TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 1]) + && (guess < 2 || !TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 2])) + { + guess -= 1; + } + + guess += range.start; + + // Don't suggest empty inline code or duplicate backticks. + can_suggest_backtick(dox, guess).then_some(guess) +} + +/// Returns whether inserting a backtick at `dox[index]` will not produce double backticks. +fn can_suggest_backtick(dox: &str, index: usize) -> bool { + (index == 0 || dox.as_bytes()[index - 1] != b'`') + && (index == dox.len() || dox.as_bytes()[index] != b'`') +} + +/// Increase the index until it is inside or one past the end of one of the ranges. +/// +/// The ranges must be sorted for this to work correctly. +fn clamp_start(index: usize, ranges: &[Range]) -> Option { + for range in ranges { + if range.start >= index { + return Some(range.start); + } + if index <= range.end { + return Some(index); + } + } + None +} + +/// Decrease the index until it is inside or one past the end of one of the ranges. +/// +/// The ranges must be sorted for this to work correctly. +fn clamp_end(index: usize, ranges: &[Range]) -> Option { + for range in ranges.iter().rev() { + if range.end <= index { + return Some(range.end); + } + if index >= range.start { + return Some(index); + } + } + None +} + +/// Try to emit a span suggestion and fall back to help messages if we can't find a suitable span. +/// +/// This helps finding backticks in huge macro-generated docs. +fn suggest_insertion( + cx: &DocContext<'_>, + item: &Item, + dox: &str, + lint: &mut DiagnosticBuilder<'_, ()>, + insert_index: usize, + suggestion: char, + message: &str, +) { + /// Maximum bytes of context to show around the insertion. + const CONTEXT_MAX_LEN: usize = 80; + + if let Some(span) = + source_span_for_markdown_range(cx.tcx, &dox, &(insert_index..insert_index), &item.attrs) + { + lint.span_suggestion(span, message, suggestion, Applicability::MaybeIncorrect); + } else { + let line_start = dox[..insert_index].rfind('\n').map_or(0, |idx| idx + 1); + let line_end = dox[insert_index..].find('\n').map_or(dox.len(), |idx| idx + insert_index); + + let context_before_max_len = if insert_index - line_start < CONTEXT_MAX_LEN / 2 { + insert_index - line_start + } else if line_end - insert_index < CONTEXT_MAX_LEN / 2 { + CONTEXT_MAX_LEN - (line_end - insert_index) + } else { + CONTEXT_MAX_LEN / 2 + }; + let context_after_max_len = CONTEXT_MAX_LEN - context_before_max_len; + + let (prefix, context_start) = if insert_index - line_start <= context_before_max_len { + ("", line_start) + } else { + ("...", dox.ceil_char_boundary(insert_index - context_before_max_len)) + }; + let (suffix, context_end) = if line_end - insert_index <= context_after_max_len { + ("", line_end) + } else { + ("...", dox.floor_char_boundary(insert_index + context_after_max_len)) + }; + + let context_full = &dox[context_start..context_end].trim_end(); + let context_before = &dox[context_start..insert_index]; + let context_after = &dox[insert_index..context_end].trim_end(); + lint.help(format!( + "{message}\n change: {prefix}{context_full}{suffix}\nto this: {prefix}{context_before}{suggestion}{context_after}{suffix}" + )); + } +} -- cgit v1.2.3