From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- src/librustdoc/passes/bare_urls.rs | 112 +++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 src/librustdoc/passes/bare_urls.rs (limited to 'src/librustdoc/passes/bare_urls.rs') diff --git a/src/librustdoc/passes/bare_urls.rs b/src/librustdoc/passes/bare_urls.rs new file mode 100644 index 000000000..392e26ea6 --- /dev/null +++ b/src/librustdoc/passes/bare_urls.rs @@ -0,0 +1,112 @@ +//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.` +//! Suggests wrapping the link with angle brackets: `Go to .` to linkify it. +use super::Pass; +use crate::clean::*; +use crate::core::DocContext; +use crate::html::markdown::main_body_opts; +use crate::visit::DocVisitor; +use core::ops::Range; +use pulldown_cmark::{Event, Parser, Tag}; +use regex::Regex; +use rustc_errors::Applicability; +use std::mem; +use std::sync::LazyLock; + +pub(crate) const CHECK_BARE_URLS: Pass = Pass { + name: "check-bare-urls", + run: check_bare_urls, + description: "detects URLs that are not hyperlinks", +}; + +static URL_REGEX: LazyLock = LazyLock::new(|| { + Regex::new(concat!( + r"https?://", // url scheme + r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains + r"[a-zA-Z]{2,63}", // root domain + r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)" // optional query or url fragments + )) + .expect("failed to build regex") +}); + +struct BareUrlsLinter<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, +} + +impl<'a, 'tcx> BareUrlsLinter<'a, 'tcx> { + fn find_raw_urls( + &self, + text: &str, + range: Range, + f: &impl Fn(&DocContext<'_>, &str, &str, Range), + ) { + trace!("looking for raw urls in {}", text); + // For now, we only check "full" URLs (meaning, starting with "http://" or "https://"). + for match_ in URL_REGEX.find_iter(text) { + let url = match_.as_str(); + let url_range = match_.range(); + f( + self.cx, + "this URL is not a hyperlink", + url, + Range { start: range.start + url_range.start, end: range.start + url_range.end }, + ); + } + } +} + +pub(crate) fn check_bare_urls(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + BareUrlsLinter { cx }.visit_crate(&krate); + krate +} + +impl<'a, 'tcx> DocVisitor for BareUrlsLinter<'a, 'tcx> { + fn visit_item(&mut self, item: &Item) { + let Some(hir_id) = DocContext::as_local_hir_id(self.cx.tcx, item.item_id) + else { + // If non-local, no need to check anything. + return; + }; + let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + if !dox.is_empty() { + let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range| { + let sp = super::source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs) + .unwrap_or_else(|| item.attr_span(cx.tcx)); + cx.tcx.struct_span_lint_hir(crate::lint::BARE_URLS, hir_id, sp, |lint| { + lint.build(msg) + .note("bare URLs are not automatically turned into clickable links") + .span_suggestion( + sp, + "use an automatic link instead", + format!("<{}>", url), + Applicability::MachineApplicable, + ) + .emit(); + }); + }; + + let mut p = Parser::new_ext(&dox, main_body_opts()).into_offset_iter(); + + while let Some((event, range)) = p.next() { + match event { + Event::Text(s) => self.find_raw_urls(&s, range, &report_diag), + // We don't want to check the text inside code blocks or links. + Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { + while let Some((event, _)) = p.next() { + match event { + Event::End(end) + if mem::discriminant(&end) == mem::discriminant(&tag) => + { + break; + } + _ => {} + } + } + } + _ => {} + } + } + } + + self.visit_item_recur(item) + } +} -- cgit v1.2.3