//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.` //! Suggests wrapping the link with angle brackets: `Go to .` to linkify it. use super::Pass; use crate::clean::*; use crate::core::DocContext; use crate::html::markdown::main_body_opts; use crate::visit::DocVisitor; use core::ops::Range; use pulldown_cmark::{Event, Parser, Tag}; use regex::Regex; use rustc_errors::Applicability; use std::mem; use std::sync::LazyLock; pub(crate) const CHECK_BARE_URLS: Pass = Pass { name: "check-bare-urls", run: check_bare_urls, description: "detects URLs that are not hyperlinks", }; static URL_REGEX: LazyLock = LazyLock::new(|| { Regex::new(concat!( r"https?://", // url scheme r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains r"[a-zA-Z]{2,63}", // root domain r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)" // optional query or url fragments )) .expect("failed to build regex") }); struct BareUrlsLinter<'a, 'tcx> { cx: &'a mut DocContext<'tcx>, } impl<'a, 'tcx> BareUrlsLinter<'a, 'tcx> { fn find_raw_urls( &self, text: &str, range: Range, f: &impl Fn(&DocContext<'_>, &str, &str, Range), ) { trace!("looking for raw urls in {}", text); // For now, we only check "full" URLs (meaning, starting with "http://" or "https://"). for match_ in URL_REGEX.find_iter(text) { let url = match_.as_str(); let url_range = match_.range(); f( self.cx, "this URL is not a hyperlink", url, Range { start: range.start + url_range.start, end: range.start + url_range.end }, ); } } } pub(crate) fn check_bare_urls(krate: Crate, cx: &mut DocContext<'_>) -> Crate { BareUrlsLinter { cx }.visit_crate(&krate); krate } impl<'a, 'tcx> DocVisitor for BareUrlsLinter<'a, 'tcx> { fn visit_item(&mut self, item: &Item) { let Some(hir_id) = DocContext::as_local_hir_id(self.cx.tcx, item.item_id) else { // If non-local, no need to check anything. return; }; let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); if !dox.is_empty() { let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range| { let sp = super::source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs) .unwrap_or_else(|| item.attr_span(cx.tcx)); cx.tcx.struct_span_lint_hir(crate::lint::BARE_URLS, hir_id, sp, |lint| { lint.build(msg) .note("bare URLs are not automatically turned into clickable links") .span_suggestion( sp, "use an automatic link instead", format!("<{}>", url), Applicability::MachineApplicable, ) .emit(); }); }; let mut p = Parser::new_ext(&dox, main_body_opts()).into_offset_iter(); while let Some((event, range)) = p.next() { match event { Event::Text(s) => self.find_raw_urls(&s, range, &report_diag), // We don't want to check the text inside code blocks or links. Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { while let Some((event, _)) = p.next() { match event { Event::End(end) if mem::discriminant(&end) == mem::discriminant(&tag) => { break; } _ => {} } } } _ => {} } } } self.visit_item_recur(item) } }