//! Detects invalid HTML (like an unclosed ``) in doc comments. use super::Pass; use crate::clean::*; use crate::core::DocContext; use crate::html::markdown::main_body_opts; use crate::visit::DocVisitor; use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; use std::iter::Peekable; use std::ops::Range; use std::str::CharIndices; pub(crate) const CHECK_INVALID_HTML_TAGS: Pass = Pass { name: "check-invalid-html-tags", run: check_invalid_html_tags, description: "detects invalid HTML tags in doc comments", }; struct InvalidHtmlTagsLinter<'a, 'tcx> { cx: &'a mut DocContext<'tcx>, } pub(crate) fn check_invalid_html_tags(krate: Crate, cx: &mut DocContext<'_>) -> Crate { if cx.tcx.sess.is_nightly_build() { let mut coll = InvalidHtmlTagsLinter { cx }; coll.visit_crate(&krate); } krate } const ALLOWED_UNCLOSED: &[&str] = &[ "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr", ]; fn drop_tag( tags: &mut Vec<(String, Range)>, tag_name: String, range: Range, f: &impl Fn(&str, &Range, bool), ) { let tag_name_low = tag_name.to_lowercase(); if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { // If the tag is nested inside a "` (the `h2` tag isn't required // but it helps for the visualization). f(&format!("unopened HTML tag `{}`", tag_name), &range, false); } } fn extract_path_backwards(text: &str, end_pos: usize) -> Option { use rustc_lexer::{is_id_continue, is_id_start}; let mut current_pos = end_pos; loop { if current_pos >= 2 && text[..current_pos].ends_with("::") { current_pos -= 2; } let new_pos = text[..current_pos] .char_indices() .rev() .take_while(|(_, c)| is_id_start(*c) || is_id_continue(*c)) .reduce(|_accum, item| item) .and_then(|(new_pos, c)| is_id_start(c).then_some(new_pos)); if let Some(new_pos) = new_pos { if current_pos != new_pos { current_pos = new_pos; continue; } } break; } if current_pos == end_pos { None } else { Some(current_pos) } } fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool { // https://spec.commonmark.org/0.30/#raw-html // // > A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or // > hyphens (-). c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit()) } fn extract_html_tag( tags: &mut Vec<(String, Range)>, text: &str, range: &Range, start_pos: usize, iter: &mut Peekable>, f: &impl Fn(&str, &Range, bool), ) { let mut tag_name = String::new(); let mut is_closing = false; let mut prev_pos = start_pos; loop { let (pos, c) = match iter.peek() { Some((pos, c)) => (*pos, *c), // In case we reached the of the doc comment, we want to check that it's an // unclosed HTML tag. For example "/// (prev_pos, '\0'), }; prev_pos = pos; // Checking if this is a closing tag (like `` for ``). if c == '/' && tag_name.is_empty() { is_closing = true; } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) { tag_name.push(c); } else { if !tag_name.is_empty() { let mut r = Range { start: range.start + start_pos, end: range.start + pos }; if c == '>' { // In case we have a tag without attribute, we can consider the span to // refer to it fully. r.end += 1; } if is_closing { // In case we have "" or even "". if c != '>' { if !c.is_whitespace() { // It seems like it's not a valid HTML tag. break; } let mut found = false; for (new_pos, c) in text[pos..].char_indices() { if !c.is_whitespace() { if c == '>' { r.end = range.start + new_pos + 1; found = true; } break; } } if !found { break; } } drop_tag(tags, tag_name, r, f); } else { tags.push((tag_name, r)); } } break; } iter.next(); } } fn extract_tags( tags: &mut Vec<(String, Range)>, text: &str, range: Range, is_in_comment: &mut Option>, f: &impl Fn(&str, &Range, bool), ) { let mut iter = text.char_indices().peekable(); while let Some((start_pos, c)) = iter.next() { if is_in_comment.is_some() { if text[start_pos..].starts_with("-->") { *is_in_comment = None; } } else if c == '<' { if text[start_pos..].starts_with("