summaryrefslogtreecommitdiffstats
path: root/src/librustdoc/passes/html_tags.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:11:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:11:28 +0000
commit94a0819fe3a0d679c3042a77bfe6a2afc505daea (patch)
tree2b827afe6a05f3538db3f7803a88c4587fe85648 /src/librustdoc/passes/html_tags.rs
parentAdding upstream version 1.64.0+dfsg1. (diff)
downloadrustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.tar.xz
rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.zip
Adding upstream version 1.66.0+dfsg1.upstream/1.66.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librustdoc/passes/html_tags.rs')
-rw-r--r--src/librustdoc/passes/html_tags.rs153
1 files changed, 140 insertions, 13 deletions
diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs
index f3a3c853c..a89ed7c7e 100644
--- a/src/librustdoc/passes/html_tags.rs
+++ b/src/librustdoc/passes/html_tags.rs
@@ -22,10 +22,8 @@ struct InvalidHtmlTagsLinter<'a, 'tcx> {
}
pub(crate) fn check_invalid_html_tags(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
- if cx.tcx.sess.is_nightly_build() {
- let mut coll = InvalidHtmlTagsLinter { cx };
- coll.visit_crate(&krate);
- }
+ let mut coll = InvalidHtmlTagsLinter { cx };
+ coll.visit_crate(&krate);
krate
}
@@ -94,6 +92,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
if current_pos == end_pos { None } else { Some(current_pos) }
}
+fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
+ use rustc_lexer::{is_id_continue, is_id_start};
+ let mut current_pos = start_pos;
+ loop {
+ if current_pos < text.len() && text[current_pos..].starts_with("::") {
+ current_pos += 2;
+ } else {
+ break;
+ }
+ let mut chars = text[current_pos..].chars();
+ if let Some(c) = chars.next() {
+ if is_id_start(c) {
+ current_pos += c.len_utf8();
+ } else {
+ break;
+ }
+ }
+ while let Some(c) = chars.next() {
+ if is_id_continue(c) {
+ current_pos += c.len_utf8();
+ } else {
+ break;
+ }
+ }
+ }
+ if current_pos == start_pos { None } else { Some(current_pos) }
+}
+
fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
// https://spec.commonmark.org/0.30/#raw-html
//
@@ -158,7 +184,60 @@ fn extract_html_tag(
}
drop_tag(tags, tag_name, r, f);
} else {
- tags.push((tag_name, r));
+ let mut is_self_closing = false;
+ let mut quote_pos = None;
+ if c != '>' {
+ let mut quote = None;
+ let mut after_eq = false;
+ for (i, c) in text[pos..].char_indices() {
+ if !c.is_whitespace() {
+ if let Some(q) = quote {
+ if c == q {
+ quote = None;
+ quote_pos = None;
+ after_eq = false;
+ }
+ } else if c == '>' {
+ break;
+ } else if c == '/' && !after_eq {
+ is_self_closing = true;
+ } else {
+ if is_self_closing {
+ is_self_closing = false;
+ }
+ if (c == '"' || c == '\'') && after_eq {
+ quote = Some(c);
+ quote_pos = Some(pos + i);
+ } else if c == '=' {
+ after_eq = true;
+ }
+ }
+ } else if quote.is_none() {
+ after_eq = false;
+ }
+ }
+ }
+ if let Some(quote_pos) = quote_pos {
+ let qr = Range { start: quote_pos, end: quote_pos };
+ f(
+ &format!("unclosed quoted HTML attribute on tag `{}`", tag_name),
+ &qr,
+ false,
+ );
+ }
+ if is_self_closing {
+ // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
+ let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..])
+ || tags.iter().take(pos + 1).any(|(at, _)| {
+ let at = at.to_lowercase();
+ at == "svg" || at == "math"
+ });
+ if !valid {
+ f(&format!("invalid self-closing HTML tag `{}`", tag_name), &r, false);
+ }
+ } else {
+ tags.push((tag_name, r));
+ }
}
}
break;
@@ -212,27 +291,74 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
Some(sp) => sp,
None => item.attr_span(tcx),
};
- tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| {
+ tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, msg, |lint| {
use rustc_lint_defs::Applicability;
- let mut diag = lint.build(msg);
// If a tag looks like `<this>`, it might actually be a generic.
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
- if let Some(Some(generics_start)) = (is_open_tag
- && dox[..range.end].ends_with('>'))
+ let mut generics_end = range.end;
+ if let Some(Some(mut generics_start)) = (is_open_tag
+ && dox[..generics_end].ends_with('>'))
.then(|| extract_path_backwards(&dox, range.start))
{
+ while generics_start != 0
+ && generics_end < dox.len()
+ && dox.as_bytes()[generics_start - 1] == b'<'
+ && dox.as_bytes()[generics_end] == b'>'
+ {
+ generics_end += 1;
+ generics_start -= 1;
+ if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
+ generics_start = new_start;
+ }
+ if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+ generics_end = new_end;
+ }
+ }
+ if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+ generics_end = new_end;
+ }
let generics_sp = match super::source_span_for_markdown_range(
tcx,
&dox,
- &(generics_start..range.end),
+ &(generics_start..generics_end),
&item.attrs,
) {
Some(sp) => sp,
None => item.attr_span(tcx),
};
+ // Sometimes, we only extract part of a path. For example, consider this:
+ //
+ // <[u32] as IntoIter<u32>>::Item
+ // ^^^^^ unclosed HTML tag `u32`
+ //
+ // We don't have any code for parsing fully-qualified trait paths.
+ // In theory, we could add it, but doing it correctly would require
+ // parsing the entire path grammar, which is problematic because of
+ // overlap between the path grammar and Markdown.
+ //
+ // The example above shows that ambiguity. Is `[u32]` intended to be an
+ // intra-doc link to the u32 primitive, or is it intended to be a slice?
+ //
+ // If the below conditional were removed, we would suggest this, which is
+ // not what the user probably wants.
+ //
+ // <[u32] as `IntoIter<u32>`>::Item
+ //
+ // We know that the user actually wants to wrap the whole thing in a code
+ // block, but the only reason we know that is because `u32` does not, in
+ // fact, implement IntoIter. If the example looks like this:
+ //
+ // <[Vec<i32>] as IntoIter<i32>::Item
+ //
+ // The ideal fix would be significantly different.
+ if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
+ || (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
+ {
+ return lint;
+ }
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
- diag.multipart_suggestion(
+ lint.multipart_suggestion(
"try marking as source code",
vec![
(generics_sp.shrink_to_lo(), String::from("`")),
@@ -241,7 +367,8 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
Applicability::MaybeIncorrect,
);
}
- diag.emit()
+
+ lint
});
};
@@ -278,7 +405,7 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
for (event, range) in p {
match event {
Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
- Event::Html(text) | Event::Text(text) if !in_code_block => {
+ Event::Html(text) if !in_code_block => {
extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
}
Event::End(Tag::CodeBlock(_)) => in_code_block = false,