Adding upstream version 1.66.0+dfsg1.upstream/1.66.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:11:28 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:11:28 +0000
commit: 94a0819fe3a0d679c3042a77bfe6a2afc505daea (patch)
tree: 2b827afe6a05f3538db3f7803a88c4587fe85648 /src/librustdoc/passes/html_tags.rs
parent: Adding upstream version 1.64.0+dfsg1. (diff)
download: rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.tar.xz
rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.zip
1 files changed, 140 insertions, 13 deletions
diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs
index f3a3c853c..a89ed7c7e 100644
--- a/src/librustdoc/passes/html_tags.rs
+++ b/src/librustdoc/passes/html_tags.rs
@@ -22,10 +22,8 @@ struct InvalidHtmlTagsLinter<'a, 'tcx> {
 }
 
 pub(crate) fn check_invalid_html_tags(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
-    if cx.tcx.sess.is_nightly_build() {
-        let mut coll = InvalidHtmlTagsLinter { cx };
-        coll.visit_crate(&krate);
-    }
+    let mut coll = InvalidHtmlTagsLinter { cx };
+    coll.visit_crate(&krate);
     krate
 }
 
@@ -94,6 +92,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
     if current_pos == end_pos { None } else { Some(current_pos) }
 }
 
+fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
+    use rustc_lexer::{is_id_continue, is_id_start};
+    let mut current_pos = start_pos;
+    loop {
+        if current_pos < text.len() && text[current_pos..].starts_with("::") {
+            current_pos += 2;
+        } else {
+            break;
+        }
+        let mut chars = text[current_pos..].chars();
+        if let Some(c) = chars.next() {
+            if is_id_start(c) {
+                current_pos += c.len_utf8();
+            } else {
+                break;
+            }
+        }
+        while let Some(c) = chars.next() {
+            if is_id_continue(c) {
+                current_pos += c.len_utf8();
+            } else {
+                break;
+            }
+        }
+    }
+    if current_pos == start_pos { None } else { Some(current_pos) }
+}
+
 fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
     // https://spec.commonmark.org/0.30/#raw-html
     //
@@ -158,7 +184,60 @@ fn extract_html_tag(
                     }
                     drop_tag(tags, tag_name, r, f);
                 } else {
-                    tags.push((tag_name, r));
+                    let mut is_self_closing = false;
+                    let mut quote_pos = None;
+                    if c != '>' {
+                        let mut quote = None;
+                        let mut after_eq = false;
+                        for (i, c) in text[pos..].char_indices() {
+                            if !c.is_whitespace() {
+                                if let Some(q) = quote {
+                                    if c == q {
+                                        quote = None;
+                                        quote_pos = None;
+                                        after_eq = false;
+                                    }
+                                } else if c == '>' {
+                                    break;
+                                } else if c == '/' && !after_eq {
+                                    is_self_closing = true;
+                                } else {
+                                    if is_self_closing {
+                                        is_self_closing = false;
+                                    }
+                                    if (c == '"' || c == '\'') && after_eq {
+                                        quote = Some(c);
+                                        quote_pos = Some(pos + i);
+                                    } else if c == '=' {
+                                        after_eq = true;
+                                    }
+                                }
+                            } else if quote.is_none() {
+                                after_eq = false;
+                            }
+                        }
+                    }
+                    if let Some(quote_pos) = quote_pos {
+                        let qr = Range { start: quote_pos, end: quote_pos };
+                        f(
+                            &format!("unclosed quoted HTML attribute on tag `{}`", tag_name),
+                            &qr,
+                            false,
+                        );
+                    }
+                    if is_self_closing {
+                        // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
+                        let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..])
+                            || tags.iter().take(pos + 1).any(|(at, _)| {
+                                let at = at.to_lowercase();
+                                at == "svg" || at == "math"
+                            });
+                        if !valid {
+                            f(&format!("invalid self-closing HTML tag `{}`", tag_name), &r, false);
+                        }
+                    } else {
+                        tags.push((tag_name, r));
+                    }
                 }
             }
             break;
@@ -212,27 +291,74 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
                     Some(sp) => sp,
                     None => item.attr_span(tcx),
                 };
-                tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| {
+                tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, msg, |lint| {
                     use rustc_lint_defs::Applicability;
-                    let mut diag = lint.build(msg);
                     // If a tag looks like `<this>`, it might actually be a generic.
                     // We don't try to detect stuff `<like, this>` because that's not valid HTML,
                     // and we don't try to detect stuff `<like this>` because that's not valid Rust.
-                    if let Some(Some(generics_start)) = (is_open_tag
-                        && dox[..range.end].ends_with('>'))
+                    let mut generics_end = range.end;
+                    if let Some(Some(mut generics_start)) = (is_open_tag
+                        && dox[..generics_end].ends_with('>'))
                     .then(|| extract_path_backwards(&dox, range.start))
                     {
+                        while generics_start != 0
+                            && generics_end < dox.len()
+                            && dox.as_bytes()[generics_start - 1] == b'<'
+                            && dox.as_bytes()[generics_end] == b'>'
+                        {
+                            generics_end += 1;
+                            generics_start -= 1;
+                            if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
+                                generics_start = new_start;
+                            }
+                            if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+                                generics_end = new_end;
+                            }
+                        }
+                        if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+                            generics_end = new_end;
+                        }
                         let generics_sp = match super::source_span_for_markdown_range(
                             tcx,
                             &dox,
-                            &(generics_start..range.end),
+                            &(generics_start..generics_end),
                             &item.attrs,
                         ) {
                             Some(sp) => sp,
                             None => item.attr_span(tcx),
                         };
+                        // Sometimes, we only extract part of a path. For example, consider this:
+                        //
+                        //     <[u32] as IntoIter<u32>>::Item
+                        //                       ^^^^^ unclosed HTML tag `u32`
+                        //
+                        // We don't have any code for parsing fully-qualified trait paths.
+                        // In theory, we could add it, but doing it correctly would require
+                        // parsing the entire path grammar, which is problematic because of
+                        // overlap between the path grammar and Markdown.
+                        //
+                        // The example above shows that ambiguity. Is `[u32]` intended to be an
+                        // intra-doc link to the u32 primitive, or is it intended to be a slice?
+                        //
+                        // If the below conditional were removed, we would suggest this, which is
+                        // not what the user probably wants.
+                        //
+                        //     <[u32] as `IntoIter<u32>`>::Item
+                        //
+                        // We know that the user actually wants to wrap the whole thing in a code
+                        // block, but the only reason we know that is because `u32` does not, in
+                        // fact, implement IntoIter. If the example looks like this:
+                        //
+                        //     <[Vec<i32>] as IntoIter<i32>::Item
+                        //
+                        // The ideal fix would be significantly different.
+                        if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
+                            || (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
+                        {
+                            return lint;
+                        }
                         // multipart form is chosen here because ``Vec<i32>`` would be confusing.
-                        diag.multipart_suggestion(
+                        lint.multipart_suggestion(
                             "try marking as source code",
                             vec![
                                 (generics_sp.shrink_to_lo(), String::from("`")),
@@ -241,7 +367,8 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
                             Applicability::MaybeIncorrect,
                         );
                     }
-                    diag.emit()
+
+                    lint
                 });
             };
 
@@ -278,7 +405,7 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
             for (event, range) in p {
                 match event {
                     Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
-                    Event::Html(text) | Event::Text(text) if !in_code_block => {
+                    Event::Html(text) if !in_code_block => {
                         extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
                     }
                     Event::End(Tag::CodeBlock(_)) => in_code_block = false,
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:11:28 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:11:28 +0000
commit	94a0819fe3a0d679c3042a77bfe6a2afc505daea (patch)
tree	2b827afe6a05f3538db3f7803a88c4587fe85648 /src/librustdoc/passes/html_tags.rs
parent	Adding upstream version 1.64.0+dfsg1. (diff)
download	rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.tar.xz rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.zip