summaryrefslogtreecommitdiffstats
path: root/src/librustdoc/passes/lint
diff options
context:
space:
mode:
Diffstat (limited to 'src/librustdoc/passes/lint')
-rw-r--r--src/librustdoc/passes/lint/bare_urls.rs89
-rw-r--r--src/librustdoc/passes/lint/check_code_block_syntax.rs170
-rw-r--r--src/librustdoc/passes/lint/html_tags.rs407
3 files changed, 666 insertions, 0 deletions
diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs
new file mode 100644
index 000000000..423230cfe
--- /dev/null
+++ b/src/librustdoc/passes/lint/bare_urls.rs
@@ -0,0 +1,89 @@
+//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.`
+//! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it.
+
+use crate::clean::*;
+use crate::core::DocContext;
+use crate::html::markdown::main_body_opts;
+use crate::passes::source_span_for_markdown_range;
+use core::ops::Range;
+use pulldown_cmark::{Event, Parser, Tag};
+use regex::Regex;
+use rustc_errors::Applicability;
+use std::mem;
+use std::sync::LazyLock;
+
+pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) {
+ let Some(hir_id) = DocContext::as_local_hir_id(cx.tcx, item.item_id)
+ else {
+ // If non-local, no need to check anything.
+ return;
+ };
+ let dox = item.attrs.collapsed_doc_value().unwrap_or_default();
+ if !dox.is_empty() {
+ let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range<usize>| {
+ let sp = source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs)
+ .unwrap_or_else(|| item.attr_span(cx.tcx));
+ cx.tcx.struct_span_lint_hir(crate::lint::BARE_URLS, hir_id, sp, msg, |lint| {
+ lint.note("bare URLs are not automatically turned into clickable links")
+ .span_suggestion(
+ sp,
+ "use an automatic link instead",
+ format!("<{}>", url),
+ Applicability::MachineApplicable,
+ )
+ });
+ };
+
+ let mut p = Parser::new_ext(&dox, main_body_opts()).into_offset_iter();
+
+ while let Some((event, range)) = p.next() {
+ match event {
+ Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag),
+ // We don't want to check the text inside code blocks or links.
+ Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => {
+ while let Some((event, _)) = p.next() {
+ match event {
+ Event::End(end)
+ if mem::discriminant(&end) == mem::discriminant(&tag) =>
+ {
+ break;
+ }
+ _ => {}
+ }
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+}
+
+static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(concat!(
+ r"https?://", // url scheme
+ r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains
+ r"[a-zA-Z]{2,63}", // root domain
+ r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)" // optional query or url fragments
+ ))
+ .expect("failed to build regex")
+});
+
+fn find_raw_urls(
+ cx: &DocContext<'_>,
+ text: &str,
+ range: Range<usize>,
+ f: &impl Fn(&DocContext<'_>, &str, &str, Range<usize>),
+) {
+ trace!("looking for raw urls in {}", text);
+ // For now, we only check "full" URLs (meaning, starting with "http://" or "https://").
+ for match_ in URL_REGEX.find_iter(text) {
+ let url = match_.as_str();
+ let url_range = match_.range();
+ f(
+ cx,
+ "this URL is not a hyperlink",
+ url,
+ Range { start: range.start + url_range.start, end: range.start + url_range.end },
+ );
+ }
+}
diff --git a/src/librustdoc/passes/lint/check_code_block_syntax.rs b/src/librustdoc/passes/lint/check_code_block_syntax.rs
new file mode 100644
index 000000000..5aa4f238b
--- /dev/null
+++ b/src/librustdoc/passes/lint/check_code_block_syntax.rs
@@ -0,0 +1,170 @@
+//! Validates syntax inside Rust code blocks (\`\`\`rust).
+use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_errors::{
+ emitter::Emitter,
+ translation::{to_fluent_args, Translate},
+ Applicability, Diagnostic, Handler, LazyFallbackBundle,
+};
+use rustc_parse::parse_stream_from_source_str;
+use rustc_session::parse::ParseSess;
+use rustc_span::hygiene::{AstPass, ExpnData, ExpnKind, LocalExpnId};
+use rustc_span::source_map::{FilePathMapping, SourceMap};
+use rustc_span::{FileName, InnerSpan, DUMMY_SP};
+
+use crate::clean;
+use crate::core::DocContext;
+use crate::html::markdown::{self, RustCodeBlock};
+use crate::passes::source_span_for_markdown_range;
+
+pub(crate) fn visit_item(cx: &DocContext<'_>, item: &clean::Item) {
+ if let Some(dox) = &item.attrs.collapsed_doc_value() {
+ let sp = item.attr_span(cx.tcx);
+ let extra =
+ crate::html::markdown::ExtraInfo::new_did(cx.tcx, item.item_id.expect_def_id(), sp);
+ for code_block in markdown::rust_code_blocks(dox, &extra) {
+ check_rust_syntax(cx, item, dox, code_block);
+ }
+ }
+}
+
+fn check_rust_syntax(
+ cx: &DocContext<'_>,
+ item: &clean::Item,
+ dox: &str,
+ code_block: RustCodeBlock,
+) {
+ let buffer = Lrc::new(Lock::new(Buffer::default()));
+ let fallback_bundle =
+ rustc_errors::fallback_fluent_bundle(rustc_errors::DEFAULT_LOCALE_RESOURCES, false);
+ let emitter = BufferEmitter { buffer: Lrc::clone(&buffer), fallback_bundle };
+
+ let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
+ let handler = Handler::with_emitter(false, None, Box::new(emitter));
+ let source = dox[code_block.code].to_owned();
+ let sess = ParseSess::with_span_handler(handler, sm);
+
+ let edition = code_block.lang_string.edition.unwrap_or_else(|| cx.tcx.sess.edition());
+ let expn_data =
+ ExpnData::default(ExpnKind::AstPass(AstPass::TestHarness), DUMMY_SP, edition, None, None);
+ let expn_id = cx.tcx.with_stable_hashing_context(|hcx| LocalExpnId::fresh(expn_data, hcx));
+ let span = DUMMY_SP.fresh_expansion(expn_id);
+
+ let is_empty = rustc_driver::catch_fatal_errors(|| {
+ parse_stream_from_source_str(
+ FileName::Custom(String::from("doctest")),
+ source,
+ &sess,
+ Some(span),
+ )
+ .is_empty()
+ })
+ .unwrap_or(false);
+ let buffer = buffer.borrow();
+
+ if !buffer.has_errors && !is_empty {
+ // No errors in a non-empty program.
+ return;
+ }
+
+ let Some(local_id) = item.item_id.as_def_id().and_then(|x| x.as_local())
+ else {
+ // We don't need to check the syntax for other crates so returning
+ // without doing anything should not be a problem.
+ return;
+ };
+
+ let hir_id = cx.tcx.hir().local_def_id_to_hir_id(local_id);
+ let empty_block = code_block.lang_string == Default::default() && code_block.is_fenced;
+ let is_ignore = code_block.lang_string.ignore != markdown::Ignore::None;
+
+ // The span and whether it is precise or not.
+ let (sp, precise_span) =
+ match source_span_for_markdown_range(cx.tcx, dox, &code_block.range, &item.attrs) {
+ Some(sp) => (sp, true),
+ None => (item.attr_span(cx.tcx), false),
+ };
+
+ let msg = if buffer.has_errors {
+ "could not parse code block as Rust code"
+ } else {
+ "Rust code block is empty"
+ };
+
+ // Finally build and emit the completed diagnostic.
+ // All points of divergence have been handled earlier so this can be
+ // done the same way whether the span is precise or not.
+ cx.tcx.struct_span_lint_hir(crate::lint::INVALID_RUST_CODEBLOCKS, hir_id, sp, msg, |lint| {
+ let explanation = if is_ignore {
+ "`ignore` code blocks require valid Rust code for syntax highlighting; \
+ mark blocks that do not contain Rust code as text"
+ } else {
+ "mark blocks that do not contain Rust code as text"
+ };
+
+ if precise_span {
+ if is_ignore {
+ // giving an accurate suggestion is hard because `ignore` might not have come first in the list.
+ // just give a `help` instead.
+ lint.span_help(
+ sp.from_inner(InnerSpan::new(0, 3)),
+ &format!("{}: ```text", explanation),
+ );
+ } else if empty_block {
+ lint.span_suggestion(
+ sp.from_inner(InnerSpan::new(0, 3)).shrink_to_hi(),
+ explanation,
+ "text",
+ Applicability::MachineApplicable,
+ );
+ }
+ } else if empty_block || is_ignore {
+ lint.help(&format!("{}: ```text", explanation));
+ }
+
+ // FIXME(#67563): Provide more context for these errors by displaying the spans inline.
+ for message in buffer.messages.iter() {
+ lint.note(message);
+ }
+
+ lint
+ });
+}
+
+#[derive(Default)]
+struct Buffer {
+ messages: Vec<String>,
+ has_errors: bool,
+}
+
+struct BufferEmitter {
+ buffer: Lrc<Lock<Buffer>>,
+ fallback_bundle: LazyFallbackBundle,
+}
+
+impl Translate for BufferEmitter {
+ fn fluent_bundle(&self) -> Option<&Lrc<rustc_errors::FluentBundle>> {
+ None
+ }
+
+ fn fallback_fluent_bundle(&self) -> &rustc_errors::FluentBundle {
+ &**self.fallback_bundle
+ }
+}
+
+impl Emitter for BufferEmitter {
+ fn emit_diagnostic(&mut self, diag: &Diagnostic) {
+ let mut buffer = self.buffer.borrow_mut();
+
+ let fluent_args = to_fluent_args(diag.args());
+ let translated_main_message = self.translate_message(&diag.message[0].0, &fluent_args);
+
+ buffer.messages.push(format!("error from rustc: {}", translated_main_message));
+ if diag.is_error() {
+ buffer.has_errors = true;
+ }
+ }
+
+ fn source_map(&self) -> Option<&Lrc<SourceMap>> {
+ None
+ }
+}
diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs
new file mode 100644
index 000000000..070c0aab5
--- /dev/null
+++ b/src/librustdoc/passes/lint/html_tags.rs
@@ -0,0 +1,407 @@
+//! Detects invalid HTML (like an unclosed `<span>`) in doc comments.
+use crate::clean::*;
+use crate::core::DocContext;
+use crate::html::markdown::main_body_opts;
+use crate::passes::source_span_for_markdown_range;
+
+use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag};
+
+use std::iter::Peekable;
+use std::ops::Range;
+use std::str::CharIndices;
+
+pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
+ let tcx = cx.tcx;
+ let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id)
+ // If non-local, no need to check anything.
+ else { return };
+ let dox = item.attrs.collapsed_doc_value().unwrap_or_default();
+ if !dox.is_empty() {
+ let report_diag = |msg: &str, range: &Range<usize>, is_open_tag: bool| {
+ let sp = match source_span_for_markdown_range(tcx, &dox, range, &item.attrs) {
+ Some(sp) => sp,
+ None => item.attr_span(tcx),
+ };
+ tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, msg, |lint| {
+ use rustc_lint_defs::Applicability;
+ // If a tag looks like `<this>`, it might actually be a generic.
+ // We don't try to detect stuff `<like, this>` because that's not valid HTML,
+ // and we don't try to detect stuff `<like this>` because that's not valid Rust.
+ let mut generics_end = range.end;
+ if let Some(Some(mut generics_start)) = (is_open_tag
+ && dox[..generics_end].ends_with('>'))
+ .then(|| extract_path_backwards(&dox, range.start))
+ {
+ while generics_start != 0
+ && generics_end < dox.len()
+ && dox.as_bytes()[generics_start - 1] == b'<'
+ && dox.as_bytes()[generics_end] == b'>'
+ {
+ generics_end += 1;
+ generics_start -= 1;
+ if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
+ generics_start = new_start;
+ }
+ if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+ generics_end = new_end;
+ }
+ }
+ if let Some(new_end) = extract_path_forward(&dox, generics_end) {
+ generics_end = new_end;
+ }
+ let generics_sp = match source_span_for_markdown_range(
+ tcx,
+ &dox,
+ &(generics_start..generics_end),
+ &item.attrs,
+ ) {
+ Some(sp) => sp,
+ None => item.attr_span(tcx),
+ };
+ // Sometimes, we only extract part of a path. For example, consider this:
+ //
+ // <[u32] as IntoIter<u32>>::Item
+ // ^^^^^ unclosed HTML tag `u32`
+ //
+ // We don't have any code for parsing fully-qualified trait paths.
+ // In theory, we could add it, but doing it correctly would require
+ // parsing the entire path grammar, which is problematic because of
+ // overlap between the path grammar and Markdown.
+ //
+ // The example above shows that ambiguity. Is `[u32]` intended to be an
+ // intra-doc link to the u32 primitive, or is it intended to be a slice?
+ //
+ // If the below conditional were removed, we would suggest this, which is
+ // not what the user probably wants.
+ //
+ // <[u32] as `IntoIter<u32>`>::Item
+ //
+ // We know that the user actually wants to wrap the whole thing in a code
+ // block, but the only reason we know that is because `u32` does not, in
+ // fact, implement IntoIter. If the example looks like this:
+ //
+ // <[Vec<i32>] as IntoIter<i32>::Item
+ //
+ // The ideal fix would be significantly different.
+ if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
+ || (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
+ {
+ return lint;
+ }
+ // multipart form is chosen here because ``Vec<i32>`` would be confusing.
+ lint.multipart_suggestion(
+ "try marking as source code",
+ vec![
+ (generics_sp.shrink_to_lo(), String::from("`")),
+ (generics_sp.shrink_to_hi(), String::from("`")),
+ ],
+ Applicability::MaybeIncorrect,
+ );
+ }
+
+ lint
+ });
+ };
+
+ let mut tags = Vec::new();
+ let mut is_in_comment = None;
+ let mut in_code_block = false;
+
+ let link_names = item.link_names(&cx.cache);
+
+ let mut replacer = |broken_link: BrokenLink<'_>| {
+ if let Some(link) =
+ link_names.iter().find(|link| *link.original_text == *broken_link.reference)
+ {
+ Some((link.href.as_str().into(), link.new_text.as_str().into()))
+ } else if matches!(
+ &broken_link.link_type,
+ LinkType::Reference | LinkType::ReferenceUnknown
+ ) {
+ // If the link is shaped [like][this], suppress any broken HTML in the [this] part.
+ // The `broken_intra_doc_links` will report typos in there anyway.
+ Some((
+ broken_link.reference.to_string().into(),
+ broken_link.reference.to_string().into(),
+ ))
+ } else {
+ None
+ }
+ };
+
+ let p = Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer))
+ .into_offset_iter();
+
+ for (event, range) in p {
+ match event {
+ Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
+ Event::Html(text) if !in_code_block => {
+ extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
+ }
+ Event::End(Tag::CodeBlock(_)) => in_code_block = false,
+ _ => {}
+ }
+ }
+
+ for (tag, range) in tags.iter().filter(|(t, _)| {
+ let t = t.to_lowercase();
+ !ALLOWED_UNCLOSED.contains(&t.as_str())
+ }) {
+ report_diag(&format!("unclosed HTML tag `{}`", tag), range, true);
+ }
+
+ if let Some(range) = is_in_comment {
+ report_diag("Unclosed HTML comment", &range, false);
+ }
+ }
+}
+
+const ALLOWED_UNCLOSED: &[&str] = &[
+ "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
+ "source", "track", "wbr",
+];
+
+fn drop_tag(
+ tags: &mut Vec<(String, Range<usize>)>,
+ tag_name: String,
+ range: Range<usize>,
+ f: &impl Fn(&str, &Range<usize>, bool),
+) {
+ let tag_name_low = tag_name.to_lowercase();
+ if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
+ // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
+ // be emitted.
+ let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
+ let at = at.to_lowercase();
+ at == "script" || at == "style"
+ });
+ for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
+ if should_not_warn {
+ continue;
+ }
+ let last_tag_name_low = last_tag_name.to_lowercase();
+ if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) {
+ continue;
+ }
+ // `tags` is used as a queue, meaning that everything after `pos` is included inside it.
+ // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
+ // have `h3`, meaning the tag wasn't closed as it should have.
+ f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span, true);
+ }
+ // Remove the `tag_name` that was originally closed
+ tags.pop();
+ } else {
+ // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
+ // but it helps for the visualization).
+ f(&format!("unopened HTML tag `{}`", tag_name), &range, false);
+ }
+}
+
+fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
+ use rustc_lexer::{is_id_continue, is_id_start};
+ let mut current_pos = end_pos;
+ loop {
+ if current_pos >= 2 && text[..current_pos].ends_with("::") {
+ current_pos -= 2;
+ }
+ let new_pos = text[..current_pos]
+ .char_indices()
+ .rev()
+ .take_while(|(_, c)| is_id_start(*c) || is_id_continue(*c))
+ .reduce(|_accum, item| item)
+ .and_then(|(new_pos, c)| is_id_start(c).then_some(new_pos));
+ if let Some(new_pos) = new_pos {
+ if current_pos != new_pos {
+ current_pos = new_pos;
+ continue;
+ }
+ }
+ break;
+ }
+ if current_pos == end_pos { None } else { Some(current_pos) }
+}
+
+fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
+ use rustc_lexer::{is_id_continue, is_id_start};
+ let mut current_pos = start_pos;
+ loop {
+ if current_pos < text.len() && text[current_pos..].starts_with("::") {
+ current_pos += 2;
+ } else {
+ break;
+ }
+ let mut chars = text[current_pos..].chars();
+ if let Some(c) = chars.next() {
+ if is_id_start(c) {
+ current_pos += c.len_utf8();
+ } else {
+ break;
+ }
+ }
+ while let Some(c) = chars.next() {
+ if is_id_continue(c) {
+ current_pos += c.len_utf8();
+ } else {
+ break;
+ }
+ }
+ }
+ if current_pos == start_pos { None } else { Some(current_pos) }
+}
+
+fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
+ // https://spec.commonmark.org/0.30/#raw-html
+ //
+ // > A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or
+ // > hyphens (-).
+ c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit())
+}
+
+fn extract_html_tag(
+ tags: &mut Vec<(String, Range<usize>)>,
+ text: &str,
+ range: &Range<usize>,
+ start_pos: usize,
+ iter: &mut Peekable<CharIndices<'_>>,
+ f: &impl Fn(&str, &Range<usize>, bool),
+) {
+ let mut tag_name = String::new();
+ let mut is_closing = false;
+ let mut prev_pos = start_pos;
+
+ loop {
+ let (pos, c) = match iter.peek() {
+ Some((pos, c)) => (*pos, *c),
+ // In case we reached the of the doc comment, we want to check that it's an
+ // unclosed HTML tag. For example "/// <h3".
+ None => (prev_pos, '\0'),
+ };
+ prev_pos = pos;
+ // Checking if this is a closing tag (like `</a>` for `<a>`).
+ if c == '/' && tag_name.is_empty() {
+ is_closing = true;
+ } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) {
+ tag_name.push(c);
+ } else {
+ if !tag_name.is_empty() {
+ let mut r = Range { start: range.start + start_pos, end: range.start + pos };
+ if c == '>' {
+ // In case we have a tag without attribute, we can consider the span to
+ // refer to it fully.
+ r.end += 1;
+ }
+ if is_closing {
+ // In case we have "</div >" or even "</div >".
+ if c != '>' {
+ if !c.is_whitespace() {
+ // It seems like it's not a valid HTML tag.
+ break;
+ }
+ let mut found = false;
+ for (new_pos, c) in text[pos..].char_indices() {
+ if !c.is_whitespace() {
+ if c == '>' {
+ r.end = range.start + new_pos + 1;
+ found = true;
+ }
+ break;
+ }
+ }
+ if !found {
+ break;
+ }
+ }
+ drop_tag(tags, tag_name, r, f);
+ } else {
+ let mut is_self_closing = false;
+ let mut quote_pos = None;
+ if c != '>' {
+ let mut quote = None;
+ let mut after_eq = false;
+ for (i, c) in text[pos..].char_indices() {
+ if !c.is_whitespace() {
+ if let Some(q) = quote {
+ if c == q {
+ quote = None;
+ quote_pos = None;
+ after_eq = false;
+ }
+ } else if c == '>' {
+ break;
+ } else if c == '/' && !after_eq {
+ is_self_closing = true;
+ } else {
+ if is_self_closing {
+ is_self_closing = false;
+ }
+ if (c == '"' || c == '\'') && after_eq {
+ quote = Some(c);
+ quote_pos = Some(pos + i);
+ } else if c == '=' {
+ after_eq = true;
+ }
+ }
+ } else if quote.is_none() {
+ after_eq = false;
+ }
+ }
+ }
+ if let Some(quote_pos) = quote_pos {
+ let qr = Range { start: quote_pos, end: quote_pos };
+ f(
+ &format!("unclosed quoted HTML attribute on tag `{}`", tag_name),
+ &qr,
+ false,
+ );
+ }
+ if is_self_closing {
+ // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
+ let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..])
+ || tags.iter().take(pos + 1).any(|(at, _)| {
+ let at = at.to_lowercase();
+ at == "svg" || at == "math"
+ });
+ if !valid {
+ f(&format!("invalid self-closing HTML tag `{}`", tag_name), &r, false);
+ }
+ } else {
+ tags.push((tag_name, r));
+ }
+ }
+ }
+ break;
+ }
+ iter.next();
+ }
+}
+
+fn extract_tags(
+ tags: &mut Vec<(String, Range<usize>)>,
+ text: &str,
+ range: Range<usize>,
+ is_in_comment: &mut Option<Range<usize>>,
+ f: &impl Fn(&str, &Range<usize>, bool),
+) {
+ let mut iter = text.char_indices().peekable();
+
+ while let Some((start_pos, c)) = iter.next() {
+ if is_in_comment.is_some() {
+ if text[start_pos..].starts_with("-->") {
+ *is_in_comment = None;
+ }
+ } else if c == '<' {
+ if text[start_pos..].starts_with("<!--") {
+ // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!)
+ iter.next();
+ iter.next();
+ iter.next();
+ *is_in_comment = Some(Range {
+ start: range.start + start_pos,
+ end: range.start + start_pos + 3,
+ });
+ } else {
+ extract_html_tag(tags, text, &range, start_pos, &mut iter, f);
+ }
+ }
+ }
+}