diff options
Diffstat (limited to 'compiler/rustc_resolve/src/rustdoc.rs')
-rw-r--r-- | compiler/rustc_resolve/src/rustdoc.rs | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs new file mode 100644 index 000000000..b8853c174 --- /dev/null +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -0,0 +1,388 @@ +use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag}; +use rustc_ast as ast; +use rustc_ast::util::comments::beautify_doc_string; +use rustc_data_structures::fx::FxHashMap; +use rustc_span::def_id::DefId; +use rustc_span::symbol::{kw, Symbol}; +use rustc_span::Span; +use std::{cmp, mem}; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum DocFragmentKind { + /// A doc fragment created from a `///` or `//!` doc comment. + SugaredDoc, + /// A doc fragment created from a "raw" `#[doc=""]` attribute. + RawDoc, +} + +/// A portion of documentation, extracted from a `#[doc]` attribute. +/// +/// Each variant contains the line number within the complete doc-comment where the fragment +/// starts, as well as the Span where the corresponding doc comment or attribute is located. +/// +/// Included files are kept separate from inline doc comments so that proper line-number +/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are +/// kept separate because of issue #42760. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DocFragment { + pub span: Span, + /// The module this doc-comment came from. + /// + /// This allows distinguishing between the original documentation and a pub re-export. + /// If it is `None`, the item was not re-exported. + pub parent_module: Option<DefId>, + pub doc: Symbol, + pub kind: DocFragmentKind, + pub indent: usize, +} + +#[derive(Clone, Copy, Debug)] +pub enum MalformedGenerics { + /// This link has unbalanced angle brackets. + /// + /// For example, `Vec<T` should trigger this, as should `Vec<T>>`. + UnbalancedAngleBrackets, + /// The generics are not attached to a type. + /// + /// For example, `<T>` should trigger this. + /// + /// This is detected by checking if the path is empty after the generics are stripped. + MissingType, + /// The link uses fully-qualified syntax, which is currently unsupported. + /// + /// For example, `<Vec as IntoIterator>::into_iter` should trigger this. + /// + /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside + /// angle brackets. + HasFullyQualifiedSyntax, + /// The link has an invalid path separator. + /// + /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not** + /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be + /// called. + /// + /// Note that this will also **not** be triggered if the invalid path separator is inside angle + /// brackets because rustdoc mostly ignores what's inside angle brackets (except for + /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)). + /// + /// This is detected by checking if there is a colon followed by a non-colon in the link. + InvalidPathSeparator, + /// The link has too many angle brackets. + /// + /// For example, `Vec<<T>>` should trigger this. + TooManyAngleBrackets, + /// The link has empty angle brackets. + /// + /// For example, `Vec<>` should trigger this. + EmptyAngleBrackets, +} + +/// Removes excess indentation on comments in order for the Markdown +/// to be parsed correctly. This is necessary because the convention for +/// writing documentation is to provide a space between the /// or //! marker +/// and the doc text, but Markdown is whitespace-sensitive. For example, +/// a block of text with four-space indentation is parsed as a code block, +/// so if we didn't unindent comments, these list items +/// +/// /// A list: +/// /// +/// /// - Foo +/// /// - Bar +/// +/// would be parsed as if they were in a code block, which is likely not what the user intended. +pub fn unindent_doc_fragments(docs: &mut [DocFragment]) { + // `add` is used in case the most common sugared doc syntax is used ("/// "). The other + // fragments kind's lines are never starting with a whitespace unless they are using some + // markdown formatting requiring it. Therefore, if the doc block have a mix between the two, + // we need to take into account the fact that the minimum indent minus one (to take this + // whitespace into account). + // + // For example: + // + // /// hello! + // #[doc = "another"] + // + // In this case, you want "hello! another" and not "hello! another". + let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind) + && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc) + { + // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to + // "decide" how much the minimum indent will be. + 1 + } else { + 0 + }; + + // `min_indent` is used to know how much whitespaces from the start of each lines must be + // removed. Example: + // + // /// hello! + // #[doc = "another"] + // + // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum + // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4 + // (5 - 1) whitespaces. + let Some(min_indent) = docs + .iter() + .map(|fragment| { + fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| { + if line.chars().all(|c| c.is_whitespace()) { + min_indent + } else { + // Compare against either space or tab, ignoring whether they are + // mixed or not. + let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); + cmp::min(min_indent, whitespace) + + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add } + } + }) + }) + .min() + else { + return; + }; + + for fragment in docs { + if fragment.doc == kw::Empty { + continue; + } + + let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 { + min_indent - add + } else { + min_indent + }; + + fragment.indent = min_indent; + } +} + +/// The goal of this function is to apply the `DocFragment` transformation that is required when +/// transforming into the final Markdown, which is applying the computed indent to each line in +/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`). +/// +/// Note: remove the trailing newline where appropriate +pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) { + let s = frag.doc.as_str(); + let mut iter = s.lines(); + if s.is_empty() { + out.push('\n'); + return; + } + while let Some(line) = iter.next() { + if line.chars().any(|c| !c.is_whitespace()) { + assert!(line.len() >= frag.indent); + out.push_str(&line[frag.indent..]); + } else { + out.push_str(line); + } + out.push('\n'); + } +} + +pub fn attrs_to_doc_fragments<'a>( + attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>, + doc_only: bool, +) -> (Vec<DocFragment>, ast::AttrVec) { + let mut doc_fragments = Vec::new(); + let mut other_attrs = ast::AttrVec::new(); + for (attr, parent_module) in attrs { + if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() { + let doc = beautify_doc_string(doc_str, comment_kind); + let kind = if attr.is_doc_comment() { + DocFragmentKind::SugaredDoc + } else { + DocFragmentKind::RawDoc + }; + let fragment = DocFragment { span: attr.span, doc, kind, parent_module, indent: 0 }; + doc_fragments.push(fragment); + } else if !doc_only { + other_attrs.push(attr.clone()); + } + } + + unindent_doc_fragments(&mut doc_fragments); + + (doc_fragments, other_attrs) +} + +/// Return the doc-comments on this item, grouped by the module they came from. +/// The module can be different if this is a re-export with added documentation. +/// +/// The last newline is not trimmed so the produced strings are reusable between +/// early and late doc link resolution regardless of their position. +pub fn prepare_to_doc_link_resolution( + doc_fragments: &[DocFragment], +) -> FxHashMap<Option<DefId>, String> { + let mut res = FxHashMap::default(); + for fragment in doc_fragments { + let out_str = res.entry(fragment.parent_module).or_default(); + add_doc_fragment(out_str, fragment); + } + res +} + +/// Options for rendering Markdown in the main body of documentation. +pub fn main_body_opts() -> Options { + Options::ENABLE_TABLES + | Options::ENABLE_FOOTNOTES + | Options::ENABLE_STRIKETHROUGH + | Options::ENABLE_TASKLISTS + | Options::ENABLE_SMART_PUNCTUATION +} + +fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> { + let mut stripped_segment = String::new(); + let mut param_depth = 0; + + let mut latest_generics_chunk = String::new(); + + for c in segment { + if c == '<' { + param_depth += 1; + latest_generics_chunk.clear(); + } else if c == '>' { + param_depth -= 1; + if latest_generics_chunk.contains(" as ") { + // The segment tries to use fully-qualified syntax, which is currently unsupported. + // Give a helpful error message instead of completely ignoring the angle brackets. + return Err(MalformedGenerics::HasFullyQualifiedSyntax); + } + } else { + if param_depth == 0 { + stripped_segment.push(c); + } else { + latest_generics_chunk.push(c); + } + } + } + + if param_depth == 0 { + Ok(stripped_segment) + } else { + // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>` + Err(MalformedGenerics::UnbalancedAngleBrackets) + } +} + +pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> { + if !path_str.contains(['<', '>']) { + return Ok(path_str.into()); + } + let mut stripped_segments = vec![]; + let mut path = path_str.chars().peekable(); + let mut segment = Vec::new(); + + while let Some(chr) = path.next() { + match chr { + ':' => { + if path.next_if_eq(&':').is_some() { + let stripped_segment = + strip_generics_from_path_segment(mem::take(&mut segment))?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } else { + return Err(MalformedGenerics::InvalidPathSeparator); + } + } + '<' => { + segment.push(chr); + + match path.next() { + Some('<') => { + return Err(MalformedGenerics::TooManyAngleBrackets); + } + Some('>') => { + return Err(MalformedGenerics::EmptyAngleBrackets); + } + Some(chr) => { + segment.push(chr); + + while let Some(chr) = path.next_if(|c| *c != '>') { + segment.push(chr); + } + } + None => break, + } + } + _ => segment.push(chr), + } + trace!("raw segment: {:?}", segment); + } + + if !segment.is_empty() { + let stripped_segment = strip_generics_from_path_segment(segment)?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } + + debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments); + + let stripped_path = stripped_segments.join("::"); + + if !stripped_path.is_empty() { + Ok(stripped_path.into()) + } else { + Err(MalformedGenerics::MissingType) + } +} + +/// Returns whether the first doc-comment is an inner attribute. +/// +//// If there are no doc-comments, return true. +/// FIXME(#78591): Support both inner and outer attributes on the same item. +pub fn inner_docs(attrs: &[ast::Attribute]) -> bool { + attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner) +} + +/// Simplified version of the corresponding function in rustdoc. +/// If the rustdoc version returns a successful result, this function must return the same result. +/// Otherwise this function may return anything. +fn preprocess_link(link: &str) -> Box<str> { + let link = link.replace('`', ""); + let link = link.split('#').next().unwrap(); + let link = link.trim(); + let link = link.rsplit('@').next().unwrap(); + let link = link.strip_suffix("()").unwrap_or(link); + let link = link.strip_suffix("{}").unwrap_or(link); + let link = link.strip_suffix("[]").unwrap_or(link); + let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link }; + strip_generics_from_path(link).unwrap_or_else(|_| link.into()) +} + +/// Keep inline and reference links `[]`, +/// but skip autolinks `<>` which we never consider to be intra-doc links. +pub fn may_be_doc_link(link_type: LinkType) -> bool { + match link_type { + LinkType::Inline + | LinkType::Reference + | LinkType::ReferenceUnknown + | LinkType::Collapsed + | LinkType::CollapsedUnknown + | LinkType::Shortcut + | LinkType::ShortcutUnknown => true, + LinkType::Autolink | LinkType::Email => false, + } +} + +/// Simplified version of `preprocessed_markdown_links` from rustdoc. +/// Must return at least the same links as it, but may add some more links on top of that. +pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> { + let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true); + let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap(); + + Parser::new_with_broken_link_callback( + &doc, + main_body_opts(), + Some(&mut |link: BrokenLink<'_>| Some((link.reference, "".into()))), + ) + .filter_map(|event| match event { + Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Some(preprocess_link(&dest)) + } + _ => None, + }) + .collect() +} |