summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_resolve/src/rustdoc.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_resolve/src/rustdoc.rs')
-rw-r--r--compiler/rustc_resolve/src/rustdoc.rs388
1 files changed, 388 insertions, 0 deletions
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
new file mode 100644
index 000000000..b8853c174
--- /dev/null
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -0,0 +1,388 @@
+use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag};
+use rustc_ast as ast;
+use rustc_ast::util::comments::beautify_doc_string;
+use rustc_data_structures::fx::FxHashMap;
+use rustc_span::def_id::DefId;
+use rustc_span::symbol::{kw, Symbol};
+use rustc_span::Span;
+use std::{cmp, mem};
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum DocFragmentKind {
+ /// A doc fragment created from a `///` or `//!` doc comment.
+ SugaredDoc,
+ /// A doc fragment created from a "raw" `#[doc=""]` attribute.
+ RawDoc,
+}
+
+/// A portion of documentation, extracted from a `#[doc]` attribute.
+///
+/// Each variant contains the line number within the complete doc-comment where the fragment
+/// starts, as well as the Span where the corresponding doc comment or attribute is located.
+///
+/// Included files are kept separate from inline doc comments so that proper line-number
+/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
+/// kept separate because of issue #42760.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct DocFragment {
+ pub span: Span,
+ /// The module this doc-comment came from.
+ ///
+ /// This allows distinguishing between the original documentation and a pub re-export.
+ /// If it is `None`, the item was not re-exported.
+ pub parent_module: Option<DefId>,
+ pub doc: Symbol,
+ pub kind: DocFragmentKind,
+ pub indent: usize,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum MalformedGenerics {
+ /// This link has unbalanced angle brackets.
+ ///
+ /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
+ UnbalancedAngleBrackets,
+ /// The generics are not attached to a type.
+ ///
+ /// For example, `<T>` should trigger this.
+ ///
+ /// This is detected by checking if the path is empty after the generics are stripped.
+ MissingType,
+ /// The link uses fully-qualified syntax, which is currently unsupported.
+ ///
+ /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
+ ///
+ /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
+ /// angle brackets.
+ HasFullyQualifiedSyntax,
+ /// The link has an invalid path separator.
+ ///
+ /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
+ /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
+ /// called.
+ ///
+ /// Note that this will also **not** be triggered if the invalid path separator is inside angle
+ /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
+ /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
+ ///
+ /// This is detected by checking if there is a colon followed by a non-colon in the link.
+ InvalidPathSeparator,
+ /// The link has too many angle brackets.
+ ///
+ /// For example, `Vec<<T>>` should trigger this.
+ TooManyAngleBrackets,
+ /// The link has empty angle brackets.
+ ///
+ /// For example, `Vec<>` should trigger this.
+ EmptyAngleBrackets,
+}
+
+/// Removes excess indentation on comments in order for the Markdown
+/// to be parsed correctly. This is necessary because the convention for
+/// writing documentation is to provide a space between the /// or //! marker
+/// and the doc text, but Markdown is whitespace-sensitive. For example,
+/// a block of text with four-space indentation is parsed as a code block,
+/// so if we didn't unindent comments, these list items
+///
+/// /// A list:
+/// ///
+/// /// - Foo
+/// /// - Bar
+///
+/// would be parsed as if they were in a code block, which is likely not what the user intended.
+pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
+ // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
+ // fragments kind's lines are never starting with a whitespace unless they are using some
+ // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
+ // we need to take into account the fact that the minimum indent minus one (to take this
+ // whitespace into account).
+ //
+ // For example:
+ //
+ // /// hello!
+ // #[doc = "another"]
+ //
+ // In this case, you want "hello! another" and not "hello! another".
+ let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
+ && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
+ {
+ // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
+ // "decide" how much the minimum indent will be.
+ 1
+ } else {
+ 0
+ };
+
+ // `min_indent` is used to know how much whitespaces from the start of each lines must be
+ // removed. Example:
+ //
+ // /// hello!
+ // #[doc = "another"]
+ //
+ // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
+ // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
+ // (5 - 1) whitespaces.
+ let Some(min_indent) = docs
+ .iter()
+ .map(|fragment| {
+ fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
+ if line.chars().all(|c| c.is_whitespace()) {
+ min_indent
+ } else {
+ // Compare against either space or tab, ignoring whether they are
+ // mixed or not.
+ let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+ cmp::min(min_indent, whitespace)
+ + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
+ }
+ })
+ })
+ .min()
+ else {
+ return;
+ };
+
+ for fragment in docs {
+ if fragment.doc == kw::Empty {
+ continue;
+ }
+
+ let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
+ min_indent - add
+ } else {
+ min_indent
+ };
+
+ fragment.indent = min_indent;
+ }
+}
+
+/// The goal of this function is to apply the `DocFragment` transformation that is required when
+/// transforming into the final Markdown, which is applying the computed indent to each line in
+/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
+///
+/// Note: remove the trailing newline where appropriate
+pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
+ let s = frag.doc.as_str();
+ let mut iter = s.lines();
+ if s.is_empty() {
+ out.push('\n');
+ return;
+ }
+ while let Some(line) = iter.next() {
+ if line.chars().any(|c| !c.is_whitespace()) {
+ assert!(line.len() >= frag.indent);
+ out.push_str(&line[frag.indent..]);
+ } else {
+ out.push_str(line);
+ }
+ out.push('\n');
+ }
+}
+
+pub fn attrs_to_doc_fragments<'a>(
+ attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
+ doc_only: bool,
+) -> (Vec<DocFragment>, ast::AttrVec) {
+ let mut doc_fragments = Vec::new();
+ let mut other_attrs = ast::AttrVec::new();
+ for (attr, parent_module) in attrs {
+ if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
+ let doc = beautify_doc_string(doc_str, comment_kind);
+ let kind = if attr.is_doc_comment() {
+ DocFragmentKind::SugaredDoc
+ } else {
+ DocFragmentKind::RawDoc
+ };
+ let fragment = DocFragment { span: attr.span, doc, kind, parent_module, indent: 0 };
+ doc_fragments.push(fragment);
+ } else if !doc_only {
+ other_attrs.push(attr.clone());
+ }
+ }
+
+ unindent_doc_fragments(&mut doc_fragments);
+
+ (doc_fragments, other_attrs)
+}
+
+/// Return the doc-comments on this item, grouped by the module they came from.
+/// The module can be different if this is a re-export with added documentation.
+///
+/// The last newline is not trimmed so the produced strings are reusable between
+/// early and late doc link resolution regardless of their position.
+pub fn prepare_to_doc_link_resolution(
+ doc_fragments: &[DocFragment],
+) -> FxHashMap<Option<DefId>, String> {
+ let mut res = FxHashMap::default();
+ for fragment in doc_fragments {
+ let out_str = res.entry(fragment.parent_module).or_default();
+ add_doc_fragment(out_str, fragment);
+ }
+ res
+}
+
+/// Options for rendering Markdown in the main body of documentation.
+pub fn main_body_opts() -> Options {
+ Options::ENABLE_TABLES
+ | Options::ENABLE_FOOTNOTES
+ | Options::ENABLE_STRIKETHROUGH
+ | Options::ENABLE_TASKLISTS
+ | Options::ENABLE_SMART_PUNCTUATION
+}
+
+fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
+ let mut stripped_segment = String::new();
+ let mut param_depth = 0;
+
+ let mut latest_generics_chunk = String::new();
+
+ for c in segment {
+ if c == '<' {
+ param_depth += 1;
+ latest_generics_chunk.clear();
+ } else if c == '>' {
+ param_depth -= 1;
+ if latest_generics_chunk.contains(" as ") {
+ // The segment tries to use fully-qualified syntax, which is currently unsupported.
+ // Give a helpful error message instead of completely ignoring the angle brackets.
+ return Err(MalformedGenerics::HasFullyQualifiedSyntax);
+ }
+ } else {
+ if param_depth == 0 {
+ stripped_segment.push(c);
+ } else {
+ latest_generics_chunk.push(c);
+ }
+ }
+ }
+
+ if param_depth == 0 {
+ Ok(stripped_segment)
+ } else {
+ // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
+ Err(MalformedGenerics::UnbalancedAngleBrackets)
+ }
+}
+
+pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
+ if !path_str.contains(['<', '>']) {
+ return Ok(path_str.into());
+ }
+ let mut stripped_segments = vec![];
+ let mut path = path_str.chars().peekable();
+ let mut segment = Vec::new();
+
+ while let Some(chr) = path.next() {
+ match chr {
+ ':' => {
+ if path.next_if_eq(&':').is_some() {
+ let stripped_segment =
+ strip_generics_from_path_segment(mem::take(&mut segment))?;
+ if !stripped_segment.is_empty() {
+ stripped_segments.push(stripped_segment);
+ }
+ } else {
+ return Err(MalformedGenerics::InvalidPathSeparator);
+ }
+ }
+ '<' => {
+ segment.push(chr);
+
+ match path.next() {
+ Some('<') => {
+ return Err(MalformedGenerics::TooManyAngleBrackets);
+ }
+ Some('>') => {
+ return Err(MalformedGenerics::EmptyAngleBrackets);
+ }
+ Some(chr) => {
+ segment.push(chr);
+
+ while let Some(chr) = path.next_if(|c| *c != '>') {
+ segment.push(chr);
+ }
+ }
+ None => break,
+ }
+ }
+ _ => segment.push(chr),
+ }
+ trace!("raw segment: {:?}", segment);
+ }
+
+ if !segment.is_empty() {
+ let stripped_segment = strip_generics_from_path_segment(segment)?;
+ if !stripped_segment.is_empty() {
+ stripped_segments.push(stripped_segment);
+ }
+ }
+
+ debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
+
+ let stripped_path = stripped_segments.join("::");
+
+ if !stripped_path.is_empty() {
+ Ok(stripped_path.into())
+ } else {
+ Err(MalformedGenerics::MissingType)
+ }
+}
+
+/// Returns whether the first doc-comment is an inner attribute.
+///
+//// If there are no doc-comments, return true.
+/// FIXME(#78591): Support both inner and outer attributes on the same item.
+pub fn inner_docs(attrs: &[ast::Attribute]) -> bool {
+ attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner)
+}
+
+/// Simplified version of the corresponding function in rustdoc.
+/// If the rustdoc version returns a successful result, this function must return the same result.
+/// Otherwise this function may return anything.
+fn preprocess_link(link: &str) -> Box<str> {
+ let link = link.replace('`', "");
+ let link = link.split('#').next().unwrap();
+ let link = link.trim();
+ let link = link.rsplit('@').next().unwrap();
+ let link = link.strip_suffix("()").unwrap_or(link);
+ let link = link.strip_suffix("{}").unwrap_or(link);
+ let link = link.strip_suffix("[]").unwrap_or(link);
+ let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
+ strip_generics_from_path(link).unwrap_or_else(|_| link.into())
+}
+
+/// Keep inline and reference links `[]`,
+/// but skip autolinks `<>` which we never consider to be intra-doc links.
+pub fn may_be_doc_link(link_type: LinkType) -> bool {
+ match link_type {
+ LinkType::Inline
+ | LinkType::Reference
+ | LinkType::ReferenceUnknown
+ | LinkType::Collapsed
+ | LinkType::CollapsedUnknown
+ | LinkType::Shortcut
+ | LinkType::ShortcutUnknown => true,
+ LinkType::Autolink | LinkType::Email => false,
+ }
+}
+
+/// Simplified version of `preprocessed_markdown_links` from rustdoc.
+/// Must return at least the same links as it, but may add some more links on top of that.
+pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> {
+ let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
+ let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
+
+ Parser::new_with_broken_link_callback(
+ &doc,
+ main_body_opts(),
+ Some(&mut |link: BrokenLink<'_>| Some((link.reference, "".into()))),
+ )
+ .filter_map(|event| match event {
+ Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
+ Some(preprocess_link(&dest))
+ }
+ _ => None,
+ })
+ .collect()
+}