diff options
Diffstat (limited to 'src/librustdoc/passes')
-rw-r--r-- | src/librustdoc/passes/bare_urls.rs | 112 | ||||
-rw-r--r-- | src/librustdoc/passes/calculate_doc_coverage.rs | 276 | ||||
-rw-r--r-- | src/librustdoc/passes/check_code_block_syntax.rs | 205 | ||||
-rw-r--r-- | src/librustdoc/passes/check_doc_test_visibility.rs | 145 | ||||
-rw-r--r-- | src/librustdoc/passes/collect_intra_doc_links.rs | 2161 | ||||
-rw-r--r-- | src/librustdoc/passes/collect_intra_doc_links/early.rs | 405 | ||||
-rw-r--r-- | src/librustdoc/passes/collect_trait_impls.rs | 273 | ||||
-rw-r--r-- | src/librustdoc/passes/html_tags.rs | 303 | ||||
-rw-r--r-- | src/librustdoc/passes/mod.rs | 212 | ||||
-rw-r--r-- | src/librustdoc/passes/propagate_doc_cfg.rs | 45 | ||||
-rw-r--r-- | src/librustdoc/passes/strip_hidden.rs | 68 | ||||
-rw-r--r-- | src/librustdoc/passes/strip_priv_imports.rs | 16 | ||||
-rw-r--r-- | src/librustdoc/passes/strip_private.rs | 35 | ||||
-rw-r--r-- | src/librustdoc/passes/stripper.rs | 188 |
14 files changed, 4444 insertions, 0 deletions
diff --git a/src/librustdoc/passes/bare_urls.rs b/src/librustdoc/passes/bare_urls.rs new file mode 100644 index 000000000..392e26ea6 --- /dev/null +++ b/src/librustdoc/passes/bare_urls.rs @@ -0,0 +1,112 @@ +//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.` +//! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it. +use super::Pass; +use crate::clean::*; +use crate::core::DocContext; +use crate::html::markdown::main_body_opts; +use crate::visit::DocVisitor; +use core::ops::Range; +use pulldown_cmark::{Event, Parser, Tag}; +use regex::Regex; +use rustc_errors::Applicability; +use std::mem; +use std::sync::LazyLock; + +pub(crate) const CHECK_BARE_URLS: Pass = Pass { + name: "check-bare-urls", + run: check_bare_urls, + description: "detects URLs that are not hyperlinks", +}; + +static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| { + Regex::new(concat!( + r"https?://", // url scheme + r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains + r"[a-zA-Z]{2,63}", // root domain + r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)" // optional query or url fragments + )) + .expect("failed to build regex") +}); + +struct BareUrlsLinter<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, +} + +impl<'a, 'tcx> BareUrlsLinter<'a, 'tcx> { + fn find_raw_urls( + &self, + text: &str, + range: Range<usize>, + f: &impl Fn(&DocContext<'_>, &str, &str, Range<usize>), + ) { + trace!("looking for raw urls in {}", text); + // For now, we only check "full" URLs (meaning, starting with "http://" or "https://"). + for match_ in URL_REGEX.find_iter(text) { + let url = match_.as_str(); + let url_range = match_.range(); + f( + self.cx, + "this URL is not a hyperlink", + url, + Range { start: range.start + url_range.start, end: range.start + url_range.end }, + ); + } + } +} + +pub(crate) fn check_bare_urls(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + BareUrlsLinter { cx }.visit_crate(&krate); + krate +} + +impl<'a, 'tcx> DocVisitor for BareUrlsLinter<'a, 'tcx> { + fn visit_item(&mut self, item: &Item) { + let Some(hir_id) = DocContext::as_local_hir_id(self.cx.tcx, item.item_id) + else { + // If non-local, no need to check anything. + return; + }; + let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + if !dox.is_empty() { + let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range<usize>| { + let sp = super::source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs) + .unwrap_or_else(|| item.attr_span(cx.tcx)); + cx.tcx.struct_span_lint_hir(crate::lint::BARE_URLS, hir_id, sp, |lint| { + lint.build(msg) + .note("bare URLs are not automatically turned into clickable links") + .span_suggestion( + sp, + "use an automatic link instead", + format!("<{}>", url), + Applicability::MachineApplicable, + ) + .emit(); + }); + }; + + let mut p = Parser::new_ext(&dox, main_body_opts()).into_offset_iter(); + + while let Some((event, range)) = p.next() { + match event { + Event::Text(s) => self.find_raw_urls(&s, range, &report_diag), + // We don't want to check the text inside code blocks or links. + Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { + while let Some((event, _)) = p.next() { + match event { + Event::End(end) + if mem::discriminant(&end) == mem::discriminant(&tag) => + { + break; + } + _ => {} + } + } + } + _ => {} + } + } + } + + self.visit_item_recur(item) + } +} diff --git a/src/librustdoc/passes/calculate_doc_coverage.rs b/src/librustdoc/passes/calculate_doc_coverage.rs new file mode 100644 index 000000000..4c6e3eb04 --- /dev/null +++ b/src/librustdoc/passes/calculate_doc_coverage.rs @@ -0,0 +1,276 @@ +//! Calculates information used for the --show-coverage flag. +use crate::clean; +use crate::core::DocContext; +use crate::html::markdown::{find_testable_code, ErrorCodes}; +use crate::passes::check_doc_test_visibility::{should_have_doc_example, Tests}; +use crate::passes::Pass; +use crate::visit::DocVisitor; +use rustc_hir as hir; +use rustc_lint::builtin::MISSING_DOCS; +use rustc_middle::lint::LintLevelSource; +use rustc_middle::ty::DefIdTree; +use rustc_session::lint; +use rustc_span::FileName; +use serde::Serialize; + +use std::collections::BTreeMap; +use std::ops; + +pub(crate) const CALCULATE_DOC_COVERAGE: Pass = Pass { + name: "calculate-doc-coverage", + run: calculate_doc_coverage, + description: "counts the number of items with and without documentation", +}; + +fn calculate_doc_coverage(krate: clean::Crate, ctx: &mut DocContext<'_>) -> clean::Crate { + let mut calc = CoverageCalculator { items: Default::default(), ctx }; + calc.visit_crate(&krate); + + calc.print_results(); + + krate +} + +#[derive(Default, Copy, Clone, Serialize, Debug)] +struct ItemCount { + total: u64, + with_docs: u64, + total_examples: u64, + with_examples: u64, +} + +impl ItemCount { + fn count_item( + &mut self, + has_docs: bool, + has_doc_example: bool, + should_have_doc_examples: bool, + should_have_docs: bool, + ) { + if has_docs || should_have_docs { + self.total += 1; + } + + if has_docs { + self.with_docs += 1; + } + if should_have_doc_examples || has_doc_example { + self.total_examples += 1; + } + if has_doc_example { + self.with_examples += 1; + } + } + + fn percentage(&self) -> Option<f64> { + if self.total > 0 { + Some((self.with_docs as f64 * 100.0) / self.total as f64) + } else { + None + } + } + + fn examples_percentage(&self) -> Option<f64> { + if self.total_examples > 0 { + Some((self.with_examples as f64 * 100.0) / self.total_examples as f64) + } else { + None + } + } +} + +impl ops::Sub for ItemCount { + type Output = Self; + + fn sub(self, rhs: Self) -> Self { + ItemCount { + total: self.total - rhs.total, + with_docs: self.with_docs - rhs.with_docs, + total_examples: self.total_examples - rhs.total_examples, + with_examples: self.with_examples - rhs.with_examples, + } + } +} + +impl ops::AddAssign for ItemCount { + fn add_assign(&mut self, rhs: Self) { + self.total += rhs.total; + self.with_docs += rhs.with_docs; + self.total_examples += rhs.total_examples; + self.with_examples += rhs.with_examples; + } +} + +struct CoverageCalculator<'a, 'b> { + items: BTreeMap<FileName, ItemCount>, + ctx: &'a mut DocContext<'b>, +} + +fn limit_filename_len(filename: String) -> String { + let nb_chars = filename.chars().count(); + if nb_chars > 35 { + "...".to_string() + + &filename[filename.char_indices().nth(nb_chars - 32).map(|x| x.0).unwrap_or(0)..] + } else { + filename + } +} + +impl<'a, 'b> CoverageCalculator<'a, 'b> { + fn to_json(&self) -> String { + serde_json::to_string( + &self + .items + .iter() + .map(|(k, v)| (k.prefer_local().to_string(), v)) + .collect::<BTreeMap<String, &ItemCount>>(), + ) + .expect("failed to convert JSON data to string") + } + + fn print_results(&self) { + let output_format = self.ctx.output_format; + if output_format.is_json() { + println!("{}", self.to_json()); + return; + } + let mut total = ItemCount::default(); + + fn print_table_line() { + println!("+-{0:->35}-+-{0:->10}-+-{0:->10}-+-{0:->10}-+-{0:->10}-+", ""); + } + + fn print_table_record( + name: &str, + count: ItemCount, + percentage: f64, + examples_percentage: f64, + ) { + println!( + "| {:<35} | {:>10} | {:>9.1}% | {:>10} | {:>9.1}% |", + name, count.with_docs, percentage, count.with_examples, examples_percentage, + ); + } + + print_table_line(); + println!( + "| {:<35} | {:>10} | {:>10} | {:>10} | {:>10} |", + "File", "Documented", "Percentage", "Examples", "Percentage", + ); + print_table_line(); + + for (file, &count) in &self.items { + if let Some(percentage) = count.percentage() { + print_table_record( + &limit_filename_len(file.prefer_local().to_string_lossy().into()), + count, + percentage, + count.examples_percentage().unwrap_or(0.), + ); + + total += count; + } + } + + print_table_line(); + print_table_record( + "Total", + total, + total.percentage().unwrap_or(0.0), + total.examples_percentage().unwrap_or(0.0), + ); + print_table_line(); + } +} + +impl<'a, 'b> DocVisitor for CoverageCalculator<'a, 'b> { + fn visit_item(&mut self, i: &clean::Item) { + if !i.item_id.is_local() { + // non-local items are skipped because they can be out of the users control, + // especially in the case of trait impls, which rustdoc eagerly inlines + return; + } + + match *i.kind { + clean::StrippedItem(..) => { + // don't count items in stripped modules + return; + } + // docs on `use` and `extern crate` statements are not displayed, so they're not + // worth counting + clean::ImportItem(..) | clean::ExternCrateItem { .. } => {} + // Don't count trait impls, the missing-docs lint doesn't so we shouldn't either. + // Inherent impls *can* be documented, and those docs show up, but in most cases it + // doesn't make sense, as all methods on a type are in one single impl block + clean::ImplItem(_) => {} + _ => { + let has_docs = !i.attrs.doc_strings.is_empty(); + let mut tests = Tests { found_tests: 0 }; + + find_testable_code( + &i.attrs.collapsed_doc_value().unwrap_or_default(), + &mut tests, + ErrorCodes::No, + false, + None, + ); + + let filename = i.span(self.ctx.tcx).filename(self.ctx.sess()); + let has_doc_example = tests.found_tests != 0; + // The `expect_def_id()` should be okay because `local_def_id_to_hir_id` + // would presumably panic if a fake `DefIndex` were passed. + let hir_id = self + .ctx + .tcx + .hir() + .local_def_id_to_hir_id(i.item_id.expect_def_id().expect_local()); + let (level, source) = self.ctx.tcx.lint_level_at_node(MISSING_DOCS, hir_id); + + // In case we have: + // + // ``` + // enum Foo { Bar(u32) } + // // or: + // struct Bar(u32); + // ``` + // + // there is no need to require documentation on the fields of tuple variants and + // tuple structs. + let should_be_ignored = i + .item_id + .as_def_id() + .and_then(|def_id| self.ctx.tcx.opt_parent(def_id)) + .and_then(|def_id| self.ctx.tcx.hir().get_if_local(def_id)) + .map(|node| { + matches!( + node, + hir::Node::Variant(hir::Variant { + data: hir::VariantData::Tuple(_, _), + .. + }) | hir::Node::Item(hir::Item { + kind: hir::ItemKind::Struct(hir::VariantData::Tuple(_, _), _), + .. + }) + ) + }) + .unwrap_or(false); + + // `missing_docs` is allow-by-default, so don't treat this as ignoring the item + // unless the user had an explicit `allow`. + // + let should_have_docs = !should_be_ignored + && (level != lint::Level::Allow || matches!(source, LintLevelSource::Default)); + + debug!("counting {:?} {:?} in {:?}", i.type_(), i.name, filename); + self.items.entry(filename).or_default().count_item( + has_docs, + has_doc_example, + should_have_doc_example(self.ctx, i), + should_have_docs, + ); + } + } + + self.visit_item_recur(i) + } +} diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs new file mode 100644 index 000000000..0172ef570 --- /dev/null +++ b/src/librustdoc/passes/check_code_block_syntax.rs @@ -0,0 +1,205 @@ +//! Validates syntax inside Rust code blocks (\`\`\`rust). +use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_errors::{ + emitter::Emitter, Applicability, Diagnostic, Handler, LazyFallbackBundle, LintDiagnosticBuilder, +}; +use rustc_parse::parse_stream_from_source_str; +use rustc_session::parse::ParseSess; +use rustc_span::hygiene::{AstPass, ExpnData, ExpnKind, LocalExpnId}; +use rustc_span::source_map::{FilePathMapping, SourceMap}; +use rustc_span::{FileName, InnerSpan, DUMMY_SP}; + +use crate::clean; +use crate::core::DocContext; +use crate::html::markdown::{self, RustCodeBlock}; +use crate::passes::Pass; +use crate::visit::DocVisitor; + +pub(crate) const CHECK_CODE_BLOCK_SYNTAX: Pass = Pass { + name: "check-code-block-syntax", + run: check_code_block_syntax, + description: "validates syntax inside Rust code blocks", +}; + +pub(crate) fn check_code_block_syntax( + krate: clean::Crate, + cx: &mut DocContext<'_>, +) -> clean::Crate { + SyntaxChecker { cx }.visit_crate(&krate); + krate +} + +struct SyntaxChecker<'a, 'tcx> { + cx: &'a DocContext<'tcx>, +} + +impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { + fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) { + let buffer = Lrc::new(Lock::new(Buffer::default())); + let fallback_bundle = + rustc_errors::fallback_fluent_bundle(rustc_errors::DEFAULT_LOCALE_RESOURCES, false); + let emitter = BufferEmitter { buffer: Lrc::clone(&buffer), fallback_bundle }; + + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let handler = Handler::with_emitter(false, None, Box::new(emitter)); + let source = dox[code_block.code].to_owned(); + let sess = ParseSess::with_span_handler(handler, sm); + + let edition = code_block.lang_string.edition.unwrap_or_else(|| self.cx.tcx.sess.edition()); + let expn_data = ExpnData::default( + ExpnKind::AstPass(AstPass::TestHarness), + DUMMY_SP, + edition, + None, + None, + ); + let expn_id = + self.cx.tcx.with_stable_hashing_context(|hcx| LocalExpnId::fresh(expn_data, hcx)); + let span = DUMMY_SP.fresh_expansion(expn_id); + + let is_empty = rustc_driver::catch_fatal_errors(|| { + parse_stream_from_source_str( + FileName::Custom(String::from("doctest")), + source, + &sess, + Some(span), + ) + .is_empty() + }) + .unwrap_or(false); + let buffer = buffer.borrow(); + + if !buffer.has_errors && !is_empty { + // No errors in a non-empty program. + return; + } + + let Some(local_id) = item.item_id.as_def_id().and_then(|x| x.as_local()) + else { + // We don't need to check the syntax for other crates so returning + // without doing anything should not be a problem. + return; + }; + + let hir_id = self.cx.tcx.hir().local_def_id_to_hir_id(local_id); + let empty_block = code_block.lang_string == Default::default() && code_block.is_fenced; + let is_ignore = code_block.lang_string.ignore != markdown::Ignore::None; + + // The span and whether it is precise or not. + let (sp, precise_span) = match super::source_span_for_markdown_range( + self.cx.tcx, + dox, + &code_block.range, + &item.attrs, + ) { + Some(sp) => (sp, true), + None => (item.attr_span(self.cx.tcx), false), + }; + + // lambda that will use the lint to start a new diagnostic and add + // a suggestion to it when needed. + let diag_builder = |lint: LintDiagnosticBuilder<'_, ()>| { + let explanation = if is_ignore { + "`ignore` code blocks require valid Rust code for syntax highlighting; \ + mark blocks that do not contain Rust code as text" + } else { + "mark blocks that do not contain Rust code as text" + }; + let msg = if buffer.has_errors { + "could not parse code block as Rust code" + } else { + "Rust code block is empty" + }; + let mut diag = lint.build(msg); + + if precise_span { + if is_ignore { + // giving an accurate suggestion is hard because `ignore` might not have come first in the list. + // just give a `help` instead. + diag.span_help( + sp.from_inner(InnerSpan::new(0, 3)), + &format!("{}: ```text", explanation), + ); + } else if empty_block { + diag.span_suggestion( + sp.from_inner(InnerSpan::new(0, 3)).shrink_to_hi(), + explanation, + "text", + Applicability::MachineApplicable, + ); + } + } else if empty_block || is_ignore { + diag.help(&format!("{}: ```text", explanation)); + } + + // FIXME(#67563): Provide more context for these errors by displaying the spans inline. + for message in buffer.messages.iter() { + diag.note(message); + } + + diag.emit(); + }; + + // Finally build and emit the completed diagnostic. + // All points of divergence have been handled earlier so this can be + // done the same way whether the span is precise or not. + self.cx.tcx.struct_span_lint_hir( + crate::lint::INVALID_RUST_CODEBLOCKS, + hir_id, + sp, + diag_builder, + ); + } +} + +impl<'a, 'tcx> DocVisitor for SyntaxChecker<'a, 'tcx> { + fn visit_item(&mut self, item: &clean::Item) { + if let Some(dox) = &item.attrs.collapsed_doc_value() { + let sp = item.attr_span(self.cx.tcx); + let extra = crate::html::markdown::ExtraInfo::new_did( + self.cx.tcx, + item.item_id.expect_def_id(), + sp, + ); + for code_block in markdown::rust_code_blocks(dox, &extra) { + self.check_rust_syntax(item, dox, code_block); + } + } + + self.visit_item_recur(item) + } +} + +#[derive(Default)] +struct Buffer { + messages: Vec<String>, + has_errors: bool, +} + +struct BufferEmitter { + buffer: Lrc<Lock<Buffer>>, + fallback_bundle: LazyFallbackBundle, +} + +impl Emitter for BufferEmitter { + fn emit_diagnostic(&mut self, diag: &Diagnostic) { + let mut buffer = self.buffer.borrow_mut(); + // FIXME(davidtwco): need to support translation here eventually + buffer.messages.push(format!("error from rustc: {}", diag.message[0].0.expect_str())); + if diag.is_error() { + buffer.has_errors = true; + } + } + + fn source_map(&self) -> Option<&Lrc<SourceMap>> { + None + } + + fn fluent_bundle(&self) -> Option<&Lrc<rustc_errors::FluentBundle>> { + None + } + + fn fallback_fluent_bundle(&self) -> &rustc_errors::FluentBundle { + &**self.fallback_bundle + } +} diff --git a/src/librustdoc/passes/check_doc_test_visibility.rs b/src/librustdoc/passes/check_doc_test_visibility.rs new file mode 100644 index 000000000..e86f90833 --- /dev/null +++ b/src/librustdoc/passes/check_doc_test_visibility.rs @@ -0,0 +1,145 @@ +//! Looks for items missing (or incorrectly having) doctests. +//! +//! This pass is overloaded and runs two different lints. +//! +//! - MISSING_DOC_CODE_EXAMPLES: this lint is **UNSTABLE** and looks for public items missing doctests. +//! - PRIVATE_DOC_TESTS: this lint is **STABLE** and looks for private items with doctests. + +use super::Pass; +use crate::clean; +use crate::clean::*; +use crate::core::DocContext; +use crate::html::markdown::{find_testable_code, ErrorCodes, Ignore, LangString}; +use crate::visit::DocVisitor; +use crate::visit_ast::inherits_doc_hidden; +use rustc_hir as hir; +use rustc_middle::lint::LintLevelSource; +use rustc_session::lint; +use rustc_span::symbol::sym; + +pub(crate) const CHECK_DOC_TEST_VISIBILITY: Pass = Pass { + name: "check_doc_test_visibility", + run: check_doc_test_visibility, + description: "run various visibility-related lints on doctests", +}; + +struct DocTestVisibilityLinter<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, +} + +pub(crate) fn check_doc_test_visibility(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + let mut coll = DocTestVisibilityLinter { cx }; + coll.visit_crate(&krate); + krate +} + +impl<'a, 'tcx> DocVisitor for DocTestVisibilityLinter<'a, 'tcx> { + fn visit_item(&mut self, item: &Item) { + let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + + look_for_tests(self.cx, &dox, item); + + self.visit_item_recur(item) + } +} + +pub(crate) struct Tests { + pub(crate) found_tests: usize, +} + +impl crate::doctest::Tester for Tests { + fn add_test(&mut self, _: String, config: LangString, _: usize) { + if config.rust && config.ignore == Ignore::None { + self.found_tests += 1; + } + } +} + +pub(crate) fn should_have_doc_example(cx: &DocContext<'_>, item: &clean::Item) -> bool { + if !cx.cache.access_levels.is_public(item.item_id.expect_def_id()) + || matches!( + *item.kind, + clean::StructFieldItem(_) + | clean::VariantItem(_) + | clean::AssocConstItem(..) + | clean::AssocTypeItem(..) + | clean::TypedefItem(_) + | clean::StaticItem(_) + | clean::ConstantItem(_) + | clean::ExternCrateItem { .. } + | clean::ImportItem(_) + | clean::PrimitiveItem(_) + | clean::KeywordItem + // check for trait impl + | clean::ImplItem(box clean::Impl { trait_: Some(_), .. }) + ) + { + return false; + } + + // The `expect_def_id()` should be okay because `local_def_id_to_hir_id` + // would presumably panic if a fake `DefIndex` were passed. + let hir_id = cx.tcx.hir().local_def_id_to_hir_id(item.item_id.expect_def_id().expect_local()); + + // check if parent is trait impl + if let Some(parent_hir_id) = cx.tcx.hir().find_parent_node(hir_id) { + if let Some(parent_node) = cx.tcx.hir().find(parent_hir_id) { + if matches!( + parent_node, + hir::Node::Item(hir::Item { + kind: hir::ItemKind::Impl(hir::Impl { of_trait: Some(_), .. }), + .. + }) + ) { + return false; + } + } + } + + if cx.tcx.hir().attrs(hir_id).lists(sym::doc).has_word(sym::hidden) + || inherits_doc_hidden(cx.tcx, hir_id) + || cx.tcx.hir().span(hir_id).in_derive_expansion() + { + return false; + } + let (level, source) = cx.tcx.lint_level_at_node(crate::lint::MISSING_DOC_CODE_EXAMPLES, hir_id); + level != lint::Level::Allow || matches!(source, LintLevelSource::Default) +} + +pub(crate) fn look_for_tests<'tcx>(cx: &DocContext<'tcx>, dox: &str, item: &Item) { + let Some(hir_id) = DocContext::as_local_hir_id(cx.tcx, item.item_id) + else { + // If non-local, no need to check anything. + return; + }; + + let mut tests = Tests { found_tests: 0 }; + + find_testable_code(dox, &mut tests, ErrorCodes::No, false, None); + + if tests.found_tests == 0 && cx.tcx.sess.is_nightly_build() { + if should_have_doc_example(cx, item) { + debug!("reporting error for {:?} (hir_id={:?})", item, hir_id); + let sp = item.attr_span(cx.tcx); + cx.tcx.struct_span_lint_hir( + crate::lint::MISSING_DOC_CODE_EXAMPLES, + hir_id, + sp, + |lint| { + lint.build("missing code example in this documentation").emit(); + }, + ); + } + } else if tests.found_tests > 0 + && !cx.cache.access_levels.is_exported(item.item_id.expect_def_id()) + { + cx.tcx.struct_span_lint_hir( + crate::lint::PRIVATE_DOC_TESTS, + hir_id, + item.attr_span(cx.tcx), + |lint| { + lint.build("documentation test in private item").emit(); + }, + ); + } +} diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs new file mode 100644 index 000000000..7d7a63c53 --- /dev/null +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -0,0 +1,2161 @@ +//! This module implements [RFC 1946]: Intra-rustdoc-links +//! +//! [RFC 1946]: https://github.com/rust-lang/rfcs/blob/master/text/1946-intra-rustdoc-links.md + +use pulldown_cmark::LinkType; +use rustc_ast::util::comments::may_have_doc_links; +use rustc_data_structures::{ + fx::{FxHashMap, FxHashSet}, + intern::Interned, +}; +use rustc_errors::{Applicability, Diagnostic}; +use rustc_hir::def::Namespace::*; +use rustc_hir::def::{DefKind, Namespace, PerNS}; +use rustc_hir::def_id::{DefId, CRATE_DEF_ID}; +use rustc_hir::Mutability; +use rustc_middle::ty::{DefIdTree, Ty, TyCtxt}; +use rustc_middle::{bug, ty}; +use rustc_resolve::ParentScope; +use rustc_session::lint::Lint; +use rustc_span::hygiene::MacroKind; +use rustc_span::symbol::{sym, Ident, Symbol}; +use rustc_span::BytePos; +use smallvec::{smallvec, SmallVec}; + +use std::borrow::Cow; +use std::mem; +use std::ops::Range; + +use crate::clean::{self, utils::find_nearest_parent_module}; +use crate::clean::{Crate, Item, ItemId, ItemLink, PrimitiveType}; +use crate::core::DocContext; +use crate::html::markdown::{markdown_links, MarkdownLink}; +use crate::lint::{BROKEN_INTRA_DOC_LINKS, PRIVATE_INTRA_DOC_LINKS}; +use crate::passes::Pass; +use crate::visit::DocVisitor; + +mod early; +pub(crate) use early::early_resolve_intra_doc_links; + +pub(crate) const COLLECT_INTRA_DOC_LINKS: Pass = Pass { + name: "collect-intra-doc-links", + run: collect_intra_doc_links, + description: "resolves intra-doc links", +}; + +fn collect_intra_doc_links(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + let mut collector = + LinkCollector { cx, mod_ids: Vec::new(), visited_links: FxHashMap::default() }; + collector.visit_crate(&krate); + krate +} + +#[derive(Copy, Clone, Debug, Hash)] +enum Res { + Def(DefKind, DefId), + Primitive(PrimitiveType), +} + +type ResolveRes = rustc_hir::def::Res<rustc_ast::NodeId>; + +impl Res { + fn descr(self) -> &'static str { + match self { + Res::Def(kind, id) => ResolveRes::Def(kind, id).descr(), + Res::Primitive(_) => "builtin type", + } + } + + fn article(self) -> &'static str { + match self { + Res::Def(kind, id) => ResolveRes::Def(kind, id).article(), + Res::Primitive(_) => "a", + } + } + + fn name(self, tcx: TyCtxt<'_>) -> Symbol { + match self { + Res::Def(_, id) => tcx.item_name(id), + Res::Primitive(prim) => prim.as_sym(), + } + } + + fn def_id(self, tcx: TyCtxt<'_>) -> DefId { + match self { + Res::Def(_, id) => id, + Res::Primitive(prim) => *PrimitiveType::primitive_locations(tcx).get(&prim).unwrap(), + } + } + + fn from_def_id(tcx: TyCtxt<'_>, def_id: DefId) -> Res { + Res::Def(tcx.def_kind(def_id), def_id) + } + + /// Used for error reporting. + fn disambiguator_suggestion(self) -> Suggestion { + let kind = match self { + Res::Primitive(_) => return Suggestion::Prefix("prim"), + Res::Def(kind, _) => kind, + }; + if kind == DefKind::Macro(MacroKind::Bang) { + return Suggestion::Macro; + } else if kind == DefKind::Fn || kind == DefKind::AssocFn { + return Suggestion::Function; + } else if kind == DefKind::Field { + return Suggestion::RemoveDisambiguator; + } + + let prefix = match kind { + DefKind::Struct => "struct", + DefKind::Enum => "enum", + DefKind::Trait => "trait", + DefKind::Union => "union", + DefKind::Mod => "mod", + DefKind::Const | DefKind::ConstParam | DefKind::AssocConst | DefKind::AnonConst => { + "const" + } + DefKind::Static(_) => "static", + DefKind::Macro(MacroKind::Derive) => "derive", + // Now handle things that don't have a specific disambiguator + _ => match kind + .ns() + .expect("tried to calculate a disambiguator for a def without a namespace?") + { + Namespace::TypeNS => "type", + Namespace::ValueNS => "value", + Namespace::MacroNS => "macro", + }, + }; + + Suggestion::Prefix(prefix) + } +} + +impl TryFrom<ResolveRes> for Res { + type Error = (); + + fn try_from(res: ResolveRes) -> Result<Self, ()> { + use rustc_hir::def::Res::*; + match res { + Def(kind, id) => Ok(Res::Def(kind, id)), + PrimTy(prim) => Ok(Res::Primitive(PrimitiveType::from_hir(prim))), + // e.g. `#[derive]` + NonMacroAttr(..) | Err => Result::Err(()), + other => bug!("unrecognized res {:?}", other), + } + } +} + +/// The link failed to resolve. [`resolution_failure`] should look to see if there's +/// a more helpful error that can be given. +#[derive(Debug)] +struct UnresolvedPath<'a> { + /// Item on which the link is resolved, used for resolving `Self`. + item_id: ItemId, + /// The scope the link was resolved in. + module_id: DefId, + /// If part of the link resolved, this has the `Res`. + /// + /// In `[std::io::Error::x]`, `std::io::Error` would be a partial resolution. + partial_res: Option<Res>, + /// The remaining unresolved path segments. + /// + /// In `[std::io::Error::x]`, `x` would be unresolved. + unresolved: Cow<'a, str>, +} + +#[derive(Debug)] +enum ResolutionFailure<'a> { + /// This resolved, but with the wrong namespace. + WrongNamespace { + /// What the link resolved to. + res: Res, + /// The expected namespace for the resolution, determined from the link's disambiguator. + /// + /// E.g., for `[fn@Result]` this is [`Namespace::ValueNS`], + /// even though `Result`'s actual namespace is [`Namespace::TypeNS`]. + expected_ns: Namespace, + }, + NotResolved(UnresolvedPath<'a>), +} + +#[derive(Clone, Copy, Debug)] +enum MalformedGenerics { + /// This link has unbalanced angle brackets. + /// + /// For example, `Vec<T` should trigger this, as should `Vec<T>>`. + UnbalancedAngleBrackets, + /// The generics are not attached to a type. + /// + /// For example, `<T>` should trigger this. + /// + /// This is detected by checking if the path is empty after the generics are stripped. + MissingType, + /// The link uses fully-qualified syntax, which is currently unsupported. + /// + /// For example, `<Vec as IntoIterator>::into_iter` should trigger this. + /// + /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside + /// angle brackets. + HasFullyQualifiedSyntax, + /// The link has an invalid path separator. + /// + /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not** + /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be + /// called. + /// + /// Note that this will also **not** be triggered if the invalid path separator is inside angle + /// brackets because rustdoc mostly ignores what's inside angle brackets (except for + /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)). + /// + /// This is detected by checking if there is a colon followed by a non-colon in the link. + InvalidPathSeparator, + /// The link has too many angle brackets. + /// + /// For example, `Vec<<T>>` should trigger this. + TooManyAngleBrackets, + /// The link has empty angle brackets. + /// + /// For example, `Vec<>` should trigger this. + EmptyAngleBrackets, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub(crate) enum UrlFragment { + Item(DefId), + UserWritten(String), +} + +impl UrlFragment { + /// Render the fragment, including the leading `#`. + pub(crate) fn render(&self, s: &mut String, tcx: TyCtxt<'_>) { + s.push('#'); + match self { + &UrlFragment::Item(def_id) => { + let kind = match tcx.def_kind(def_id) { + DefKind::AssocFn => { + if tcx.impl_defaultness(def_id).has_value() { + "method." + } else { + "tymethod." + } + } + DefKind::AssocConst => "associatedconstant.", + DefKind::AssocTy => "associatedtype.", + DefKind::Variant => "variant.", + DefKind::Field => { + let parent_id = tcx.parent(def_id); + if tcx.def_kind(parent_id) == DefKind::Variant { + s.push_str("variant."); + s.push_str(tcx.item_name(parent_id).as_str()); + ".field." + } else { + "structfield." + } + } + kind => bug!("unexpected associated item kind: {:?}", kind), + }; + s.push_str(kind); + s.push_str(tcx.item_name(def_id).as_str()); + } + UrlFragment::UserWritten(raw) => s.push_str(&raw), + } + } +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +struct ResolutionInfo { + item_id: ItemId, + module_id: DefId, + dis: Option<Disambiguator>, + path_str: String, + extra_fragment: Option<String>, +} + +#[derive(Clone)] +struct DiagnosticInfo<'a> { + item: &'a Item, + dox: &'a str, + ori_link: &'a str, + link_range: Range<usize>, +} + +struct LinkCollector<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, + /// A stack of modules used to decide what scope to resolve in. + /// + /// The last module will be used if the parent scope of the current item is + /// unknown. + mod_ids: Vec<DefId>, + /// Cache the resolved links so we can avoid resolving (and emitting errors for) the same link. + /// The link will be `None` if it could not be resolved (i.e. the error was cached). + visited_links: FxHashMap<ResolutionInfo, Option<(Res, Option<UrlFragment>)>>, +} + +impl<'a, 'tcx> LinkCollector<'a, 'tcx> { + /// Given a full link, parse it as an [enum struct variant]. + /// + /// In particular, this will return an error whenever there aren't three + /// full path segments left in the link. + /// + /// [enum struct variant]: rustc_hir::VariantData::Struct + fn variant_field<'path>( + &self, + path_str: &'path str, + item_id: ItemId, + module_id: DefId, + ) -> Result<(Res, DefId), UnresolvedPath<'path>> { + let tcx = self.cx.tcx; + let no_res = || UnresolvedPath { + item_id, + module_id, + partial_res: None, + unresolved: path_str.into(), + }; + + debug!("looking for enum variant {}", path_str); + let mut split = path_str.rsplitn(3, "::"); + let variant_field_name = split + .next() + .map(|f| Symbol::intern(f)) + .expect("fold_item should ensure link is non-empty"); + let variant_name = + // we're not sure this is a variant at all, so use the full string + // If there's no second component, the link looks like `[path]`. + // So there's no partial res and we should say the whole link failed to resolve. + split.next().map(|f| Symbol::intern(f)).ok_or_else(no_res)?; + let path = split + .next() + .map(|f| f.to_owned()) + // If there's no third component, we saw `[a::b]` before and it failed to resolve. + // So there's no partial res. + .ok_or_else(no_res)?; + let ty_res = self.resolve_path(&path, TypeNS, item_id, module_id).ok_or_else(no_res)?; + + match ty_res { + Res::Def(DefKind::Enum, did) => match tcx.type_of(did).kind() { + ty::Adt(def, _) if def.is_enum() => { + if let Some(field) = def.all_fields().find(|f| f.name == variant_field_name) { + Ok((ty_res, field.did)) + } else { + Err(UnresolvedPath { + item_id, + module_id, + partial_res: Some(Res::Def(DefKind::Enum, def.did())), + unresolved: variant_field_name.to_string().into(), + }) + } + } + _ => unreachable!(), + }, + _ => Err(UnresolvedPath { + item_id, + module_id, + partial_res: Some(ty_res), + unresolved: variant_name.to_string().into(), + }), + } + } + + /// Given a primitive type, try to resolve an associated item. + fn resolve_primitive_associated_item( + &self, + prim_ty: PrimitiveType, + ns: Namespace, + item_name: Symbol, + ) -> Option<(Res, DefId)> { + let tcx = self.cx.tcx; + + prim_ty.impls(tcx).find_map(|impl_| { + tcx.associated_items(impl_) + .find_by_name_and_namespace(tcx, Ident::with_dummy_span(item_name), ns, impl_) + .map(|item| (Res::Primitive(prim_ty), item.def_id)) + }) + } + + fn resolve_self_ty(&self, path_str: &str, ns: Namespace, item_id: ItemId) -> Option<Res> { + if ns != TypeNS || path_str != "Self" { + return None; + } + + let tcx = self.cx.tcx; + item_id + .as_def_id() + .map(|def_id| match tcx.def_kind(def_id) { + def_kind @ (DefKind::AssocFn + | DefKind::AssocConst + | DefKind::AssocTy + | DefKind::Variant + | DefKind::Field) => { + let parent_def_id = tcx.parent(def_id); + if def_kind == DefKind::Field && tcx.def_kind(parent_def_id) == DefKind::Variant + { + tcx.parent(parent_def_id) + } else { + parent_def_id + } + } + _ => def_id, + }) + .and_then(|self_id| match tcx.def_kind(self_id) { + DefKind::Impl => self.def_id_to_res(self_id), + def_kind => Some(Res::Def(def_kind, self_id)), + }) + } + + /// Convenience wrapper around `resolve_rustdoc_path`. + /// + /// This also handles resolving `true` and `false` as booleans. + /// NOTE: `resolve_rustdoc_path` knows only about paths, not about types. + /// Associated items will never be resolved by this function. + fn resolve_path( + &self, + path_str: &str, + ns: Namespace, + item_id: ItemId, + module_id: DefId, + ) -> Option<Res> { + if let res @ Some(..) = self.resolve_self_ty(path_str, ns, item_id) { + return res; + } + + // Resolver doesn't know about true, false, and types that aren't paths (e.g. `()`). + let result = self + .cx + .resolver_caches + .doc_link_resolutions + .get(&(Symbol::intern(path_str), ns, module_id)) + .copied() + .unwrap_or_else(|| { + self.cx.enter_resolver(|resolver| { + let parent_scope = + ParentScope::module(resolver.expect_module(module_id), resolver); + resolver.resolve_rustdoc_path(path_str, ns, parent_scope) + }) + }) + .and_then(|res| res.try_into().ok()) + .or_else(|| resolve_primitive(path_str, ns)); + debug!("{} resolved to {:?} in namespace {:?}", path_str, result, ns); + result + } + + /// Resolves a string as a path within a particular namespace. Returns an + /// optional URL fragment in the case of variants and methods. + fn resolve<'path>( + &mut self, + path_str: &'path str, + ns: Namespace, + item_id: ItemId, + module_id: DefId, + ) -> Result<(Res, Option<DefId>), UnresolvedPath<'path>> { + if let Some(res) = self.resolve_path(path_str, ns, item_id, module_id) { + return Ok(match res { + Res::Def( + DefKind::AssocFn | DefKind::AssocConst | DefKind::AssocTy | DefKind::Variant, + def_id, + ) => (Res::from_def_id(self.cx.tcx, self.cx.tcx.parent(def_id)), Some(def_id)), + _ => (res, None), + }); + } else if ns == MacroNS { + return Err(UnresolvedPath { + item_id, + module_id, + partial_res: None, + unresolved: path_str.into(), + }); + } + + // Try looking for methods and associated items. + let mut split = path_str.rsplitn(2, "::"); + // NB: `split`'s first element is always defined, even if the delimiter was not present. + // NB: `item_str` could be empty when resolving in the root namespace (e.g. `::std`). + let item_str = split.next().unwrap(); + let item_name = Symbol::intern(item_str); + let path_root = split + .next() + .map(|f| f.to_owned()) + // If there's no `::`, it's not an associated item. + // So we can be sure that `rustc_resolve` was accurate when it said it wasn't resolved. + .ok_or_else(|| { + debug!("found no `::`, assumming {} was correctly not in scope", item_name); + UnresolvedPath { + item_id, + module_id, + partial_res: None, + unresolved: item_str.into(), + } + })?; + + // FIXME(#83862): this arbitrarily gives precedence to primitives over modules to support + // links to primitives when `#[doc(primitive)]` is present. It should give an ambiguity + // error instead and special case *only* modules with `#[doc(primitive)]`, not all + // primitives. + resolve_primitive(&path_root, TypeNS) + .or_else(|| self.resolve_path(&path_root, TypeNS, item_id, module_id)) + .and_then(|ty_res| { + self.resolve_associated_item(ty_res, item_name, ns, module_id).map(Ok) + }) + .unwrap_or_else(|| { + if ns == Namespace::ValueNS { + self.variant_field(path_str, item_id, module_id) + } else { + Err(UnresolvedPath { + item_id, + module_id, + partial_res: None, + unresolved: path_root.into(), + }) + } + }) + .map(|(res, def_id)| (res, Some(def_id))) + } + + /// Convert a DefId to a Res, where possible. + /// + /// This is used for resolving type aliases. + fn def_id_to_res(&self, ty_id: DefId) -> Option<Res> { + use PrimitiveType::*; + Some(match *self.cx.tcx.type_of(ty_id).kind() { + ty::Bool => Res::Primitive(Bool), + ty::Char => Res::Primitive(Char), + ty::Int(ity) => Res::Primitive(ity.into()), + ty::Uint(uty) => Res::Primitive(uty.into()), + ty::Float(fty) => Res::Primitive(fty.into()), + ty::Str => Res::Primitive(Str), + ty::Tuple(tys) if tys.is_empty() => Res::Primitive(Unit), + ty::Tuple(_) => Res::Primitive(Tuple), + ty::Array(..) => Res::Primitive(Array), + ty::Slice(_) => Res::Primitive(Slice), + ty::RawPtr(_) => Res::Primitive(RawPointer), + ty::Ref(..) => Res::Primitive(Reference), + ty::FnDef(..) => panic!("type alias to a function definition"), + ty::FnPtr(_) => Res::Primitive(Fn), + ty::Never => Res::Primitive(Never), + ty::Adt(ty::AdtDef(Interned(&ty::AdtDefData { did, .. }, _)), _) | ty::Foreign(did) => { + Res::from_def_id(self.cx.tcx, did) + } + ty::Projection(_) + | ty::Closure(..) + | ty::Generator(..) + | ty::GeneratorWitness(_) + | ty::Opaque(..) + | ty::Dynamic(..) + | ty::Param(_) + | ty::Bound(..) + | ty::Placeholder(_) + | ty::Infer(_) + | ty::Error(_) => return None, + }) + } + + /// Convert a PrimitiveType to a Ty, where possible. + /// + /// This is used for resolving trait impls for primitives + fn primitive_type_to_ty(&mut self, prim: PrimitiveType) -> Option<Ty<'tcx>> { + use PrimitiveType::*; + let tcx = self.cx.tcx; + + // FIXME: Only simple types are supported here, see if we can support + // other types such as Tuple, Array, Slice, etc. + // See https://github.com/rust-lang/rust/issues/90703#issuecomment-1004263455 + Some(tcx.mk_ty(match prim { + Bool => ty::Bool, + Str => ty::Str, + Char => ty::Char, + Never => ty::Never, + I8 => ty::Int(ty::IntTy::I8), + I16 => ty::Int(ty::IntTy::I16), + I32 => ty::Int(ty::IntTy::I32), + I64 => ty::Int(ty::IntTy::I64), + I128 => ty::Int(ty::IntTy::I128), + Isize => ty::Int(ty::IntTy::Isize), + F32 => ty::Float(ty::FloatTy::F32), + F64 => ty::Float(ty::FloatTy::F64), + U8 => ty::Uint(ty::UintTy::U8), + U16 => ty::Uint(ty::UintTy::U16), + U32 => ty::Uint(ty::UintTy::U32), + U64 => ty::Uint(ty::UintTy::U64), + U128 => ty::Uint(ty::UintTy::U128), + Usize => ty::Uint(ty::UintTy::Usize), + _ => return None, + })) + } + + /// Resolve an associated item, returning its containing page's `Res` + /// and the fragment targeting the associated item on its page. + fn resolve_associated_item( + &mut self, + root_res: Res, + item_name: Symbol, + ns: Namespace, + module_id: DefId, + ) -> Option<(Res, DefId)> { + let tcx = self.cx.tcx; + + match root_res { + Res::Primitive(prim) => { + self.resolve_primitive_associated_item(prim, ns, item_name).or_else(|| { + self.primitive_type_to_ty(prim) + .and_then(|ty| { + resolve_associated_trait_item(ty, module_id, item_name, ns, self.cx) + }) + .map(|item| (root_res, item.def_id)) + }) + } + Res::Def(DefKind::TyAlias, did) => { + // Resolve the link on the type the alias points to. + // FIXME: if the associated item is defined directly on the type alias, + // it will show up on its documentation page, we should link there instead. + let res = self.def_id_to_res(did)?; + self.resolve_associated_item(res, item_name, ns, module_id) + } + Res::Def( + def_kind @ (DefKind::Struct | DefKind::Union | DefKind::Enum | DefKind::ForeignTy), + did, + ) => { + debug!("looking for associated item named {} for item {:?}", item_name, did); + // Checks if item_name is a variant of the `SomeItem` enum + if ns == TypeNS && def_kind == DefKind::Enum { + match tcx.type_of(did).kind() { + ty::Adt(adt_def, _) => { + for variant in adt_def.variants() { + if variant.name == item_name { + return Some((root_res, variant.def_id)); + } + } + } + _ => unreachable!(), + } + } + + // Checks if item_name belongs to `impl SomeItem` + let assoc_item = tcx + .inherent_impls(did) + .iter() + .flat_map(|&imp| { + tcx.associated_items(imp).find_by_name_and_namespace( + tcx, + Ident::with_dummy_span(item_name), + ns, + imp, + ) + }) + .copied() + // There should only ever be one associated item that matches from any inherent impl + .next() + // Check if item_name belongs to `impl SomeTrait for SomeItem` + // FIXME(#74563): This gives precedence to `impl SomeItem`: + // Although having both would be ambiguous, use impl version for compatibility's sake. + // To handle that properly resolve() would have to support + // something like [`ambi_fn`](<SomeStruct as SomeTrait>::ambi_fn) + .or_else(|| { + resolve_associated_trait_item( + tcx.type_of(did), + module_id, + item_name, + ns, + self.cx, + ) + }); + + debug!("got associated item {:?}", assoc_item); + + if let Some(item) = assoc_item { + return Some((root_res, item.def_id)); + } + + if ns != Namespace::ValueNS { + return None; + } + debug!("looking for fields named {} for {:?}", item_name, did); + // FIXME: this doesn't really belong in `associated_item` (maybe `variant_field` is better?) + // NOTE: it's different from variant_field because it only resolves struct fields, + // not variant fields (2 path segments, not 3). + // + // We need to handle struct (and union) fields in this code because + // syntactically their paths are identical to associated item paths: + // `module::Type::field` and `module::Type::Assoc`. + // + // On the other hand, variant fields can't be mistaken for associated + // items because they look like this: `module::Type::Variant::field`. + // + // Variants themselves don't need to be handled here, even though + // they also look like associated items (`module::Type::Variant`), + // because they are real Rust syntax (unlike the intra-doc links + // field syntax) and are handled by the compiler's resolver. + let def = match tcx.type_of(did).kind() { + ty::Adt(def, _) if !def.is_enum() => def, + _ => return None, + }; + let field = + def.non_enum_variant().fields.iter().find(|item| item.name == item_name)?; + Some((root_res, field.did)) + } + Res::Def(DefKind::Trait, did) => tcx + .associated_items(did) + .find_by_name_and_namespace(tcx, Ident::with_dummy_span(item_name), ns, did) + .map(|item| { + let res = Res::Def(item.kind.as_def_kind(), item.def_id); + (res, item.def_id) + }), + _ => None, + } + } +} + +fn full_res(tcx: TyCtxt<'_>, (base, assoc_item): (Res, Option<DefId>)) -> Res { + assoc_item.map_or(base, |def_id| Res::from_def_id(tcx, def_id)) +} + +/// Look to see if a resolved item has an associated item named `item_name`. +/// +/// Given `[std::io::Error::source]`, where `source` is unresolved, this would +/// find `std::error::Error::source` and return +/// `<io::Error as error::Error>::source`. +fn resolve_associated_trait_item<'a>( + ty: Ty<'a>, + module: DefId, + item_name: Symbol, + ns: Namespace, + cx: &mut DocContext<'a>, +) -> Option<ty::AssocItem> { + // FIXME: this should also consider blanket impls (`impl<T> X for T`). Unfortunately + // `get_auto_trait_and_blanket_impls` is broken because the caching behavior is wrong. In the + // meantime, just don't look for these blanket impls. + + // Next consider explicit impls: `impl MyTrait for MyType` + // Give precedence to inherent impls. + let traits = trait_impls_for(cx, ty, module); + debug!("considering traits {:?}", traits); + let mut candidates = traits.iter().filter_map(|&(impl_, trait_)| { + cx.tcx + .associated_items(trait_) + .find_by_name_and_namespace(cx.tcx, Ident::with_dummy_span(item_name), ns, trait_) + .map(|trait_assoc| { + trait_assoc_to_impl_assoc_item(cx.tcx, impl_, trait_assoc.def_id) + .unwrap_or(trait_assoc) + }) + }); + // FIXME(#74563): warn about ambiguity + debug!("the candidates were {:?}", candidates.clone().collect::<Vec<_>>()); + candidates.next().copied() +} + +/// Find the associated item in the impl `impl_id` that corresponds to the +/// trait associated item `trait_assoc_id`. +/// +/// This function returns `None` if no associated item was found in the impl. +/// This can occur when the trait associated item has a default value that is +/// not overridden in the impl. +/// +/// This is just a wrapper around [`TyCtxt::impl_item_implementor_ids()`] and +/// [`TyCtxt::associated_item()`] (with some helpful logging added). +#[instrument(level = "debug", skip(tcx))] +fn trait_assoc_to_impl_assoc_item<'tcx>( + tcx: TyCtxt<'tcx>, + impl_id: DefId, + trait_assoc_id: DefId, +) -> Option<&'tcx ty::AssocItem> { + let trait_to_impl_assoc_map = tcx.impl_item_implementor_ids(impl_id); + debug!(?trait_to_impl_assoc_map); + let impl_assoc_id = *trait_to_impl_assoc_map.get(&trait_assoc_id)?; + debug!(?impl_assoc_id); + let impl_assoc = tcx.associated_item(impl_assoc_id); + debug!(?impl_assoc); + Some(impl_assoc) +} + +/// Given a type, return all trait impls in scope in `module` for that type. +/// Returns a set of pairs of `(impl_id, trait_id)`. +/// +/// NOTE: this cannot be a query because more traits could be available when more crates are compiled! +/// So it is not stable to serialize cross-crate. +#[instrument(level = "debug", skip(cx))] +fn trait_impls_for<'a>( + cx: &mut DocContext<'a>, + ty: Ty<'a>, + module: DefId, +) -> FxHashSet<(DefId, DefId)> { + let tcx = cx.tcx; + let iter = cx.resolver_caches.traits_in_scope[&module].iter().flat_map(|trait_candidate| { + let trait_ = trait_candidate.def_id; + trace!("considering explicit impl for trait {:?}", trait_); + + // Look at each trait implementation to see if it's an impl for `did` + tcx.find_map_relevant_impl(trait_, ty, |impl_| { + let trait_ref = tcx.impl_trait_ref(impl_).expect("this is not an inherent impl"); + // Check if these are the same type. + let impl_type = trait_ref.self_ty(); + trace!( + "comparing type {} with kind {:?} against type {:?}", + impl_type, + impl_type.kind(), + ty + ); + // Fast path: if this is a primitive simple `==` will work + // NOTE: the `match` is necessary; see #92662. + // this allows us to ignore generics because the user input + // may not include the generic placeholders + // e.g. this allows us to match Foo (user comment) with Foo<T> (actual type) + let saw_impl = impl_type == ty + || match (impl_type.kind(), ty.kind()) { + (ty::Adt(impl_def, _), ty::Adt(ty_def, _)) => { + debug!("impl def_id: {:?}, ty def_id: {:?}", impl_def.did(), ty_def.did()); + impl_def.did() == ty_def.did() + } + _ => false, + }; + + if saw_impl { Some((impl_, trait_)) } else { None } + }) + }); + iter.collect() +} + +/// Check for resolve collisions between a trait and its derive. +/// +/// These are common and we should just resolve to the trait in that case. +fn is_derive_trait_collision<T>(ns: &PerNS<Result<(Res, T), ResolutionFailure<'_>>>) -> bool { + matches!( + *ns, + PerNS { + type_ns: Ok((Res::Def(DefKind::Trait, _), _)), + macro_ns: Ok((Res::Def(DefKind::Macro(MacroKind::Derive), _), _)), + .. + } + ) +} + +impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> { + fn visit_item(&mut self, item: &Item) { + let parent_node = + item.item_id.as_def_id().and_then(|did| find_nearest_parent_module(self.cx.tcx, did)); + if parent_node.is_some() { + trace!("got parent node for {:?} {:?}, id {:?}", item.type_(), item.name, item.item_id); + } + + let inner_docs = item.inner_docs(self.cx.tcx); + + if item.is_mod() && inner_docs { + self.mod_ids.push(item.item_id.expect_def_id()); + } + + // We want to resolve in the lexical scope of the documentation. + // In the presence of re-exports, this is not the same as the module of the item. + // Rather than merging all documentation into one, resolve it one attribute at a time + // so we know which module it came from. + for (parent_module, doc) in item.attrs.prepare_to_doc_link_resolution() { + if !may_have_doc_links(&doc) { + continue; + } + debug!("combined_docs={}", doc); + // NOTE: if there are links that start in one crate and end in another, this will not resolve them. + // This is a degenerate case and it's not supported by rustdoc. + let parent_node = parent_module.or(parent_node); + let mut tmp_links = self + .cx + .resolver_caches + .markdown_links + .take() + .expect("`markdown_links` are already borrowed"); + if !tmp_links.contains_key(&doc) { + tmp_links.insert(doc.clone(), preprocessed_markdown_links(&doc)); + } + for md_link in &tmp_links[&doc] { + let link = self.resolve_link(item, &doc, parent_node, md_link); + if let Some(link) = link { + self.cx.cache.intra_doc_links.entry(item.item_id).or_default().push(link); + } + } + self.cx.resolver_caches.markdown_links = Some(tmp_links); + } + + if item.is_mod() { + if !inner_docs { + self.mod_ids.push(item.item_id.expect_def_id()); + } + + self.visit_item_recur(item); + self.mod_ids.pop(); + } else { + self.visit_item_recur(item) + } + } +} + +enum PreprocessingError { + /// User error: `[std#x#y]` is not valid + MultipleAnchors, + Disambiguator(Range<usize>, String), + MalformedGenerics(MalformedGenerics, String), +} + +impl PreprocessingError { + fn report(&self, cx: &DocContext<'_>, diag_info: DiagnosticInfo<'_>) { + match self { + PreprocessingError::MultipleAnchors => report_multiple_anchors(cx, diag_info), + PreprocessingError::Disambiguator(range, msg) => { + disambiguator_error(cx, diag_info, range.clone(), msg) + } + PreprocessingError::MalformedGenerics(err, path_str) => { + report_malformed_generics(cx, diag_info, *err, path_str) + } + } + } +} + +#[derive(Clone)] +struct PreprocessingInfo { + path_str: String, + disambiguator: Option<Disambiguator>, + extra_fragment: Option<String>, + link_text: String, +} + +// Not a typedef to avoid leaking several private structures from this module. +pub(crate) struct PreprocessedMarkdownLink( + Result<PreprocessingInfo, PreprocessingError>, + MarkdownLink, +); + +/// Returns: +/// - `None` if the link should be ignored. +/// - `Some(Err)` if the link should emit an error +/// - `Some(Ok)` if the link is valid +/// +/// `link_buffer` is needed for lifetime reasons; it will always be overwritten and the contents ignored. +fn preprocess_link( + ori_link: &MarkdownLink, +) -> Option<Result<PreprocessingInfo, PreprocessingError>> { + // [] is mostly likely not supposed to be a link + if ori_link.link.is_empty() { + return None; + } + + // Bail early for real links. + if ori_link.link.contains('/') { + return None; + } + + let stripped = ori_link.link.replace('`', ""); + let mut parts = stripped.split('#'); + + let link = parts.next().unwrap(); + if link.trim().is_empty() { + // This is an anchor to an element of the current page, nothing to do in here! + return None; + } + let extra_fragment = parts.next(); + if parts.next().is_some() { + // A valid link can't have multiple #'s + return Some(Err(PreprocessingError::MultipleAnchors)); + } + + // Parse and strip the disambiguator from the link, if present. + let (disambiguator, path_str, link_text) = match Disambiguator::from_str(link) { + Ok(Some((d, path, link_text))) => (Some(d), path.trim(), link_text.trim()), + Ok(None) => (None, link.trim(), link.trim()), + Err((err_msg, relative_range)) => { + // Only report error if we would not have ignored this link. See issue #83859. + if !should_ignore_link_with_disambiguators(link) { + let no_backticks_range = range_between_backticks(ori_link); + let disambiguator_range = (no_backticks_range.start + relative_range.start) + ..(no_backticks_range.start + relative_range.end); + return Some(Err(PreprocessingError::Disambiguator(disambiguator_range, err_msg))); + } else { + return None; + } + } + }; + + if should_ignore_link(path_str) { + return None; + } + + // Strip generics from the path. + let path_str = if path_str.contains(['<', '>'].as_slice()) { + match strip_generics_from_path(path_str) { + Ok(path) => path, + Err(err) => { + debug!("link has malformed generics: {}", path_str); + return Some(Err(PreprocessingError::MalformedGenerics(err, path_str.to_owned()))); + } + } + } else { + path_str.to_owned() + }; + + // Sanity check to make sure we don't have any angle brackets after stripping generics. + assert!(!path_str.contains(['<', '>'].as_slice())); + + // The link is not an intra-doc link if it still contains spaces after stripping generics. + if path_str.contains(' ') { + return None; + } + + Some(Ok(PreprocessingInfo { + path_str, + disambiguator, + extra_fragment: extra_fragment.map(|frag| frag.to_owned()), + link_text: link_text.to_owned(), + })) +} + +fn preprocessed_markdown_links(s: &str) -> Vec<PreprocessedMarkdownLink> { + markdown_links(s, |link| { + preprocess_link(&link).map(|pp_link| PreprocessedMarkdownLink(pp_link, link)) + }) +} + +impl LinkCollector<'_, '_> { + /// This is the entry point for resolving an intra-doc link. + /// + /// FIXME(jynelson): this is way too many arguments + fn resolve_link( + &mut self, + item: &Item, + dox: &str, + parent_node: Option<DefId>, + link: &PreprocessedMarkdownLink, + ) -> Option<ItemLink> { + let PreprocessedMarkdownLink(pp_link, ori_link) = link; + trace!("considering link '{}'", ori_link.link); + + let diag_info = DiagnosticInfo { + item, + dox, + ori_link: &ori_link.link, + link_range: ori_link.range.clone(), + }; + + let PreprocessingInfo { path_str, disambiguator, extra_fragment, link_text } = + pp_link.as_ref().map_err(|err| err.report(self.cx, diag_info.clone())).ok()?; + let disambiguator = *disambiguator; + + // In order to correctly resolve intra-doc links we need to + // pick a base AST node to work from. If the documentation for + // this module came from an inner comment (//!) then we anchor + // our name resolution *inside* the module. If, on the other + // hand it was an outer comment (///) then we anchor the name + // resolution in the parent module on the basis that the names + // used are more likely to be intended to be parent names. For + // this, we set base_node to None for inner comments since + // we've already pushed this node onto the resolution stack but + // for outer comments we explicitly try and resolve against the + // parent_node first. + let inner_docs = item.inner_docs(self.cx.tcx); + let base_node = + if item.is_mod() && inner_docs { self.mod_ids.last().copied() } else { parent_node }; + let module_id = base_node.expect("doc link without parent module"); + + let (mut res, fragment) = self.resolve_with_disambiguator_cached( + ResolutionInfo { + item_id: item.item_id, + module_id, + dis: disambiguator, + path_str: path_str.to_owned(), + extra_fragment: extra_fragment.clone(), + }, + diag_info.clone(), // this struct should really be Copy, but Range is not :( + // For reference-style links we want to report only one error so unsuccessful + // resolutions are cached, for other links we want to report an error every + // time so they are not cached. + matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut), + )?; + + // Check for a primitive which might conflict with a module + // Report the ambiguity and require that the user specify which one they meant. + // FIXME: could there ever be a primitive not in the type namespace? + if matches!( + disambiguator, + None | Some(Disambiguator::Namespace(Namespace::TypeNS) | Disambiguator::Primitive) + ) && !matches!(res, Res::Primitive(_)) + { + if let Some(prim) = resolve_primitive(path_str, TypeNS) { + // `prim@char` + if matches!(disambiguator, Some(Disambiguator::Primitive)) { + res = prim; + } else { + // `[char]` when a `char` module is in scope + let candidates = vec![res, prim]; + ambiguity_error(self.cx, diag_info, path_str, candidates); + return None; + } + } + } + + match res { + Res::Primitive(prim) => { + if let Some(UrlFragment::Item(id)) = fragment { + // We're actually resolving an associated item of a primitive, so we need to + // verify the disambiguator (if any) matches the type of the associated item. + // This case should really follow the same flow as the `Res::Def` branch below, + // but attempting to add a call to `clean::register_res` causes an ICE. @jyn514 + // thinks `register_res` is only needed for cross-crate re-exports, but Rust + // doesn't allow statements like `use str::trim;`, making this a (hopefully) + // valid omission. See https://github.com/rust-lang/rust/pull/80660#discussion_r551585677 + // for discussion on the matter. + let kind = self.cx.tcx.def_kind(id); + self.verify_disambiguator( + path_str, + ori_link, + kind, + id, + disambiguator, + item, + &diag_info, + )?; + + // FIXME: it would be nice to check that the feature gate was enabled in the original crate, not just ignore it altogether. + // However I'm not sure how to check that across crates. + if prim == PrimitiveType::RawPointer + && item.item_id.is_local() + && !self.cx.tcx.features().intra_doc_pointers + { + self.report_rawptr_assoc_feature_gate(dox, ori_link, item); + } + } else { + match disambiguator { + Some(Disambiguator::Primitive | Disambiguator::Namespace(_)) | None => {} + Some(other) => { + self.report_disambiguator_mismatch( + path_str, ori_link, other, res, &diag_info, + ); + return None; + } + } + } + + Some(ItemLink { + link: ori_link.link.clone(), + link_text: link_text.clone(), + did: res.def_id(self.cx.tcx), + fragment, + }) + } + Res::Def(kind, id) => { + let (kind_for_dis, id_for_dis) = if let Some(UrlFragment::Item(id)) = fragment { + (self.cx.tcx.def_kind(id), id) + } else { + (kind, id) + }; + self.verify_disambiguator( + path_str, + ori_link, + kind_for_dis, + id_for_dis, + disambiguator, + item, + &diag_info, + )?; + let id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id)); + Some(ItemLink { + link: ori_link.link.clone(), + link_text: link_text.clone(), + did: id, + fragment, + }) + } + } + } + + fn verify_disambiguator( + &self, + path_str: &str, + ori_link: &MarkdownLink, + kind: DefKind, + id: DefId, + disambiguator: Option<Disambiguator>, + item: &Item, + diag_info: &DiagnosticInfo<'_>, + ) -> Option<()> { + debug!("intra-doc link to {} resolved to {:?}", path_str, (kind, id)); + + // Disallow e.g. linking to enums with `struct@` + debug!("saw kind {:?} with disambiguator {:?}", kind, disambiguator); + match (kind, disambiguator) { + | (DefKind::Const | DefKind::ConstParam | DefKind::AssocConst | DefKind::AnonConst, Some(Disambiguator::Kind(DefKind::Const))) + // NOTE: this allows 'method' to mean both normal functions and associated functions + // This can't cause ambiguity because both are in the same namespace. + | (DefKind::Fn | DefKind::AssocFn, Some(Disambiguator::Kind(DefKind::Fn))) + // These are namespaces; allow anything in the namespace to match + | (_, Some(Disambiguator::Namespace(_))) + // If no disambiguator given, allow anything + | (_, None) + // All of these are valid, so do nothing + => {} + (actual, Some(Disambiguator::Kind(expected))) if actual == expected => {} + (_, Some(specified @ Disambiguator::Kind(_) | specified @ Disambiguator::Primitive)) => { + self.report_disambiguator_mismatch(path_str,ori_link,specified, Res::Def(kind, id),diag_info); + return None; + } + } + + // item can be non-local e.g. when using #[doc(primitive = "pointer")] + if let Some((src_id, dst_id)) = id + .as_local() + // The `expect_def_id()` should be okay because `local_def_id_to_hir_id` + // would presumably panic if a fake `DefIndex` were passed. + .and_then(|dst_id| { + item.item_id.expect_def_id().as_local().map(|src_id| (src_id, dst_id)) + }) + { + if self.cx.tcx.privacy_access_levels(()).is_exported(src_id) + && !self.cx.tcx.privacy_access_levels(()).is_exported(dst_id) + { + privacy_error(self.cx, diag_info, path_str); + } + } + + Some(()) + } + + fn report_disambiguator_mismatch( + &self, + path_str: &str, + ori_link: &MarkdownLink, + specified: Disambiguator, + resolved: Res, + diag_info: &DiagnosticInfo<'_>, + ) { + // The resolved item did not match the disambiguator; give a better error than 'not found' + let msg = format!("incompatible link kind for `{}`", path_str); + let callback = |diag: &mut Diagnostic, sp: Option<rustc_span::Span>| { + let note = format!( + "this link resolved to {} {}, which is not {} {}", + resolved.article(), + resolved.descr(), + specified.article(), + specified.descr(), + ); + if let Some(sp) = sp { + diag.span_label(sp, ¬e); + } else { + diag.note(¬e); + } + suggest_disambiguator(resolved, diag, path_str, &ori_link.link, sp); + }; + report_diagnostic(self.cx.tcx, BROKEN_INTRA_DOC_LINKS, &msg, diag_info, callback); + } + + fn report_rawptr_assoc_feature_gate(&self, dox: &str, ori_link: &MarkdownLink, item: &Item) { + let span = + super::source_span_for_markdown_range(self.cx.tcx, dox, &ori_link.range, &item.attrs) + .unwrap_or_else(|| item.attr_span(self.cx.tcx)); + rustc_session::parse::feature_err( + &self.cx.tcx.sess.parse_sess, + sym::intra_doc_pointers, + span, + "linking to associated items of raw pointers is experimental", + ) + .note("rustdoc does not allow disambiguating between `*const` and `*mut`, and pointers are unstable until it does") + .emit(); + } + + fn resolve_with_disambiguator_cached( + &mut self, + key: ResolutionInfo, + diag: DiagnosticInfo<'_>, + // If errors are cached then they are only reported on first ocurrence + // which we want in some cases but not in others. + cache_errors: bool, + ) -> Option<(Res, Option<UrlFragment>)> { + if let Some(res) = self.visited_links.get(&key) { + if res.is_some() || cache_errors { + return res.clone(); + } + } + + let res = self.resolve_with_disambiguator(&key, diag.clone()).and_then(|(res, def_id)| { + let fragment = match (&key.extra_fragment, def_id) { + (Some(_), Some(def_id)) => { + report_anchor_conflict(self.cx, diag, def_id); + return None; + } + (Some(u_frag), None) => Some(UrlFragment::UserWritten(u_frag.clone())), + (None, Some(def_id)) => Some(UrlFragment::Item(def_id)), + (None, None) => None, + }; + Some((res, fragment)) + }); + + if res.is_some() || cache_errors { + self.visited_links.insert(key, res.clone()); + } + res + } + + /// After parsing the disambiguator, resolve the main part of the link. + // FIXME(jynelson): wow this is just so much + fn resolve_with_disambiguator( + &mut self, + key: &ResolutionInfo, + diag: DiagnosticInfo<'_>, + ) -> Option<(Res, Option<DefId>)> { + let disambiguator = key.dis; + let path_str = &key.path_str; + let item_id = key.item_id; + let base_node = key.module_id; + + match disambiguator.map(Disambiguator::ns) { + Some(expected_ns) => { + match self.resolve(path_str, expected_ns, item_id, base_node) { + Ok(res) => Some(res), + Err(err) => { + // We only looked in one namespace. Try to give a better error if possible. + // FIXME: really it should be `resolution_failure` that does this, not `resolve_with_disambiguator`. + // See https://github.com/rust-lang/rust/pull/76955#discussion_r493953382 for a good approach. + let mut err = ResolutionFailure::NotResolved(err); + for other_ns in [TypeNS, ValueNS, MacroNS] { + if other_ns != expected_ns { + if let Ok(res) = + self.resolve(path_str, other_ns, item_id, base_node) + { + err = ResolutionFailure::WrongNamespace { + res: full_res(self.cx.tcx, res), + expected_ns, + }; + break; + } + } + } + resolution_failure(self, diag, path_str, disambiguator, smallvec![err]) + } + } + } + None => { + // Try everything! + let mut candidate = |ns| { + self.resolve(path_str, ns, item_id, base_node) + .map_err(ResolutionFailure::NotResolved) + }; + + let candidates = PerNS { + macro_ns: candidate(MacroNS), + type_ns: candidate(TypeNS), + value_ns: candidate(ValueNS).and_then(|(res, def_id)| { + match res { + // Constructors are picked up in the type namespace. + Res::Def(DefKind::Ctor(..), _) => { + Err(ResolutionFailure::WrongNamespace { res, expected_ns: TypeNS }) + } + _ => Ok((res, def_id)), + } + }), + }; + + let len = candidates.iter().filter(|res| res.is_ok()).count(); + + if len == 0 { + return resolution_failure( + self, + diag, + path_str, + disambiguator, + candidates.into_iter().filter_map(|res| res.err()).collect(), + ); + } + + if len == 1 { + Some(candidates.into_iter().find_map(|res| res.ok()).unwrap()) + } else if len == 2 && is_derive_trait_collision(&candidates) { + Some(candidates.type_ns.unwrap()) + } else { + let ignore_macro = is_derive_trait_collision(&candidates); + // If we're reporting an ambiguity, don't mention the namespaces that failed + let mut candidates = + candidates.map(|candidate| candidate.ok().map(|(res, _)| res)); + if ignore_macro { + candidates.macro_ns = None; + } + ambiguity_error(self.cx, diag, path_str, candidates.present_items().collect()); + None + } + } + } + } +} + +/// Get the section of a link between the backticks, +/// or the whole link if there aren't any backticks. +/// +/// For example: +/// +/// ```text +/// [`Foo`] +/// ^^^ +/// ``` +fn range_between_backticks(ori_link: &MarkdownLink) -> Range<usize> { + let after_first_backtick_group = ori_link.link.bytes().position(|b| b != b'`').unwrap_or(0); + let before_second_backtick_group = ori_link + .link + .bytes() + .skip(after_first_backtick_group) + .position(|b| b == b'`') + .unwrap_or(ori_link.link.len()); + (ori_link.range.start + after_first_backtick_group) + ..(ori_link.range.start + before_second_backtick_group) +} + +/// Returns true if we should ignore `link` due to it being unlikely +/// that it is an intra-doc link. `link` should still have disambiguators +/// if there were any. +/// +/// The difference between this and [`should_ignore_link()`] is that this +/// check should only be used on links that still have disambiguators. +fn should_ignore_link_with_disambiguators(link: &str) -> bool { + link.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, !*&;@()".contains(ch))) +} + +/// Returns true if we should ignore `path_str` due to it being unlikely +/// that it is an intra-doc link. +fn should_ignore_link(path_str: &str) -> bool { + path_str.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, !*&;".contains(ch))) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +/// Disambiguators for a link. +enum Disambiguator { + /// `prim@` + /// + /// This is buggy, see <https://github.com/rust-lang/rust/pull/77875#discussion_r503583103> + Primitive, + /// `struct@` or `f()` + Kind(DefKind), + /// `type@` + Namespace(Namespace), +} + +impl Disambiguator { + /// Given a link, parse and return `(disambiguator, path_str, link_text)`. + /// + /// This returns `Ok(Some(...))` if a disambiguator was found, + /// `Ok(None)` if no disambiguator was found, or `Err(...)` + /// if there was a problem with the disambiguator. + fn from_str(link: &str) -> Result<Option<(Self, &str, &str)>, (String, Range<usize>)> { + use Disambiguator::{Kind, Namespace as NS, Primitive}; + + if let Some(idx) = link.find('@') { + let (prefix, rest) = link.split_at(idx); + let d = match prefix { + "struct" => Kind(DefKind::Struct), + "enum" => Kind(DefKind::Enum), + "trait" => Kind(DefKind::Trait), + "union" => Kind(DefKind::Union), + "module" | "mod" => Kind(DefKind::Mod), + "const" | "constant" => Kind(DefKind::Const), + "static" => Kind(DefKind::Static(Mutability::Not)), + "function" | "fn" | "method" => Kind(DefKind::Fn), + "derive" => Kind(DefKind::Macro(MacroKind::Derive)), + "type" => NS(Namespace::TypeNS), + "value" => NS(Namespace::ValueNS), + "macro" => NS(Namespace::MacroNS), + "prim" | "primitive" => Primitive, + _ => return Err((format!("unknown disambiguator `{}`", prefix), 0..idx)), + }; + Ok(Some((d, &rest[1..], &rest[1..]))) + } else { + let suffixes = [ + ("!()", DefKind::Macro(MacroKind::Bang)), + ("!{}", DefKind::Macro(MacroKind::Bang)), + ("![]", DefKind::Macro(MacroKind::Bang)), + ("()", DefKind::Fn), + ("!", DefKind::Macro(MacroKind::Bang)), + ]; + for (suffix, kind) in suffixes { + if let Some(path_str) = link.strip_suffix(suffix) { + // Avoid turning `!` or `()` into an empty string + if !path_str.is_empty() { + return Ok(Some((Kind(kind), path_str, link))); + } + } + } + Ok(None) + } + } + + fn ns(self) -> Namespace { + match self { + Self::Namespace(n) => n, + Self::Kind(k) => { + k.ns().expect("only DefKinds with a valid namespace can be disambiguators") + } + Self::Primitive => TypeNS, + } + } + + fn article(self) -> &'static str { + match self { + Self::Namespace(_) => panic!("article() doesn't make sense for namespaces"), + Self::Kind(k) => k.article(), + Self::Primitive => "a", + } + } + + fn descr(self) -> &'static str { + match self { + Self::Namespace(n) => n.descr(), + // HACK(jynelson): the source of `DefKind::descr` only uses the DefId for + // printing "module" vs "crate" so using the wrong ID is not a huge problem + Self::Kind(k) => k.descr(CRATE_DEF_ID.to_def_id()), + Self::Primitive => "builtin type", + } + } +} + +/// A suggestion to show in a diagnostic. +enum Suggestion { + /// `struct@` + Prefix(&'static str), + /// `f()` + Function, + /// `m!` + Macro, + /// `foo` without any disambiguator + RemoveDisambiguator, +} + +impl Suggestion { + fn descr(&self) -> Cow<'static, str> { + match self { + Self::Prefix(x) => format!("prefix with `{}@`", x).into(), + Self::Function => "add parentheses".into(), + Self::Macro => "add an exclamation mark".into(), + Self::RemoveDisambiguator => "remove the disambiguator".into(), + } + } + + fn as_help(&self, path_str: &str) -> String { + // FIXME: if this is an implied shortcut link, it's bad style to suggest `@` + match self { + Self::Prefix(prefix) => format!("{}@{}", prefix, path_str), + Self::Function => format!("{}()", path_str), + Self::Macro => format!("{}!", path_str), + Self::RemoveDisambiguator => path_str.into(), + } + } + + fn as_help_span( + &self, + path_str: &str, + ori_link: &str, + sp: rustc_span::Span, + ) -> Vec<(rustc_span::Span, String)> { + let inner_sp = match ori_link.find('(') { + Some(index) => sp.with_hi(sp.lo() + BytePos(index as _)), + None => sp, + }; + let inner_sp = match ori_link.find('!') { + Some(index) => inner_sp.with_hi(inner_sp.lo() + BytePos(index as _)), + None => inner_sp, + }; + let inner_sp = match ori_link.find('@') { + Some(index) => inner_sp.with_lo(inner_sp.lo() + BytePos(index as u32 + 1)), + None => inner_sp, + }; + match self { + Self::Prefix(prefix) => { + // FIXME: if this is an implied shortcut link, it's bad style to suggest `@` + let mut sugg = vec![(sp.with_hi(inner_sp.lo()), format!("{}@", prefix))]; + if sp.hi() != inner_sp.hi() { + sugg.push((inner_sp.shrink_to_hi().with_hi(sp.hi()), String::new())); + } + sugg + } + Self::Function => { + let mut sugg = vec![(inner_sp.shrink_to_hi().with_hi(sp.hi()), "()".to_string())]; + if sp.lo() != inner_sp.lo() { + sugg.push((inner_sp.shrink_to_lo().with_lo(sp.lo()), String::new())); + } + sugg + } + Self::Macro => { + let mut sugg = vec![(inner_sp.shrink_to_hi(), "!".to_string())]; + if sp.lo() != inner_sp.lo() { + sugg.push((inner_sp.shrink_to_lo().with_lo(sp.lo()), String::new())); + } + sugg + } + Self::RemoveDisambiguator => vec![(sp, path_str.into())], + } + } +} + +/// Reports a diagnostic for an intra-doc link. +/// +/// If no link range is provided, or the source span of the link cannot be determined, the span of +/// the entire documentation block is used for the lint. If a range is provided but the span +/// calculation fails, a note is added to the diagnostic pointing to the link in the markdown. +/// +/// The `decorate` callback is invoked in all cases to allow further customization of the +/// diagnostic before emission. If the span of the link was able to be determined, the second +/// parameter of the callback will contain it, and the primary span of the diagnostic will be set +/// to it. +fn report_diagnostic( + tcx: TyCtxt<'_>, + lint: &'static Lint, + msg: &str, + DiagnosticInfo { item, ori_link: _, dox, link_range }: &DiagnosticInfo<'_>, + decorate: impl FnOnce(&mut Diagnostic, Option<rustc_span::Span>), +) { + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) + else { + // If non-local, no need to check anything. + info!("ignoring warning from parent crate: {}", msg); + return; + }; + + let sp = item.attr_span(tcx); + + tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| { + let mut diag = lint.build(msg); + + let span = + super::source_span_for_markdown_range(tcx, dox, link_range, &item.attrs).map(|sp| { + if dox.as_bytes().get(link_range.start) == Some(&b'`') + && dox.as_bytes().get(link_range.end - 1) == Some(&b'`') + { + sp.with_lo(sp.lo() + BytePos(1)).with_hi(sp.hi() - BytePos(1)) + } else { + sp + } + }); + + if let Some(sp) = span { + diag.set_span(sp); + } else { + // blah blah blah\nblah\nblah [blah] blah blah\nblah blah + // ^ ~~~~ + // | link_range + // last_new_line_offset + let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1); + let line = dox[last_new_line_offset..].lines().next().unwrap_or(""); + + // Print the line containing the `link_range` and manually mark it with '^'s. + diag.note(&format!( + "the link appears in this line:\n\n{line}\n\ + {indicator: <before$}{indicator:^<found$}", + line = line, + indicator = "", + before = link_range.start - last_new_line_offset, + found = link_range.len(), + )); + } + + decorate(&mut diag, span); + + diag.emit(); + }); +} + +/// Reports a link that failed to resolve. +/// +/// This also tries to resolve any intermediate path segments that weren't +/// handled earlier. For example, if passed `Item::Crate(std)` and `path_str` +/// `std::io::Error::x`, this will resolve `std::io::Error`. +fn resolution_failure( + collector: &mut LinkCollector<'_, '_>, + diag_info: DiagnosticInfo<'_>, + path_str: &str, + disambiguator: Option<Disambiguator>, + kinds: SmallVec<[ResolutionFailure<'_>; 3]>, +) -> Option<(Res, Option<DefId>)> { + let tcx = collector.cx.tcx; + let mut recovered_res = None; + report_diagnostic( + tcx, + BROKEN_INTRA_DOC_LINKS, + &format!("unresolved link to `{}`", path_str), + &diag_info, + |diag, sp| { + let item = |res: Res| format!("the {} `{}`", res.descr(), res.name(tcx),); + let assoc_item_not_allowed = |res: Res| { + let name = res.name(tcx); + format!( + "`{}` is {} {}, not a module or type, and cannot have associated items", + name, + res.article(), + res.descr() + ) + }; + // ignore duplicates + let mut variants_seen = SmallVec::<[_; 3]>::new(); + for mut failure in kinds { + let variant = std::mem::discriminant(&failure); + if variants_seen.contains(&variant) { + continue; + } + variants_seen.push(variant); + + if let ResolutionFailure::NotResolved(UnresolvedPath { + item_id, + module_id, + partial_res, + unresolved, + }) = &mut failure + { + use DefKind::*; + + let item_id = *item_id; + let module_id = *module_id; + // FIXME(jynelson): this might conflict with my `Self` fix in #76467 + // FIXME: maybe use itertools `collect_tuple` instead? + fn split(path: &str) -> Option<(&str, &str)> { + let mut splitter = path.rsplitn(2, "::"); + splitter.next().and_then(|right| splitter.next().map(|left| (left, right))) + } + + // Check if _any_ parent of the path gets resolved. + // If so, report it and say the first which failed; if not, say the first path segment didn't resolve. + let mut name = path_str; + 'outer: loop { + let Some((start, end)) = split(name) else { + // avoid bug that marked [Quux::Z] as missing Z, not Quux + if partial_res.is_none() { + *unresolved = name.into(); + } + break; + }; + name = start; + for ns in [TypeNS, ValueNS, MacroNS] { + if let Ok(res) = collector.resolve(start, ns, item_id, module_id) { + debug!("found partial_res={:?}", res); + *partial_res = Some(full_res(collector.cx.tcx, res)); + *unresolved = end.into(); + break 'outer; + } + } + *unresolved = end.into(); + } + + let last_found_module = match *partial_res { + Some(Res::Def(DefKind::Mod, id)) => Some(id), + None => Some(module_id), + _ => None, + }; + // See if this was a module: `[path]` or `[std::io::nope]` + if let Some(module) = last_found_module { + let note = if partial_res.is_some() { + // Part of the link resolved; e.g. `std::io::nonexistent` + let module_name = tcx.item_name(module); + format!("no item named `{}` in module `{}`", unresolved, module_name) + } else { + // None of the link resolved; e.g. `Notimported` + format!("no item named `{}` in scope", unresolved) + }; + if let Some(span) = sp { + diag.span_label(span, ¬e); + } else { + diag.note(¬e); + } + + if !path_str.contains("::") { + if disambiguator.map_or(true, |d| d.ns() == MacroNS) + && let Some(&res) = collector.cx.resolver_caches.all_macro_rules + .get(&Symbol::intern(path_str)) + { + diag.note(format!( + "`macro_rules` named `{path_str}` exists in this crate, \ + but it is not in scope at this link's location" + )); + recovered_res = res.try_into().ok().map(|res| (res, None)); + } else { + // If the link has `::` in it, assume it was meant to be an + // intra-doc link. Otherwise, the `[]` might be unrelated. + diag.help("to escape `[` and `]` characters, \ + add '\\' before them like `\\[` or `\\]`"); + } + } + + continue; + } + + // Otherwise, it must be an associated item or variant + let res = partial_res.expect("None case was handled by `last_found_module`"); + let name = res.name(tcx); + let kind = match res { + Res::Def(kind, _) => Some(kind), + Res::Primitive(_) => None, + }; + let path_description = if let Some(kind) = kind { + match kind { + Mod | ForeignMod => "inner item", + Struct => "field or associated item", + Enum | Union => "variant or associated item", + Variant + | Field + | Closure + | Generator + | AssocTy + | AssocConst + | AssocFn + | Fn + | Macro(_) + | Const + | ConstParam + | ExternCrate + | Use + | LifetimeParam + | Ctor(_, _) + | AnonConst + | InlineConst => { + let note = assoc_item_not_allowed(res); + if let Some(span) = sp { + diag.span_label(span, ¬e); + } else { + diag.note(¬e); + } + return; + } + Trait | TyAlias | ForeignTy | OpaqueTy | TraitAlias | TyParam + | Static(_) => "associated item", + Impl | GlobalAsm => unreachable!("not a path"), + } + } else { + "associated item" + }; + let note = format!( + "the {} `{}` has no {} named `{}`", + res.descr(), + name, + disambiguator.map_or(path_description, |d| d.descr()), + unresolved, + ); + if let Some(span) = sp { + diag.span_label(span, ¬e); + } else { + diag.note(¬e); + } + + continue; + } + let note = match failure { + ResolutionFailure::NotResolved { .. } => unreachable!("handled above"), + ResolutionFailure::WrongNamespace { res, expected_ns } => { + suggest_disambiguator(res, diag, path_str, diag_info.ori_link, sp); + + format!( + "this link resolves to {}, which is not in the {} namespace", + item(res), + expected_ns.descr() + ) + } + }; + if let Some(span) = sp { + diag.span_label(span, ¬e); + } else { + diag.note(¬e); + } + } + }, + ); + + recovered_res +} + +fn report_multiple_anchors(cx: &DocContext<'_>, diag_info: DiagnosticInfo<'_>) { + let msg = format!("`{}` contains multiple anchors", diag_info.ori_link); + anchor_failure(cx, diag_info, &msg, 1) +} + +fn report_anchor_conflict(cx: &DocContext<'_>, diag_info: DiagnosticInfo<'_>, def_id: DefId) { + let (link, kind) = (diag_info.ori_link, Res::from_def_id(cx.tcx, def_id).descr()); + let msg = format!("`{link}` contains an anchor, but links to {kind}s are already anchored"); + anchor_failure(cx, diag_info, &msg, 0) +} + +/// Report an anchor failure. +fn anchor_failure( + cx: &DocContext<'_>, + diag_info: DiagnosticInfo<'_>, + msg: &str, + anchor_idx: usize, +) { + report_diagnostic(cx.tcx, BROKEN_INTRA_DOC_LINKS, msg, &diag_info, |diag, sp| { + if let Some(mut sp) = sp { + if let Some((fragment_offset, _)) = + diag_info.ori_link.char_indices().filter(|(_, x)| *x == '#').nth(anchor_idx) + { + sp = sp.with_lo(sp.lo() + BytePos(fragment_offset as _)); + } + diag.span_label(sp, "invalid anchor"); + } + }); +} + +/// Report an error in the link disambiguator. +fn disambiguator_error( + cx: &DocContext<'_>, + mut diag_info: DiagnosticInfo<'_>, + disambiguator_range: Range<usize>, + msg: &str, +) { + diag_info.link_range = disambiguator_range; + report_diagnostic(cx.tcx, BROKEN_INTRA_DOC_LINKS, msg, &diag_info, |diag, _sp| { + let msg = format!( + "see {}/rustdoc/linking-to-items-by-name.html#namespaces-and-disambiguators for more info about disambiguators", + crate::DOC_RUST_LANG_ORG_CHANNEL + ); + diag.note(&msg); + }); +} + +fn report_malformed_generics( + cx: &DocContext<'_>, + diag_info: DiagnosticInfo<'_>, + err: MalformedGenerics, + path_str: &str, +) { + report_diagnostic( + cx.tcx, + BROKEN_INTRA_DOC_LINKS, + &format!("unresolved link to `{}`", path_str), + &diag_info, + |diag, sp| { + let note = match err { + MalformedGenerics::UnbalancedAngleBrackets => "unbalanced angle brackets", + MalformedGenerics::MissingType => "missing type for generic parameters", + MalformedGenerics::HasFullyQualifiedSyntax => { + diag.note( + "see https://github.com/rust-lang/rust/issues/74563 for more information", + ); + "fully-qualified syntax is unsupported" + } + MalformedGenerics::InvalidPathSeparator => "has invalid path separator", + MalformedGenerics::TooManyAngleBrackets => "too many angle brackets", + MalformedGenerics::EmptyAngleBrackets => "empty angle brackets", + }; + if let Some(span) = sp { + diag.span_label(span, note); + } else { + diag.note(note); + } + }, + ); +} + +/// Report an ambiguity error, where there were multiple possible resolutions. +fn ambiguity_error( + cx: &DocContext<'_>, + diag_info: DiagnosticInfo<'_>, + path_str: &str, + candidates: Vec<Res>, +) { + let mut msg = format!("`{}` is ", path_str); + + match candidates.as_slice() { + [first_def, second_def] => { + msg += &format!( + "both {} {} and {} {}", + first_def.article(), + first_def.descr(), + second_def.article(), + second_def.descr(), + ); + } + _ => { + let mut candidates = candidates.iter().peekable(); + while let Some(res) = candidates.next() { + if candidates.peek().is_some() { + msg += &format!("{} {}, ", res.article(), res.descr()); + } else { + msg += &format!("and {} {}", res.article(), res.descr()); + } + } + } + } + + report_diagnostic(cx.tcx, BROKEN_INTRA_DOC_LINKS, &msg, &diag_info, |diag, sp| { + if let Some(sp) = sp { + diag.span_label(sp, "ambiguous link"); + } else { + diag.note("ambiguous link"); + } + + for res in candidates { + suggest_disambiguator(res, diag, path_str, diag_info.ori_link, sp); + } + }); +} + +/// In case of an ambiguity or mismatched disambiguator, suggest the correct +/// disambiguator. +fn suggest_disambiguator( + res: Res, + diag: &mut Diagnostic, + path_str: &str, + ori_link: &str, + sp: Option<rustc_span::Span>, +) { + let suggestion = res.disambiguator_suggestion(); + let help = format!("to link to the {}, {}", res.descr(), suggestion.descr()); + + if let Some(sp) = sp { + let mut spans = suggestion.as_help_span(path_str, ori_link, sp); + if spans.len() > 1 { + diag.multipart_suggestion(&help, spans, Applicability::MaybeIncorrect); + } else { + let (sp, suggestion_text) = spans.pop().unwrap(); + diag.span_suggestion_verbose(sp, &help, suggestion_text, Applicability::MaybeIncorrect); + } + } else { + diag.help(&format!("{}: {}", help, suggestion.as_help(path_str))); + } +} + +/// Report a link from a public item to a private one. +fn privacy_error(cx: &DocContext<'_>, diag_info: &DiagnosticInfo<'_>, path_str: &str) { + let sym; + let item_name = match diag_info.item.name { + Some(name) => { + sym = name; + sym.as_str() + } + None => "<unknown>", + }; + let msg = + format!("public documentation for `{}` links to private item `{}`", item_name, path_str); + + report_diagnostic(cx.tcx, PRIVATE_INTRA_DOC_LINKS, &msg, diag_info, |diag, sp| { + if let Some(sp) = sp { + diag.span_label(sp, "this item is private"); + } + + let note_msg = if cx.render_options.document_private { + "this link resolves only because you passed `--document-private-items`, but will break without" + } else { + "this link will resolve properly if you pass `--document-private-items`" + }; + diag.note(note_msg); + }); +} + +/// Resolve a primitive type or value. +fn resolve_primitive(path_str: &str, ns: Namespace) -> Option<Res> { + if ns != TypeNS { + return None; + } + use PrimitiveType::*; + let prim = match path_str { + "isize" => Isize, + "i8" => I8, + "i16" => I16, + "i32" => I32, + "i64" => I64, + "i128" => I128, + "usize" => Usize, + "u8" => U8, + "u16" => U16, + "u32" => U32, + "u64" => U64, + "u128" => U128, + "f32" => F32, + "f64" => F64, + "char" => Char, + "bool" | "true" | "false" => Bool, + "str" | "&str" => Str, + // See #80181 for why these don't have symbols associated. + "slice" => Slice, + "array" => Array, + "tuple" => Tuple, + "unit" => Unit, + "pointer" | "*const" | "*mut" => RawPointer, + "reference" | "&" | "&mut" => Reference, + "fn" => Fn, + "never" | "!" => Never, + _ => return None, + }; + debug!("resolved primitives {:?}", prim); + Some(Res::Primitive(prim)) +} + +fn strip_generics_from_path(path_str: &str) -> Result<String, MalformedGenerics> { + let mut stripped_segments = vec![]; + let mut path = path_str.chars().peekable(); + let mut segment = Vec::new(); + + while let Some(chr) = path.next() { + match chr { + ':' => { + if path.next_if_eq(&':').is_some() { + let stripped_segment = + strip_generics_from_path_segment(mem::take(&mut segment))?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } else { + return Err(MalformedGenerics::InvalidPathSeparator); + } + } + '<' => { + segment.push(chr); + + match path.next() { + Some('<') => { + return Err(MalformedGenerics::TooManyAngleBrackets); + } + Some('>') => { + return Err(MalformedGenerics::EmptyAngleBrackets); + } + Some(chr) => { + segment.push(chr); + + while let Some(chr) = path.next_if(|c| *c != '>') { + segment.push(chr); + } + } + None => break, + } + } + _ => segment.push(chr), + } + trace!("raw segment: {:?}", segment); + } + + if !segment.is_empty() { + let stripped_segment = strip_generics_from_path_segment(segment)?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } + + debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments); + + let stripped_path = stripped_segments.join("::"); + + if !stripped_path.is_empty() { Ok(stripped_path) } else { Err(MalformedGenerics::MissingType) } +} + +fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> { + let mut stripped_segment = String::new(); + let mut param_depth = 0; + + let mut latest_generics_chunk = String::new(); + + for c in segment { + if c == '<' { + param_depth += 1; + latest_generics_chunk.clear(); + } else if c == '>' { + param_depth -= 1; + if latest_generics_chunk.contains(" as ") { + // The segment tries to use fully-qualified syntax, which is currently unsupported. + // Give a helpful error message instead of completely ignoring the angle brackets. + return Err(MalformedGenerics::HasFullyQualifiedSyntax); + } + } else { + if param_depth == 0 { + stripped_segment.push(c); + } else { + latest_generics_chunk.push(c); + } + } + } + + if param_depth == 0 { + Ok(stripped_segment) + } else { + // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>` + Err(MalformedGenerics::UnbalancedAngleBrackets) + } +} diff --git a/src/librustdoc/passes/collect_intra_doc_links/early.rs b/src/librustdoc/passes/collect_intra_doc_links/early.rs new file mode 100644 index 000000000..38cfd7a27 --- /dev/null +++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs @@ -0,0 +1,405 @@ +use crate::clean::Attributes; +use crate::core::ResolverCaches; +use crate::passes::collect_intra_doc_links::preprocessed_markdown_links; +use crate::passes::collect_intra_doc_links::{Disambiguator, PreprocessedMarkdownLink}; + +use rustc_ast::visit::{self, AssocCtxt, Visitor}; +use rustc_ast::{self as ast, ItemKind}; +use rustc_data_structures::fx::FxHashMap; +use rustc_hir::def::Namespace::*; +use rustc_hir::def::{DefKind, Namespace, Res}; +use rustc_hir::def_id::{DefId, DefIdMap, DefIdSet, CRATE_DEF_ID}; +use rustc_hir::TraitCandidate; +use rustc_middle::ty::{DefIdTree, Visibility}; +use rustc_resolve::{ParentScope, Resolver}; +use rustc_session::config::Externs; +use rustc_session::Session; +use rustc_span::symbol::sym; +use rustc_span::{Symbol, SyntaxContext}; + +use std::collections::hash_map::Entry; +use std::mem; + +pub(crate) fn early_resolve_intra_doc_links( + resolver: &mut Resolver<'_>, + sess: &Session, + krate: &ast::Crate, + externs: Externs, + document_private_items: bool, +) -> ResolverCaches { + let parent_scope = + ParentScope::module(resolver.expect_module(CRATE_DEF_ID.to_def_id()), resolver); + let mut link_resolver = EarlyDocLinkResolver { + resolver, + sess, + parent_scope, + visited_mods: Default::default(), + markdown_links: Default::default(), + doc_link_resolutions: Default::default(), + traits_in_scope: Default::default(), + all_traits: Default::default(), + all_trait_impls: Default::default(), + all_macro_rules: Default::default(), + document_private_items, + }; + + // Overridden `visit_item` below doesn't apply to the crate root, + // so we have to visit its attributes and reexports separately. + link_resolver.resolve_doc_links_local(&krate.attrs); + link_resolver.process_module_children_or_reexports(CRATE_DEF_ID.to_def_id()); + visit::walk_crate(&mut link_resolver, krate); + link_resolver.process_extern_impls(); + + // FIXME: somehow rustdoc is still missing crates even though we loaded all + // the known necessary crates. Load them all unconditionally until we find a way to fix this. + // DO NOT REMOVE THIS without first testing on the reproducer in + // https://github.com/jyn514/objr/commit/edcee7b8124abf0e4c63873e8422ff81beb11ebb + for (extern_name, _) in externs.iter().filter(|(_, entry)| entry.add_prelude) { + link_resolver.resolver.resolve_rustdoc_path(extern_name, TypeNS, parent_scope); + } + + ResolverCaches { + markdown_links: Some(link_resolver.markdown_links), + doc_link_resolutions: link_resolver.doc_link_resolutions, + traits_in_scope: link_resolver.traits_in_scope, + all_traits: Some(link_resolver.all_traits), + all_trait_impls: Some(link_resolver.all_trait_impls), + all_macro_rules: link_resolver.all_macro_rules, + } +} + +fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes { + Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true) +} + +struct EarlyDocLinkResolver<'r, 'ra> { + resolver: &'r mut Resolver<'ra>, + sess: &'r Session, + parent_scope: ParentScope<'ra>, + visited_mods: DefIdSet, + markdown_links: FxHashMap<String, Vec<PreprocessedMarkdownLink>>, + doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option<Res<ast::NodeId>>>, + traits_in_scope: DefIdMap<Vec<TraitCandidate>>, + all_traits: Vec<DefId>, + all_trait_impls: Vec<DefId>, + all_macro_rules: FxHashMap<Symbol, Res<ast::NodeId>>, + document_private_items: bool, +} + +impl<'ra> EarlyDocLinkResolver<'_, 'ra> { + fn add_traits_in_scope(&mut self, def_id: DefId) { + // Calls to `traits_in_scope` are expensive, so try to avoid them if only possible. + // Keys in the `traits_in_scope` cache are always module IDs. + if let Entry::Vacant(entry) = self.traits_in_scope.entry(def_id) { + let module = self.resolver.get_nearest_non_block_module(def_id); + let module_id = module.def_id(); + let entry = if module_id == def_id { + Some(entry) + } else if let Entry::Vacant(entry) = self.traits_in_scope.entry(module_id) { + Some(entry) + } else { + None + }; + if let Some(entry) = entry { + entry.insert(self.resolver.traits_in_scope( + None, + &ParentScope::module(module, self.resolver), + SyntaxContext::root(), + None, + )); + } + } + } + + /// Add traits in scope for links in impls collected by the `collect-intra-doc-links` pass. + /// That pass filters impls using type-based information, but we don't yet have such + /// information here, so we just conservatively calculate traits in scope for *all* modules + /// having impls in them. + fn process_extern_impls(&mut self) { + // Resolving links in already existing crates may trigger loading of new crates. + let mut start_cnum = 0; + loop { + let crates = Vec::from_iter(self.resolver.cstore().crates_untracked()); + for &cnum in &crates[start_cnum..] { + let all_traits = + Vec::from_iter(self.resolver.cstore().traits_in_crate_untracked(cnum)); + let all_trait_impls = + Vec::from_iter(self.resolver.cstore().trait_impls_in_crate_untracked(cnum)); + let all_inherent_impls = + Vec::from_iter(self.resolver.cstore().inherent_impls_in_crate_untracked(cnum)); + let all_incoherent_impls = Vec::from_iter( + self.resolver.cstore().incoherent_impls_in_crate_untracked(cnum), + ); + + // Querying traits in scope is expensive so we try to prune the impl and traits lists + // using privacy, private traits and impls from other crates are never documented in + // the current crate, and links in their doc comments are not resolved. + for &def_id in &all_traits { + if self.resolver.cstore().visibility_untracked(def_id).is_public() { + self.resolve_doc_links_extern_impl(def_id, false); + } + } + for &(trait_def_id, impl_def_id, simplified_self_ty) in &all_trait_impls { + if self.resolver.cstore().visibility_untracked(trait_def_id).is_public() + && simplified_self_ty.and_then(|ty| ty.def()).map_or(true, |ty_def_id| { + self.resolver.cstore().visibility_untracked(ty_def_id).is_public() + }) + { + self.resolve_doc_links_extern_impl(impl_def_id, false); + } + } + for (ty_def_id, impl_def_id) in all_inherent_impls { + if self.resolver.cstore().visibility_untracked(ty_def_id).is_public() { + self.resolve_doc_links_extern_impl(impl_def_id, true); + } + } + for impl_def_id in all_incoherent_impls { + self.resolve_doc_links_extern_impl(impl_def_id, true); + } + + self.all_traits.extend(all_traits); + self.all_trait_impls + .extend(all_trait_impls.into_iter().map(|(_, def_id, _)| def_id)); + } + + if crates.len() > start_cnum { + start_cnum = crates.len(); + } else { + break; + } + } + } + + fn resolve_doc_links_extern_impl(&mut self, def_id: DefId, is_inherent: bool) { + self.resolve_doc_links_extern_outer_fixme(def_id, def_id); + let assoc_item_def_ids = Vec::from_iter( + self.resolver.cstore().associated_item_def_ids_untracked(def_id, self.sess), + ); + for assoc_def_id in assoc_item_def_ids { + if !is_inherent || self.resolver.cstore().visibility_untracked(assoc_def_id).is_public() + { + self.resolve_doc_links_extern_outer_fixme(assoc_def_id, def_id); + } + } + } + + // FIXME: replace all uses with `resolve_doc_links_extern_outer` to actually resolve links, not + // just add traits in scope. This may be expensive and require benchmarking and optimization. + fn resolve_doc_links_extern_outer_fixme(&mut self, def_id: DefId, scope_id: DefId) { + if !self.resolver.cstore().may_have_doc_links_untracked(def_id) { + return; + } + if let Some(parent_id) = self.resolver.opt_parent(scope_id) { + self.add_traits_in_scope(parent_id); + } + } + + fn resolve_doc_links_extern_outer(&mut self, def_id: DefId, scope_id: DefId) { + if !self.resolver.cstore().may_have_doc_links_untracked(def_id) { + return; + } + let attrs = Vec::from_iter(self.resolver.cstore().item_attrs_untracked(def_id, self.sess)); + let parent_scope = ParentScope::module( + self.resolver.get_nearest_non_block_module( + self.resolver.opt_parent(scope_id).unwrap_or(scope_id), + ), + self.resolver, + ); + self.resolve_doc_links(doc_attrs(attrs.iter()), parent_scope); + } + + fn resolve_doc_links_extern_inner(&mut self, def_id: DefId) { + if !self.resolver.cstore().may_have_doc_links_untracked(def_id) { + return; + } + let attrs = Vec::from_iter(self.resolver.cstore().item_attrs_untracked(def_id, self.sess)); + let parent_scope = ParentScope::module(self.resolver.expect_module(def_id), self.resolver); + self.resolve_doc_links(doc_attrs(attrs.iter()), parent_scope); + } + + fn resolve_doc_links_local(&mut self, attrs: &[ast::Attribute]) { + if !attrs.iter().any(|attr| attr.may_have_doc_links()) { + return; + } + self.resolve_doc_links(doc_attrs(attrs.iter()), self.parent_scope); + } + + fn resolve_and_cache( + &mut self, + path_str: &str, + ns: Namespace, + parent_scope: &ParentScope<'ra>, + ) -> bool { + // FIXME: This caching may be incorrect in case of multiple `macro_rules` + // items with the same name in the same module. + self.doc_link_resolutions + .entry((Symbol::intern(path_str), ns, parent_scope.module.def_id())) + .or_insert_with_key(|(path, ns, _)| { + self.resolver.resolve_rustdoc_path(path.as_str(), *ns, *parent_scope) + }) + .is_some() + } + + fn resolve_doc_links(&mut self, attrs: Attributes, parent_scope: ParentScope<'ra>) { + let mut need_traits_in_scope = false; + for (doc_module, doc) in attrs.prepare_to_doc_link_resolution() { + assert_eq!(doc_module, None); + let mut tmp_links = mem::take(&mut self.markdown_links); + let links = + tmp_links.entry(doc).or_insert_with_key(|doc| preprocessed_markdown_links(doc)); + for PreprocessedMarkdownLink(pp_link, _) in links { + if let Ok(pinfo) = pp_link { + // The logic here is a conservative approximation for path resolution in + // `resolve_with_disambiguator`. + if let Some(ns) = pinfo.disambiguator.map(Disambiguator::ns) { + if self.resolve_and_cache(&pinfo.path_str, ns, &parent_scope) { + continue; + } + } + + // Resolve all namespaces due to no disambiguator or for diagnostics. + let mut any_resolved = false; + let mut need_assoc = false; + for ns in [TypeNS, ValueNS, MacroNS] { + if self.resolve_and_cache(&pinfo.path_str, ns, &parent_scope) { + any_resolved = true; + } else if ns != MacroNS { + need_assoc = true; + } + } + + // Resolve all prefixes for type-relative resolution or for diagnostics. + if need_assoc || !any_resolved { + let mut path = &pinfo.path_str[..]; + while let Some(idx) = path.rfind("::") { + path = &path[..idx]; + need_traits_in_scope = true; + for ns in [TypeNS, ValueNS, MacroNS] { + self.resolve_and_cache(path, ns, &parent_scope); + } + } + } + } + } + self.markdown_links = tmp_links; + } + + if need_traits_in_scope { + self.add_traits_in_scope(parent_scope.module.def_id()); + } + } + + /// When reexports are inlined, they are replaced with item which they refer to, those items + /// may have links in their doc comments, those links are resolved at the item definition site, + /// so we need to know traits in scope at that definition site. + fn process_module_children_or_reexports(&mut self, module_id: DefId) { + if !self.visited_mods.insert(module_id) { + return; // avoid infinite recursion + } + + for child in self.resolver.module_children_or_reexports(module_id) { + // This condition should give a superset of `denied` from `fn clean_use_statement`. + if child.vis.is_public() + || self.document_private_items + && child.vis != Visibility::Restricted(module_id) + && module_id.is_local() + { + if let Some(def_id) = child.res.opt_def_id() && !def_id.is_local() { + let scope_id = match child.res { + Res::Def(DefKind::Variant, ..) => self.resolver.parent(def_id), + _ => def_id, + }; + self.resolve_doc_links_extern_outer(def_id, scope_id); // Outer attribute scope + if let Res::Def(DefKind::Mod, ..) = child.res { + self.resolve_doc_links_extern_inner(def_id); // Inner attribute scope + } + // `DefKind::Trait`s are processed in `process_extern_impls`. + if let Res::Def(DefKind::Mod | DefKind::Enum, ..) = child.res { + self.process_module_children_or_reexports(def_id); + } + if let Res::Def(DefKind::Struct | DefKind::Union | DefKind::Variant, _) = + child.res + { + let field_def_ids = Vec::from_iter( + self.resolver + .cstore() + .associated_item_def_ids_untracked(def_id, self.sess), + ); + for field_def_id in field_def_ids { + self.resolve_doc_links_extern_outer(field_def_id, scope_id); + } + } + } + } + } + } +} + +impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> { + fn visit_item(&mut self, item: &ast::Item) { + self.resolve_doc_links_local(&item.attrs); // Outer attribute scope + if let ItemKind::Mod(..) = item.kind { + let module_def_id = self.resolver.local_def_id(item.id).to_def_id(); + let module = self.resolver.expect_module(module_def_id); + let old_module = mem::replace(&mut self.parent_scope.module, module); + let old_macro_rules = self.parent_scope.macro_rules; + self.resolve_doc_links_local(&item.attrs); // Inner attribute scope + self.process_module_children_or_reexports(module_def_id); + visit::walk_item(self, item); + if item + .attrs + .iter() + .all(|attr| !attr.has_name(sym::macro_use) && !attr.has_name(sym::macro_escape)) + { + self.parent_scope.macro_rules = old_macro_rules; + } + self.parent_scope.module = old_module; + } else { + match &item.kind { + ItemKind::Trait(..) => { + self.all_traits.push(self.resolver.local_def_id(item.id).to_def_id()); + } + ItemKind::Impl(box ast::Impl { of_trait: Some(..), .. }) => { + self.all_trait_impls.push(self.resolver.local_def_id(item.id).to_def_id()); + } + ItemKind::MacroDef(macro_def) if macro_def.macro_rules => { + let (macro_rules_scope, res) = + self.resolver.macro_rules_scope(self.resolver.local_def_id(item.id)); + self.parent_scope.macro_rules = macro_rules_scope; + self.all_macro_rules.insert(item.ident.name, res); + } + _ => {} + } + visit::walk_item(self, item); + } + } + + fn visit_assoc_item(&mut self, item: &ast::AssocItem, ctxt: AssocCtxt) { + self.resolve_doc_links_local(&item.attrs); + visit::walk_assoc_item(self, item, ctxt) + } + + fn visit_foreign_item(&mut self, item: &ast::ForeignItem) { + self.resolve_doc_links_local(&item.attrs); + visit::walk_foreign_item(self, item) + } + + fn visit_variant(&mut self, v: &ast::Variant) { + self.resolve_doc_links_local(&v.attrs); + visit::walk_variant(self, v) + } + + fn visit_field_def(&mut self, field: &ast::FieldDef) { + self.resolve_doc_links_local(&field.attrs); + visit::walk_field_def(self, field) + } + + fn visit_block(&mut self, block: &ast::Block) { + let old_macro_rules = self.parent_scope.macro_rules; + visit::walk_block(self, block); + self.parent_scope.macro_rules = old_macro_rules; + } + + // NOTE: if doc-comments are ever allowed on other nodes (e.g. function parameters), + // then this will have to implement other visitor methods too. +} diff --git a/src/librustdoc/passes/collect_trait_impls.rs b/src/librustdoc/passes/collect_trait_impls.rs new file mode 100644 index 000000000..6b699c790 --- /dev/null +++ b/src/librustdoc/passes/collect_trait_impls.rs @@ -0,0 +1,273 @@ +//! Collects trait impls for each item in the crate. For example, if a crate +//! defines a struct that implements a trait, this pass will note that the +//! struct implements that trait. +use super::Pass; +use crate::clean::*; +use crate::core::DocContext; +use crate::formats::cache::Cache; +use crate::visit::DocVisitor; + +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::{self, DefIdTree}; +use rustc_span::symbol::sym; + +pub(crate) const COLLECT_TRAIT_IMPLS: Pass = Pass { + name: "collect-trait-impls", + run: collect_trait_impls, + description: "retrieves trait impls for items in the crate", +}; + +pub(crate) fn collect_trait_impls(mut krate: Crate, cx: &mut DocContext<'_>) -> Crate { + let synth_impls = cx.sess().time("collect_synthetic_impls", || { + let mut synth = SyntheticImplCollector { cx, impls: Vec::new() }; + synth.visit_crate(&krate); + synth.impls + }); + + let prims: FxHashSet<PrimitiveType> = krate.primitives.iter().map(|p| p.1).collect(); + + let crate_items = { + let mut coll = ItemCollector::new(); + cx.sess().time("collect_items_for_trait_impls", || coll.visit_crate(&krate)); + coll.items + }; + + let mut new_items_external = Vec::new(); + let mut new_items_local = Vec::new(); + + // External trait impls. + cx.with_all_trait_impls(|cx, all_trait_impls| { + let _prof_timer = cx.tcx.sess.prof.generic_activity("build_extern_trait_impls"); + for &impl_def_id in all_trait_impls.iter().skip_while(|def_id| def_id.is_local()) { + inline::build_impl(cx, None, impl_def_id, None, &mut new_items_external); + } + }); + + // Local trait impls. + cx.with_all_trait_impls(|cx, all_trait_impls| { + let _prof_timer = cx.tcx.sess.prof.generic_activity("build_local_trait_impls"); + let mut attr_buf = Vec::new(); + for &impl_def_id in all_trait_impls.iter().take_while(|def_id| def_id.is_local()) { + let mut parent = Some(cx.tcx.parent(impl_def_id)); + while let Some(did) = parent { + attr_buf.extend( + cx.tcx + .get_attrs(did, sym::doc) + .filter(|attr| { + if let Some([attr]) = attr.meta_item_list().as_deref() { + attr.has_name(sym::cfg) + } else { + false + } + }) + .cloned(), + ); + parent = cx.tcx.opt_parent(did); + } + inline::build_impl(cx, None, impl_def_id, Some(&attr_buf), &mut new_items_local); + attr_buf.clear(); + } + }); + + cx.tcx.sess.prof.generic_activity("build_primitive_trait_impls").run(|| { + for def_id in PrimitiveType::all_impls(cx.tcx) { + // Try to inline primitive impls from other crates. + if !def_id.is_local() { + inline::build_impl(cx, None, def_id, None, &mut new_items_external); + } + } + for (prim, did) in PrimitiveType::primitive_locations(cx.tcx) { + // Do not calculate blanket impl list for docs that are not going to be rendered. + // While the `impl` blocks themselves are only in `libcore`, the module with `doc` + // attached is directly included in `libstd` as well. + let tcx = cx.tcx; + if did.is_local() { + for def_id in prim.impls(tcx).filter(|def_id| { + // Avoid including impl blocks with filled-in generics. + // https://github.com/rust-lang/rust/issues/94937 + // + // FIXME(notriddle): https://github.com/rust-lang/rust/issues/97129 + // + // This tactic of using inherent impl blocks for getting + // auto traits and blanket impls is a hack. What we really + // want is to check if `[T]` impls `Send`, which has + // nothing to do with the inherent impl. + // + // Rustdoc currently uses these `impl` block as a source of + // the `Ty`, as well as the `ParamEnv`, `SubstsRef`, and + // `Generics`. To avoid relying on the `impl` block, these + // things would need to be created from wholecloth, in a + // form that is valid for use in type inference. + let ty = tcx.type_of(def_id); + match ty.kind() { + ty::Slice(ty) + | ty::Ref(_, ty, _) + | ty::RawPtr(ty::TypeAndMut { ty, .. }) => { + matches!(ty.kind(), ty::Param(..)) + } + ty::Tuple(tys) => tys.iter().all(|ty| matches!(ty.kind(), ty::Param(..))), + _ => true, + } + }) { + let impls = get_auto_trait_and_blanket_impls(cx, def_id); + new_items_external.extend(impls.filter(|i| cx.inlined.insert(i.item_id))); + } + } + } + }); + + let mut cleaner = BadImplStripper { prims, items: crate_items, cache: &cx.cache }; + let mut type_did_to_deref_target: FxHashMap<DefId, &Type> = FxHashMap::default(); + + // Follow all `Deref` targets of included items and recursively add them as valid + fn add_deref_target( + cx: &DocContext<'_>, + map: &FxHashMap<DefId, &Type>, + cleaner: &mut BadImplStripper<'_>, + targets: &mut FxHashSet<DefId>, + type_did: DefId, + ) { + if let Some(target) = map.get(&type_did) { + debug!("add_deref_target: type {:?}, target {:?}", type_did, target); + if let Some(target_prim) = target.primitive_type() { + cleaner.prims.insert(target_prim); + } else if let Some(target_did) = target.def_id(&cx.cache) { + // `impl Deref<Target = S> for S` + if !targets.insert(target_did) { + // Avoid infinite cycles + return; + } + cleaner.items.insert(target_did.into()); + add_deref_target(cx, map, cleaner, targets, target_did); + } + } + } + + // scan through included items ahead of time to splice in Deref targets to the "valid" sets + for it in new_items_external.iter().chain(new_items_local.iter()) { + if let ImplItem(box Impl { ref for_, ref trait_, ref items, .. }) = *it.kind { + if trait_.as_ref().map(|t| t.def_id()) == cx.tcx.lang_items().deref_trait() + && cleaner.keep_impl(for_, true) + { + let target = items + .iter() + .find_map(|item| match *item.kind { + AssocTypeItem(ref t, _) => Some(&t.type_), + _ => None, + }) + .expect("Deref impl without Target type"); + + if let Some(prim) = target.primitive_type() { + cleaner.prims.insert(prim); + } else if let Some(did) = target.def_id(&cx.cache) { + cleaner.items.insert(did.into()); + } + if let Some(for_did) = for_.def_id(&cx.cache) { + if type_did_to_deref_target.insert(for_did, target).is_none() { + // Since only the `DefId` portion of the `Type` instances is known to be same for both the + // `Deref` target type and the impl for type positions, this map of types is keyed by + // `DefId` and for convenience uses a special cleaner that accepts `DefId`s directly. + if cleaner.keep_impl_with_def_id(for_did.into()) { + let mut targets = FxHashSet::default(); + targets.insert(for_did); + add_deref_target( + cx, + &type_did_to_deref_target, + &mut cleaner, + &mut targets, + for_did, + ); + } + } + } + } + } + } + + // Filter out external items that are not needed + new_items_external.retain(|it| { + if let ImplItem(box Impl { ref for_, ref trait_, ref kind, .. }) = *it.kind { + cleaner.keep_impl( + for_, + trait_.as_ref().map(|t| t.def_id()) == cx.tcx.lang_items().deref_trait(), + ) || trait_.as_ref().map_or(false, |t| cleaner.keep_impl_with_def_id(t.def_id().into())) + || kind.is_blanket() + } else { + true + } + }); + + if let ModuleItem(Module { items, .. }) = &mut *krate.module.kind { + items.extend(synth_impls); + items.extend(new_items_external); + items.extend(new_items_local); + } else { + panic!("collect-trait-impls can't run"); + }; + + krate +} + +struct SyntheticImplCollector<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, + impls: Vec<Item>, +} + +impl<'a, 'tcx> DocVisitor for SyntheticImplCollector<'a, 'tcx> { + fn visit_item(&mut self, i: &Item) { + if i.is_struct() || i.is_enum() || i.is_union() { + // FIXME(eddyb) is this `doc(hidden)` check needed? + if !self.cx.tcx.is_doc_hidden(i.item_id.expect_def_id()) { + self.impls + .extend(get_auto_trait_and_blanket_impls(self.cx, i.item_id.expect_def_id())); + } + } + + self.visit_item_recur(i) + } +} + +#[derive(Default)] +struct ItemCollector { + items: FxHashSet<ItemId>, +} + +impl ItemCollector { + fn new() -> Self { + Self::default() + } +} + +impl DocVisitor for ItemCollector { + fn visit_item(&mut self, i: &Item) { + self.items.insert(i.item_id); + + self.visit_item_recur(i) + } +} + +struct BadImplStripper<'a> { + prims: FxHashSet<PrimitiveType>, + items: FxHashSet<ItemId>, + cache: &'a Cache, +} + +impl<'a> BadImplStripper<'a> { + fn keep_impl(&self, ty: &Type, is_deref: bool) -> bool { + if let Generic(_) = ty { + // keep impls made on generics + true + } else if let Some(prim) = ty.primitive_type() { + self.prims.contains(&prim) + } else if let Some(did) = ty.def_id(self.cache) { + is_deref || self.keep_impl_with_def_id(did.into()) + } else { + false + } + } + + fn keep_impl_with_def_id(&self, item_id: ItemId) -> bool { + self.items.contains(&item_id) + } +} diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs new file mode 100644 index 000000000..f3a3c853c --- /dev/null +++ b/src/librustdoc/passes/html_tags.rs @@ -0,0 +1,303 @@ +//! Detects invalid HTML (like an unclosed `<span>`) in doc comments. +use super::Pass; +use crate::clean::*; +use crate::core::DocContext; +use crate::html::markdown::main_body_opts; +use crate::visit::DocVisitor; + +use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; + +use std::iter::Peekable; +use std::ops::Range; +use std::str::CharIndices; + +pub(crate) const CHECK_INVALID_HTML_TAGS: Pass = Pass { + name: "check-invalid-html-tags", + run: check_invalid_html_tags, + description: "detects invalid HTML tags in doc comments", +}; + +struct InvalidHtmlTagsLinter<'a, 'tcx> { + cx: &'a mut DocContext<'tcx>, +} + +pub(crate) fn check_invalid_html_tags(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + if cx.tcx.sess.is_nightly_build() { + let mut coll = InvalidHtmlTagsLinter { cx }; + coll.visit_crate(&krate); + } + krate +} + +const ALLOWED_UNCLOSED: &[&str] = &[ + "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", + "source", "track", "wbr", +]; + +fn drop_tag( + tags: &mut Vec<(String, Range<usize>)>, + tag_name: String, + range: Range<usize>, + f: &impl Fn(&str, &Range<usize>, bool), +) { + let tag_name_low = tag_name.to_lowercase(); + if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { + // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should + // be emitted. + let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| { + let at = at.to_lowercase(); + at == "script" || at == "style" + }); + for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) { + if should_not_warn { + continue; + } + let last_tag_name_low = last_tag_name.to_lowercase(); + if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) { + continue; + } + // `tags` is used as a queue, meaning that everything after `pos` is included inside it. + // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still + // have `h3`, meaning the tag wasn't closed as it should have. + f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span, true); + } + // Remove the `tag_name` that was originally closed + tags.pop(); + } else { + // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required + // but it helps for the visualization). + f(&format!("unopened HTML tag `{}`", tag_name), &range, false); + } +} + +fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> { + use rustc_lexer::{is_id_continue, is_id_start}; + let mut current_pos = end_pos; + loop { + if current_pos >= 2 && text[..current_pos].ends_with("::") { + current_pos -= 2; + } + let new_pos = text[..current_pos] + .char_indices() + .rev() + .take_while(|(_, c)| is_id_start(*c) || is_id_continue(*c)) + .reduce(|_accum, item| item) + .and_then(|(new_pos, c)| is_id_start(c).then_some(new_pos)); + if let Some(new_pos) = new_pos { + if current_pos != new_pos { + current_pos = new_pos; + continue; + } + } + break; + } + if current_pos == end_pos { None } else { Some(current_pos) } +} + +fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool { + // https://spec.commonmark.org/0.30/#raw-html + // + // > A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or + // > hyphens (-). + c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit()) +} + +fn extract_html_tag( + tags: &mut Vec<(String, Range<usize>)>, + text: &str, + range: &Range<usize>, + start_pos: usize, + iter: &mut Peekable<CharIndices<'_>>, + f: &impl Fn(&str, &Range<usize>, bool), +) { + let mut tag_name = String::new(); + let mut is_closing = false; + let mut prev_pos = start_pos; + + loop { + let (pos, c) = match iter.peek() { + Some((pos, c)) => (*pos, *c), + // In case we reached the of the doc comment, we want to check that it's an + // unclosed HTML tag. For example "/// <h3". + None => (prev_pos, '\0'), + }; + prev_pos = pos; + // Checking if this is a closing tag (like `</a>` for `<a>`). + if c == '/' && tag_name.is_empty() { + is_closing = true; + } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) { + tag_name.push(c); + } else { + if !tag_name.is_empty() { + let mut r = Range { start: range.start + start_pos, end: range.start + pos }; + if c == '>' { + // In case we have a tag without attribute, we can consider the span to + // refer to it fully. + r.end += 1; + } + if is_closing { + // In case we have "</div >" or even "</div >". + if c != '>' { + if !c.is_whitespace() { + // It seems like it's not a valid HTML tag. + break; + } + let mut found = false; + for (new_pos, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + if c == '>' { + r.end = range.start + new_pos + 1; + found = true; + } + break; + } + } + if !found { + break; + } + } + drop_tag(tags, tag_name, r, f); + } else { + tags.push((tag_name, r)); + } + } + break; + } + iter.next(); + } +} + +fn extract_tags( + tags: &mut Vec<(String, Range<usize>)>, + text: &str, + range: Range<usize>, + is_in_comment: &mut Option<Range<usize>>, + f: &impl Fn(&str, &Range<usize>, bool), +) { + let mut iter = text.char_indices().peekable(); + + while let Some((start_pos, c)) = iter.next() { + if is_in_comment.is_some() { + if text[start_pos..].starts_with("-->") { + *is_in_comment = None; + } + } else if c == '<' { + if text[start_pos..].starts_with("<!--") { + // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!) + iter.next(); + iter.next(); + iter.next(); + *is_in_comment = Some(Range { + start: range.start + start_pos, + end: range.start + start_pos + 3, + }); + } else { + extract_html_tag(tags, text, &range, start_pos, &mut iter, f); + } + } + } +} + +impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> { + fn visit_item(&mut self, item: &Item) { + let tcx = self.cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) + // If non-local, no need to check anything. + else { return }; + let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + if !dox.is_empty() { + let report_diag = |msg: &str, range: &Range<usize>, is_open_tag: bool| { + let sp = match super::source_span_for_markdown_range(tcx, &dox, range, &item.attrs) + { + Some(sp) => sp, + None => item.attr_span(tcx), + }; + tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| { + use rustc_lint_defs::Applicability; + let mut diag = lint.build(msg); + // If a tag looks like `<this>`, it might actually be a generic. + // We don't try to detect stuff `<like, this>` because that's not valid HTML, + // and we don't try to detect stuff `<like this>` because that's not valid Rust. + if let Some(Some(generics_start)) = (is_open_tag + && dox[..range.end].ends_with('>')) + .then(|| extract_path_backwards(&dox, range.start)) + { + let generics_sp = match super::source_span_for_markdown_range( + tcx, + &dox, + &(generics_start..range.end), + &item.attrs, + ) { + Some(sp) => sp, + None => item.attr_span(tcx), + }; + // multipart form is chosen here because ``Vec<i32>`` would be confusing. + diag.multipart_suggestion( + "try marking as source code", + vec![ + (generics_sp.shrink_to_lo(), String::from("`")), + (generics_sp.shrink_to_hi(), String::from("`")), + ], + Applicability::MaybeIncorrect, + ); + } + diag.emit() + }); + }; + + let mut tags = Vec::new(); + let mut is_in_comment = None; + let mut in_code_block = false; + + let link_names = item.link_names(&self.cx.cache); + + let mut replacer = |broken_link: BrokenLink<'_>| { + if let Some(link) = + link_names.iter().find(|link| *link.original_text == *broken_link.reference) + { + Some((link.href.as_str().into(), link.new_text.as_str().into())) + } else if matches!( + &broken_link.link_type, + LinkType::Reference | LinkType::ReferenceUnknown + ) { + // If the link is shaped [like][this], suppress any broken HTML in the [this] part. + // The `broken_intra_doc_links` will report typos in there anyway. + Some(( + broken_link.reference.to_string().into(), + broken_link.reference.to_string().into(), + )) + } else { + None + } + }; + + let p = + Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer)) + .into_offset_iter(); + + for (event, range) in p { + match event { + Event::Start(Tag::CodeBlock(_)) => in_code_block = true, + Event::Html(text) | Event::Text(text) if !in_code_block => { + extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) + } + Event::End(Tag::CodeBlock(_)) => in_code_block = false, + _ => {} + } + } + + for (tag, range) in tags.iter().filter(|(t, _)| { + let t = t.to_lowercase(); + !ALLOWED_UNCLOSED.contains(&t.as_str()) + }) { + report_diag(&format!("unclosed HTML tag `{}`", tag), range, true); + } + + if let Some(range) = is_in_comment { + report_diag("Unclosed HTML comment", &range, false); + } + } + + self.visit_item_recur(item) + } +} diff --git a/src/librustdoc/passes/mod.rs b/src/librustdoc/passes/mod.rs new file mode 100644 index 000000000..f81b38ea3 --- /dev/null +++ b/src/librustdoc/passes/mod.rs @@ -0,0 +1,212 @@ +//! Contains information about "passes", used to modify crate information during the documentation +//! process. + +use rustc_middle::ty::TyCtxt; +use rustc_span::{InnerSpan, Span, DUMMY_SP}; +use std::ops::Range; + +use self::Condition::*; +use crate::clean::{self, DocFragmentKind}; +use crate::core::DocContext; + +mod stripper; +pub(crate) use stripper::*; + +mod bare_urls; +pub(crate) use self::bare_urls::CHECK_BARE_URLS; + +mod strip_hidden; +pub(crate) use self::strip_hidden::STRIP_HIDDEN; + +mod strip_private; +pub(crate) use self::strip_private::STRIP_PRIVATE; + +mod strip_priv_imports; +pub(crate) use self::strip_priv_imports::STRIP_PRIV_IMPORTS; + +mod propagate_doc_cfg; +pub(crate) use self::propagate_doc_cfg::PROPAGATE_DOC_CFG; + +pub(crate) mod collect_intra_doc_links; +pub(crate) use self::collect_intra_doc_links::COLLECT_INTRA_DOC_LINKS; + +mod check_doc_test_visibility; +pub(crate) use self::check_doc_test_visibility::CHECK_DOC_TEST_VISIBILITY; + +mod collect_trait_impls; +pub(crate) use self::collect_trait_impls::COLLECT_TRAIT_IMPLS; + +mod check_code_block_syntax; +pub(crate) use self::check_code_block_syntax::CHECK_CODE_BLOCK_SYNTAX; + +mod calculate_doc_coverage; +pub(crate) use self::calculate_doc_coverage::CALCULATE_DOC_COVERAGE; + +mod html_tags; +pub(crate) use self::html_tags::CHECK_INVALID_HTML_TAGS; + +/// A single pass over the cleaned documentation. +/// +/// Runs in the compiler context, so it has access to types and traits and the like. +#[derive(Copy, Clone)] +pub(crate) struct Pass { + pub(crate) name: &'static str, + pub(crate) run: fn(clean::Crate, &mut DocContext<'_>) -> clean::Crate, + pub(crate) description: &'static str, +} + +/// In a list of passes, a pass that may or may not need to be run depending on options. +#[derive(Copy, Clone)] +pub(crate) struct ConditionalPass { + pub(crate) pass: Pass, + pub(crate) condition: Condition, +} + +/// How to decide whether to run a conditional pass. +#[derive(Copy, Clone)] +pub(crate) enum Condition { + Always, + /// When `--document-private-items` is passed. + WhenDocumentPrivate, + /// When `--document-private-items` is not passed. + WhenNotDocumentPrivate, + /// When `--document-hidden-items` is not passed. + WhenNotDocumentHidden, +} + +/// The full list of passes. +pub(crate) const PASSES: &[Pass] = &[ + CHECK_DOC_TEST_VISIBILITY, + STRIP_HIDDEN, + STRIP_PRIVATE, + STRIP_PRIV_IMPORTS, + PROPAGATE_DOC_CFG, + COLLECT_INTRA_DOC_LINKS, + CHECK_CODE_BLOCK_SYNTAX, + COLLECT_TRAIT_IMPLS, + CALCULATE_DOC_COVERAGE, + CHECK_INVALID_HTML_TAGS, + CHECK_BARE_URLS, +]; + +/// The list of passes run by default. +pub(crate) const DEFAULT_PASSES: &[ConditionalPass] = &[ + ConditionalPass::always(COLLECT_TRAIT_IMPLS), + ConditionalPass::always(CHECK_DOC_TEST_VISIBILITY), + ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden), + ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate), + ConditionalPass::new(STRIP_PRIV_IMPORTS, WhenDocumentPrivate), + ConditionalPass::always(COLLECT_INTRA_DOC_LINKS), + ConditionalPass::always(CHECK_CODE_BLOCK_SYNTAX), + ConditionalPass::always(CHECK_INVALID_HTML_TAGS), + ConditionalPass::always(PROPAGATE_DOC_CFG), + ConditionalPass::always(CHECK_BARE_URLS), +]; + +/// The list of default passes run when `--doc-coverage` is passed to rustdoc. +pub(crate) const COVERAGE_PASSES: &[ConditionalPass] = &[ + ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden), + ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate), + ConditionalPass::always(CALCULATE_DOC_COVERAGE), +]; + +impl ConditionalPass { + pub(crate) const fn always(pass: Pass) -> Self { + Self::new(pass, Always) + } + + pub(crate) const fn new(pass: Pass, condition: Condition) -> Self { + ConditionalPass { pass, condition } + } +} + +/// Returns the given default set of passes. +pub(crate) fn defaults(show_coverage: bool) -> &'static [ConditionalPass] { + if show_coverage { COVERAGE_PASSES } else { DEFAULT_PASSES } +} + +/// Returns a span encompassing all the given attributes. +pub(crate) fn span_of_attrs(attrs: &clean::Attributes) -> Option<Span> { + if attrs.doc_strings.is_empty() { + return None; + } + let start = attrs.doc_strings[0].span; + if start == DUMMY_SP { + return None; + } + let end = attrs.doc_strings.last().expect("no doc strings provided").span; + Some(start.to(end)) +} + +/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code. +/// +/// This method will return `None` if we cannot construct a span from the source map or if the +/// attributes are not all sugared doc comments. It's difficult to calculate the correct span in +/// that case due to escaping and other source features. +pub(crate) fn source_span_for_markdown_range( + tcx: TyCtxt<'_>, + markdown: &str, + md_range: &Range<usize>, + attrs: &clean::Attributes, +) -> Option<Span> { + let is_all_sugared_doc = + attrs.doc_strings.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc); + + if !is_all_sugared_doc { + return None; + } + + let snippet = tcx.sess.source_map().span_to_snippet(span_of_attrs(attrs)?).ok()?; + + let starting_line = markdown[..md_range.start].matches('\n').count(); + let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count(); + + // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat + // CRLF and LF line endings the same way. + let mut src_lines = snippet.split_terminator('\n'); + let md_lines = markdown.split_terminator('\n'); + + // The number of bytes from the source span to the markdown span that are not part + // of the markdown, like comment markers. + let mut start_bytes = 0; + let mut end_bytes = 0; + + 'outer: for (line_no, md_line) in md_lines.enumerate() { + loop { + let source_line = src_lines.next()?; + match source_line.find(md_line) { + Some(offset) => { + if line_no == starting_line { + start_bytes += offset; + + if starting_line == ending_line { + break 'outer; + } + } else if line_no == ending_line { + end_bytes += offset; + break 'outer; + } else if line_no < starting_line { + start_bytes += source_line.len() - md_line.len(); + } else { + end_bytes += source_line.len() - md_line.len(); + } + break; + } + None => { + // Since this is a source line that doesn't include a markdown line, + // we have to count the newline that we split from earlier. + if line_no <= starting_line { + start_bytes += source_line.len() + 1; + } else { + end_bytes += source_line.len() + 1; + } + } + } + } + } + + Some(span_of_attrs(attrs)?.from_inner(InnerSpan::new( + md_range.start + start_bytes, + md_range.end + start_bytes + end_bytes, + ))) +} diff --git a/src/librustdoc/passes/propagate_doc_cfg.rs b/src/librustdoc/passes/propagate_doc_cfg.rs new file mode 100644 index 000000000..0c5d83655 --- /dev/null +++ b/src/librustdoc/passes/propagate_doc_cfg.rs @@ -0,0 +1,45 @@ +//! Propagates [`#[doc(cfg(...))]`](https://github.com/rust-lang/rust/issues/43781) to child items. +use std::sync::Arc; + +use crate::clean::cfg::Cfg; +use crate::clean::{Crate, Item}; +use crate::core::DocContext; +use crate::fold::DocFolder; +use crate::passes::Pass; + +pub(crate) const PROPAGATE_DOC_CFG: Pass = Pass { + name: "propagate-doc-cfg", + run: propagate_doc_cfg, + description: "propagates `#[doc(cfg(...))]` to child items", +}; + +pub(crate) fn propagate_doc_cfg(cr: Crate, _: &mut DocContext<'_>) -> Crate { + CfgPropagator { parent_cfg: None }.fold_crate(cr) +} + +struct CfgPropagator { + parent_cfg: Option<Arc<Cfg>>, +} + +impl DocFolder for CfgPropagator { + fn fold_item(&mut self, mut item: Item) -> Option<Item> { + let old_parent_cfg = self.parent_cfg.clone(); + + let new_cfg = match (self.parent_cfg.take(), item.cfg.take()) { + (None, None) => None, + (Some(rc), None) | (None, Some(rc)) => Some(rc), + (Some(mut a), Some(b)) => { + let b = Arc::try_unwrap(b).unwrap_or_else(|rc| Cfg::clone(&rc)); + *Arc::make_mut(&mut a) &= b; + Some(a) + } + }; + self.parent_cfg = new_cfg.clone(); + item.cfg = new_cfg; + + let result = self.fold_item_recur(item); + self.parent_cfg = old_parent_cfg; + + Some(result) + } +} diff --git a/src/librustdoc/passes/strip_hidden.rs b/src/librustdoc/passes/strip_hidden.rs new file mode 100644 index 000000000..533e2ce46 --- /dev/null +++ b/src/librustdoc/passes/strip_hidden.rs @@ -0,0 +1,68 @@ +//! Strip all doc(hidden) items from the output. +use rustc_span::symbol::sym; +use std::mem; + +use crate::clean; +use crate::clean::{Item, ItemIdSet, NestedAttributesExt}; +use crate::core::DocContext; +use crate::fold::{strip_item, DocFolder}; +use crate::passes::{ImplStripper, Pass}; + +pub(crate) const STRIP_HIDDEN: Pass = Pass { + name: "strip-hidden", + run: strip_hidden, + description: "strips all `#[doc(hidden)]` items from the output", +}; + +/// Strip items marked `#[doc(hidden)]` +pub(crate) fn strip_hidden(krate: clean::Crate, cx: &mut DocContext<'_>) -> clean::Crate { + let mut retained = ItemIdSet::default(); + + // strip all #[doc(hidden)] items + let krate = { + let mut stripper = Stripper { retained: &mut retained, update_retained: true }; + stripper.fold_crate(krate) + }; + + // strip all impls referencing stripped items + let mut stripper = ImplStripper { retained: &retained, cache: &cx.cache }; + stripper.fold_crate(krate) +} + +struct Stripper<'a> { + retained: &'a mut ItemIdSet, + update_retained: bool, +} + +impl<'a> DocFolder for Stripper<'a> { + fn fold_item(&mut self, i: Item) -> Option<Item> { + if i.attrs.lists(sym::doc).has_word(sym::hidden) { + debug!("strip_hidden: stripping {:?} {:?}", i.type_(), i.name); + // Use a dedicated hidden item for fields, variants, and modules. + // We need to keep private fields and variants, so that the docs + // can show a placeholder "// some variants omitted". We need to keep + // private modules, because they can contain impl blocks, and impl + // block privacy is inherited from the type and trait, not from the + // module it's defined in. Both of these are marked "stripped," and + // not included in the final docs, but since they still have an effect + // on the final doc, cannot be completely removed from the Clean IR. + match *i.kind { + clean::StructFieldItem(..) | clean::ModuleItem(..) | clean::VariantItem(..) => { + // We need to recurse into stripped modules to + // strip things like impl methods but when doing so + // we must not add any items to the `retained` set. + let old = mem::replace(&mut self.update_retained, false); + let ret = strip_item(self.fold_item_recur(i)); + self.update_retained = old; + return Some(ret); + } + _ => return None, + } + } else { + if self.update_retained { + self.retained.insert(i.item_id); + } + } + Some(self.fold_item_recur(i)) + } +} diff --git a/src/librustdoc/passes/strip_priv_imports.rs b/src/librustdoc/passes/strip_priv_imports.rs new file mode 100644 index 000000000..85be8fa10 --- /dev/null +++ b/src/librustdoc/passes/strip_priv_imports.rs @@ -0,0 +1,16 @@ +//! Strips all private import statements (use, extern crate) from a +//! crate. +use crate::clean; +use crate::core::DocContext; +use crate::fold::DocFolder; +use crate::passes::{ImportStripper, Pass}; + +pub(crate) const STRIP_PRIV_IMPORTS: Pass = Pass { + name: "strip-priv-imports", + run: strip_priv_imports, + description: "strips all private import statements (`use`, `extern crate`) from a crate", +}; + +pub(crate) fn strip_priv_imports(krate: clean::Crate, _: &mut DocContext<'_>) -> clean::Crate { + ImportStripper.fold_crate(krate) +} diff --git a/src/librustdoc/passes/strip_private.rs b/src/librustdoc/passes/strip_private.rs new file mode 100644 index 000000000..9ba841a31 --- /dev/null +++ b/src/librustdoc/passes/strip_private.rs @@ -0,0 +1,35 @@ +//! Strip all private items from the output. Additionally implies strip_priv_imports. +//! Basically, the goal is to remove items that are not relevant for public documentation. +use crate::clean::{self, ItemIdSet}; +use crate::core::DocContext; +use crate::fold::DocFolder; +use crate::passes::{ImplStripper, ImportStripper, Pass, Stripper}; + +pub(crate) const STRIP_PRIVATE: Pass = Pass { + name: "strip-private", + run: strip_private, + description: "strips all private items from a crate which cannot be seen externally, \ + implies strip-priv-imports", +}; + +/// Strip private items from the point of view of a crate or externally from a +/// crate, specified by the `xcrate` flag. +pub(crate) fn strip_private(mut krate: clean::Crate, cx: &mut DocContext<'_>) -> clean::Crate { + // This stripper collects all *retained* nodes. + let mut retained = ItemIdSet::default(); + + // strip all private items + { + let mut stripper = Stripper { + retained: &mut retained, + access_levels: &cx.cache.access_levels, + update_retained: true, + is_json_output: cx.output_format.is_json() && !cx.show_coverage, + }; + krate = ImportStripper.fold_crate(stripper.fold_crate(krate)); + } + + // strip all impls referencing private items + let mut stripper = ImplStripper { retained: &retained, cache: &cx.cache }; + stripper.fold_crate(krate) +} diff --git a/src/librustdoc/passes/stripper.rs b/src/librustdoc/passes/stripper.rs new file mode 100644 index 000000000..0d419042a --- /dev/null +++ b/src/librustdoc/passes/stripper.rs @@ -0,0 +1,188 @@ +//! A collection of utility functions for the `strip_*` passes. +use rustc_hir::def_id::DefId; +use rustc_middle::middle::privacy::AccessLevels; +use std::mem; + +use crate::clean::{self, Item, ItemId, ItemIdSet}; +use crate::fold::{strip_item, DocFolder}; +use crate::formats::cache::Cache; + +pub(crate) struct Stripper<'a> { + pub(crate) retained: &'a mut ItemIdSet, + pub(crate) access_levels: &'a AccessLevels<DefId>, + pub(crate) update_retained: bool, + pub(crate) is_json_output: bool, +} + +impl<'a> Stripper<'a> { + // We need to handle this differently for the JSON output because some non exported items could + // be used in public API. And so, we need these items as well. `is_exported` only checks if they + // are in the public API, which is not enough. + #[inline] + fn is_item_reachable(&self, item_id: ItemId) -> bool { + if self.is_json_output { + self.access_levels.is_reachable(item_id.expect_def_id()) + } else { + self.access_levels.is_exported(item_id.expect_def_id()) + } + } +} + +impl<'a> DocFolder for Stripper<'a> { + fn fold_item(&mut self, i: Item) -> Option<Item> { + match *i.kind { + clean::StrippedItem(..) => { + // We need to recurse into stripped modules to strip things + // like impl methods but when doing so we must not add any + // items to the `retained` set. + debug!("Stripper: recursing into stripped {:?} {:?}", i.type_(), i.name); + let old = mem::replace(&mut self.update_retained, false); + let ret = self.fold_item_recur(i); + self.update_retained = old; + return Some(ret); + } + // These items can all get re-exported + clean::OpaqueTyItem(..) + | clean::TypedefItem(..) + | clean::StaticItem(..) + | clean::StructItem(..) + | clean::EnumItem(..) + | clean::TraitItem(..) + | clean::FunctionItem(..) + | clean::VariantItem(..) + | clean::MethodItem(..) + | clean::ForeignFunctionItem(..) + | clean::ForeignStaticItem(..) + | clean::ConstantItem(..) + | clean::UnionItem(..) + | clean::AssocConstItem(..) + | clean::AssocTypeItem(..) + | clean::TraitAliasItem(..) + | clean::MacroItem(..) + | clean::ForeignTypeItem => { + let item_id = i.item_id; + if item_id.is_local() && !self.is_item_reachable(item_id) { + debug!("Stripper: stripping {:?} {:?}", i.type_(), i.name); + return None; + } + } + + clean::StructFieldItem(..) => { + if !i.visibility.is_public() { + return Some(strip_item(i)); + } + } + + clean::ModuleItem(..) => { + if i.item_id.is_local() && !i.visibility.is_public() { + debug!("Stripper: stripping module {:?}", i.name); + let old = mem::replace(&mut self.update_retained, false); + let ret = strip_item(self.fold_item_recur(i)); + self.update_retained = old; + return Some(ret); + } + } + + // handled in the `strip-priv-imports` pass + clean::ExternCrateItem { .. } | clean::ImportItem(..) => {} + + clean::ImplItem(..) => {} + + // tymethods etc. have no control over privacy + clean::TyMethodItem(..) | clean::TyAssocConstItem(..) | clean::TyAssocTypeItem(..) => {} + + // Proc-macros are always public + clean::ProcMacroItem(..) => {} + + // Primitives are never stripped + clean::PrimitiveItem(..) => {} + + // Keywords are never stripped + clean::KeywordItem => {} + } + + let fastreturn = match *i.kind { + // nothing left to do for traits (don't want to filter their + // methods out, visibility controlled by the trait) + clean::TraitItem(..) => true, + + // implementations of traits are always public. + clean::ImplItem(ref imp) if imp.trait_.is_some() => true, + // Variant fields have inherited visibility + clean::VariantItem(clean::Variant::Struct(..) | clean::Variant::Tuple(..)) => true, + _ => false, + }; + + let i = if fastreturn { + if self.update_retained { + self.retained.insert(i.item_id); + } + return Some(i); + } else { + self.fold_item_recur(i) + }; + + if self.update_retained { + self.retained.insert(i.item_id); + } + Some(i) + } +} + +/// This stripper discards all impls which reference stripped items +pub(crate) struct ImplStripper<'a> { + pub(crate) retained: &'a ItemIdSet, + pub(crate) cache: &'a Cache, +} + +impl<'a> DocFolder for ImplStripper<'a> { + fn fold_item(&mut self, i: Item) -> Option<Item> { + if let clean::ImplItem(ref imp) = *i.kind { + // Impl blocks can be skipped if they are: empty; not a trait impl; and have no + // documentation. + if imp.trait_.is_none() && imp.items.is_empty() && i.doc_value().is_none() { + return None; + } + if let Some(did) = imp.for_.def_id(self.cache) { + if did.is_local() && !imp.for_.is_assoc_ty() && !self.retained.contains(&did.into()) + { + debug!("ImplStripper: impl item for stripped type; removing"); + return None; + } + } + if let Some(did) = imp.trait_.as_ref().map(|t| t.def_id()) { + if did.is_local() && !self.retained.contains(&did.into()) { + debug!("ImplStripper: impl item for stripped trait; removing"); + return None; + } + } + if let Some(generics) = imp.trait_.as_ref().and_then(|t| t.generics()) { + for typaram in generics { + if let Some(did) = typaram.def_id(self.cache) { + if did.is_local() && !self.retained.contains(&did.into()) { + debug!( + "ImplStripper: stripped item in trait's generics; removing impl" + ); + return None; + } + } + } + } + } + Some(self.fold_item_recur(i)) + } +} + +/// This stripper discards all private import statements (`use`, `extern crate`) +pub(crate) struct ImportStripper; + +impl DocFolder for ImportStripper { + fn fold_item(&mut self, i: Item) -> Option<Item> { + match *i.kind { + clean::ExternCrateItem { .. } | clean::ImportItem(..) if !i.visibility.is_public() => { + None + } + _ => Some(self.fold_item_recur(i)), + } + } +} |