summaryrefslogtreecommitdiffstats
path: root/src/librustdoc/scrape_examples.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/librustdoc/scrape_examples.rs')
-rw-r--r--src/librustdoc/scrape_examples.rs359
1 files changed, 359 insertions, 0 deletions
diff --git a/src/librustdoc/scrape_examples.rs b/src/librustdoc/scrape_examples.rs
new file mode 100644
index 000000000..0d9684025
--- /dev/null
+++ b/src/librustdoc/scrape_examples.rs
@@ -0,0 +1,359 @@
+//! This module analyzes crates to find call sites that can serve as examples in the documentation.
+
+use crate::clean;
+use crate::config;
+use crate::formats;
+use crate::formats::renderer::FormatRenderer;
+use crate::html::render::Context;
+
+use rustc_data_structures::fx::FxHashMap;
+use rustc_hir::{
+ self as hir,
+ intravisit::{self, Visitor},
+};
+use rustc_interface::interface;
+use rustc_macros::{Decodable, Encodable};
+use rustc_middle::hir::map::Map;
+use rustc_middle::hir::nested_filter;
+use rustc_middle::ty::{self, TyCtxt};
+use rustc_serialize::{
+ opaque::{FileEncoder, MemDecoder},
+ Decodable, Encodable,
+};
+use rustc_session::getopts;
+use rustc_span::{
+ def_id::{CrateNum, DefPathHash, LOCAL_CRATE},
+ edition::Edition,
+ BytePos, FileName, SourceFile,
+};
+
+use std::fs;
+use std::path::PathBuf;
+
+#[derive(Debug, Clone)]
+pub(crate) struct ScrapeExamplesOptions {
+ output_path: PathBuf,
+ target_crates: Vec<String>,
+ pub(crate) scrape_tests: bool,
+}
+
+impl ScrapeExamplesOptions {
+ pub(crate) fn new(
+ matches: &getopts::Matches,
+ diag: &rustc_errors::Handler,
+ ) -> Result<Option<Self>, i32> {
+ let output_path = matches.opt_str("scrape-examples-output-path");
+ let target_crates = matches.opt_strs("scrape-examples-target-crate");
+ let scrape_tests = matches.opt_present("scrape-tests");
+ match (output_path, !target_crates.is_empty(), scrape_tests) {
+ (Some(output_path), true, _) => Ok(Some(ScrapeExamplesOptions {
+ output_path: PathBuf::from(output_path),
+ target_crates,
+ scrape_tests,
+ })),
+ (Some(_), false, _) | (None, true, _) => {
+ diag.err("must use --scrape-examples-output-path and --scrape-examples-target-crate together");
+ Err(1)
+ }
+ (None, false, true) => {
+ diag.err("must use --scrape-examples-output-path and --scrape-examples-target-crate with --scrape-tests");
+ Err(1)
+ }
+ (None, false, false) => Ok(None),
+ }
+ }
+}
+
+#[derive(Encodable, Decodable, Debug, Clone)]
+pub(crate) struct SyntaxRange {
+ pub(crate) byte_span: (u32, u32),
+ pub(crate) line_span: (usize, usize),
+}
+
+impl SyntaxRange {
+ fn new(span: rustc_span::Span, file: &SourceFile) -> Option<Self> {
+ let get_pos = |bytepos: BytePos| file.original_relative_byte_pos(bytepos).0;
+ let get_line = |bytepos: BytePos| file.lookup_line(bytepos);
+
+ Some(SyntaxRange {
+ byte_span: (get_pos(span.lo()), get_pos(span.hi())),
+ line_span: (get_line(span.lo())?, get_line(span.hi())?),
+ })
+ }
+}
+
+#[derive(Encodable, Decodable, Debug, Clone)]
+pub(crate) struct CallLocation {
+ pub(crate) call_expr: SyntaxRange,
+ pub(crate) call_ident: SyntaxRange,
+ pub(crate) enclosing_item: SyntaxRange,
+}
+
+impl CallLocation {
+ fn new(
+ expr_span: rustc_span::Span,
+ ident_span: rustc_span::Span,
+ enclosing_item_span: rustc_span::Span,
+ source_file: &SourceFile,
+ ) -> Option<Self> {
+ Some(CallLocation {
+ call_expr: SyntaxRange::new(expr_span, source_file)?,
+ call_ident: SyntaxRange::new(ident_span, source_file)?,
+ enclosing_item: SyntaxRange::new(enclosing_item_span, source_file)?,
+ })
+ }
+}
+
+#[derive(Encodable, Decodable, Debug, Clone)]
+pub(crate) struct CallData {
+ pub(crate) locations: Vec<CallLocation>,
+ pub(crate) url: String,
+ pub(crate) display_name: String,
+ pub(crate) edition: Edition,
+}
+
+pub(crate) type FnCallLocations = FxHashMap<PathBuf, CallData>;
+pub(crate) type AllCallLocations = FxHashMap<DefPathHash, FnCallLocations>;
+
+/// Visitor for traversing a crate and finding instances of function calls.
+struct FindCalls<'a, 'tcx> {
+ tcx: TyCtxt<'tcx>,
+ map: Map<'tcx>,
+ cx: Context<'tcx>,
+ target_crates: Vec<CrateNum>,
+ calls: &'a mut AllCallLocations,
+}
+
+impl<'a, 'tcx> Visitor<'tcx> for FindCalls<'a, 'tcx>
+where
+ 'tcx: 'a,
+{
+ type NestedFilter = nested_filter::OnlyBodies;
+
+ fn nested_visit_map(&mut self) -> Self::Map {
+ self.map
+ }
+
+ fn visit_expr(&mut self, ex: &'tcx hir::Expr<'tcx>) {
+ intravisit::walk_expr(self, ex);
+
+ let tcx = self.tcx;
+
+ // If we visit an item that contains an expression outside a function body,
+ // then we need to exit before calling typeck (which will panic). See
+ // test/run-make/rustdoc-scrape-examples-invalid-expr for an example.
+ let hir = tcx.hir();
+ if hir.maybe_body_owned_by(ex.hir_id.owner).is_none() {
+ return;
+ }
+
+ // Get type of function if expression is a function call
+ let (ty, call_span, ident_span) = match ex.kind {
+ hir::ExprKind::Call(f, _) => {
+ let types = tcx.typeck(ex.hir_id.owner);
+
+ if let Some(ty) = types.node_type_opt(f.hir_id) {
+ (ty, ex.span, f.span)
+ } else {
+ trace!("node_type_opt({}) = None", f.hir_id);
+ return;
+ }
+ }
+ hir::ExprKind::MethodCall(path, _, call_span) => {
+ let types = tcx.typeck(ex.hir_id.owner);
+ let Some(def_id) = types.type_dependent_def_id(ex.hir_id) else {
+ trace!("type_dependent_def_id({}) = None", ex.hir_id);
+ return;
+ };
+
+ let ident_span = path.ident.span;
+ (tcx.type_of(def_id), call_span, ident_span)
+ }
+ _ => {
+ return;
+ }
+ };
+
+ // If this span comes from a macro expansion, then the source code may not actually show
+ // a use of the given item, so it would be a poor example. Hence, we skip all uses in macros.
+ if call_span.from_expansion() {
+ trace!("Rejecting expr from macro: {call_span:?}");
+ return;
+ }
+
+ // If the enclosing item has a span coming from a proc macro, then we also don't want to include
+ // the example.
+ let enclosing_item_span = tcx
+ .hir()
+ .span_with_body(tcx.hir().local_def_id_to_hir_id(tcx.hir().get_parent_item(ex.hir_id)));
+ if enclosing_item_span.from_expansion() {
+ trace!("Rejecting expr ({call_span:?}) from macro item: {enclosing_item_span:?}");
+ return;
+ }
+
+ // If the enclosing item doesn't actually enclose the call, this means we probably have a weird
+ // macro issue even though the spans aren't tagged as being from an expansion.
+ if !enclosing_item_span.contains(call_span) {
+ warn!(
+ "Attempted to scrape call at [{call_span:?}] whose enclosing item [{enclosing_item_span:?}] doesn't contain the span of the call."
+ );
+ return;
+ }
+
+ // Similarly for the call w/ the function ident.
+ if !call_span.contains(ident_span) {
+ warn!(
+ "Attempted to scrape call at [{call_span:?}] whose identifier [{ident_span:?}] was not contained in the span of the call."
+ );
+ return;
+ }
+
+ // Save call site if the function resolves to a concrete definition
+ if let ty::FnDef(def_id, _) = ty.kind() {
+ if self.target_crates.iter().all(|krate| *krate != def_id.krate) {
+ trace!("Rejecting expr from crate not being documented: {call_span:?}");
+ return;
+ }
+
+ let source_map = tcx.sess.source_map();
+ let file = source_map.lookup_char_pos(call_span.lo()).file;
+ let file_path = match file.name.clone() {
+ FileName::Real(real_filename) => real_filename.into_local_path(),
+ _ => None,
+ };
+
+ if let Some(file_path) = file_path {
+ let abs_path = match fs::canonicalize(file_path.clone()) {
+ Ok(abs_path) => abs_path,
+ Err(_) => {
+ trace!("Could not canonicalize file path: {}", file_path.display());
+ return;
+ }
+ };
+
+ let cx = &self.cx;
+ let clean_span = crate::clean::types::Span::new(call_span);
+ let url = match cx.href_from_span(clean_span, false) {
+ Some(url) => url,
+ None => {
+ trace!(
+ "Rejecting expr ({call_span:?}) whose clean span ({clean_span:?}) cannot be turned into a link"
+ );
+ return;
+ }
+ };
+
+ let mk_call_data = || {
+ let display_name = file_path.display().to_string();
+ let edition = call_span.edition();
+ CallData { locations: Vec::new(), url, display_name, edition }
+ };
+
+ let fn_key = tcx.def_path_hash(*def_id);
+ let fn_entries = self.calls.entry(fn_key).or_default();
+
+ trace!("Including expr: {:?}", call_span);
+ let enclosing_item_span =
+ source_map.span_extend_to_prev_char(enclosing_item_span, '\n', false);
+ let location =
+ match CallLocation::new(call_span, ident_span, enclosing_item_span, &file) {
+ Some(location) => location,
+ None => {
+ trace!("Could not get serializable call location for {call_span:?}");
+ return;
+ }
+ };
+ fn_entries.entry(abs_path).or_insert_with(mk_call_data).locations.push(location);
+ }
+ }
+ }
+}
+
+pub(crate) fn run(
+ krate: clean::Crate,
+ mut renderopts: config::RenderOptions,
+ cache: formats::cache::Cache,
+ tcx: TyCtxt<'_>,
+ options: ScrapeExamplesOptions,
+) -> interface::Result<()> {
+ let inner = move || -> Result<(), String> {
+ // Generates source files for examples
+ renderopts.no_emit_shared = true;
+ let (cx, _) = Context::init(krate, renderopts, cache, tcx).map_err(|e| e.to_string())?;
+
+ // Collect CrateIds corresponding to provided target crates
+ // If two different versions of the crate in the dependency tree, then examples will be collcted from both.
+ let all_crates = tcx
+ .crates(())
+ .iter()
+ .chain([&LOCAL_CRATE])
+ .map(|crate_num| (crate_num, tcx.crate_name(*crate_num)))
+ .collect::<Vec<_>>();
+ let target_crates = options
+ .target_crates
+ .into_iter()
+ .flat_map(|target| all_crates.iter().filter(move |(_, name)| name.as_str() == target))
+ .map(|(crate_num, _)| **crate_num)
+ .collect::<Vec<_>>();
+
+ debug!("All crates in TyCtxt: {all_crates:?}");
+ debug!("Scrape examples target_crates: {target_crates:?}");
+
+ // Run call-finder on all items
+ let mut calls = FxHashMap::default();
+ let mut finder = FindCalls { calls: &mut calls, tcx, map: tcx.hir(), cx, target_crates };
+ tcx.hir().visit_all_item_likes_in_crate(&mut finder);
+
+ // The visitor might have found a type error, which we need to
+ // promote to a fatal error
+ if tcx.sess.diagnostic().has_errors_or_lint_errors().is_some() {
+ return Err(String::from("Compilation failed, aborting rustdoc"));
+ }
+
+ // Sort call locations within a given file in document order
+ for fn_calls in calls.values_mut() {
+ for file_calls in fn_calls.values_mut() {
+ file_calls.locations.sort_by_key(|loc| loc.call_expr.byte_span.0);
+ }
+ }
+
+ // Save output to provided path
+ let mut encoder = FileEncoder::new(options.output_path).map_err(|e| e.to_string())?;
+ calls.encode(&mut encoder);
+ encoder.finish().map_err(|e| e.to_string())?;
+
+ Ok(())
+ };
+
+ if let Err(e) = inner() {
+ tcx.sess.fatal(&e);
+ }
+
+ Ok(())
+}
+
+// Note: the Handler must be passed in explicitly because sess isn't available while parsing options
+pub(crate) fn load_call_locations(
+ with_examples: Vec<String>,
+ diag: &rustc_errors::Handler,
+) -> Result<AllCallLocations, i32> {
+ let inner = || {
+ let mut all_calls: AllCallLocations = FxHashMap::default();
+ for path in with_examples {
+ let bytes = fs::read(&path).map_err(|e| format!("{} (for path {})", e, path))?;
+ let mut decoder = MemDecoder::new(&bytes, 0);
+ let calls = AllCallLocations::decode(&mut decoder);
+
+ for (function, fn_calls) in calls.into_iter() {
+ all_calls.entry(function).or_default().extend(fn_calls.into_iter());
+ }
+ }
+
+ Ok(all_calls)
+ };
+
+ inner().map_err(|e: String| {
+ diag.err(&format!("failed to load examples: {}", e));
+ 1
+ })
+}