diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /src/librustdoc/html/render/search_index.rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librustdoc/html/render/search_index.rs')
-rw-r--r-- | src/librustdoc/html/render/search_index.rs | 589 |
1 files changed, 589 insertions, 0 deletions
diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs new file mode 100644 index 000000000..d672f0bb5 --- /dev/null +++ b/src/librustdoc/html/render/search_index.rs @@ -0,0 +1,589 @@ +use std::collections::hash_map::Entry; +use std::collections::BTreeMap; + +use rustc_data_structures::fx::FxHashMap; +use rustc_middle::ty::TyCtxt; +use rustc_span::def_id::LOCAL_CRATE; +use rustc_span::symbol::Symbol; +use serde::ser::{Serialize, SerializeStruct, Serializer}; + +use crate::clean; +use crate::clean::types::{ + FnRetTy, Function, GenericBound, Generics, ItemId, Type, WherePredicate, +}; +use crate::formats::cache::{Cache, OrphanImplItem}; +use crate::formats::item_type::ItemType; +use crate::html::format::join_with_double_colon; +use crate::html::markdown::short_markdown_summary; +use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, RenderTypeId}; + +/// Builds the search index from the collected metadata +pub(crate) fn build_index<'tcx>( + krate: &clean::Crate, + cache: &mut Cache, + tcx: TyCtxt<'tcx>, +) -> String { + let mut itemid_to_pathid = FxHashMap::default(); + let mut crate_paths = vec![]; + + // Attach all orphan items to the type's definition if the type + // has since been learned. + for &OrphanImplItem { parent, ref item, ref impl_generics } in &cache.orphan_impl_items { + if let Some(&(ref fqp, _)) = cache.paths.get(&parent) { + let desc = item + .doc_value() + .map_or_else(String::new, |s| short_markdown_summary(&s, &item.link_names(cache))); + cache.search_index.push(IndexItem { + ty: item.type_(), + name: item.name.unwrap().to_string(), + path: join_with_double_colon(&fqp[..fqp.len() - 1]), + desc, + parent: Some(parent), + parent_idx: None, + search_type: get_function_type_for_search(item, tcx, impl_generics.as_ref(), cache), + aliases: item.attrs.get_doc_aliases(), + }); + } + } + + let crate_doc = krate + .module + .doc_value() + .map_or_else(String::new, |s| short_markdown_summary(&s, &krate.module.link_names(cache))); + + // Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias, + // we need the alias element to have an array of items. + let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new(); + + // Sort search index items. This improves the compressibility of the search index. + cache.search_index.sort_unstable_by(|k1, k2| { + // `sort_unstable_by_key` produces lifetime errors + let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent); + let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent); + std::cmp::Ord::cmp(&k1, &k2) + }); + + // Set up alias indexes. + for (i, item) in cache.search_index.iter().enumerate() { + for alias in &item.aliases[..] { + aliases.entry(alias.as_str().to_lowercase()).or_default().push(i); + } + } + + // Reduce `DefId` in paths into smaller sequential numbers, + // and prune the paths that do not appear in the index. + let mut lastpath = ""; + let mut lastpathid = 0usize; + + // First, on function signatures + let mut search_index = std::mem::replace(&mut cache.search_index, Vec::new()); + for item in search_index.iter_mut() { + fn convert_render_type( + ty: &mut RenderType, + cache: &mut Cache, + itemid_to_pathid: &mut FxHashMap<ItemId, usize>, + lastpathid: &mut usize, + crate_paths: &mut Vec<(ItemType, Symbol)>, + ) { + if let Some(generics) = &mut ty.generics { + for item in generics { + convert_render_type(item, cache, itemid_to_pathid, lastpathid, crate_paths); + } + } + let Cache { ref paths, ref external_paths, .. } = *cache; + let Some(id) = ty.id.clone() else { + assert!(ty.generics.is_some()); + return; + }; + let (itemid, path, item_type) = match id { + RenderTypeId::DefId(defid) => { + if let Some(&(ref fqp, item_type)) = + paths.get(&defid).or_else(|| external_paths.get(&defid)) + { + (ItemId::DefId(defid), *fqp.last().unwrap(), item_type) + } else { + ty.id = None; + return; + } + } + RenderTypeId::Primitive(primitive) => ( + ItemId::Primitive(primitive, LOCAL_CRATE), + primitive.as_sym(), + ItemType::Primitive, + ), + RenderTypeId::Index(_) => return, + }; + match itemid_to_pathid.entry(itemid) { + Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())), + Entry::Vacant(entry) => { + let pathid = *lastpathid; + entry.insert(pathid); + *lastpathid += 1; + crate_paths.push((item_type, path)); + ty.id = Some(RenderTypeId::Index(pathid)); + } + } + } + if let Some(search_type) = &mut item.search_type { + for item in &mut search_type.inputs { + convert_render_type( + item, + cache, + &mut itemid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + for item in &mut search_type.output { + convert_render_type( + item, + cache, + &mut itemid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + } + } + + let Cache { ref paths, .. } = *cache; + + // Then, on parent modules + let crate_items: Vec<&IndexItem> = search_index + .iter_mut() + .map(|item| { + item.parent_idx = + item.parent.and_then(|defid| match itemid_to_pathid.entry(ItemId::DefId(defid)) { + Entry::Occupied(entry) => Some(*entry.get()), + Entry::Vacant(entry) => { + let pathid = lastpathid; + entry.insert(pathid); + lastpathid += 1; + + if let Some(&(ref fqp, short)) = paths.get(&defid) { + crate_paths.push((short, *fqp.last().unwrap())); + Some(pathid) + } else { + None + } + } + }); + + // Omit the parent path if it is same to that of the prior item. + if lastpath == &item.path { + item.path.clear(); + } else { + lastpath = &item.path; + } + + &*item + }) + .collect(); + + struct CrateData<'a> { + doc: String, + items: Vec<&'a IndexItem>, + paths: Vec<(ItemType, Symbol)>, + // The String is alias name and the vec is the list of the elements with this alias. + // + // To be noted: the `usize` elements are indexes to `items`. + aliases: &'a BTreeMap<String, Vec<usize>>, + } + + impl<'a> Serialize for CrateData<'a> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let has_aliases = !self.aliases.is_empty(); + let mut crate_data = + serializer.serialize_struct("CrateData", if has_aliases { 9 } else { 8 })?; + crate_data.serialize_field("doc", &self.doc)?; + crate_data.serialize_field( + "t", + &self.items.iter().map(|item| &item.ty).collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "n", + &self.items.iter().map(|item| &item.name).collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "q", + &self.items.iter().map(|item| &item.path).collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "d", + &self.items.iter().map(|item| &item.desc).collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "i", + &self + .items + .iter() + .map(|item| { + assert_eq!( + item.parent.is_some(), + item.parent_idx.is_some(), + "`{}` is missing idx", + item.name + ); + // 0 is a sentinel, everything else is one-indexed + item.parent_idx.map(|x| x + 1).unwrap_or(0) + }) + .collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "f", + &self + .items + .iter() + .map(|item| { + // Fake option to get `0` out as a sentinel instead of `null`. + // We want to use `0` because it's three less bytes. + enum FunctionOption<'a> { + Function(&'a IndexItemFunctionType), + None, + } + impl<'a> Serialize for FunctionOption<'a> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + match self { + FunctionOption::None => 0.serialize(serializer), + FunctionOption::Function(ty) => ty.serialize(serializer), + } + } + } + match &item.search_type { + Some(ty) => FunctionOption::Function(ty), + None => FunctionOption::None, + } + }) + .collect::<Vec<_>>(), + )?; + crate_data.serialize_field( + "p", + &self.paths.iter().map(|(it, s)| (it, s.to_string())).collect::<Vec<_>>(), + )?; + if has_aliases { + crate_data.serialize_field("a", &self.aliases)?; + } + crate_data.end() + } + } + + // Collect the index into a string + format!( + r#""{}":{}"#, + krate.name(tcx), + serde_json::to_string(&CrateData { + doc: crate_doc, + items: crate_items, + paths: crate_paths, + aliases: &aliases, + }) + .expect("failed serde conversion") + // All these `replace` calls are because we have to go through JS string for JSON content. + .replace('\\', r"\\") + .replace('\'', r"\'") + // We need to escape double quotes for the JSON. + .replace("\\\"", "\\\\\"") + ) +} + +pub(crate) fn get_function_type_for_search<'tcx>( + item: &clean::Item, + tcx: TyCtxt<'tcx>, + impl_generics: Option<&(clean::Type, clean::Generics)>, + cache: &Cache, +) -> Option<IndexItemFunctionType> { + let (mut inputs, mut output) = match *item.kind { + clean::FunctionItem(ref f) => get_fn_inputs_and_outputs(f, tcx, impl_generics, cache), + clean::MethodItem(ref m, _) => get_fn_inputs_and_outputs(m, tcx, impl_generics, cache), + clean::TyMethodItem(ref m) => get_fn_inputs_and_outputs(m, tcx, impl_generics, cache), + _ => return None, + }; + + inputs.retain(|a| a.id.is_some() || a.generics.is_some()); + output.retain(|a| a.id.is_some() || a.generics.is_some()); + + Some(IndexItemFunctionType { inputs, output }) +} + +fn get_index_type(clean_type: &clean::Type, generics: Vec<RenderType>) -> RenderType { + RenderType { + id: get_index_type_id(clean_type), + generics: if generics.is_empty() { None } else { Some(generics) }, + } +} + +fn get_index_type_id(clean_type: &clean::Type) -> Option<RenderTypeId> { + match *clean_type { + clean::Type::Path { ref path, .. } => Some(RenderTypeId::DefId(path.def_id())), + clean::DynTrait(ref bounds, _) => { + let path = &bounds[0].trait_; + Some(RenderTypeId::DefId(path.def_id())) + } + clean::Primitive(p) => Some(RenderTypeId::Primitive(p)), + clean::BorrowedRef { ref type_, .. } | clean::RawPointer(_, ref type_) => { + get_index_type_id(type_) + } + clean::BareFunction(_) + | clean::Generic(_) + | clean::ImplTrait(_) + | clean::Tuple(_) + | clean::Slice(_) + | clean::Array(_, _) + | clean::QPath { .. } + | clean::Infer => None, + } +} + +/// The point of this function is to replace bounds with types. +/// +/// i.e. `[T, U]` when you have the following bounds: `T: Display, U: Option<T>` will return +/// `[Display, Option]`. If a type parameter has no trait bound, it is discarded. +/// +/// Important note: It goes through generics recursively. So if you have +/// `T: Option<Result<(), ()>>`, it'll go into `Option` and then into `Result`. +#[instrument(level = "trace", skip(tcx, res, cache))] +fn add_generics_and_bounds_as_types<'tcx, 'a>( + self_: Option<&'a Type>, + generics: &Generics, + arg: &'a Type, + tcx: TyCtxt<'tcx>, + recurse: usize, + res: &mut Vec<RenderType>, + cache: &Cache, +) { + fn insert_ty(res: &mut Vec<RenderType>, ty: Type, mut generics: Vec<RenderType>) { + // generics and impl trait are both identified by their generics, + // rather than a type name itself + let anonymous = ty.is_full_generic() || ty.is_impl_trait(); + let generics_empty = generics.is_empty(); + + if anonymous { + if generics_empty { + // This is a type parameter with no trait bounds (for example: `T` in + // `fn f<T>(p: T)`, so not useful for the rustdoc search because we would end up + // with an empty type with an empty name. Let's just discard it. + return; + } else if generics.len() == 1 { + // In this case, no need to go through an intermediate state if the type parameter + // contains only one trait bound. + // + // For example: + // + // `fn foo<T: Display>(r: Option<T>) {}` + // + // In this case, it would contain: + // + // ``` + // [{ + // name: "option", + // generics: [{ + // name: "", + // generics: [ + // name: "Display", + // generics: [] + // }] + // }] + // }] + // ``` + // + // After removing the intermediate (unnecessary) type parameter, it'll become: + // + // ``` + // [{ + // name: "option", + // generics: [{ + // name: "Display", + // generics: [] + // }] + // }] + // ``` + // + // To be noted that it can work if there is ONLY ONE trait bound, otherwise we still + // need to keep it as is! + res.push(generics.pop().unwrap()); + return; + } + } + let index_ty = get_index_type(&ty, generics); + if index_ty.id.is_none() && generics_empty { + return; + } + res.push(index_ty); + } + + if recurse >= 10 { + // FIXME: remove this whole recurse thing when the recursion bug is fixed + // See #59502 for the original issue. + return; + } + + // First, check if it's "Self". + let arg = if let Some(self_) = self_ { + match &*arg { + Type::BorrowedRef { type_, .. } if type_.is_self_type() => self_, + type_ if type_.is_self_type() => self_, + arg => arg, + } + } else { + arg + }; + + // If this argument is a type parameter and not a trait bound or a type, we need to look + // for its bounds. + if let Type::Generic(arg_s) = *arg { + // First we check if the bounds are in a `where` predicate... + if let Some(where_pred) = generics.where_predicates.iter().find(|g| match g { + WherePredicate::BoundPredicate { ty, .. } => ty.def_id(cache) == arg.def_id(cache), + _ => false, + }) { + let mut ty_generics = Vec::new(); + let bounds = where_pred.get_bounds().unwrap_or_else(|| &[]); + for bound in bounds.iter() { + if let GenericBound::TraitBound(poly_trait, _) = bound { + for param_def in poly_trait.generic_params.iter() { + match ¶m_def.kind { + clean::GenericParamDefKind::Type { default: Some(ty), .. } => { + add_generics_and_bounds_as_types( + self_, + generics, + ty, + tcx, + recurse + 1, + &mut ty_generics, + cache, + ) + } + _ => {} + } + } + } + } + insert_ty(res, arg.clone(), ty_generics); + } + // Otherwise we check if the trait bounds are "inlined" like `T: Option<u32>`... + if let Some(bound) = generics.params.iter().find(|g| g.is_type() && g.name == arg_s) { + let mut ty_generics = Vec::new(); + for bound in bound.get_bounds().unwrap_or(&[]) { + if let Some(path) = bound.get_trait_path() { + let ty = Type::Path { path }; + add_generics_and_bounds_as_types( + self_, + generics, + &ty, + tcx, + recurse + 1, + &mut ty_generics, + cache, + ); + } + } + insert_ty(res, arg.clone(), ty_generics); + } + } else if let Type::ImplTrait(ref bounds) = *arg { + let mut ty_generics = Vec::new(); + for bound in bounds { + if let Some(path) = bound.get_trait_path() { + let ty = Type::Path { path }; + add_generics_and_bounds_as_types( + self_, + generics, + &ty, + tcx, + recurse + 1, + &mut ty_generics, + cache, + ); + } + } + insert_ty(res, arg.clone(), ty_generics); + } else { + // This is not a type parameter. So for example if we have `T, U: Option<T>`, and we're + // looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't. + // + // So in here, we can add it directly and look for its own type parameters (so for `Option`, + // we will look for them but not for `T`). + let mut ty_generics = Vec::new(); + if let Some(arg_generics) = arg.generics() { + for gen in arg_generics.iter() { + add_generics_and_bounds_as_types( + self_, + generics, + gen, + tcx, + recurse + 1, + &mut ty_generics, + cache, + ); + } + } + insert_ty(res, arg.clone(), ty_generics); + } +} + +/// Return the full list of types when bounds have been resolved. +/// +/// i.e. `fn foo<A: Display, B: Option<A>>(x: u32, y: B)` will return +/// `[u32, Display, Option]`. +fn get_fn_inputs_and_outputs<'tcx>( + func: &Function, + tcx: TyCtxt<'tcx>, + impl_generics: Option<&(clean::Type, clean::Generics)>, + cache: &Cache, +) -> (Vec<RenderType>, Vec<RenderType>) { + let decl = &func.decl; + + let combined_generics; + let (self_, generics) = if let Some(&(ref impl_self, ref impl_generics)) = impl_generics { + match (impl_generics.is_empty(), func.generics.is_empty()) { + (true, _) => (Some(impl_self), &func.generics), + (_, true) => (Some(impl_self), impl_generics), + (false, false) => { + let mut params = func.generics.params.clone(); + params.extend(impl_generics.params.clone()); + let mut where_predicates = func.generics.where_predicates.clone(); + where_predicates.extend(impl_generics.where_predicates.clone()); + combined_generics = clean::Generics { params, where_predicates }; + (Some(impl_self), &combined_generics) + } + } + } else { + (None, &func.generics) + }; + + let mut all_types = Vec::new(); + for arg in decl.inputs.values.iter() { + let mut args = Vec::new(); + add_generics_and_bounds_as_types(self_, generics, &arg.type_, tcx, 0, &mut args, cache); + if !args.is_empty() { + all_types.extend(args); + } else { + all_types.push(get_index_type(&arg.type_, vec![])); + } + } + + let mut ret_types = Vec::new(); + match decl.output { + FnRetTy::Return(ref return_type) => { + add_generics_and_bounds_as_types( + self_, + generics, + return_type, + tcx, + 0, + &mut ret_types, + cache, + ); + if ret_types.is_empty() { + ret_types.push(get_index_type(return_type, vec![])); + } + } + _ => {} + }; + (all_types, ret_types) +} |