From c23a457e72abe608715ac76f076f47dc42af07a5 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 20:31:44 +0200 Subject: Merging upstream version 1.74.1+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_metadata/messages.ftl | 3 - compiler/rustc_metadata/src/creader.rs | 10 +- compiler/rustc_metadata/src/fs.rs | 4 +- compiler/rustc_metadata/src/lib.rs | 4 +- compiler/rustc_metadata/src/locator.rs | 3 +- compiler/rustc_metadata/src/rmeta/decoder.rs | 267 ++++++++++++++++----- .../src/rmeta/decoder/cstore_impl.rs | 20 +- compiler/rustc_metadata/src/rmeta/encoder.rs | 214 +++++++++++++---- compiler/rustc_metadata/src/rmeta/mod.rs | 22 +- compiler/rustc_metadata/src/rmeta/table.rs | 157 ++++++++---- 10 files changed, 520 insertions(+), 184 deletions(-) (limited to 'compiler/rustc_metadata') diff --git a/compiler/rustc_metadata/messages.ftl b/compiler/rustc_metadata/messages.ftl index cc58d51be..633004fdd 100644 --- a/compiler/rustc_metadata/messages.ftl +++ b/compiler/rustc_metadata/messages.ftl @@ -259,9 +259,6 @@ metadata_std_required = metadata_symbol_conflicts_current = the current crate is indistinguishable from one of its dependencies: it has the same crate-name `{$crate_name}` and was compiled with the same `-C metadata` arguments. This will result in symbol conflicts between the two. -metadata_symbol_conflicts_others = - found two different crates with name `{$crate_name}` that are not distinguished by differing `-C metadata`. This will result in symbol conflicts between the two. - metadata_target_no_std_support = the `{$locator_triple}` target may not support the standard library diff --git a/compiler/rustc_metadata/src/creader.rs b/compiler/rustc_metadata/src/creader.rs index fce80ab37..692214753 100644 --- a/compiler/rustc_metadata/src/creader.rs +++ b/compiler/rustc_metadata/src/creader.rs @@ -8,7 +8,7 @@ use rustc_ast::expand::allocator::{alloc_error_handler_name, global_fn_name, All use rustc_ast::{self as ast, *}; use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::svh::Svh; -use rustc_data_structures::sync::{MappedReadGuard, MappedWriteGuard, ReadGuard, WriteGuard}; +use rustc_data_structures::sync::{FreezeReadGuard, FreezeWriteGuard}; use rustc_expand::base::SyntaxExtension; use rustc_hir::def_id::{CrateNum, LocalDefId, StableCrateId, StableCrateIdMap, LOCAL_CRATE}; use rustc_hir::definitions::Definitions; @@ -134,14 +134,14 @@ impl<'a> std::fmt::Debug for CrateDump<'a> { } impl CStore { - pub fn from_tcx(tcx: TyCtxt<'_>) -> MappedReadGuard<'_, CStore> { - ReadGuard::map(tcx.untracked().cstore.read(), |cstore| { + pub fn from_tcx(tcx: TyCtxt<'_>) -> FreezeReadGuard<'_, CStore> { + FreezeReadGuard::map(tcx.untracked().cstore.read(), |cstore| { cstore.as_any().downcast_ref::().expect("`tcx.cstore` is not a `CStore`") }) } - pub fn from_tcx_mut(tcx: TyCtxt<'_>) -> MappedWriteGuard<'_, CStore> { - WriteGuard::map(tcx.untracked().cstore.write(), |cstore| { + pub fn from_tcx_mut(tcx: TyCtxt<'_>) -> FreezeWriteGuard<'_, CStore> { + FreezeWriteGuard::map(tcx.untracked().cstore.write(), |cstore| { cstore.untracked_as_any().downcast_mut().expect("`tcx.cstore` is not a `CStore`") }) } diff --git a/compiler/rustc_metadata/src/fs.rs b/compiler/rustc_metadata/src/fs.rs index 2a9662b80..7eb2a347d 100644 --- a/compiler/rustc_metadata/src/fs.rs +++ b/compiler/rustc_metadata/src/fs.rs @@ -5,7 +5,6 @@ use crate::errors::{ use crate::{encode_metadata, EncodedMetadata}; use rustc_data_structures::temp_dir::MaybeTempDir; -use rustc_hir::def_id::LOCAL_CRATE; use rustc_middle::ty::TyCtxt; use rustc_session::config::{OutFileName, OutputType}; use rustc_session::output::filename_for_metadata; @@ -40,8 +39,7 @@ pub fn emit_wrapper_file( } pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) { - let crate_name = tcx.crate_name(LOCAL_CRATE); - let out_filename = filename_for_metadata(tcx.sess, crate_name, tcx.output_filenames(())); + let out_filename = filename_for_metadata(tcx.sess, tcx.output_filenames(())); // To avoid races with another rustc process scanning the output directory, // we need to write the file somewhere else and atomically move it to its // final destination, with an `fs::rename` call. In order for the rename to diff --git a/compiler/rustc_metadata/src/lib.rs b/compiler/rustc_metadata/src/lib.rs index 87373d997..fa77b36c4 100644 --- a/compiler/rustc_metadata/src/lib.rs +++ b/compiler/rustc_metadata/src/lib.rs @@ -26,7 +26,7 @@ extern crate rustc_middle; #[macro_use] extern crate tracing; -pub use rmeta::{provide, provide_extern}; +pub use rmeta::provide; use rustc_errors::{DiagnosticMessage, SubdiagnosticMessage}; use rustc_fluent_macro::fluent_messages; @@ -42,6 +42,6 @@ pub mod locator; pub use fs::{emit_wrapper_file, METADATA_FILENAME}; pub use native_libs::find_native_static_library; -pub use rmeta::{encode_metadata, EncodedMetadata, METADATA_HEADER}; +pub use rmeta::{encode_metadata, rendered_const, EncodedMetadata, METADATA_HEADER}; fluent_messages! { "../messages.ftl" } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index bf6004ba8..3062939d8 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -903,10 +903,11 @@ pub fn list_file_metadata( path: &Path, metadata_loader: &dyn MetadataLoader, out: &mut dyn Write, + ls_kinds: &[String], ) -> IoResult<()> { let flavor = get_flavor_from_path(path); match get_metadata_section(target, flavor, path, metadata_loader) { - Ok(metadata) => metadata.list_crate_metadata(out), + Ok(metadata) => metadata.list_crate_metadata(out, ls_kinds), Err(msg) => write!(out, "{msg}\n"), } } diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index e8f66c36a..b189e79df 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -9,7 +9,7 @@ use rustc_data_structures::captures::Captures; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::owned_slice::OwnedSlice; use rustc_data_structures::svh::Svh; -use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceCell}; +use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock}; use rustc_data_structures::unhash::UnhashMap; use rustc_expand::base::{SyntaxExtension, SyntaxExtensionKind}; use rustc_expand::proc_macro::{AttrProcMacro, BangProcMacro, DeriveProcMacro}; @@ -24,7 +24,6 @@ use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo}; use rustc_middle::mir::interpret::{AllocDecodingSession, AllocDecodingState}; use rustc_middle::ty::codec::TyDecoder; use rustc_middle::ty::fast_reject::SimplifiedType; -use rustc_middle::ty::GeneratorDiagnosticData; use rustc_middle::ty::{self, ParameterizedOverTcx, Ty, TyCtxt, Visibility}; use rustc_serialize::opaque::MemDecoder; use rustc_serialize::{Decodable, Decoder}; @@ -44,7 +43,6 @@ use std::sync::atomic::Ordering; use std::{io, iter, mem}; pub(super) use cstore_impl::provide; -pub use cstore_impl::provide_extern; use rustc_span::hygiene::HygieneDecodeContext; mod cstore_impl; @@ -93,7 +91,7 @@ pub(crate) struct CrateMetadata { /// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`. def_path_hash_map: DefPathHashMapRef<'static>, /// Likewise for ExpnHash. - expn_hash_map: OnceCell>, + expn_hash_map: OnceLock>, /// Used for decoding interpret::AllocIds in a cached & thread-safe manner. alloc_decoding_state: AllocDecodingState, /// Caches decoded `DefKey`s. @@ -250,6 +248,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) { } impl LazyValue { + #[inline] fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx> where T::Value<'tcx>: Decodable>, @@ -294,6 +293,7 @@ unsafe impl<'a, 'tcx, T: Decodable>> TrustedLen } impl LazyArray { + #[inline] fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>( self, metadata: M, @@ -360,8 +360,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> { self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len)) } - fn read_lazy_table(&mut self, len: usize) -> LazyTable { - self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len)) + fn read_lazy_table(&mut self, width: usize, len: usize) -> LazyTable { + self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len)) } #[inline] @@ -420,6 +420,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> { } impl<'a, 'tcx> Decodable> for CrateNum { + #[inline] fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum { let cnum = CrateNum::from_u32(d.read_u32()); d.map_encoded_cnum_to_current(cnum) @@ -427,18 +428,21 @@ impl<'a, 'tcx> Decodable> for CrateNum { } impl<'a, 'tcx> Decodable> for DefIndex { + #[inline] fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex { DefIndex::from_u32(d.read_u32()) } } impl<'a, 'tcx> Decodable> for ExpnIndex { + #[inline] fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex { ExpnIndex::from_u32(d.read_u32()) } } impl<'a, 'tcx> Decodable> for ast::AttrId { + #[inline] fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId { let sess = d.sess.expect("can't decode AttrId without Session"); sess.parse_sess.attr_id_generator.mk_attr_id() @@ -672,6 +676,7 @@ impl<'a, 'tcx, T> Decodable> for LazyValue { } impl<'a, 'tcx, T> Decodable> for LazyArray { + #[inline] fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self { let len = decoder.read_usize(); if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) } @@ -680,8 +685,9 @@ impl<'a, 'tcx, T> Decodable> for LazyArray { impl<'a, 'tcx, I: Idx, T> Decodable> for LazyTable { fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self { + let width = decoder.read_usize(); let len = decoder.read_usize(); - decoder.read_lazy_table(len) + decoder.read_lazy_table(width, len) } } @@ -717,25 +723,196 @@ impl MetadataBlob { LazyValue::::from_position(NonZeroUsize::new(pos).unwrap()).decode(self) } - pub(crate) fn list_crate_metadata(&self, out: &mut dyn io::Write) -> io::Result<()> { + pub(crate) fn list_crate_metadata( + &self, + out: &mut dyn io::Write, + ls_kinds: &[String], + ) -> io::Result<()> { let root = self.get_root(); - writeln!(out, "Crate info:")?; - writeln!(out, "name {}{}", root.name(), root.extra_filename)?; - writeln!(out, "hash {} stable_crate_id {:?}", root.hash(), root.stable_crate_id)?; - writeln!(out, "proc_macro {:?}", root.proc_macro_data.is_some())?; - writeln!(out, "=External Dependencies=")?; - - for (i, dep) in root.crate_deps.decode(self).enumerate() { - let CrateDep { name, extra_filename, hash, host_hash, kind, is_private } = dep; - let number = i + 1; - - writeln!( - out, - "{number} {name}{extra_filename} hash {hash} host_hash {host_hash:?} kind {kind:?} {privacy}", - privacy = if is_private { "private" } else { "public" } - )?; + + let all_ls_kinds = vec![ + "root".to_owned(), + "lang_items".to_owned(), + "features".to_owned(), + "items".to_owned(), + ]; + let ls_kinds = if ls_kinds.contains(&"all".to_owned()) { &all_ls_kinds } else { ls_kinds }; + + for kind in ls_kinds { + match &**kind { + "root" => { + writeln!(out, "Crate info:")?; + writeln!(out, "name {}{}", root.name(), root.extra_filename)?; + writeln!( + out, + "hash {} stable_crate_id {:?}", + root.hash(), + root.stable_crate_id + )?; + writeln!(out, "proc_macro {:?}", root.proc_macro_data.is_some())?; + writeln!(out, "triple {}", root.header.triple.triple())?; + writeln!(out, "edition {}", root.edition)?; + writeln!(out, "symbol_mangling_version {:?}", root.symbol_mangling_version)?; + writeln!( + out, + "required_panic_strategy {:?} panic_in_drop_strategy {:?}", + root.required_panic_strategy, root.panic_in_drop_strategy + )?; + writeln!( + out, + "has_global_allocator {} has_alloc_error_handler {} has_panic_handler {} has_default_lib_allocator {}", + root.has_global_allocator, + root.has_alloc_error_handler, + root.has_panic_handler, + root.has_default_lib_allocator + )?; + writeln!( + out, + "compiler_builtins {} needs_allocator {} needs_panic_runtime {} no_builtins {} panic_runtime {} profiler_runtime {}", + root.compiler_builtins, + root.needs_allocator, + root.needs_panic_runtime, + root.no_builtins, + root.panic_runtime, + root.profiler_runtime + )?; + + writeln!(out, "=External Dependencies=")?; + let dylib_dependency_formats = + root.dylib_dependency_formats.decode(self).collect::>(); + for (i, dep) in root.crate_deps.decode(self).enumerate() { + let CrateDep { name, extra_filename, hash, host_hash, kind, is_private } = + dep; + let number = i + 1; + + writeln!( + out, + "{number} {name}{extra_filename} hash {hash} host_hash {host_hash:?} kind {kind:?} {privacy}{linkage}", + privacy = if is_private { "private" } else { "public" }, + linkage = if dylib_dependency_formats.is_empty() { + String::new() + } else { + format!(" linkage {:?}", dylib_dependency_formats[i]) + } + )?; + } + write!(out, "\n")?; + } + + "lang_items" => { + writeln!(out, "=Lang items=")?; + for (id, lang_item) in root.lang_items.decode(self) { + writeln!( + out, + "{} = crate{}", + lang_item.name(), + DefPath::make(LOCAL_CRATE, id, |parent| root + .tables + .def_keys + .get(self, parent) + .unwrap() + .decode(self)) + .to_string_no_crate_verbose() + )?; + } + for lang_item in root.lang_items_missing.decode(self) { + writeln!(out, "{} = ", lang_item.name())?; + } + write!(out, "\n")?; + } + + "features" => { + writeln!(out, "=Lib features=")?; + for (feature, since) in root.lib_features.decode(self) { + writeln!( + out, + "{}{}", + feature, + if let Some(since) = since { + format!(" since {since}") + } else { + String::new() + } + )?; + } + write!(out, "\n")?; + } + + "items" => { + writeln!(out, "=Items=")?; + + fn print_item( + blob: &MetadataBlob, + out: &mut dyn io::Write, + item: DefIndex, + indent: usize, + ) -> io::Result<()> { + let root = blob.get_root(); + + let def_kind = root.tables.opt_def_kind.get(blob, item).unwrap(); + let def_key = root.tables.def_keys.get(blob, item).unwrap().decode(blob); + let def_name = if item == CRATE_DEF_INDEX { + rustc_span::symbol::kw::Crate + } else { + def_key + .disambiguated_data + .data + .get_opt_name() + .unwrap_or_else(|| Symbol::intern("???")) + }; + let visibility = + root.tables.visibility.get(blob, item).unwrap().decode(blob).map_id( + |index| { + format!( + "crate{}", + DefPath::make(LOCAL_CRATE, index, |parent| root + .tables + .def_keys + .get(blob, parent) + .unwrap() + .decode(blob)) + .to_string_no_crate_verbose() + ) + }, + ); + write!( + out, + "{nil: { + writeln!( + out, + "unknown -Zls kind. allowed values are: all, root, lang_items, features, items" + )?; + } + } } - write!(out, "\n")?; + Ok(()) } } @@ -1493,11 +1670,12 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { // We can't reuse an existing SourceFile, so allocate a new one // containing the information we need. + let original_end_pos = source_file_to_import.end_position(); let rustc_span::SourceFile { mut name, src_hash, - start_pos, - end_pos, + start_pos: original_start_pos, + source_len, lines, multibyte_chars, non_narrow_chars, @@ -1539,59 +1717,38 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { // on `try_to_translate_virtual_to_real`). try_to_translate_virtual_to_real(&mut name); - let source_length = (end_pos - start_pos).to_usize(); - let local_version = sess.source_map().new_imported_source_file( name, src_hash, name_hash, - source_length, + source_len.to_u32(), self.cnum, lines, multibyte_chars, non_narrow_chars, normalized_pos, - start_pos, source_file_index, ); debug!( "CrateMetaData::imported_source_files alloc \ - source_file {:?} original (start_pos {:?} end_pos {:?}) \ - translated (start_pos {:?} end_pos {:?})", + source_file {:?} original (start_pos {:?} source_len {:?}) \ + translated (start_pos {:?} source_len {:?})", local_version.name, - start_pos, - end_pos, + original_start_pos, + source_len, local_version.start_pos, - local_version.end_pos + local_version.source_len ); ImportedSourceFile { - original_start_pos: start_pos, - original_end_pos: end_pos, + original_start_pos, + original_end_pos, translated_source_file: local_version, } }) .clone() } - fn get_generator_diagnostic_data( - self, - tcx: TyCtxt<'tcx>, - id: DefIndex, - ) -> Option> { - self.root - .tables - .generator_diagnostic_data - .get(self, id) - .map(|param| param.decode((self, tcx))) - .map(|generator_data| GeneratorDiagnosticData { - generator_interior_types: generator_data.generator_interior_types, - hir_owner: generator_data.hir_owner, - nodes_types: generator_data.nodes_types, - adjustments: generator_data.adjustments, - }) - } - fn get_attr_flags(self, index: DefIndex) -> AttrFlags { self.root.tables.attr_flags.get(self, index) } diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index aeda8af6d..f27eee0d7 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -14,10 +14,11 @@ use rustc_middle::arena::ArenaAllocatable; use rustc_middle::metadata::ModChild; use rustc_middle::middle::exported_symbols::ExportedSymbol; use rustc_middle::middle::stability::DeprecationEntry; +use rustc_middle::query::ExternProviders; use rustc_middle::query::LocalCrate; -use rustc_middle::query::{ExternProviders, Providers}; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, TyCtxt}; +use rustc_middle::util::Providers; use rustc_session::cstore::CrateStore; use rustc_session::{Session, StableCrateId}; use rustc_span::hygiene::{ExpnHash, ExpnId}; @@ -126,12 +127,12 @@ macro_rules! provide_one { // External query providers call `crate_hash` in order to register a dependency // on the crate metadata. The exception is `crate_hash` itself, which obviously // doesn't need to do this (and can't, as it would cause a query cycle). - use rustc_middle::dep_graph::DepKind; - if DepKind::$name != DepKind::crate_hash && $tcx.dep_graph.is_fully_enabled() { + use rustc_middle::dep_graph::dep_kinds; + if dep_kinds::$name != dep_kinds::crate_hash && $tcx.dep_graph.is_fully_enabled() { $tcx.ensure().crate_hash($def_id.krate); } - let cdata = rustc_data_structures::sync::MappedReadGuard::map(CStore::from_tcx($tcx), |c| { + let cdata = rustc_data_structures::sync::FreezeReadGuard::map(CStore::from_tcx($tcx), |c| { c.get_crate_data($def_id.krate).cdata }); let $cdata = crate::creader::CrateMetadataRef { @@ -147,7 +148,7 @@ macro_rules! provide_one { macro_rules! provide { ($tcx:ident, $def_id:ident, $other:ident, $cdata:ident, $($name:ident => { $($compute:tt)* })*) => { - pub fn provide_extern(providers: &mut ExternProviders) { + fn provide_extern(providers: &mut ExternProviders) { $(provide_one! { $tcx, $def_id, $other, $cdata, $name => { $($compute)* } })* @@ -209,6 +210,7 @@ provide! { tcx, def_id, other, cdata, inferred_outlives_of => { table_defaulted_array } super_predicates_of => { table } type_of => { table } + type_alias_is_lazy => { cdata.root.tables.type_alias_is_lazy.get(cdata, def_id.index) } variances_of => { table } fn_sig => { table } codegen_fn_attrs => { table } @@ -373,7 +375,6 @@ provide! { tcx, def_id, other, cdata, crate_extern_paths => { cdata.source().paths().cloned().collect() } expn_that_defined => { cdata.get_expn_that_defined(def_id.index, tcx.sess) } - generator_diagnostic_data => { cdata.get_generator_diagnostic_data(tcx, def_id.index) } is_doc_hidden => { cdata.get_attr_flags(def_id.index).contains(AttrFlags::IS_DOC_HIDDEN) } doc_link_resolutions => { tcx.arena.alloc(cdata.get_doc_link_resolutions(def_id.index)) } doc_link_traits_in_scope => { @@ -385,7 +386,7 @@ pub(in crate::rmeta) fn provide(providers: &mut Providers) { // FIXME(#44234) - almost all of these queries have no sub-queries and // therefore no actual inputs, they're just reading tables calculated in // resolve! Does this work? Unsure! That's what the issue is about - *providers = Providers { + providers.queries = rustc_middle::query::Providers { allocator_kind: |tcx, ()| CStore::from_tcx(tcx).allocator_kind(), alloc_error_handler_kind: |tcx, ()| CStore::from_tcx(tcx).alloc_error_handler_kind(), is_private_dep: |_tcx, LocalCrate| false, @@ -510,11 +511,12 @@ pub(in crate::rmeta) fn provide(providers: &mut Providers) { crates: |tcx, ()| { // The list of loaded crates is now frozen in query cache, // so make sure cstore is not mutably accessed from here on. - tcx.untracked().cstore.leak(); + tcx.untracked().cstore.freeze(); tcx.arena.alloc_from_iter(CStore::from_tcx(tcx).iter_crate_data().map(|(cnum, _)| cnum)) }, - ..*providers + ..providers.queries }; + provide_extern(&mut providers.extern_queries); } impl CStore { diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index be91ad408..a4ba94327 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -14,11 +14,12 @@ use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_hir as hir; use rustc_hir::def::DefKind; use rustc_hir::def_id::{ - CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_ID, CRATE_DEF_INDEX, LOCAL_CRATE, + CrateNum, DefId, DefIndex, LocalDefId, LocalDefIdSet, CRATE_DEF_ID, CRATE_DEF_INDEX, + LOCAL_CRATE, }; use rustc_hir::definitions::DefPathData; -use rustc_hir::intravisit; use rustc_hir::lang_items::LangItem; +use rustc_hir_pretty::id_to_string; use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerFile; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::middle::exported_symbols::{ @@ -30,7 +31,6 @@ use rustc_middle::query::Providers; use rustc_middle::traits::specialization_graph; use rustc_middle::ty::codec::TyEncoder; use rustc_middle::ty::fast_reject::{self, SimplifiedType, TreatParams}; -use rustc_middle::ty::TypeVisitableExt; use rustc_middle::ty::{self, AssocItemContainer, SymbolName, Ty, TyCtxt}; use rustc_middle::util::common::to_readable_str; use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder}; @@ -50,7 +50,6 @@ pub(super) struct EncodeContext<'a, 'tcx> { opaque: opaque::FileEncoder, tcx: TyCtxt<'tcx>, feat: &'tcx rustc_feature::Features, - tables: TableBuilders, lazy_state: LazyState, @@ -131,7 +130,8 @@ impl<'a, 'tcx, T> Encodable> for LazyArray { impl<'a, 'tcx, I, T> Encodable> for LazyTable { fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { - e.emit_usize(self.encoded_size); + e.emit_usize(self.width); + e.emit_usize(self.len); e.emit_lazy_distance(self.position); } } @@ -279,8 +279,8 @@ impl<'a, 'tcx> Encodable> for SpanData { // All of this logic ensures that the final result of deserialization is a 'normal' // Span that can be used without any additional trouble. let metadata_index = { - // Introduce a new scope so that we drop the 'lock()' temporary - match &*source_file.external_src.lock() { + // Introduce a new scope so that we drop the 'read()' temporary + match &*source_file.external_src.read() { ExternalSource::Foreign { metadata_index, .. } => *metadata_index, src => panic!("Unexpected external source {src:?}"), } @@ -347,6 +347,13 @@ impl<'a, 'tcx> Encodable> for Symbol { } } +impl<'a, 'tcx> Encodable> for [u8] { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> { const CLEAR_CROSS_CRATE: bool = true; @@ -819,7 +826,7 @@ fn should_encode_span(def_kind: DefKind) -> bool { | DefKind::Enum | DefKind::Variant | DefKind::Trait - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::ForeignTy | DefKind::TraitAlias | DefKind::AssocTy @@ -854,7 +861,7 @@ fn should_encode_attrs(def_kind: DefKind) -> bool { | DefKind::Enum | DefKind::Variant | DefKind::Trait - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::ForeignTy | DefKind::TraitAlias | DefKind::AssocTy @@ -895,7 +902,7 @@ fn should_encode_expn_that_defined(def_kind: DefKind) -> bool { | DefKind::Variant | DefKind::Trait | DefKind::Impl { .. } => true, - DefKind::TyAlias { .. } + DefKind::TyAlias | DefKind::ForeignTy | DefKind::TraitAlias | DefKind::AssocTy @@ -930,7 +937,7 @@ fn should_encode_visibility(def_kind: DefKind) -> bool { | DefKind::Enum | DefKind::Variant | DefKind::Trait - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::ForeignTy | DefKind::TraitAlias | DefKind::AssocTy @@ -974,7 +981,7 @@ fn should_encode_stability(def_kind: DefKind) -> bool { | DefKind::Const | DefKind::Fn | DefKind::ForeignMod - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::OpaqueTy | DefKind::Enum | DefKind::Union @@ -994,15 +1001,31 @@ fn should_encode_stability(def_kind: DefKind) -> bool { } } -/// Whether we should encode MIR. +/// Whether we should encode MIR. Return a pair, resp. for CTFE and for LLVM. /// /// Computing, optimizing and encoding the MIR is a relatively expensive operation. /// We want to avoid this work when not required. Therefore: /// - we only compute `mir_for_ctfe` on items with const-eval semantics; /// - we skip `optimized_mir` for check runs. +/// - we only encode `optimized_mir` that could be generated in other crates, that is, a code that +/// is either generic or has inline hint, and is reachable from the other crates (contained +/// in reachable set). /// -/// Return a pair, resp. for CTFE and for LLVM. -fn should_encode_mir(tcx: TyCtxt<'_>, def_id: LocalDefId) -> (bool, bool) { +/// Note: Reachable set describes definitions that might be generated or referenced from other +/// crates and it can be used to limit optimized MIR that needs to be encoded. On the other hand, +/// the reachable set doesn't have much to say about which definitions might be evaluated at compile +/// time in other crates, so it cannot be used to omit CTFE MIR. For example, `f` below is +/// unreachable and yet it can be evaluated in other crates: +/// +/// ``` +/// const fn f() -> usize { 0 } +/// pub struct S { pub a: [usize; f()] } +/// ``` +fn should_encode_mir( + tcx: TyCtxt<'_>, + reachable_set: &LocalDefIdSet, + def_id: LocalDefId, +) -> (bool, bool) { match tcx.def_kind(def_id) { // Constructors DefKind::Ctor(_, _) => { @@ -1019,14 +1042,15 @@ fn should_encode_mir(tcx: TyCtxt<'_>, def_id: LocalDefId) -> (bool, bool) { // Full-fledged functions + closures DefKind::AssocFn | DefKind::Fn | DefKind::Closure => { let generics = tcx.generics_of(def_id); - let needs_inline = (generics.requires_monomorphization(tcx) - || tcx.codegen_fn_attrs(def_id).requests_inline()) - && tcx.sess.opts.output_types.should_codegen(); + let opt = tcx.sess.opts.unstable_opts.always_encode_mir + || (tcx.sess.opts.output_types.should_codegen() + && reachable_set.contains(&def_id) + && (generics.requires_monomorphization(tcx) + || tcx.codegen_fn_attrs(def_id).requests_inline())); // The function has a `const` modifier or is in a `#[const_trait]`. let is_const_fn = tcx.is_const_fn_raw(def_id.to_def_id()) || tcx.is_const_default_method(def_id.to_def_id()); - let always_encode_mir = tcx.sess.opts.unstable_opts.always_encode_mir; - (is_const_fn, needs_inline || always_encode_mir) + (is_const_fn, opt) } // Generators require optimized MIR to compute layout. DefKind::Generator => (false, true), @@ -1067,9 +1091,7 @@ fn should_encode_variances<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId, def_kind: Def | DefKind::Closure | DefKind::Generator | DefKind::ExternCrate => false, - DefKind::TyAlias { lazy } => { - lazy || tcx.type_of(def_id).instantiate_identity().has_opaque_types() - } + DefKind::TyAlias => tcx.type_alias_is_lazy(def_id), } } @@ -1080,7 +1102,7 @@ fn should_encode_generics(def_kind: DefKind) -> bool { | DefKind::Enum | DefKind::Variant | DefKind::Trait - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::ForeignTy | DefKind::TraitAlias | DefKind::AssocTy @@ -1120,7 +1142,7 @@ fn should_encode_type(tcx: TyCtxt<'_>, def_id: LocalDefId, def_kind: DefKind) -> | DefKind::Fn | DefKind::Const | DefKind::Static(..) - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::ForeignTy | DefKind::Impl { .. } | DefKind::AssocFn @@ -1180,7 +1202,7 @@ fn should_encode_fn_sig(def_kind: DefKind) -> bool { | DefKind::Const | DefKind::Static(..) | DefKind::Ctor(..) - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::OpaqueTy | DefKind::ForeignTy | DefKind::Impl { .. } @@ -1221,7 +1243,7 @@ fn should_encode_constness(def_kind: DefKind) -> bool { | DefKind::AssocConst | DefKind::AnonConst | DefKind::Static(..) - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::OpaqueTy | DefKind::Impl { of_trait: false } | DefKind::ForeignTy @@ -1254,7 +1276,7 @@ fn should_encode_const(def_kind: DefKind) -> bool { | DefKind::Field | DefKind::Fn | DefKind::Static(..) - | DefKind::TyAlias { .. } + | DefKind::TyAlias | DefKind::OpaqueTy | DefKind::ForeignTy | DefKind::Impl { .. } @@ -1414,7 +1436,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { } } if let DefKind::Generator = def_kind { - self.encode_info_for_generator(local_id); + let data = self.tcx.generator_kind(def_id).unwrap(); + record!(self.tables.generator_kind[def_id] <- data); } if let DefKind::Enum | DefKind::Struct | DefKind::Union = def_kind { self.encode_info_for_adt(local_id); @@ -1425,6 +1448,11 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { if let DefKind::Macro(_) = def_kind { self.encode_info_for_macro(local_id); } + if let DefKind::TyAlias = def_kind { + self.tables + .type_alias_is_lazy + .set(def_id.index, self.tcx.type_alias_is_lazy(def_id)); + } if let DefKind::OpaqueTy = def_kind { self.encode_explicit_item_bounds(def_id); self.tables @@ -1572,9 +1600,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { } let tcx = self.tcx; + let reachable_set = tcx.reachable_set(()); let keys_and_jobs = tcx.mir_keys(()).iter().filter_map(|&def_id| { - let (encode_const, encode_opt) = should_encode_mir(tcx, def_id); + let (encode_const, encode_opt) = should_encode_mir(tcx, reachable_set, def_id); if encode_const || encode_opt { Some((def_id, encode_const, encode_opt)) } else { None } }); for (def_id, encode_const, encode_opt) in keys_and_jobs { @@ -1586,8 +1615,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { record!(self.tables.closure_saved_names_of_captured_variables[def_id.to_def_id()] <- tcx.closure_saved_names_of_captured_variables(def_id)); - if tcx.sess.opts.unstable_opts.drop_tracking_mir - && let DefKind::Generator = self.tcx.def_kind(def_id) + if let DefKind::Generator = self.tcx.def_kind(def_id) && let Some(witnesses) = tcx.mir_generator_witnesses(def_id) { record!(self.tables.mir_generator_witnesses[def_id.to_def_id()] <- witnesses); @@ -1607,13 +1635,19 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { record!(self.tables.mir_const_qualif[def_id.to_def_id()] <- qualifs); let body_id = tcx.hir().maybe_body_owned_by(def_id); if let Some(body_id) = body_id { - let const_data = self.encode_rendered_const_for_body(body_id); + let const_data = rendered_const(self.tcx, body_id); record!(self.tables.rendered_const[def_id.to_def_id()] <- const_data); } } } record!(self.tables.promoted_mir[def_id.to_def_id()] <- tcx.promoted_mir(def_id)); + if let DefKind::Generator = self.tcx.def_kind(def_id) + && let Some(witnesses) = tcx.mir_generator_witnesses(def_id) + { + record!(self.tables.mir_generator_witnesses[def_id.to_def_id()] <- witnesses); + } + let instance = ty::InstanceDef::Item(def_id.to_def_id()); let unused = tcx.unused_generic_params(instance); self.tables.unused_generic_params.set(def_id.local_def_index, unused); @@ -1675,14 +1709,6 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { } } - fn encode_rendered_const_for_body(&mut self, body_id: hir::BodyId) -> String { - let hir = self.tcx.hir(); - let body = hir.body(body_id); - rustc_hir_pretty::to_string(&(&hir as &dyn intravisit::Map<'_>), |s| { - s.print_expr(&body.value) - }) - } - #[instrument(level = "debug", skip(self))] fn encode_info_for_macro(&mut self, def_id: LocalDefId) { let tcx = self.tcx; @@ -1694,15 +1720,6 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { record!(self.tables.macro_definition[def_id.to_def_id()] <- &*macro_def.body); } - #[instrument(level = "debug", skip(self))] - fn encode_info_for_generator(&mut self, def_id: LocalDefId) { - let typeck_result: &'tcx ty::TypeckResults<'tcx> = self.tcx.typeck(def_id); - let data = self.tcx.generator_kind(def_id).unwrap(); - let generator_diagnostic_data = typeck_result.get_generator_diagnostic_data(); - record!(self.tables.generator_kind[def_id.to_def_id()] <- data); - record!(self.tables.generator_diagnostic_data[def_id.to_def_id()] <- generator_diagnostic_data); - } - fn encode_native_libraries(&mut self) -> LazyArray { empty_proc_macro!(self); let used_libraries = self.tcx.native_libraries(LOCAL_CRATE); @@ -2067,8 +2084,9 @@ fn prefetch_mir(tcx: TyCtxt<'_>) { return; } + let reachable_set = tcx.reachable_set(()); par_for_each_in(tcx.mir_keys(()), |&def_id| { - let (encode_const, encode_opt) = should_encode_mir(tcx, def_id); + let (encode_const, encode_opt) = should_encode_mir(tcx, reachable_set, def_id); if encode_const { tcx.ensure_with_value().mir_for_ctfe(def_id); @@ -2284,3 +2302,97 @@ pub fn provide(providers: &mut Providers) { ..*providers } } + +/// Build a textual representation of an unevaluated constant expression. +/// +/// If the const expression is too complex, an underscore `_` is returned. +/// For const arguments, it's `{ _ }` to be precise. +/// This means that the output is not necessarily valid Rust code. +/// +/// Currently, only +/// +/// * literals (optionally with a leading `-`) +/// * unit `()` +/// * blocks (`{ … }`) around simple expressions and +/// * paths without arguments +/// +/// are considered simple enough. Simple blocks are included since they are +/// necessary to disambiguate unit from the unit type. +/// This list might get extended in the future. +/// +/// Without this censoring, in a lot of cases the output would get too large +/// and verbose. Consider `match` expressions, blocks and deeply nested ADTs. +/// Further, private and `doc(hidden)` fields of structs would get leaked +/// since HIR datatypes like the `body` parameter do not contain enough +/// semantic information for this function to be able to hide them – +/// at least not without significant performance overhead. +/// +/// Whenever possible, prefer to evaluate the constant first and try to +/// use a different method for pretty-printing. Ideally this function +/// should only ever be used as a fallback. +pub fn rendered_const<'tcx>(tcx: TyCtxt<'tcx>, body: hir::BodyId) -> String { + let hir = tcx.hir(); + let value = &hir.body(body).value; + + #[derive(PartialEq, Eq)] + enum Classification { + Literal, + Simple, + Complex, + } + + use Classification::*; + + fn classify(expr: &hir::Expr<'_>) -> Classification { + match &expr.kind { + hir::ExprKind::Unary(hir::UnOp::Neg, expr) => { + if matches!(expr.kind, hir::ExprKind::Lit(_)) { Literal } else { Complex } + } + hir::ExprKind::Lit(_) => Literal, + hir::ExprKind::Tup([]) => Simple, + hir::ExprKind::Block(hir::Block { stmts: [], expr: Some(expr), .. }, _) => { + if classify(expr) == Complex { Complex } else { Simple } + } + // Paths with a self-type or arguments are too “complex” following our measure since + // they may leak private fields of structs (with feature `adt_const_params`). + // Consider: `>::CONSTANT`. + // Paths without arguments are definitely harmless though. + hir::ExprKind::Path(hir::QPath::Resolved(_, hir::Path { segments, .. })) => { + if segments.iter().all(|segment| segment.args.is_none()) { Simple } else { Complex } + } + // FIXME: Claiming that those kinds of QPaths are simple is probably not true if the Ty + // contains const arguments. Is there a *concise* way to check for this? + hir::ExprKind::Path(hir::QPath::TypeRelative(..)) => Simple, + // FIXME: Can they contain const arguments and thus leak private struct fields? + hir::ExprKind::Path(hir::QPath::LangItem(..)) => Simple, + _ => Complex, + } + } + + let classification = classify(value); + + if classification == Literal + && !value.span.from_expansion() + && let Ok(snippet) = tcx.sess.source_map().span_to_snippet(value.span) { + // For literals, we avoid invoking the pretty-printer and use the source snippet instead to + // preserve certain stylistic choices the user likely made for the sake legibility like + // + // * hexadecimal notation + // * underscores + // * character escapes + // + // FIXME: This passes through `-/*spacer*/0` verbatim. + snippet + } else if classification == Simple { + // Otherwise we prefer pretty-printing to get rid of extraneous whitespace, comments and + // other formatting artifacts. + id_to_string(&hir, body.hir_id) + } else if tcx.def_kind(hir.body_owner_def_id(body).to_def_id()) == DefKind::AnonConst { + // FIXME: Omit the curly braces if the enclosing expression is an array literal + // with a repeated element (an `ExprKind::Repeat`) as in such case it + // would not actually need any disambiguation. + "{ _ }".to_owned() + } else { + "_".to_owned() + } +} diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index a89e235ff..42764af52 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -21,10 +21,10 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo}; use rustc_middle::middle::resolve_bound_vars::ObjectLifetimeDefault; use rustc_middle::mir; -use rustc_middle::query::Providers; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, ReprOptions, Ty, UnusedGenericParams}; -use rustc_middle::ty::{DeducedParamAttrs, GeneratorDiagnosticData, ParameterizedOverTcx, TyCtxt}; +use rustc_middle::ty::{DeducedParamAttrs, ParameterizedOverTcx, TyCtxt}; +use rustc_middle::util::Providers; use rustc_serialize::opaque::FileEncoder; use rustc_session::config::SymbolManglingVersion; use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; @@ -38,11 +38,10 @@ use rustc_target::spec::{PanicStrategy, TargetTriple}; use std::marker::PhantomData; use std::num::NonZeroUsize; -pub use decoder::provide_extern; use decoder::DecodeContext; pub(crate) use decoder::{CrateMetadata, CrateNumMap, MetadataBlob}; use encoder::EncodeContext; -pub use encoder::{encode_metadata, EncodedMetadata}; +pub use encoder::{encode_metadata, rendered_const, EncodedMetadata}; use rustc_span::hygiene::SyntaxContextData; mod decoder; @@ -142,7 +141,11 @@ impl LazyArray { /// eagerly and in-order. struct LazyTable { position: NonZeroUsize, - encoded_size: usize, + /// The encoded size of the elements of a table is selected at runtime to drop + /// trailing zeroes. This is the number of bytes used for each table element. + width: usize, + /// How many elements are in the table. + len: usize, _marker: PhantomData T>, } @@ -153,9 +156,10 @@ impl ParameterizedOverTcx for LazyTable LazyTable { fn from_position_and_encoded_size( position: NonZeroUsize, - encoded_size: usize, + width: usize, + len: usize, ) -> LazyTable { - LazyTable { position, encoded_size, _marker: PhantomData } + LazyTable { position, width, len, _marker: PhantomData } } } @@ -379,6 +383,7 @@ define_tables! { is_intrinsic: Table, is_macro_rules: Table, is_type_alias_impl_trait: Table, + type_alias_is_lazy: Table, attr_flags: Table, def_path_hashes: Table, explicit_item_bounds: Table, Span)>>, @@ -434,7 +439,7 @@ define_tables! { coerce_unsized_info: Table>, mir_const_qualif: Table>, rendered_const: Table>, - asyncness: Table, + asyncness: Table, fn_arg_names: Table>, generator_kind: Table>, trait_def: Table>, @@ -448,7 +453,6 @@ define_tables! { // definitions from any given crate. def_keys: Table>, proc_macro_quoted_spans: Table>, - generator_diagnostic_data: Table>>, variant_data: Table>, assoc_container: Table, macro_definition: Table>, diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index ea66c770b..bb1320942 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -5,7 +5,6 @@ use rustc_hir::def::{CtorKind, CtorOf}; use rustc_index::Idx; use rustc_middle::ty::{ParameterizedOverTcx, UnusedGenericParams}; use rustc_serialize::opaque::FileEncoder; -use rustc_serialize::Encoder as _; use rustc_span::hygiene::MacroKind; use std::marker::PhantomData; use std::num::NonZeroUsize; @@ -38,6 +37,12 @@ impl IsDefault for u32 { } } +impl IsDefault for u64 { + fn is_default(&self) -> bool { + *self == 0 + } +} + impl IsDefault for LazyArray { fn is_default(&self) -> bool { self.num_elems == 0 @@ -89,6 +94,20 @@ impl FixedSizeEncoding for u32 { } } +impl FixedSizeEncoding for u64 { + type ByteArray = [u8; 8]; + + #[inline] + fn from_bytes(b: &[u8; 8]) -> Self { + Self::from_le_bytes(*b) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 8]) { + *b = self.to_le_bytes(); + } +} + macro_rules! fixed_size_enum { ($ty:ty { $(($($pat:tt)*))* }) => { impl FixedSizeEncoding for Option<$ty> { @@ -126,8 +145,7 @@ fixed_size_enum! { ( Enum ) ( Variant ) ( Trait ) - ( TyAlias { lazy: false } ) - ( TyAlias { lazy: true } ) + ( TyAlias ) ( ForeignTy ) ( TraitAlias ) ( AssocTy ) @@ -186,9 +204,9 @@ fixed_size_enum! { } fixed_size_enum! { - hir::IsAsync { - ( NotAsync ) - ( Async ) + ty::Asyncness { + ( Yes ) + ( No ) } } @@ -300,21 +318,21 @@ impl FixedSizeEncoding for UnusedGenericParams { // generic `LazyValue` impl, but in the general case we might not need / want // to fit every `usize` in `u32`. impl FixedSizeEncoding for Option> { - type ByteArray = [u8; 4]; + type ByteArray = [u8; 8]; #[inline] - fn from_bytes(b: &[u8; 4]) -> Self { - let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?; + fn from_bytes(b: &[u8; 8]) -> Self { + let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?; Some(LazyValue::from_position(position)) } #[inline] - fn write_to_bytes(self, b: &mut [u8; 4]) { + fn write_to_bytes(self, b: &mut [u8; 8]) { match self { None => unreachable!(), Some(lazy) => { let position = lazy.position.get(); - let position: u32 = position.try_into().unwrap(); + let position: u64 = position.try_into().unwrap(); position.write_to_bytes(b) } } @@ -323,55 +341,75 @@ impl FixedSizeEncoding for Option> { impl LazyArray { #[inline] - fn write_to_bytes_impl(self, b: &mut [u8; 8]) { - let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() }; - - let position = self.position.get(); - let position: u32 = position.try_into().unwrap(); - position.write_to_bytes(position_bytes); - - let len = self.num_elems; - let len: u32 = len.try_into().unwrap(); - len.write_to_bytes(meta_bytes); + fn write_to_bytes_impl(self, b: &mut [u8; 16]) { + let position = (self.position.get() as u64).to_le_bytes(); + let len = (self.num_elems as u64).to_le_bytes(); + + // Element width is selected at runtime on a per-table basis by omitting trailing + // zero bytes in table elements. This works very naturally when table elements are + // simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second + // element would shield the trailing zeroes in the first. Interleaving the bytes + // of the position and length exposes trailing zeroes in both to the optimization. + // We encode length second because we generally expect it to be smaller. + for i in 0..8 { + b[2 * i] = position[i]; + b[2 * i + 1] = len[i]; + } } - fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option> { - let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?; - let len = u32::from_bytes(meta_bytes) as usize; + fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option> { + let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?; + let len = u64::from_bytes(&meta) as usize; Some(LazyArray::from_position_and_num_elems(position, len)) } } +// Decoding helper for the encoding scheme used by `LazyArray`. +// Interleaving the bytes of the two integers exposes trailing bytes in the first integer +// to the varint scheme that we use for tables. +#[inline] +fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) { + let mut first = [0u8; 8]; + let mut second = [0u8; 8]; + for i in 0..8 { + first[i] = encoded[2 * i]; + second[i] = encoded[2 * i + 1]; + } + (first, second) +} + impl FixedSizeEncoding for LazyArray { - type ByteArray = [u8; 8]; + type ByteArray = [u8; 16]; #[inline] - fn from_bytes(b: &[u8; 8]) -> Self { - let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() }; - if *meta_bytes == [0; 4] { + fn from_bytes(b: &[u8; 16]) -> Self { + let (position, meta) = decode_interleaved(b); + + if meta == [0; 8] { return Default::default(); } - LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap() + LazyArray::from_bytes_impl(&position, &meta).unwrap() } #[inline] - fn write_to_bytes(self, b: &mut [u8; 8]) { + fn write_to_bytes(self, b: &mut [u8; 16]) { assert!(!self.is_default()); self.write_to_bytes_impl(b) } } impl FixedSizeEncoding for Option> { - type ByteArray = [u8; 8]; + type ByteArray = [u8; 16]; #[inline] - fn from_bytes(b: &[u8; 8]) -> Self { - let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() }; - LazyArray::from_bytes_impl(position_bytes, meta_bytes) + fn from_bytes(b: &[u8; 16]) -> Self { + let (position, meta) = decode_interleaved(b); + + LazyArray::from_bytes_impl(&position, &meta) } #[inline] - fn write_to_bytes(self, b: &mut [u8; 8]) { + fn write_to_bytes(self, b: &mut [u8; 16]) { match self { None => unreachable!(), Some(lazy) => lazy.write_to_bytes_impl(b), @@ -381,13 +419,14 @@ impl FixedSizeEncoding for Option> { /// Helper for constructing a table's serialization (also see `Table`). pub(super) struct TableBuilder { + width: usize, blocks: IndexVec, _marker: PhantomData, } impl Default for TableBuilder { fn default() -> Self { - TableBuilder { blocks: Default::default(), _marker: PhantomData } + TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData } } } @@ -415,40 +454,66 @@ impl> TableBui // > store bit-masks of which item in each bucket is actually serialized). let block = self.blocks.ensure_contains_elem(i, || [0; N]); value.write_to_bytes(block); + if self.width != N { + let width = N - trailing_zeros(block); + self.width = self.width.max(width); + } } } pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable { let pos = buf.position(); + + let width = self.width; for block in &self.blocks { - buf.emit_raw_bytes(block); + buf.write_with(|dest| { + *dest = *block; + width + }); } - let num_bytes = self.blocks.len() * N; + LazyTable::from_position_and_encoded_size( NonZeroUsize::new(pos as usize).unwrap(), - num_bytes, + width, + self.blocks.len(), ) } } +fn trailing_zeros(x: &[u8]) -> usize { + x.iter().rev().take_while(|b| **b == 0).count() +} + impl + ParameterizedOverTcx> LazyTable where for<'tcx> T::Value<'tcx>: FixedSizeEncoding, { /// Given the metadata, extract out the value at a particular index (if any). - #[inline(never)] pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> { - trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size); + trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len); + + // Access past the end of the table returns a Default + if i.index() >= self.len { + return Default::default(); + } - let start = self.position.get(); - let bytes = &metadata.blob()[start..start + self.encoded_size]; - let (bytes, []) = bytes.as_chunks::() else { panic!() }; - bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes) + let width = self.width; + let start = self.position.get() + (width * i.index()); + let end = start + width; + let bytes = &metadata.blob()[start..end]; + + if let Ok(fixed) = bytes.try_into() { + FixedSizeEncoding::from_bytes(fixed) + } else { + let mut fixed = [0u8; N]; + fixed[..width].copy_from_slice(bytes); + FixedSizeEncoding::from_bytes(&fixed) + } } /// Size of the table in entries, including possible gaps. pub(super) fn size(&self) -> usize { - self.encoded_size / N + self.len } } -- cgit v1.2.3