From e02c5b5930c2c9ba3e5423fe12e2ef0155017297 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 20:31:36 +0200 Subject: Merging upstream version 1.74.1+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_span/src/analyze_source_file.rs | 48 +- .../rustc_span/src/analyze_source_file/tests.rs | 29 +- compiler/rustc_span/src/caching_source_map_view.rs | 15 +- compiler/rustc_span/src/hygiene.rs | 126 +++-- compiler/rustc_span/src/lib.rs | 512 +++++++++++---------- compiler/rustc_span/src/source_map.rs | 217 ++++----- compiler/rustc_span/src/source_map/tests.rs | 38 +- compiler/rustc_span/src/span_encoding.rs | 255 +++++----- compiler/rustc_span/src/symbol.rs | 24 +- compiler/rustc_span/src/tests.rs | 27 +- 10 files changed, 701 insertions(+), 590 deletions(-) (limited to 'compiler/rustc_span') diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs index 26cd54210..450d5455f 100644 --- a/compiler/rustc_span/src/analyze_source_file.rs +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -11,26 +11,19 @@ mod tests; /// is detected at runtime. pub fn analyze_source_file( src: &str, - source_file_start_pos: BytePos, -) -> (Vec, Vec, Vec) { - let mut lines = vec![source_file_start_pos]; +) -> (Vec, Vec, Vec) { + let mut lines = vec![RelativeBytePos::from_u32(0)]; let mut multi_byte_chars = vec![]; let mut non_narrow_chars = vec![]; // Calls the right implementation, depending on hardware support available. - analyze_source_file_dispatch( - src, - source_file_start_pos, - &mut lines, - &mut multi_byte_chars, - &mut non_narrow_chars, - ); + analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars, &mut non_narrow_chars); // The code above optimistically registers a new line *after* each \n // it encounters. If that point is already outside the source_file, remove // it again. if let Some(&last_line_start) = lines.last() { - let source_file_end = source_file_start_pos + BytePos::from_usize(src.len()); + let source_file_end = RelativeBytePos::from_usize(src.len()); assert!(source_file_end >= last_line_start); if last_line_start == source_file_end { lines.pop(); @@ -43,14 +36,12 @@ pub fn analyze_source_file( cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { fn analyze_source_file_dispatch(src: &str, - source_file_start_pos: BytePos, - lines: &mut Vec, + lines: &mut Vec, multi_byte_chars: &mut Vec, non_narrow_chars: &mut Vec) { if is_x86_feature_detected!("sse2") { unsafe { analyze_source_file_sse2(src, - source_file_start_pos, lines, multi_byte_chars, non_narrow_chars); @@ -58,7 +49,7 @@ cfg_if::cfg_if! { } else { analyze_source_file_generic(src, src.len(), - source_file_start_pos, + RelativeBytePos::from_u32(0), lines, multi_byte_chars, non_narrow_chars); @@ -72,8 +63,7 @@ cfg_if::cfg_if! { /// SSE2 intrinsics to quickly find all newlines. #[target_feature(enable = "sse2")] unsafe fn analyze_source_file_sse2(src: &str, - output_offset: BytePos, - lines: &mut Vec, + lines: &mut Vec, multi_byte_chars: &mut Vec, non_narrow_chars: &mut Vec) { #[cfg(target_arch = "x86")] @@ -129,8 +119,7 @@ cfg_if::cfg_if! { if control_char_mask == newlines_mask { // All control characters are newlines, record them let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32; - let output_offset = output_offset + - BytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1); loop { let index = newlines_mask.trailing_zeros(); @@ -140,7 +129,7 @@ cfg_if::cfg_if! { break } - lines.push(BytePos(index) + output_offset); + lines.push(RelativeBytePos(index) + output_offset); // Clear the bit, so we can find the next one. newlines_mask &= (!1) << index; @@ -165,7 +154,7 @@ cfg_if::cfg_if! { intra_chunk_offset = analyze_source_file_generic( &src[scan_start .. ], CHUNK_SIZE - intra_chunk_offset, - BytePos::from_usize(scan_start) + output_offset, + RelativeBytePos::from_usize(scan_start), lines, multi_byte_chars, non_narrow_chars @@ -177,7 +166,7 @@ cfg_if::cfg_if! { if tail_start < src.len() { analyze_source_file_generic(&src[tail_start ..], src.len() - tail_start, - output_offset + BytePos::from_usize(tail_start), + RelativeBytePos::from_usize(tail_start), lines, multi_byte_chars, non_narrow_chars); @@ -187,13 +176,12 @@ cfg_if::cfg_if! { // The target (or compiler version) does not support SSE2 ... fn analyze_source_file_dispatch(src: &str, - source_file_start_pos: BytePos, - lines: &mut Vec, + lines: &mut Vec, multi_byte_chars: &mut Vec, non_narrow_chars: &mut Vec) { analyze_source_file_generic(src, src.len(), - source_file_start_pos, + RelativeBytePos::from_u32(0), lines, multi_byte_chars, non_narrow_chars); @@ -207,8 +195,8 @@ cfg_if::cfg_if! { fn analyze_source_file_generic( src: &str, scan_len: usize, - output_offset: BytePos, - lines: &mut Vec, + output_offset: RelativeBytePos, + lines: &mut Vec, multi_byte_chars: &mut Vec, non_narrow_chars: &mut Vec, ) -> usize { @@ -230,11 +218,11 @@ fn analyze_source_file_generic( // This is an ASCII control character, it could be one of the cases // that are interesting to us. - let pos = BytePos::from_usize(i) + output_offset; + let pos = RelativeBytePos::from_usize(i) + output_offset; match byte { b'\n' => { - lines.push(pos + BytePos(1)); + lines.push(pos + RelativeBytePos(1)); } b'\t' => { non_narrow_chars.push(NonNarrowChar::Tab(pos)); @@ -250,7 +238,7 @@ fn analyze_source_file_generic( let c = src[i..].chars().next().unwrap(); char_len = c.len_utf8(); - let pos = BytePos::from_usize(i) + output_offset; + let pos = RelativeBytePos::from_usize(i) + output_offset; if char_len > 1 { assert!((2..=4).contains(&char_len)); diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs index 66aefc9a7..0c77d080c 100644 --- a/compiler/rustc_span/src/analyze_source_file/tests.rs +++ b/compiler/rustc_span/src/analyze_source_file/tests.rs @@ -3,29 +3,28 @@ use super::*; macro_rules! test { (case: $test_name:ident, text: $text:expr, - source_file_start_pos: $source_file_start_pos:expr, lines: $lines:expr, multi_byte_chars: $multi_byte_chars:expr, non_narrow_chars: $non_narrow_chars:expr,) => { #[test] fn $test_name() { - let (lines, multi_byte_chars, non_narrow_chars) = - analyze_source_file($text, BytePos($source_file_start_pos)); + let (lines, multi_byte_chars, non_narrow_chars) = analyze_source_file($text); - let expected_lines: Vec = $lines.into_iter().map(BytePos).collect(); + let expected_lines: Vec = + $lines.into_iter().map(RelativeBytePos).collect(); assert_eq!(lines, expected_lines); let expected_mbcs: Vec = $multi_byte_chars .into_iter() - .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes }) + .map(|(pos, bytes)| MultiByteChar { pos: RelativeBytePos(pos), bytes }) .collect(); assert_eq!(multi_byte_chars, expected_mbcs); let expected_nncs: Vec = $non_narrow_chars .into_iter() - .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width)) + .map(|(pos, width)| NonNarrowChar::new(RelativeBytePos(pos), width)) .collect(); assert_eq!(non_narrow_chars, expected_nncs); @@ -36,7 +35,6 @@ macro_rules! test { test!( case: empty_text, text: "", - source_file_start_pos: 0, lines: vec![], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -45,7 +43,6 @@ test!( test!( case: newlines_short, text: "a\nc", - source_file_start_pos: 0, lines: vec![0, 2], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -54,7 +51,6 @@ test!( test!( case: newlines_long, text: "012345678\nabcdef012345678\na", - source_file_start_pos: 0, lines: vec![0, 10, 26], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -63,7 +59,6 @@ test!( test!( case: newline_and_multi_byte_char_in_same_chunk, text: "01234β789\nbcdef0123456789abcdef", - source_file_start_pos: 0, lines: vec![0, 11], multi_byte_chars: vec![(5, 2)], non_narrow_chars: vec![], @@ -72,7 +67,6 @@ test!( test!( case: newline_and_control_char_in_same_chunk, text: "01234\u{07}6789\nbcdef0123456789abcdef", - source_file_start_pos: 0, lines: vec![0, 11], multi_byte_chars: vec![], non_narrow_chars: vec![(5, 0)], @@ -81,7 +75,6 @@ test!( test!( case: multi_byte_char_short, text: "aβc", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(1, 2)], non_narrow_chars: vec![], @@ -90,7 +83,6 @@ test!( test!( case: multi_byte_char_long, text: "0123456789abcΔf012345β", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(13, 2), (22, 2)], non_narrow_chars: vec![], @@ -99,7 +91,6 @@ test!( test!( case: multi_byte_char_across_chunk_boundary, text: "0123456789abcdeΔ123456789abcdef01234", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(15, 2)], non_narrow_chars: vec![], @@ -108,7 +99,6 @@ test!( test!( case: multi_byte_char_across_chunk_boundary_tail, text: "0123456789abcdeΔ....", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(15, 2)], non_narrow_chars: vec![], @@ -117,7 +107,6 @@ test!( test!( case: non_narrow_short, text: "0\t2", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![], non_narrow_chars: vec![(1, 4)], @@ -126,7 +115,6 @@ test!( test!( case: non_narrow_long, text: "01\t3456789abcdef01234567\u{07}9", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![], non_narrow_chars: vec![(2, 4), (24, 0)], @@ -135,8 +123,7 @@ test!( test!( case: output_offset_all, text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf", - source_file_start_pos: 1000, - lines: vec![0 + 1000, 7 + 1000, 27 + 1000], - multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)], - non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], + lines: vec![0, 7, 27], + multi_byte_chars: vec![(13, 2), (29, 2)], + non_narrow_chars: vec![(2, 4), (24, 0)], ); diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs index 886112769..fbfc5c22f 100644 --- a/compiler/rustc_span/src/caching_source_map_view.rs +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -1,5 +1,5 @@ use crate::source_map::SourceMap; -use crate::{BytePos, SourceFile, SpanData}; +use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData}; use rustc_data_structures::sync::Lrc; use std::ops::Range; @@ -37,6 +37,7 @@ impl CacheEntry { self.file_index = file_idx; } + let pos = self.file.relative_position(pos); let line_index = self.file.lookup_line(pos).unwrap(); let line_bounds = self.file.line_bounds(line_index); self.line_number = line_index + 1; @@ -79,7 +80,7 @@ impl<'sm> CachingSourceMapView<'sm> { pub fn byte_pos_to_line_and_col( &mut self, pos: BytePos, - ) -> Option<(Lrc, usize, BytePos)> { + ) -> Option<(Lrc, usize, RelativeBytePos)> { self.time_stamp += 1; // Check if the position is in one of the cached lines @@ -88,11 +89,8 @@ impl<'sm> CachingSourceMapView<'sm> { let cache_entry = &mut self.line_cache[cache_idx as usize]; cache_entry.touch(self.time_stamp); - return Some(( - cache_entry.file.clone(), - cache_entry.line_number, - pos - cache_entry.line.start, - )); + let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); + return Some((cache_entry.file.clone(), cache_entry.line_number, col)); } // No cache hit ... @@ -108,7 +106,8 @@ impl<'sm> CachingSourceMapView<'sm> { let cache_entry = &mut self.line_cache[oldest]; cache_entry.update(new_file_and_idx, pos, self.time_stamp); - Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line.start)) + let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); + Some((cache_entry.file.clone(), cache_entry.line_number, col)) } pub fn span_data_to_lines_and_cols( diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 9f2ff4378..88081700c 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::stable_hasher::HashingControls; use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher}; -use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal}; use rustc_data_structures::unhash::UnhashMap; use rustc_index::IndexVec; use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::cell::RefCell; +use std::collections::hash_map::Entry; use std::fmt; use std::hash::Hash; @@ -1241,13 +1243,25 @@ impl HygieneEncodeContext { #[derive(Default)] /// Additional information used to assist in decoding hygiene data -pub struct HygieneDecodeContext { +struct HygieneDecodeContextInner { // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create // a new id in the global `HygieneData`. This map tracks the ID we end up picking, // so that multiple occurrences of the same serialized id are decoded to the same - // `SyntaxContext` - remapped_ctxts: Lock>>, + // `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded. + remapped_ctxts: Vec>, + + /// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`. + decoding: FxHashMap, +} + +#[derive(Default)] +/// Additional information used to assist in decoding hygiene data +pub struct HygieneDecodeContext { + inner: Lock, + + /// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread. + local_in_progress: WorkerLocal>>, } /// Register an expansion which has been decoded from the on-disk-cache for the local crate. @@ -1277,11 +1291,11 @@ pub fn register_expn_id( let expn_id = ExpnId { krate, local_id }; HygieneData::with(|hygiene_data| { let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data); - debug_assert!(_old_data.is_none()); + debug_assert!(_old_data.is_none() || cfg!(parallel_compiler)); let _old_hash = hygiene_data.foreign_expn_hashes.insert(expn_id, hash); - debug_assert!(_old_hash.is_none()); + debug_assert!(_old_hash.is_none() || _old_hash == Some(hash)); let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id); - debug_assert!(_old_id.is_none()); + debug_assert!(_old_id.is_none() || _old_id == Some(expn_id)); }); expn_id } @@ -1331,38 +1345,56 @@ pub fn decode_syntax_context SyntaxContext return SyntaxContext::root(); } - let outer_ctxts = &context.remapped_ctxts; + let ctxt = { + let mut inner = context.inner.lock(); - // Ensure that the lock() temporary is dropped early - { - if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() { + if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() { + // This has already beeen decoded. return ctxt; } - } - // Allocate and store SyntaxContext id *before* calling the decoder function, - // as the SyntaxContextData may reference itself. - let new_ctxt = HygieneData::with(|hygiene_data| { - let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); - // Push a dummy SyntaxContextData to ensure that nobody else can get the - // same ID as us. This will be overwritten after call `decode_Data` - hygiene_data.syntax_context_data.push(SyntaxContextData { - outer_expn: ExpnId::root(), - outer_transparency: Transparency::Transparent, - parent: SyntaxContext::root(), - opaque: SyntaxContext::root(), - opaque_and_semitransparent: SyntaxContext::root(), - dollar_crate_name: kw::Empty, - }); - let mut ctxts = outer_ctxts.lock(); - let new_len = raw_id as usize + 1; - if ctxts.len() < new_len { - ctxts.resize(new_len, None); + match inner.decoding.entry(raw_id) { + Entry::Occupied(ctxt_entry) => { + match context.local_in_progress.borrow_mut().entry(raw_id) { + Entry::Occupied(..) => { + // We're decoding this already on the current thread. Return here + // and let the function higher up the stack finish decoding to handle + // recursive cases. + return *ctxt_entry.get(); + } + Entry::Vacant(entry) => { + entry.insert(()); + + // Some other thread is current decoding this. Race with it. + *ctxt_entry.get() + } + } + } + Entry::Vacant(entry) => { + // We are the first thread to start decoding. Mark the current thread as being progress. + context.local_in_progress.borrow_mut().insert(raw_id, ()); + + // Allocate and store SyntaxContext id *before* calling the decoder function, + // as the SyntaxContextData may reference itself. + let new_ctxt = HygieneData::with(|hygiene_data| { + let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); + // Push a dummy SyntaxContextData to ensure that nobody else can get the + // same ID as us. This will be overwritten after call `decode_Data` + hygiene_data.syntax_context_data.push(SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Transparent, + parent: SyntaxContext::root(), + opaque: SyntaxContext::root(), + opaque_and_semitransparent: SyntaxContext::root(), + dollar_crate_name: kw::Empty, + }); + new_ctxt + }); + entry.insert(new_ctxt); + new_ctxt + } } - ctxts[raw_id as usize] = Some(new_ctxt); - drop(ctxts); - new_ctxt - }); + }; // Don't try to decode data while holding the lock, since we need to // be able to recursively decode a SyntaxContext @@ -1375,14 +1407,32 @@ pub fn decode_syntax_context SyntaxContext // Overwrite the dummy data with our decoded SyntaxContextData HygieneData::with(|hygiene_data| { let dummy = std::mem::replace( - &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize], + &mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize], ctxt_data, ); - // Make sure nothing weird happening while `decode_data` was running - assert_eq!(dummy.dollar_crate_name, kw::Empty); + if cfg!(not(parallel_compiler)) { + // Make sure nothing weird happened while `decode_data` was running. + // We used `kw::Empty` for the dummy value and we expect nothing to be + // modifying the dummy entry. + // This does not hold for the parallel compiler as another thread may + // have inserted the fully decoded data. + assert_eq!(dummy.dollar_crate_name, kw::Empty); + } }); - new_ctxt + // Mark the context as completed + + context.local_in_progress.borrow_mut().remove(&raw_id); + + let mut inner = context.inner.lock(); + let new_len = raw_id as usize + 1; + if inner.remapped_ctxts.len() < new_len { + inner.remapped_ctxts.resize(new_len, None); + } + inner.remapped_ctxts[raw_id as usize] = Some(ctxt); + inner.decoding.remove(&raw_id); + + ctxt } fn for_all_ctxts_in( diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index c24b8d9ec..772e09291 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -21,9 +21,11 @@ #![feature(rustc_attrs)] #![feature(let_chains)] #![feature(round_char_boundary)] +#![feature(read_buf)] +#![feature(new_uninit)] #![deny(rustc::untranslatable_diagnostic)] #![deny(rustc::diagnostic_outside_of_impl)] -#![cfg_attr(not(bootstrap), allow(internal_features))] +#![allow(internal_features)] #[macro_use] extern crate rustc_macros; @@ -31,7 +33,7 @@ extern crate rustc_macros; #[macro_use] extern crate tracing; -use rustc_data_structures::AtomicRef; +use rustc_data_structures::{outline, AtomicRef}; use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; @@ -62,7 +64,7 @@ pub mod fatal_error; pub mod profiling; use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher}; -use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_data_structures::sync::{FreezeLock, FreezeWriteGuard, Lock, Lrc}; use std::borrow::Cow; use std::cmp::{self, Ordering}; @@ -508,10 +510,6 @@ impl SpanData { pub fn is_dummy(self) -> bool { self.lo.0 == 0 && self.hi.0 == 0 } - #[inline] - pub fn is_visible(self, sm: &SourceMap) -> bool { - !self.is_dummy() && sm.is_span_accessible(self.span()) - } /// Returns `true` if `self` fully encloses `other`. pub fn contains(self, other: Self) -> bool { self.lo <= other.lo && other.hi <= self.hi @@ -571,15 +569,9 @@ impl Span { self.data().with_parent(ctxt) } - /// Returns `true` if this is a dummy span with any hygienic context. - #[inline] - pub fn is_dummy(self) -> bool { - self.data_untracked().is_dummy() - } - #[inline] pub fn is_visible(self, sm: &SourceMap) -> bool { - self.data_untracked().is_visible(sm) + !self.is_dummy() && sm.is_span_accessible(self) } /// Returns `true` if this span comes from any kind of macro, desugaring or inlining. @@ -1105,27 +1097,27 @@ impl fmt::Debug for SpanData { } /// Identifies an offset of a multi-byte character in a `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub struct MultiByteChar { - /// The absolute offset of the character in the `SourceMap`. - pub pos: BytePos, + /// The relative offset of the character in the `SourceFile`. + pub pos: RelativeBytePos, /// The number of bytes, `>= 2`. pub bytes: u8, } /// Identifies an offset of a non-narrow character in a `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub enum NonNarrowChar { /// Represents a zero-width character. - ZeroWidth(BytePos), + ZeroWidth(RelativeBytePos), /// Represents a wide (full-width) character. - Wide(BytePos), + Wide(RelativeBytePos), /// Represents a tab character, represented visually with a width of 4 characters. - Tab(BytePos), + Tab(RelativeBytePos), } impl NonNarrowChar { - fn new(pos: BytePos, width: usize) -> Self { + fn new(pos: RelativeBytePos, width: usize) -> Self { match width { 0 => NonNarrowChar::ZeroWidth(pos), 2 => NonNarrowChar::Wide(pos), @@ -1134,8 +1126,8 @@ impl NonNarrowChar { } } - /// Returns the absolute offset of the character in the `SourceMap`. - pub fn pos(&self) -> BytePos { + /// Returns the relative offset of the character in the `SourceFile`. + pub fn pos(&self) -> RelativeBytePos { match *self { NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p, } @@ -1151,10 +1143,10 @@ impl NonNarrowChar { } } -impl Add for NonNarrowChar { +impl Add for NonNarrowChar { type Output = Self; - fn add(self, rhs: BytePos) -> Self { + fn add(self, rhs: RelativeBytePos) -> Self { match self { NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs), NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs), @@ -1163,10 +1155,10 @@ impl Add for NonNarrowChar { } } -impl Sub for NonNarrowChar { +impl Sub for NonNarrowChar { type Output = Self; - fn sub(self, rhs: BytePos) -> Self { + fn sub(self, rhs: RelativeBytePos) -> Self { match self { NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs), NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs), @@ -1176,10 +1168,10 @@ impl Sub for NonNarrowChar { } /// Identifies an offset of a character that was normalized away from `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub struct NormalizedPos { - /// The absolute offset of the character in the `SourceMap`. - pub pos: BytePos, + /// The relative offset of the character in the `SourceFile`. + pub pos: RelativeBytePos, /// The difference between original and normalized string at position. pub diff: u32, } @@ -1204,7 +1196,6 @@ pub enum ExternalSourceKind { AbsentOk, /// A failed attempt has been made to load the external source. AbsentErr, - Unneeded, } impl ExternalSource { @@ -1291,7 +1282,7 @@ impl SourceFileHash { #[derive(Clone)] pub enum SourceFileLines { /// The source file lines, in decoded (random-access) form. - Lines(Vec), + Lines(Vec), /// The source file lines, in undecoded difference list form. Diffs(SourceFileDiffs), @@ -1312,11 +1303,6 @@ impl SourceFileLines { /// small crates where very little of `std`'s metadata is used. #[derive(Clone)] pub struct SourceFileDiffs { - /// Position of the first line. Note that this is always encoded as a - /// `BytePos` because it is often much larger than any of the - /// differences. - line_start: BytePos, - /// Always 1, 2, or 4. Always as small as possible, while being big /// enough to hold the length of the longest line in the source file. /// The 1 case is by far the most common. @@ -1346,13 +1332,13 @@ pub struct SourceFile { pub src_hash: SourceFileHash, /// The external source code (used for external crates, which will have a `None` /// value as `self.src`. - pub external_src: Lock, + pub external_src: FreezeLock, /// The start position of this source in the `SourceMap`. pub start_pos: BytePos, - /// The end position of this source in the `SourceMap`. - pub end_pos: BytePos, + /// The byte length of this source. + pub source_len: RelativeBytePos, /// Locations of lines beginnings in the source code. - pub lines: Lock, + pub lines: FreezeLock, /// Locations of multi-byte characters in the source code. pub multibyte_chars: Vec, /// Width of characters that are not narrow in the source code. @@ -1371,10 +1357,10 @@ impl Clone for SourceFile { name: self.name.clone(), src: self.src.clone(), src_hash: self.src_hash, - external_src: Lock::new(self.external_src.borrow().clone()), + external_src: self.external_src.clone(), start_pos: self.start_pos, - end_pos: self.end_pos, - lines: Lock::new(self.lines.borrow().clone()), + source_len: self.source_len, + lines: self.lines.clone(), multibyte_chars: self.multibyte_chars.clone(), non_narrow_chars: self.non_narrow_chars.clone(), normalized_pos: self.normalized_pos.clone(), @@ -1388,68 +1374,67 @@ impl Encodable for SourceFile { fn encode(&self, s: &mut S) { self.name.encode(s); self.src_hash.encode(s); - self.start_pos.encode(s); - self.end_pos.encode(s); + // Do not encode `start_pos` as it's global state for this session. + self.source_len.encode(s); // We are always in `Lines` form by the time we reach here. - assert!(self.lines.borrow().is_lines()); - self.lines(|lines| { - // Store the length. - s.emit_u32(lines.len() as u32); - - // Compute and store the difference list. - if lines.len() != 0 { - let max_line_length = if lines.len() == 1 { - 0 - } else { - lines - .array_windows() - .map(|&[fst, snd]| snd - fst) - .map(|bp| bp.to_usize()) - .max() - .unwrap() - }; - - let bytes_per_diff: usize = match max_line_length { - 0..=0xFF => 1, - 0x100..=0xFFFF => 2, - _ => 4, - }; - - // Encode the number of bytes used per diff. - s.emit_u8(bytes_per_diff as u8); - - // Encode the first element. - lines[0].encode(s); - - // Encode the difference list. - let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); - let num_diffs = lines.len() - 1; - let mut raw_diffs; - match bytes_per_diff { - 1 => { - raw_diffs = Vec::with_capacity(num_diffs); - for diff in diff_iter { - raw_diffs.push(diff.0 as u8); - } + assert!(self.lines.read().is_lines()); + let lines = self.lines(); + // Store the length. + s.emit_u32(lines.len() as u32); + + // Compute and store the difference list. + if lines.len() != 0 { + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines + .array_windows() + .map(|&[fst, snd]| snd - fst) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: usize = match max_line_length { + 0..=0xFF => 1, + 0x100..=0xFFFF => 2, + _ => 4, + }; + + // Encode the number of bytes used per diff. + s.emit_u8(bytes_per_diff as u8); + + // Encode the first element. + assert_eq!(lines[0], RelativeBytePos(0)); + + // Encode the difference list. + let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); + let num_diffs = lines.len() - 1; + let mut raw_diffs; + match bytes_per_diff { + 1 => { + raw_diffs = Vec::with_capacity(num_diffs); + for diff in diff_iter { + raw_diffs.push(diff.0 as u8); } - 2 => { - raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); - for diff in diff_iter { - raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes()); - } + } + 2 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes()); } - 4 => { - raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); - for diff in diff_iter { - raw_diffs.extend_from_slice(&(diff.0).to_le_bytes()); - } + } + 4 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0).to_le_bytes()); } - _ => unreachable!(), } - s.emit_raw_bytes(&raw_diffs); + _ => unreachable!(), } - }); + s.emit_raw_bytes(&raw_diffs); + } self.multibyte_chars.encode(s); self.non_narrow_chars.encode(s); @@ -1463,26 +1448,17 @@ impl Decodable for SourceFile { fn decode(d: &mut D) -> SourceFile { let name: FileName = Decodable::decode(d); let src_hash: SourceFileHash = Decodable::decode(d); - let start_pos: BytePos = Decodable::decode(d); - let end_pos: BytePos = Decodable::decode(d); + let source_len: RelativeBytePos = Decodable::decode(d); let lines = { let num_lines: u32 = Decodable::decode(d); if num_lines > 0 { // Read the number of bytes used per diff. let bytes_per_diff = d.read_u8() as usize; - // Read the first element. - let line_start: BytePos = Decodable::decode(d); - // Read the difference list. let num_diffs = num_lines as usize - 1; let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec(); - SourceFileLines::Diffs(SourceFileDiffs { - line_start, - bytes_per_diff, - num_diffs, - raw_diffs, - }) + SourceFileLines::Diffs(SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs }) } else { SourceFileLines::Lines(vec![]) } @@ -1494,14 +1470,14 @@ impl Decodable for SourceFile { let cnum: CrateNum = Decodable::decode(d); SourceFile { name, - start_pos, - end_pos, + start_pos: BytePos::from_u32(0), + source_len, src: None, src_hash, // Unused - the metadata decoder will construct // a new SourceFile, filling in `external_src` properly - external_src: Lock::new(ExternalSource::Unneeded), - lines: Lock::new(lines), + external_src: FreezeLock::frozen(ExternalSource::Unneeded), + lines: FreezeLock::new(lines), multibyte_chars, non_narrow_chars, normalized_pos, @@ -1521,102 +1497,116 @@ impl SourceFile { pub fn new( name: FileName, mut src: String, - start_pos: BytePos, hash_kind: SourceFileHashAlgorithm, - ) -> Self { + ) -> Result { // Compute the file hash before any normalization. let src_hash = SourceFileHash::new(hash_kind, &src); - let normalized_pos = normalize_src(&mut src, start_pos); + let normalized_pos = normalize_src(&mut src); let name_hash = { let mut hasher: StableHasher = StableHasher::new(); name.hash(&mut hasher); hasher.finish() }; - let end_pos = start_pos.to_usize() + src.len(); - assert!(end_pos <= u32::MAX as usize); + let source_len = src.len(); + let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?; let (lines, multibyte_chars, non_narrow_chars) = - analyze_source_file::analyze_source_file(&src, start_pos); + analyze_source_file::analyze_source_file(&src); - SourceFile { + Ok(SourceFile { name, src: Some(Lrc::new(src)), src_hash, - external_src: Lock::new(ExternalSource::Unneeded), - start_pos, - end_pos: Pos::from_usize(end_pos), - lines: Lock::new(SourceFileLines::Lines(lines)), + external_src: FreezeLock::frozen(ExternalSource::Unneeded), + start_pos: BytePos::from_u32(0), + source_len: RelativeBytePos::from_u32(source_len), + lines: FreezeLock::frozen(SourceFileLines::Lines(lines)), multibyte_chars, non_narrow_chars, normalized_pos, name_hash, cnum: LOCAL_CRATE, - } + }) } - pub fn lines(&self, f: F) -> R - where - F: FnOnce(&[BytePos]) -> R, - { - let mut guard = self.lines.borrow_mut(); - match &*guard { - SourceFileLines::Lines(lines) => f(lines), - SourceFileLines::Diffs(SourceFileDiffs { - mut line_start, - bytes_per_diff, - num_diffs, - raw_diffs, - }) => { - // Convert from "diffs" form to "lines" form. - let num_lines = num_diffs + 1; - let mut lines = Vec::with_capacity(num_lines); - lines.push(line_start); - - assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff); - match bytes_per_diff { - 1 => { - lines.extend(raw_diffs.into_iter().map(|&diff| { - line_start = line_start + BytePos(diff as u32); - line_start - })); - } - 2 => { - lines.extend((0..*num_diffs).map(|i| { - let pos = bytes_per_diff * i; - let bytes = [raw_diffs[pos], raw_diffs[pos + 1]]; - let diff = u16::from_le_bytes(bytes); - line_start = line_start + BytePos(diff as u32); - line_start - })); - } - 4 => { - lines.extend((0..*num_diffs).map(|i| { - let pos = bytes_per_diff * i; - let bytes = [ - raw_diffs[pos], - raw_diffs[pos + 1], - raw_diffs[pos + 2], - raw_diffs[pos + 3], - ]; - let diff = u32::from_le_bytes(bytes); - line_start = line_start + BytePos(diff); - line_start - })); - } - _ => unreachable!(), - } - let res = f(&lines); - *guard = SourceFileLines::Lines(lines); - res + /// This converts the `lines` field to contain `SourceFileLines::Lines` if needed and freezes it. + fn convert_diffs_to_lines_frozen(&self) { + let mut guard = if let Some(guard) = self.lines.try_write() { guard } else { return }; + + let SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs } = match &*guard { + SourceFileLines::Diffs(diffs) => diffs, + SourceFileLines::Lines(..) => { + FreezeWriteGuard::freeze(guard); + return; + } + }; + + // Convert from "diffs" form to "lines" form. + let num_lines = num_diffs + 1; + let mut lines = Vec::with_capacity(num_lines); + let mut line_start = RelativeBytePos(0); + lines.push(line_start); + + assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff); + match bytes_per_diff { + 1 => { + lines.extend(raw_diffs.into_iter().map(|&diff| { + line_start = line_start + RelativeBytePos(diff as u32); + line_start + })); + } + 2 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [raw_diffs[pos], raw_diffs[pos + 1]]; + let diff = u16::from_le_bytes(bytes); + line_start = line_start + RelativeBytePos(diff as u32); + line_start + })); } + 4 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [ + raw_diffs[pos], + raw_diffs[pos + 1], + raw_diffs[pos + 2], + raw_diffs[pos + 3], + ]; + let diff = u32::from_le_bytes(bytes); + line_start = line_start + RelativeBytePos(diff); + line_start + })); + } + _ => unreachable!(), } + + *guard = SourceFileLines::Lines(lines); + + FreezeWriteGuard::freeze(guard); + } + + pub fn lines(&self) -> &[RelativeBytePos] { + if let Some(SourceFileLines::Lines(lines)) = self.lines.get() { + return &lines[..]; + } + + outline(|| { + self.convert_diffs_to_lines_frozen(); + if let Some(SourceFileLines::Lines(lines)) = self.lines.get() { + return &lines[..]; + } + unreachable!() + }) } /// Returns the `BytePos` of the beginning of the current line. pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { + let pos = self.relative_position(pos); let line_index = self.lookup_line(pos).unwrap(); - self.lines(|lines| lines[line_index]) + let line_start_pos = self.lines()[line_index]; + self.absolute_position(line_start_pos) } /// Add externally loaded source. @@ -1627,35 +1617,37 @@ impl SourceFile { where F: FnOnce() -> Option, { - if matches!( - *self.external_src.borrow(), - ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. } - ) { + if !self.external_src.is_frozen() { let src = get_src(); - let mut external_src = self.external_src.borrow_mut(); - // Check that no-one else have provided the source while we were getting it - if let ExternalSource::Foreign { - kind: src_kind @ ExternalSourceKind::AbsentOk, .. - } = &mut *external_src - { - if let Some(mut src) = src { - // The src_hash needs to be computed on the pre-normalized src. - if self.src_hash.matches(&src) { - normalize_src(&mut src, BytePos::from_usize(0)); - *src_kind = ExternalSourceKind::Present(Lrc::new(src)); - return true; - } + let src = src.and_then(|mut src| { + // The src_hash needs to be computed on the pre-normalized src. + self.src_hash.matches(&src).then(|| { + normalize_src(&mut src); + src + }) + }); + + self.external_src.try_write().map(|mut external_src| { + if let ExternalSource::Foreign { + kind: src_kind @ ExternalSourceKind::AbsentOk, + .. + } = &mut *external_src + { + *src_kind = if let Some(src) = src { + ExternalSourceKind::Present(Lrc::new(src)) + } else { + ExternalSourceKind::AbsentErr + }; } else { - *src_kind = ExternalSourceKind::AbsentErr; + panic!("unexpected state {:?}", *external_src) } - false - } else { - self.src.is_some() || external_src.get_source().is_some() - } - } else { - self.src.is_some() || self.external_src.borrow().get_source().is_some() + // Freeze this so we don't try to load the source again. + FreezeWriteGuard::freeze(external_src) + }); } + + self.src.is_some() || self.external_src.read().get_source().is_some() } /// Gets a line from the list of pre-computed line-beginnings. @@ -1673,9 +1665,8 @@ impl SourceFile { } let begin = { - let line = self.lines(|lines| lines.get(line_number).copied())?; - let begin: BytePos = line - self.start_pos; - begin.to_usize() + let line = self.lines().get(line_number).copied()?; + line.to_usize() }; if let Some(ref src) = self.src { @@ -1698,30 +1689,44 @@ impl SourceFile { } pub fn count_lines(&self) -> usize { - self.lines(|lines| lines.len()) + self.lines().len() + } + + #[inline] + pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos { + BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32()) + } + + #[inline] + pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos { + RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32()) + } + + #[inline] + pub fn end_position(&self) -> BytePos { + self.absolute_position(self.source_len) } /// Finds the line containing the given position. The return value is the /// index into the `lines` array of this `SourceFile`, not the 1-based line /// number. If the source_file is empty or the position is located before the /// first line, `None` is returned. - pub fn lookup_line(&self, pos: BytePos) -> Option { - self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1)) + pub fn lookup_line(&self, pos: RelativeBytePos) -> Option { + self.lines().partition_point(|x| x <= &pos).checked_sub(1) } pub fn line_bounds(&self, line_index: usize) -> Range { if self.is_empty() { - return self.start_pos..self.end_pos; + return self.start_pos..self.start_pos; } - self.lines(|lines| { - assert!(line_index < lines.len()); - if line_index == (lines.len() - 1) { - lines[line_index]..self.end_pos - } else { - lines[line_index]..lines[line_index + 1] - } - }) + let lines = self.lines(); + assert!(line_index < lines.len()); + if line_index == (lines.len() - 1) { + self.absolute_position(lines[line_index])..self.end_position() + } else { + self.absolute_position(lines[line_index])..self.absolute_position(lines[line_index + 1]) + } } /// Returns whether or not the file contains the given `SourceMap` byte @@ -1730,27 +1735,29 @@ impl SourceFile { /// returns true still contain one byte position according to this function. #[inline] pub fn contains(&self, byte_pos: BytePos) -> bool { - byte_pos >= self.start_pos && byte_pos <= self.end_pos + byte_pos >= self.start_pos && byte_pos <= self.end_position() } #[inline] pub fn is_empty(&self) -> bool { - self.start_pos == self.end_pos + self.source_len.to_u32() == 0 } /// Calculates the original byte position relative to the start of the file /// based on the given byte position. - pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos { + let pos = self.relative_position(pos); + // Diff before any records is 0. Otherwise use the previously recorded // diff as that applies to the following characters until a new diff // is recorded. let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) { Ok(i) => self.normalized_pos[i].diff, - Err(i) if i == 0 => 0, + Err(0) => 0, Err(i) => self.normalized_pos[i - 1].diff, }; - BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + RelativeBytePos::from_u32(pos.0 + diff) } /// Calculates a normalized byte position from a byte offset relative to the @@ -1768,15 +1775,15 @@ impl SourceFile { .binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset))) { Ok(i) => self.normalized_pos[i].diff, - Err(i) if i == 0 => 0, + Err(0) => 0, Err(i) => self.normalized_pos[i - 1].diff, }; BytePos::from_u32(self.start_pos.0 + offset - diff) } - /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`. - pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + /// Converts an relative `RelativeBytePos` to a `CharPos` relative to the `SourceFile`. + fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos { // The number of extra bytes due to multibyte chars in the `SourceFile`. let mut total_extra_bytes = 0; @@ -1794,18 +1801,18 @@ impl SourceFile { } } - assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32()); - CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize) + assert!(total_extra_bytes <= bpos.to_u32()); + CharPos(bpos.to_usize() - total_extra_bytes as usize) } /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a - /// given `BytePos`. - pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) { + /// given `RelativeBytePos`. + fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) { let chpos = self.bytepos_to_file_charpos(pos); match self.lookup_line(pos) { Some(a) => { let line = a + 1; // Line numbers start at 1 - let linebpos = self.lines(|lines| lines[a]); + let linebpos = self.lines()[a]; let linechpos = self.bytepos_to_file_charpos(linebpos); let col = chpos - linechpos; debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos); @@ -1821,10 +1828,11 @@ impl SourceFile { /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based) /// column offset when displayed, for a given `BytePos`. pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) { + let pos = self.relative_position(pos); let (line, col_or_chpos) = self.lookup_file_pos(pos); if line > 0 { let col = col_or_chpos; - let linebpos = self.lines(|lines| lines[line - 1]); + let linebpos = self.lines()[line - 1]; let col_display = { let start_width_idx = self .non_narrow_chars @@ -1859,16 +1867,10 @@ impl SourceFile { } /// Normalizes the source code and records the normalizations. -fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec { +fn normalize_src(src: &mut String) -> Vec { let mut normalized_pos = vec![]; remove_bom(src, &mut normalized_pos); normalize_newlines(src, &mut normalized_pos); - - // Offset all the positions by start_pos to match the final file positions. - for np in &mut normalized_pos { - np.pos.0 += start_pos.0; - } - normalized_pos } @@ -1876,7 +1878,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec { fn remove_bom(src: &mut String, normalized_pos: &mut Vec) { if src.starts_with('\u{feff}') { src.drain(..3); - normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); + normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 }); } } @@ -1911,7 +1913,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec) cursor += idx - gap_len; gap_len += 1; normalized_pos.push(NormalizedPos { - pos: BytePos::from_usize(cursor + 1), + pos: RelativeBytePos::from_usize(cursor + 1), diff: original_gap + gap_len as u32, }); } @@ -2013,6 +2015,10 @@ impl_pos! { #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] pub struct BytePos(pub u32); + /// A byte offset relative to file beginning. + #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] + pub struct RelativeBytePos(pub u32); + /// A character offset. /// /// Because of multibyte UTF-8 characters, a byte offset @@ -2034,6 +2040,24 @@ impl Decodable for BytePos { } } +impl HashStable for RelativeBytePos { + fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) { + self.0.hash_stable(hcx, hasher); + } +} + +impl Encodable for RelativeBytePos { + fn encode(&self, s: &mut S) { + s.emit_u32(self.0); + } +} + +impl Decodable for RelativeBytePos { + fn decode(d: &mut D) -> RelativeBytePos { + RelativeBytePos(d.read_u32()) + } +} + // _____________________________________________________________________________ // Loc, SourceFileAndLine, SourceFileAndBytePos // diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 983b2ab04..0b575c13a 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -14,16 +14,15 @@ pub use crate::*; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::stable_hasher::{Hash128, Hash64, StableHasher}; -use rustc_data_structures::sync::{ - AtomicU32, IntoDynSyncSend, Lrc, MappedReadGuard, ReadGuard, RwLock, -}; +use rustc_data_structures::sync::{IntoDynSyncSend, Lrc, MappedReadGuard, ReadGuard, RwLock}; use std::cmp; use std::hash::Hash; use std::path::{self, Path, PathBuf}; -use std::sync::atomic::Ordering; use std::fs; use std::io; +use std::io::BorrowedBuf; +use std::io::Read; #[cfg(test)] mod tests; @@ -101,10 +100,13 @@ pub trait FileLoader { fn file_exists(&self, path: &Path) -> bool; /// Read the contents of a UTF-8 file into memory. + /// This function must return a String because we normalize + /// source files, which may require resizing. fn read_file(&self, path: &Path) -> io::Result; /// Read the contents of a potentially non-UTF-8 file into memory. - fn read_binary_file(&self, path: &Path) -> io::Result>; + /// We don't normalize binary files, so we can start in an Lrc. + fn read_binary_file(&self, path: &Path) -> io::Result>; } /// A FileLoader that uses std::fs to load real files. @@ -119,8 +121,45 @@ impl FileLoader for RealFileLoader { fs::read_to_string(path) } - fn read_binary_file(&self, path: &Path) -> io::Result> { - fs::read(path) + fn read_binary_file(&self, path: &Path) -> io::Result> { + let mut file = fs::File::open(path)?; + let len = file.metadata()?.len(); + + let mut bytes = Lrc::new_uninit_slice(len as usize); + let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap()); + match file.read_buf_exact(buf.unfilled()) { + Ok(()) => {} + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => { + drop(bytes); + return fs::read(path).map(Vec::into); + } + Err(e) => return Err(e), + } + // SAFETY: If the read_buf_exact call returns Ok(()), then we have + // read len bytes and initialized the buffer. + let bytes = unsafe { bytes.assume_init() }; + + // At this point, we've read all the bytes that filesystem metadata reported exist. + // But we are not guaranteed to be at the end of the file, because we did not attempt to do + // a read with a non-zero-sized buffer and get Ok(0). + // So we do small read to a fixed-size buffer. If the read returns no bytes then we're + // already done, and we just return the Lrc we built above. + // If the read returns bytes however, we just fall back to reading into a Vec then turning + // that into an Lrc, losing our nice peak memory behavior. This fallback code path should + // be rarely exercised. + + let mut probe = [0u8; 32]; + let n = loop { + match file.read(&mut probe) { + Ok(0) => return Ok(bytes), + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + Ok(n) => break n, + } + }; + let mut bytes: Vec = bytes.iter().copied().chain(probe[..n].iter().copied()).collect(); + file.read_to_end(&mut bytes)?; + Ok(bytes.into()) } } @@ -174,9 +213,6 @@ pub(super) struct SourceMapFiles { } pub struct SourceMap { - /// The address space below this value is currently used by the files in the source map. - used_address_space: AtomicU32, - files: RwLock, file_loader: IntoDynSyncSend>, // This is used to apply the file path remapping as specified via @@ -202,7 +238,6 @@ impl SourceMap { hash_kind: SourceFileHashAlgorithm, ) -> SourceMap { SourceMap { - used_address_space: AtomicU32::new(0), files: Default::default(), file_loader: IntoDynSyncSend(file_loader), path_mapping, @@ -228,7 +263,7 @@ impl SourceMap { /// /// Unlike `load_file`, guarantees that no normalization like BOM-removal /// takes place. - pub fn load_binary_file(&self, path: &Path) -> io::Result> { + pub fn load_binary_file(&self, path: &Path) -> io::Result> { let bytes = self.file_loader.read_binary_file(path)?; // We need to add file to the `SourceMap`, so that it is present @@ -254,26 +289,26 @@ impl SourceMap { self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned() } - fn allocate_address_space(&self, size: usize) -> Result { - let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?; - - loop { - let current = self.used_address_space.load(Ordering::Relaxed); - let next = current - .checked_add(size) - // Add one so there is some space between files. This lets us distinguish - // positions in the `SourceMap`, even in the presence of zero-length files. - .and_then(|next| next.checked_add(1)) - .ok_or(OffsetOverflowError)?; - - if self - .used_address_space - .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed) - .is_ok() - { - return Ok(usize::try_from(current).unwrap()); - } - } + fn register_source_file( + &self, + file_id: StableSourceFileId, + mut file: SourceFile, + ) -> Result, OffsetOverflowError> { + let mut files = self.files.borrow_mut(); + + file.start_pos = BytePos(if let Some(last_file) = files.source_files.last() { + // Add one so there is some space between files. This lets us distinguish + // positions in the `SourceMap`, even in the presence of zero-length files. + last_file.end_position().0.checked_add(1).ok_or(OffsetOverflowError)? + } else { + 0 + }); + + let file = Lrc::new(file); + files.source_files.push(file.clone()); + files.stable_id_to_source_file.insert(file_id, file.clone()); + + Ok(file) } /// Creates a new `SourceFile`. @@ -297,32 +332,18 @@ impl SourceMap { let (filename, _) = self.path_mapping.map_filename_prefix(&filename); let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE); - - let lrc_sf = match self.source_file_by_stable_id(file_id) { - Some(lrc_sf) => lrc_sf, + match self.source_file_by_stable_id(file_id) { + Some(lrc_sf) => Ok(lrc_sf), None => { - let start_pos = self.allocate_address_space(src.len())?; - - let source_file = Lrc::new(SourceFile::new( - filename, - src, - Pos::from_usize(start_pos), - self.hash_kind, - )); + let source_file = SourceFile::new(filename, src, self.hash_kind)?; // Let's make sure the file_id we generated above actually matches // the ID we generate for the SourceFile we just created. debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); - let mut files = self.files.borrow_mut(); - - files.source_files.push(source_file.clone()); - files.stable_id_to_source_file.insert(file_id, source_file.clone()); - - source_file + self.register_source_file(file_id, source_file) } - }; - Ok(lrc_sf) + } } /// Allocates a new `SourceFile` representing a source file from an external @@ -334,78 +355,37 @@ impl SourceMap { filename: FileName, src_hash: SourceFileHash, name_hash: Hash128, - source_len: usize, + source_len: u32, cnum: CrateNum, - file_local_lines: Lock, - mut file_local_multibyte_chars: Vec, - mut file_local_non_narrow_chars: Vec, - mut file_local_normalized_pos: Vec, - original_start_pos: BytePos, + file_local_lines: FreezeLock, + multibyte_chars: Vec, + non_narrow_chars: Vec, + normalized_pos: Vec, metadata_index: u32, ) -> Lrc { - let start_pos = self - .allocate_address_space(source_len) - .expect("not enough address space for imported source file"); - - let end_pos = Pos::from_usize(start_pos + source_len); - let start_pos = Pos::from_usize(start_pos); - - // Translate these positions into the new global frame of reference, - // now that the offset of the SourceFile is known. - // - // These are all unsigned values. `original_start_pos` may be larger or - // smaller than `start_pos`, but `pos` is always larger than both. - // Therefore, `(pos - original_start_pos) + start_pos` won't overflow - // but `start_pos - original_start_pos` might. So we use the former - // form rather than pre-computing the offset into a local variable. The - // compiler backend can optimize away the repeated computations in a - // way that won't trigger overflow checks. - match &mut *file_local_lines.borrow_mut() { - SourceFileLines::Lines(lines) => { - for pos in lines { - *pos = (*pos - original_start_pos) + start_pos; - } - } - SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => { - *line_start = (*line_start - original_start_pos) + start_pos; - } - } - for mbc in &mut file_local_multibyte_chars { - mbc.pos = (mbc.pos - original_start_pos) + start_pos; - } - for swc in &mut file_local_non_narrow_chars { - *swc = (*swc - original_start_pos) + start_pos; - } - for nc in &mut file_local_normalized_pos { - nc.pos = (nc.pos - original_start_pos) + start_pos; - } + let source_len = RelativeBytePos::from_u32(source_len); - let source_file = Lrc::new(SourceFile { + let source_file = SourceFile { name: filename, src: None, src_hash, - external_src: Lock::new(ExternalSource::Foreign { + external_src: FreezeLock::new(ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, metadata_index, }), - start_pos, - end_pos, + start_pos: BytePos(0), + source_len, lines: file_local_lines, - multibyte_chars: file_local_multibyte_chars, - non_narrow_chars: file_local_non_narrow_chars, - normalized_pos: file_local_normalized_pos, + multibyte_chars, + non_narrow_chars, + normalized_pos, name_hash, cnum, - }); - - let mut files = self.files.borrow_mut(); - - files.source_files.push(source_file.clone()); - files - .stable_id_to_source_file - .insert(StableSourceFileId::new(&source_file), source_file.clone()); + }; - source_file + let file_id = StableSourceFileId::new(&source_file); + self.register_source_file(file_id, source_file) + .expect("not enough address space for imported source file") } /// If there is a doctest offset, applies it to the line. @@ -439,6 +419,7 @@ impl SourceMap { pub fn lookup_line(&self, pos: BytePos) -> Result> { let f = self.lookup_source_file(pos); + let pos = f.relative_position(pos); match f.lookup_line(pos) { Some(line) => Ok(SourceFileAndLine { sf: f, line }), None => Err(f), @@ -534,7 +515,9 @@ impl SourceMap { return true; } let f = (*self.files.borrow().source_files)[lo].clone(); - f.lookup_line(sp.lo()) != f.lookup_line(sp.hi()) + let lo = f.relative_position(sp.lo()); + let hi = f.relative_position(sp.hi()); + f.lookup_line(lo) != f.lookup_line(hi) } #[instrument(skip(self), level = "trace")] @@ -610,11 +593,11 @@ impl SourceMap { end: (local_end.sf.name.clone(), local_end.sf.start_pos), }))) } else { - self.ensure_source_file_source_present(local_begin.sf.clone()); + self.ensure_source_file_source_present(&local_begin.sf); let start_index = local_begin.pos.to_usize(); let end_index = local_end.pos.to_usize(); - let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + let source_len = local_begin.sf.source_len.to_usize(); if start_index > end_index || end_index > source_len { return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { @@ -627,7 +610,7 @@ impl SourceMap { if let Some(ref src) = local_begin.sf.src { extract_source(src, start_index, end_index) - } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() { + } else if let Some(src) = local_begin.sf.external_src.read().get_source() { extract_source(src, start_index, end_index) } else { Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() }) @@ -919,7 +902,7 @@ impl SourceMap { let sp = sp.data(); let local_begin = self.lookup_byte_offset(sp.lo); let start_index = local_begin.pos.to_usize(); - let src = local_begin.sf.external_src.borrow(); + let src = local_begin.sf.external_src.read(); let snippet = if let Some(ref src) = local_begin.sf.src { Some(&src[start_index..]) @@ -1021,7 +1004,7 @@ impl SourceMap { return 1; } - let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + let source_len = local_begin.sf.source_len.to_usize(); debug!("source_len=`{:?}`", source_len); // Ensure indexes are also not malformed. if start_index > end_index || end_index > source_len - 1 { @@ -1029,7 +1012,7 @@ impl SourceMap { return 1; } - let src = local_begin.sf.external_src.borrow(); + let src = local_begin.sf.external_src.read(); let snippet = if let Some(src) = &local_begin.sf.src { src @@ -1076,7 +1059,7 @@ impl SourceMap { self.files().iter().fold(0, |a, f| a + f.count_lines()) } - pub fn ensure_source_file_source_present(&self, source_file: Lrc) -> bool { + pub fn ensure_source_file_source_present(&self, source_file: &SourceFile) -> bool { source_file.add_external_src(|| { let FileName::Real(ref name) = source_file.name else { return None; diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 686b3b00d..a12f50c87 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -1,6 +1,6 @@ use super::*; -use rustc_data_structures::sync::Lrc; +use rustc_data_structures::sync::{FreezeLock, Lrc}; fn init_source_map() -> SourceMap { let sm = SourceMap::new(FilePathMapping::empty()); @@ -50,6 +50,7 @@ impl SourceMap { fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { let idx = self.lookup_source_file_idx(bpos); let sf = &(*self.files.borrow().source_files)[idx]; + let bpos = sf.relative_position(bpos); sf.bytepos_to_file_charpos(bpos) } } @@ -230,8 +231,7 @@ fn t10() { let SourceFile { name, src_hash, - start_pos, - end_pos, + source_len, lines, multibyte_chars, non_narrow_chars, @@ -244,13 +244,12 @@ fn t10() { name, src_hash, name_hash, - (end_pos - start_pos).to_usize(), + source_len.to_u32(), CrateNum::new(0), - lines, + FreezeLock::new(lines.read().clone()), multibyte_chars, non_narrow_chars, normalized_pos, - start_pos, 0, ); @@ -568,3 +567,30 @@ fn test_next_point() { assert_eq!(span.hi().0, 6); assert!(sm.span_to_snippet(span).is_err()); } + +#[cfg(target_os = "linux")] +#[test] +fn read_binary_file_handles_lying_stat() { + // read_binary_file tries to read the contents of a file into an Lrc<[u8]> while + // never having two copies of the data in memory at once. This is an optimization + // to support include_bytes! with large files. But since Rust allocators are + // sensitive to alignment, our implementation can't be bootstrapped off calling + // std::fs::read. So we test that we have the same behavior even on files where + // fs::metadata lies. + + // stat always says that /proc/self/cmdline is length 0, but it isn't. + let cmdline = Path::new("/proc/self/cmdline"); + let len = std::fs::metadata(cmdline).unwrap().len() as usize; + let real = std::fs::read(cmdline).unwrap(); + assert!(len < real.len()); + let bin = RealFileLoader.read_binary_file(cmdline).unwrap(); + assert_eq!(&real[..], &bin[..]); + + // stat always says that /sys/devices/system/cpu/kernel_max is the size of a block. + let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max"); + let len = std::fs::metadata(kernel_max).unwrap().len() as usize; + let real = std::fs::read(kernel_max).unwrap(); + assert!(len > real.len()); + let bin = RealFileLoader.read_binary_file(kernel_max).unwrap(); + assert_eq!(&real[..], &bin[..]); +} diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index 1eea0f63c..93ab15460 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -1,9 +1,3 @@ -// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value). -// One format is used for keeping span data inline, -// another contains index into an out-of-line span interner. -// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd. -// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28 - use crate::def_id::{DefIndex, LocalDefId}; use crate::hygiene::SyntaxContext; use crate::SPAN_TRACK; @@ -13,59 +7,69 @@ use rustc_data_structures::fx::FxIndexSet; /// A compressed span. /// -/// Whereas [`SpanData`] is 16 bytes, which is a bit too big to stick everywhere, `Span` -/// is a form that only takes up 8 bytes, with less space for the length, parent and -/// context. The vast majority (99.9%+) of `SpanData` instances will fit within -/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are +/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only +/// takes up 8 bytes, with less space for the length, parent and context. The +/// vast majority (99.9%+) of `SpanData` instances can be made to fit within +/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are /// stored in a separate interner table, and the `Span` will index into that /// table. Interning is rare enough that the cost is low, but common enough /// that the code is exercised regularly. /// /// An earlier version of this code used only 4 bytes for `Span`, but that was /// slower because only 80--90% of spans could be stored inline (even less in -/// very large crates) and so the interner was used a lot more. +/// very large crates) and so the interner was used a lot more. That version of +/// the code also predated the storage of parents. +/// +/// There are four different span forms. /// -/// Inline (compressed) format with no parent: -/// - `span.base_or_index == span_data.lo` -/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`) -/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`) +/// Inline-context format (requires non-huge length, non-huge context, and no parent): +/// - `span.lo_or_index == span_data.lo` +/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`) +/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`) /// -/// Interned format with inline `SyntaxContext`: -/// - `span.base_or_index == index` (indexes into the interner table) -/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero) -/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`) +/// Inline-parent format (requires non-huge length, root context, and non-huge parent): +/// - `span.lo_or_index == span_data.lo` +/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo` +/// (must be `<= MAX_LEN`) +/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set +/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`) /// -/// Inline (compressed) format with root context: -/// - `span.base_or_index == span_data.lo` -/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`) -/// - `span.len_or_tag` has top bit (`PARENT_MASK`) set -/// - `span.ctxt == span_data.parent` (must be `<= MAX_CTXT`) +/// Partially-interned format (requires non-huge context): +/// - `span.lo_or_index == index` (indexes into the interner table) +/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER` +/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`) /// -/// Interned format: -/// - `span.base_or_index == index` (indexes into the interner table) -/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero) -/// - `span.ctxt_or_tag == CTXT_TAG` +/// Fully-interned format (all cases not covered above): +/// - `span.lo_or_index == index` (indexes into the interner table) +/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER` +/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER` /// -/// The inline form uses 0 for the tag value (rather than 1) so that we don't -/// need to mask out the tag bit when getting the length, and so that the -/// dummy span can be all zeroes. +/// The partially-interned form requires looking in the interning table for +/// lo and length, but the context is stored inline as well as interned. +/// This is useful because context lookups are often done in isolation, and +/// inline lookups are quicker. /// /// Notes about the choice of field sizes: -/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base` -/// values never cause interning. The number of bits needed for `base` +/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo` +/// values never cause interning. The number of bits needed for `lo` /// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate. -/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits -/// in `SpanData`, which means that large `len` values will cause interning. -/// The number of bits needed for `len` does not depend on the crate size. -/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually -/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough -/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur -/// dozens of times in a typical crate. -/// - `ctxt_or_tag` is 16 bits in `Span` and 32 bits in `SpanData`, which means that -/// large `ctxt` values will cause interning. The number of bits needed for -/// `ctxt` values depend partly on the crate size and partly on the form of -/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt_or_tag`, -/// but larger crates might need more than 16 bits. +/// Having no compression on this field means there is no performance cliff +/// if a crate exceeds a particular size. +/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32 +/// bits in `SpanData`, which means that large `len` values will cause +/// interning. The number of bits needed for `len` does not depend on the +/// crate size. The most common numbers of bits for `len` are from 0 to 7, +/// with a peak usually at 3 or 4, and then it drops off quickly from 8 +/// onwards. 15 bits is enough for 99.99%+ of cases, but larger values +/// (sometimes 20+ bits) might occur dozens of times in a typical crate. +/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in +/// `SpanData`, which means intering will happen if `ctxt` is large, if +/// `parent` is large, or if both values are non-zero. The number of bits +/// needed for `ctxt` values depend partly on the crate size and partly on +/// the form of the code. No crates in `rustc-perf` need more than 15 bits +/// for `ctxt_or_parent_or_marker`, but larger crates might need more than 16 +/// bits. The number of bits needed for `parent` hasn't been measured, +/// because `parent` isn't currently used by default. /// /// In order to reliably use parented spans in incremental compilation, /// the dependency to the parent definition's span. This is performed @@ -74,19 +78,22 @@ use rustc_data_structures::fx::FxIndexSet; #[derive(Clone, Copy, Eq, PartialEq, Hash)] #[rustc_pass_by_value] pub struct Span { - base_or_index: u32, - len_or_tag: u16, - ctxt_or_tag: u16, + lo_or_index: u32, + len_with_tag_or_marker: u16, + ctxt_or_parent_or_marker: u16, } -const LEN_TAG: u16 = 0b1111_1111_1111_1111; -const PARENT_MASK: u16 = 0b1000_0000_0000_0000; -const MAX_LEN: u32 = 0b0111_1111_1111_1111; -const CTXT_TAG: u32 = 0b1111_1111_1111_1111; -const MAX_CTXT: u32 = CTXT_TAG - 1; +// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from +// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.) +const MAX_LEN: u32 = 0b0111_1111_1111_1110; +const MAX_CTXT: u32 = 0b0111_1111_1111_1110; +const PARENT_TAG: u16 = 0b1000_0000_0000_0000; +const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111; +const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111; -/// Dummy span, both position and length are zero, syntax context is zero as well. -pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_tag: 0 }; +/// The dummy span has zero position, length, and context, and no parent. +pub const DUMMY_SP: Span = + Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 }; impl Span { #[inline] @@ -100,39 +107,43 @@ impl Span { std::mem::swap(&mut lo, &mut hi); } - let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32()); - - if len <= MAX_LEN && ctxt2 <= MAX_CTXT { - let len_or_tag = len as u16; - debug_assert_eq!(len_or_tag & PARENT_MASK, 0); + let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32()); - if let Some(parent) = parent { - // Inline format with parent. - let len_or_tag = len_or_tag | PARENT_MASK; - let parent2 = parent.local_def_index.as_u32(); - if ctxt2 == SyntaxContext::root().as_u32() - && parent2 <= MAX_CTXT - && len_or_tag < LEN_TAG - { - debug_assert_ne!(len_or_tag, LEN_TAG); - return Span { base_or_index: base, len_or_tag, ctxt_or_tag: parent2 as u16 }; - } - } else { - // Inline format with ctxt. - debug_assert_ne!(len_or_tag, LEN_TAG); + if len <= MAX_LEN { + if ctxt2 <= MAX_CTXT && parent.is_none() { + // Inline-context format. return Span { - base_or_index: base, - len_or_tag: len as u16, - ctxt_or_tag: ctxt2 as u16, + lo_or_index: lo2, + len_with_tag_or_marker: len as u16, + ctxt_or_parent_or_marker: ctxt2 as u16, + }; + } else if ctxt2 == SyntaxContext::root().as_u32() + && let Some(parent) = parent + && let parent2 = parent.local_def_index.as_u32() + && parent2 <= MAX_CTXT + { + // Inline-parent format. + return Span { + lo_or_index: lo2, + len_with_tag_or_marker: PARENT_TAG | len as u16, + ctxt_or_parent_or_marker: parent2 as u16 }; } } - // Interned format. + // Partially-interned or fully-interned format. let index = with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent })); - let ctxt_or_tag = if ctxt2 <= MAX_CTXT { ctxt2 } else { CTXT_TAG } as u16; - Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_tag } + let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT { + ctxt2 as u16 // partially-interned + } else { + CTXT_INTERNED_MARKER // fully-interned + }; + Span { + lo_or_index: index, + len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER, + ctxt_or_parent_or_marker, + } } #[inline] @@ -148,56 +159,80 @@ impl Span { /// This function must not be used outside the incremental engine. #[inline] pub fn data_untracked(self) -> SpanData { - if self.len_or_tag != LEN_TAG { - // Inline format. - if self.len_or_tag & PARENT_MASK == 0 { - debug_assert!(self.len_or_tag as u32 <= MAX_LEN); + if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { + if self.len_with_tag_or_marker & PARENT_TAG == 0 { + // Inline-context format. + let len = self.len_with_tag_or_marker as u32; + debug_assert!(len <= MAX_LEN); SpanData { - lo: BytePos(self.base_or_index), - hi: BytePos(self.base_or_index + self.len_or_tag as u32), - ctxt: SyntaxContext::from_u32(self.ctxt_or_tag as u32), + lo: BytePos(self.lo_or_index), + hi: BytePos(self.lo_or_index + len), + ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32), parent: None, } } else { - let len = self.len_or_tag & !PARENT_MASK; - debug_assert!(len as u32 <= MAX_LEN); - let parent = - LocalDefId { local_def_index: DefIndex::from_u32(self.ctxt_or_tag as u32) }; + // Inline-parent format. + let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32; + debug_assert!(len <= MAX_LEN); + let parent = LocalDefId { + local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32), + }; SpanData { - lo: BytePos(self.base_or_index), - hi: BytePos(self.base_or_index + len as u32), + lo: BytePos(self.lo_or_index), + hi: BytePos(self.lo_or_index + len), ctxt: SyntaxContext::root(), parent: Some(parent), } } } else { - // Interned format. - let index = self.base_or_index; + // Fully-interned or partially-interned format. In either case, + // the interned value contains all the data, so we don't need to + // distinguish them. + let index = self.lo_or_index; with_span_interner(|interner| interner.spans[index as usize]) } } + /// Returns `true` if this is a dummy span with any hygienic context. + #[inline] + pub fn is_dummy(self) -> bool { + if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { + // Inline-context or inline-parent format. + let lo = self.lo_or_index; + let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32; + debug_assert!(len <= MAX_LEN); + lo == 0 && len == 0 + } else { + // Fully-interned or partially-interned format. + let index = self.lo_or_index; + let data = with_span_interner(|interner| interner.spans[index as usize]); + data.lo == BytePos(0) && data.hi == BytePos(0) + } + } + /// This function is used as a fast path when decoding the full `SpanData` is not necessary. + /// It's a cut-down version of `data_untracked`. #[inline] pub fn ctxt(self) -> SyntaxContext { - let ctxt_or_tag = self.ctxt_or_tag as u32; - // Check for interned format. - if self.len_or_tag == LEN_TAG { - if ctxt_or_tag == CTXT_TAG { - // Fully interned format. - let index = self.base_or_index; - with_span_interner(|interner| interner.spans[index as usize].ctxt) + if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { + if self.len_with_tag_or_marker & PARENT_TAG == 0 { + // Inline-context format. + SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32) } else { - // Interned format with inline ctxt. - SyntaxContext::from_u32(ctxt_or_tag) + // Inline-parent format. We know that the SyntaxContext is root. + SyntaxContext::root() } - } else if self.len_or_tag & PARENT_MASK == 0 { - // Inline format with inline ctxt. - SyntaxContext::from_u32(ctxt_or_tag) } else { - // Inline format with inline parent. - // We know that the SyntaxContext is root. - SyntaxContext::root() + if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER { + // Partially-interned format. This path avoids looking up the + // interned value, and is the whole point of the + // partially-interned format. + SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32) + } else { + // Fully-interned format. + let index = self.lo_or_index; + with_span_interner(|interner| interner.spans[index as usize].ctxt) + } } } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 28a2dfebc..4f4625662 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -232,11 +232,13 @@ symbols! { NonZeroI32, NonZeroI64, NonZeroI8, + NonZeroIsize, NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, + NonZeroUsize, None, Ok, Option, @@ -278,6 +280,7 @@ symbols! { RwLock, RwLockReadGuard, RwLockWriteGuard, + Saturating, Send, SeqCst, SliceIndex, @@ -305,6 +308,7 @@ symbols! { Vec, VecDeque, Wrapper, + Wrapping, Yield, _DECLS, _Self, @@ -383,6 +387,7 @@ symbols! { asm_sym, asm_unwind, assert, + assert_eq, assert_eq_macro, assert_inhabited, assert_macro, @@ -569,6 +574,8 @@ symbols! { cosf32, cosf64, count, + coverage, + coverage_attribute, cr, crate_id, crate_in_paths, @@ -585,6 +592,7 @@ symbols! { cttz, cttz_nonzero, custom_attribute, + custom_code_classes_in_docs, custom_derive, custom_inner_attributes, custom_mir, @@ -1064,6 +1072,7 @@ symbols! { note, object_safe_for_dispatch, of, + off, offset, offset_of, omit_gdb_pretty_printer_section, @@ -1100,6 +1109,7 @@ symbols! { panic_handler, panic_impl, panic_implementation, + panic_in_cleanup, panic_info, panic_location, panic_misaligned_pointer_dereference, @@ -1169,7 +1179,6 @@ symbols! { ptr_cast_const, ptr_cast_mut, ptr_const_is_null, - ptr_from_mut, ptr_from_ref, ptr_guaranteed_cmp, ptr_is_null, @@ -1179,6 +1188,9 @@ symbols! { ptr_offset_from, ptr_offset_from_unsigned, ptr_unique, + ptr_write, + ptr_write_unaligned, + ptr_write_volatile, pub_macro_rules, pub_restricted, public, @@ -1273,6 +1285,7 @@ symbols! { rust_eh_catch_typeinfo, rust_eh_personality, rustc, + rustc_abi, rustc_allocator, rustc_allocator_zeroed, rustc_allow_const_fn_unstable, @@ -1324,6 +1337,7 @@ symbols! { rustc_main, rustc_mir, rustc_must_implement_one_of, + rustc_never_returns_null_ptr, rustc_nonnull_optimization_guaranteed, rustc_nounwind, rustc_object_lifetime_default, @@ -1357,6 +1371,7 @@ symbols! { rustc_trivial_field_reads, rustc_unsafe_specialization_marker, rustc_variance, + rustc_variance_of_opaques, rustdoc, rustdoc_internals, rustdoc_missing_doc_code_examples, @@ -1370,6 +1385,7 @@ symbols! { sanitizer_cfi_normalize_integers, sanitizer_runtime, saturating_add, + saturating_div, saturating_sub, self_in_typedefs, self_struct_ctor, @@ -1449,6 +1465,7 @@ symbols! { simd_shl, simd_shr, simd_shuffle, + simd_shuffle_generic, simd_sub, simd_trunc, simd_xor, @@ -1615,6 +1632,7 @@ symbols! { unix_sigpipe, unlikely, unmarked_api, + unnamed_fields, unpin, unreachable, unreachable_2015, @@ -1627,6 +1645,7 @@ symbols! { unsafe_block_in_unsafe_fn, unsafe_cell, unsafe_cell_from_mut, + unsafe_cell_raw_get, unsafe_no_drop_flag, unsafe_pin_internals, unsize, @@ -1687,7 +1706,10 @@ symbols! { windows_subsystem, with_negative_coherence, wrapping_add, + wrapping_div, wrapping_mul, + wrapping_rem, + wrapping_rem_euclid, wrapping_sub, wreg, write_bytes, diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs index a242ad6d1..cb88fa890 100644 --- a/compiler/rustc_span/src/tests.rs +++ b/compiler/rustc_span/src/tests.rs @@ -3,24 +3,21 @@ use super::*; #[test] fn test_lookup_line() { let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); - let sf = SourceFile::new( - FileName::Anon(Hash64::ZERO), - source, - BytePos(3), - SourceFileHashAlgorithm::Sha256, - ); - sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)])); + let mut sf = + SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256) + .unwrap(); + sf.start_pos = BytePos(3); + assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]); - assert_eq!(sf.lookup_line(BytePos(0)), None); - assert_eq!(sf.lookup_line(BytePos(3)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(4)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(0)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(1)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(16)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(17)), Some(1)); - assert_eq!(sf.lookup_line(BytePos(18)), Some(1)); + assert_eq!(sf.lookup_line(RelativeBytePos(13)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(14)), Some(1)); + assert_eq!(sf.lookup_line(RelativeBytePos(15)), Some(1)); - assert_eq!(sf.lookup_line(BytePos(28)), Some(2)); - assert_eq!(sf.lookup_line(BytePos(29)), Some(2)); + assert_eq!(sf.lookup_line(RelativeBytePos(25)), Some(2)); + assert_eq!(sf.lookup_line(RelativeBytePos(26)), Some(2)); } #[test] -- cgit v1.2.3