summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_span
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:36 +0000
commite02c5b5930c2c9ba3e5423fe12e2ef0155017297 (patch)
treefd60ebbbb5299e16e5fca8c773ddb74f764760db /compiler/rustc_span
parentAdding debian version 1.73.0+dfsg1-1. (diff)
downloadrustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.tar.xz
rustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_span')
-rw-r--r--compiler/rustc_span/src/analyze_source_file.rs48
-rw-r--r--compiler/rustc_span/src/analyze_source_file/tests.rs29
-rw-r--r--compiler/rustc_span/src/caching_source_map_view.rs15
-rw-r--r--compiler/rustc_span/src/hygiene.rs126
-rw-r--r--compiler/rustc_span/src/lib.rs512
-rw-r--r--compiler/rustc_span/src/source_map.rs217
-rw-r--r--compiler/rustc_span/src/source_map/tests.rs38
-rw-r--r--compiler/rustc_span/src/span_encoding.rs255
-rw-r--r--compiler/rustc_span/src/symbol.rs24
-rw-r--r--compiler/rustc_span/src/tests.rs27
10 files changed, 701 insertions, 590 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index 26cd54210..450d5455f 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -11,26 +11,19 @@ mod tests;
/// is detected at runtime.
pub fn analyze_source_file(
src: &str,
- source_file_start_pos: BytePos,
-) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
- let mut lines = vec![source_file_start_pos];
+) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
+ let mut lines = vec![RelativeBytePos::from_u32(0)];
let mut multi_byte_chars = vec![];
let mut non_narrow_chars = vec![];
// Calls the right implementation, depending on hardware support available.
- analyze_source_file_dispatch(
- src,
- source_file_start_pos,
- &mut lines,
- &mut multi_byte_chars,
- &mut non_narrow_chars,
- );
+ analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars, &mut non_narrow_chars);
// The code above optimistically registers a new line *after* each \n
// it encounters. If that point is already outside the source_file, remove
// it again.
if let Some(&last_line_start) = lines.last() {
- let source_file_end = source_file_start_pos + BytePos::from_usize(src.len());
+ let source_file_end = RelativeBytePos::from_usize(src.len());
assert!(source_file_end >= last_line_start);
if last_line_start == source_file_end {
lines.pop();
@@ -43,14 +36,12 @@ pub fn analyze_source_file(
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
fn analyze_source_file_dispatch(src: &str,
- source_file_start_pos: BytePos,
- lines: &mut Vec<BytePos>,
+ lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
if is_x86_feature_detected!("sse2") {
unsafe {
analyze_source_file_sse2(src,
- source_file_start_pos,
lines,
multi_byte_chars,
non_narrow_chars);
@@ -58,7 +49,7 @@ cfg_if::cfg_if! {
} else {
analyze_source_file_generic(src,
src.len(),
- source_file_start_pos,
+ RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
non_narrow_chars);
@@ -72,8 +63,7 @@ cfg_if::cfg_if! {
/// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(src: &str,
- output_offset: BytePos,
- lines: &mut Vec<BytePos>,
+ lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
#[cfg(target_arch = "x86")]
@@ -129,8 +119,7 @@ cfg_if::cfg_if! {
if control_char_mask == newlines_mask {
// All control characters are newlines, record them
let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
- let output_offset = output_offset +
- BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+ let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
loop {
let index = newlines_mask.trailing_zeros();
@@ -140,7 +129,7 @@ cfg_if::cfg_if! {
break
}
- lines.push(BytePos(index) + output_offset);
+ lines.push(RelativeBytePos(index) + output_offset);
// Clear the bit, so we can find the next one.
newlines_mask &= (!1) << index;
@@ -165,7 +154,7 @@ cfg_if::cfg_if! {
intra_chunk_offset = analyze_source_file_generic(
&src[scan_start .. ],
CHUNK_SIZE - intra_chunk_offset,
- BytePos::from_usize(scan_start) + output_offset,
+ RelativeBytePos::from_usize(scan_start),
lines,
multi_byte_chars,
non_narrow_chars
@@ -177,7 +166,7 @@ cfg_if::cfg_if! {
if tail_start < src.len() {
analyze_source_file_generic(&src[tail_start ..],
src.len() - tail_start,
- output_offset + BytePos::from_usize(tail_start),
+ RelativeBytePos::from_usize(tail_start),
lines,
multi_byte_chars,
non_narrow_chars);
@@ -187,13 +176,12 @@ cfg_if::cfg_if! {
// The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(src: &str,
- source_file_start_pos: BytePos,
- lines: &mut Vec<BytePos>,
+ lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
analyze_source_file_generic(src,
src.len(),
- source_file_start_pos,
+ RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
non_narrow_chars);
@@ -207,8 +195,8 @@ cfg_if::cfg_if! {
fn analyze_source_file_generic(
src: &str,
scan_len: usize,
- output_offset: BytePos,
- lines: &mut Vec<BytePos>,
+ output_offset: RelativeBytePos,
+ lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>,
) -> usize {
@@ -230,11 +218,11 @@ fn analyze_source_file_generic(
// This is an ASCII control character, it could be one of the cases
// that are interesting to us.
- let pos = BytePos::from_usize(i) + output_offset;
+ let pos = RelativeBytePos::from_usize(i) + output_offset;
match byte {
b'\n' => {
- lines.push(pos + BytePos(1));
+ lines.push(pos + RelativeBytePos(1));
}
b'\t' => {
non_narrow_chars.push(NonNarrowChar::Tab(pos));
@@ -250,7 +238,7 @@ fn analyze_source_file_generic(
let c = src[i..].chars().next().unwrap();
char_len = c.len_utf8();
- let pos = BytePos::from_usize(i) + output_offset;
+ let pos = RelativeBytePos::from_usize(i) + output_offset;
if char_len > 1 {
assert!((2..=4).contains(&char_len));
diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs
index 66aefc9a7..0c77d080c 100644
--- a/compiler/rustc_span/src/analyze_source_file/tests.rs
+++ b/compiler/rustc_span/src/analyze_source_file/tests.rs
@@ -3,29 +3,28 @@ use super::*;
macro_rules! test {
(case: $test_name:ident,
text: $text:expr,
- source_file_start_pos: $source_file_start_pos:expr,
lines: $lines:expr,
multi_byte_chars: $multi_byte_chars:expr,
non_narrow_chars: $non_narrow_chars:expr,) => {
#[test]
fn $test_name() {
- let (lines, multi_byte_chars, non_narrow_chars) =
- analyze_source_file($text, BytePos($source_file_start_pos));
+ let (lines, multi_byte_chars, non_narrow_chars) = analyze_source_file($text);
- let expected_lines: Vec<BytePos> = $lines.into_iter().map(BytePos).collect();
+ let expected_lines: Vec<RelativeBytePos> =
+ $lines.into_iter().map(RelativeBytePos).collect();
assert_eq!(lines, expected_lines);
let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
.into_iter()
- .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes })
+ .map(|(pos, bytes)| MultiByteChar { pos: RelativeBytePos(pos), bytes })
.collect();
assert_eq!(multi_byte_chars, expected_mbcs);
let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
.into_iter()
- .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width))
+ .map(|(pos, width)| NonNarrowChar::new(RelativeBytePos(pos), width))
.collect();
assert_eq!(non_narrow_chars, expected_nncs);
@@ -36,7 +35,6 @@ macro_rules! test {
test!(
case: empty_text,
text: "",
- source_file_start_pos: 0,
lines: vec![],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
@@ -45,7 +43,6 @@ test!(
test!(
case: newlines_short,
text: "a\nc",
- source_file_start_pos: 0,
lines: vec![0, 2],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
@@ -54,7 +51,6 @@ test!(
test!(
case: newlines_long,
text: "012345678\nabcdef012345678\na",
- source_file_start_pos: 0,
lines: vec![0, 10, 26],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
@@ -63,7 +59,6 @@ test!(
test!(
case: newline_and_multi_byte_char_in_same_chunk,
text: "01234β789\nbcdef0123456789abcdef",
- source_file_start_pos: 0,
lines: vec![0, 11],
multi_byte_chars: vec![(5, 2)],
non_narrow_chars: vec![],
@@ -72,7 +67,6 @@ test!(
test!(
case: newline_and_control_char_in_same_chunk,
text: "01234\u{07}6789\nbcdef0123456789abcdef",
- source_file_start_pos: 0,
lines: vec![0, 11],
multi_byte_chars: vec![],
non_narrow_chars: vec![(5, 0)],
@@ -81,7 +75,6 @@ test!(
test!(
case: multi_byte_char_short,
text: "aβc",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(1, 2)],
non_narrow_chars: vec![],
@@ -90,7 +83,6 @@ test!(
test!(
case: multi_byte_char_long,
text: "0123456789abcΔf012345β",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(13, 2), (22, 2)],
non_narrow_chars: vec![],
@@ -99,7 +91,6 @@ test!(
test!(
case: multi_byte_char_across_chunk_boundary,
text: "0123456789abcdeΔ123456789abcdef01234",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
@@ -108,7 +99,6 @@ test!(
test!(
case: multi_byte_char_across_chunk_boundary_tail,
text: "0123456789abcdeΔ....",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
@@ -117,7 +107,6 @@ test!(
test!(
case: non_narrow_short,
text: "0\t2",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(1, 4)],
@@ -126,7 +115,6 @@ test!(
test!(
case: non_narrow_long,
text: "01\t3456789abcdef01234567\u{07}9",
- source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(2, 4), (24, 0)],
@@ -135,8 +123,7 @@ test!(
test!(
case: output_offset_all,
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
- source_file_start_pos: 1000,
- lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
- multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
- non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
+ lines: vec![0, 7, 27],
+ multi_byte_chars: vec![(13, 2), (29, 2)],
+ non_narrow_chars: vec![(2, 4), (24, 0)],
);
diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs
index 886112769..fbfc5c22f 100644
--- a/compiler/rustc_span/src/caching_source_map_view.rs
+++ b/compiler/rustc_span/src/caching_source_map_view.rs
@@ -1,5 +1,5 @@
use crate::source_map::SourceMap;
-use crate::{BytePos, SourceFile, SpanData};
+use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData};
use rustc_data_structures::sync::Lrc;
use std::ops::Range;
@@ -37,6 +37,7 @@ impl CacheEntry {
self.file_index = file_idx;
}
+ let pos = self.file.relative_position(pos);
let line_index = self.file.lookup_line(pos).unwrap();
let line_bounds = self.file.line_bounds(line_index);
self.line_number = line_index + 1;
@@ -79,7 +80,7 @@ impl<'sm> CachingSourceMapView<'sm> {
pub fn byte_pos_to_line_and_col(
&mut self,
pos: BytePos,
- ) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
+ ) -> Option<(Lrc<SourceFile>, usize, RelativeBytePos)> {
self.time_stamp += 1;
// Check if the position is in one of the cached lines
@@ -88,11 +89,8 @@ impl<'sm> CachingSourceMapView<'sm> {
let cache_entry = &mut self.line_cache[cache_idx as usize];
cache_entry.touch(self.time_stamp);
- return Some((
- cache_entry.file.clone(),
- cache_entry.line_number,
- pos - cache_entry.line.start,
- ));
+ let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32());
+ return Some((cache_entry.file.clone(), cache_entry.line_number, col));
}
// No cache hit ...
@@ -108,7 +106,8 @@ impl<'sm> CachingSourceMapView<'sm> {
let cache_entry = &mut self.line_cache[oldest];
cache_entry.update(new_file_and_idx, pos, self.time_stamp);
- Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line.start))
+ let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32());
+ Some((cache_entry.file.clone(), cache_entry.line_number, col))
}
pub fn span_data_to_lines_and_cols(
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs
index 9f2ff4378..88081700c 100644
--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
@@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stable_hasher::HashingControls;
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
-use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal};
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::IndexVec;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::cell::RefCell;
+use std::collections::hash_map::Entry;
use std::fmt;
use std::hash::Hash;
@@ -1241,13 +1243,25 @@ impl HygieneEncodeContext {
#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
-pub struct HygieneDecodeContext {
+struct HygieneDecodeContextInner {
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
// so that multiple occurrences of the same serialized id are decoded to the same
- // `SyntaxContext`
- remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>,
+ // `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded.
+ remapped_ctxts: Vec<Option<SyntaxContext>>,
+
+ /// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
+ decoding: FxHashMap<u32, SyntaxContext>,
+}
+
+#[derive(Default)]
+/// Additional information used to assist in decoding hygiene data
+pub struct HygieneDecodeContext {
+ inner: Lock<HygieneDecodeContextInner>,
+
+ /// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
+ local_in_progress: WorkerLocal<RefCell<FxHashMap<u32, ()>>>,
}
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
@@ -1277,11 +1291,11 @@ pub fn register_expn_id(
let expn_id = ExpnId { krate, local_id };
HygieneData::with(|hygiene_data| {
let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data);
- debug_assert!(_old_data.is_none());
+ debug_assert!(_old_data.is_none() || cfg!(parallel_compiler));
let _old_hash = hygiene_data.foreign_expn_hashes.insert(expn_id, hash);
- debug_assert!(_old_hash.is_none());
+ debug_assert!(_old_hash.is_none() || _old_hash == Some(hash));
let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id);
- debug_assert!(_old_id.is_none());
+ debug_assert!(_old_id.is_none() || _old_id == Some(expn_id));
});
expn_id
}
@@ -1331,38 +1345,56 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
return SyntaxContext::root();
}
- let outer_ctxts = &context.remapped_ctxts;
+ let ctxt = {
+ let mut inner = context.inner.lock();
- // Ensure that the lock() temporary is dropped early
- {
- if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() {
+ if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
+ // This has already beeen decoded.
return ctxt;
}
- }
- // Allocate and store SyntaxContext id *before* calling the decoder function,
- // as the SyntaxContextData may reference itself.
- let new_ctxt = HygieneData::with(|hygiene_data| {
- let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
- // Push a dummy SyntaxContextData to ensure that nobody else can get the
- // same ID as us. This will be overwritten after call `decode_Data`
- hygiene_data.syntax_context_data.push(SyntaxContextData {
- outer_expn: ExpnId::root(),
- outer_transparency: Transparency::Transparent,
- parent: SyntaxContext::root(),
- opaque: SyntaxContext::root(),
- opaque_and_semitransparent: SyntaxContext::root(),
- dollar_crate_name: kw::Empty,
- });
- let mut ctxts = outer_ctxts.lock();
- let new_len = raw_id as usize + 1;
- if ctxts.len() < new_len {
- ctxts.resize(new_len, None);
+ match inner.decoding.entry(raw_id) {
+ Entry::Occupied(ctxt_entry) => {
+ match context.local_in_progress.borrow_mut().entry(raw_id) {
+ Entry::Occupied(..) => {
+ // We're decoding this already on the current thread. Return here
+ // and let the function higher up the stack finish decoding to handle
+ // recursive cases.
+ return *ctxt_entry.get();
+ }
+ Entry::Vacant(entry) => {
+ entry.insert(());
+
+ // Some other thread is current decoding this. Race with it.
+ *ctxt_entry.get()
+ }
+ }
+ }
+ Entry::Vacant(entry) => {
+ // We are the first thread to start decoding. Mark the current thread as being progress.
+ context.local_in_progress.borrow_mut().insert(raw_id, ());
+
+ // Allocate and store SyntaxContext id *before* calling the decoder function,
+ // as the SyntaxContextData may reference itself.
+ let new_ctxt = HygieneData::with(|hygiene_data| {
+ let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
+ // Push a dummy SyntaxContextData to ensure that nobody else can get the
+ // same ID as us. This will be overwritten after call `decode_Data`
+ hygiene_data.syntax_context_data.push(SyntaxContextData {
+ outer_expn: ExpnId::root(),
+ outer_transparency: Transparency::Transparent,
+ parent: SyntaxContext::root(),
+ opaque: SyntaxContext::root(),
+ opaque_and_semitransparent: SyntaxContext::root(),
+ dollar_crate_name: kw::Empty,
+ });
+ new_ctxt
+ });
+ entry.insert(new_ctxt);
+ new_ctxt
+ }
}
- ctxts[raw_id as usize] = Some(new_ctxt);
- drop(ctxts);
- new_ctxt
- });
+ };
// Don't try to decode data while holding the lock, since we need to
// be able to recursively decode a SyntaxContext
@@ -1375,14 +1407,32 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
// Overwrite the dummy data with our decoded SyntaxContextData
HygieneData::with(|hygiene_data| {
let dummy = std::mem::replace(
- &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize],
+ &mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize],
ctxt_data,
);
- // Make sure nothing weird happening while `decode_data` was running
- assert_eq!(dummy.dollar_crate_name, kw::Empty);
+ if cfg!(not(parallel_compiler)) {
+ // Make sure nothing weird happened while `decode_data` was running.
+ // We used `kw::Empty` for the dummy value and we expect nothing to be
+ // modifying the dummy entry.
+ // This does not hold for the parallel compiler as another thread may
+ // have inserted the fully decoded data.
+ assert_eq!(dummy.dollar_crate_name, kw::Empty);
+ }
});
- new_ctxt
+ // Mark the context as completed
+
+ context.local_in_progress.borrow_mut().remove(&raw_id);
+
+ let mut inner = context.inner.lock();
+ let new_len = raw_id as usize + 1;
+ if inner.remapped_ctxts.len() < new_len {
+ inner.remapped_ctxts.resize(new_len, None);
+ }
+ inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
+ inner.decoding.remove(&raw_id);
+
+ ctxt
}
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index c24b8d9ec..772e09291 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -21,9 +21,11 @@
#![feature(rustc_attrs)]
#![feature(let_chains)]
#![feature(round_char_boundary)]
+#![feature(read_buf)]
+#![feature(new_uninit)]
#![deny(rustc::untranslatable_diagnostic)]
#![deny(rustc::diagnostic_outside_of_impl)]
-#![cfg_attr(not(bootstrap), allow(internal_features))]
+#![allow(internal_features)]
#[macro_use]
extern crate rustc_macros;
@@ -31,7 +33,7 @@ extern crate rustc_macros;
#[macro_use]
extern crate tracing;
-use rustc_data_structures::AtomicRef;
+use rustc_data_structures::{outline, AtomicRef};
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
@@ -62,7 +64,7 @@ pub mod fatal_error;
pub mod profiling;
use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher};
-use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_data_structures::sync::{FreezeLock, FreezeWriteGuard, Lock, Lrc};
use std::borrow::Cow;
use std::cmp::{self, Ordering};
@@ -508,10 +510,6 @@ impl SpanData {
pub fn is_dummy(self) -> bool {
self.lo.0 == 0 && self.hi.0 == 0
}
- #[inline]
- pub fn is_visible(self, sm: &SourceMap) -> bool {
- !self.is_dummy() && sm.is_span_accessible(self.span())
- }
/// Returns `true` if `self` fully encloses `other`.
pub fn contains(self, other: Self) -> bool {
self.lo <= other.lo && other.hi <= self.hi
@@ -571,15 +569,9 @@ impl Span {
self.data().with_parent(ctxt)
}
- /// Returns `true` if this is a dummy span with any hygienic context.
- #[inline]
- pub fn is_dummy(self) -> bool {
- self.data_untracked().is_dummy()
- }
-
#[inline]
pub fn is_visible(self, sm: &SourceMap) -> bool {
- self.data_untracked().is_visible(sm)
+ !self.is_dummy() && sm.is_span_accessible(self)
}
/// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
@@ -1105,27 +1097,27 @@ impl fmt::Debug for SpanData {
}
/// Identifies an offset of a multi-byte character in a `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub struct MultiByteChar {
- /// The absolute offset of the character in the `SourceMap`.
- pub pos: BytePos,
+ /// The relative offset of the character in the `SourceFile`.
+ pub pos: RelativeBytePos,
/// The number of bytes, `>= 2`.
pub bytes: u8,
}
/// Identifies an offset of a non-narrow character in a `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub enum NonNarrowChar {
/// Represents a zero-width character.
- ZeroWidth(BytePos),
+ ZeroWidth(RelativeBytePos),
/// Represents a wide (full-width) character.
- Wide(BytePos),
+ Wide(RelativeBytePos),
/// Represents a tab character, represented visually with a width of 4 characters.
- Tab(BytePos),
+ Tab(RelativeBytePos),
}
impl NonNarrowChar {
- fn new(pos: BytePos, width: usize) -> Self {
+ fn new(pos: RelativeBytePos, width: usize) -> Self {
match width {
0 => NonNarrowChar::ZeroWidth(pos),
2 => NonNarrowChar::Wide(pos),
@@ -1134,8 +1126,8 @@ impl NonNarrowChar {
}
}
- /// Returns the absolute offset of the character in the `SourceMap`.
- pub fn pos(&self) -> BytePos {
+ /// Returns the relative offset of the character in the `SourceFile`.
+ pub fn pos(&self) -> RelativeBytePos {
match *self {
NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
}
@@ -1151,10 +1143,10 @@ impl NonNarrowChar {
}
}
-impl Add<BytePos> for NonNarrowChar {
+impl Add<RelativeBytePos> for NonNarrowChar {
type Output = Self;
- fn add(self, rhs: BytePos) -> Self {
+ fn add(self, rhs: RelativeBytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
@@ -1163,10 +1155,10 @@ impl Add<BytePos> for NonNarrowChar {
}
}
-impl Sub<BytePos> for NonNarrowChar {
+impl Sub<RelativeBytePos> for NonNarrowChar {
type Output = Self;
- fn sub(self, rhs: BytePos) -> Self {
+ fn sub(self, rhs: RelativeBytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
@@ -1176,10 +1168,10 @@ impl Sub<BytePos> for NonNarrowChar {
}
/// Identifies an offset of a character that was normalized away from `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub struct NormalizedPos {
- /// The absolute offset of the character in the `SourceMap`.
- pub pos: BytePos,
+ /// The relative offset of the character in the `SourceFile`.
+ pub pos: RelativeBytePos,
/// The difference between original and normalized string at position.
pub diff: u32,
}
@@ -1204,7 +1196,6 @@ pub enum ExternalSourceKind {
AbsentOk,
/// A failed attempt has been made to load the external source.
AbsentErr,
- Unneeded,
}
impl ExternalSource {
@@ -1291,7 +1282,7 @@ impl SourceFileHash {
#[derive(Clone)]
pub enum SourceFileLines {
/// The source file lines, in decoded (random-access) form.
- Lines(Vec<BytePos>),
+ Lines(Vec<RelativeBytePos>),
/// The source file lines, in undecoded difference list form.
Diffs(SourceFileDiffs),
@@ -1312,11 +1303,6 @@ impl SourceFileLines {
/// small crates where very little of `std`'s metadata is used.
#[derive(Clone)]
pub struct SourceFileDiffs {
- /// Position of the first line. Note that this is always encoded as a
- /// `BytePos` because it is often much larger than any of the
- /// differences.
- line_start: BytePos,
-
/// Always 1, 2, or 4. Always as small as possible, while being big
/// enough to hold the length of the longest line in the source file.
/// The 1 case is by far the most common.
@@ -1346,13 +1332,13 @@ pub struct SourceFile {
pub src_hash: SourceFileHash,
/// The external source code (used for external crates, which will have a `None`
/// value as `self.src`.
- pub external_src: Lock<ExternalSource>,
+ pub external_src: FreezeLock<ExternalSource>,
/// The start position of this source in the `SourceMap`.
pub start_pos: BytePos,
- /// The end position of this source in the `SourceMap`.
- pub end_pos: BytePos,
+ /// The byte length of this source.
+ pub source_len: RelativeBytePos,
/// Locations of lines beginnings in the source code.
- pub lines: Lock<SourceFileLines>,
+ pub lines: FreezeLock<SourceFileLines>,
/// Locations of multi-byte characters in the source code.
pub multibyte_chars: Vec<MultiByteChar>,
/// Width of characters that are not narrow in the source code.
@@ -1371,10 +1357,10 @@ impl Clone for SourceFile {
name: self.name.clone(),
src: self.src.clone(),
src_hash: self.src_hash,
- external_src: Lock::new(self.external_src.borrow().clone()),
+ external_src: self.external_src.clone(),
start_pos: self.start_pos,
- end_pos: self.end_pos,
- lines: Lock::new(self.lines.borrow().clone()),
+ source_len: self.source_len,
+ lines: self.lines.clone(),
multibyte_chars: self.multibyte_chars.clone(),
non_narrow_chars: self.non_narrow_chars.clone(),
normalized_pos: self.normalized_pos.clone(),
@@ -1388,68 +1374,67 @@ impl<S: Encoder> Encodable<S> for SourceFile {
fn encode(&self, s: &mut S) {
self.name.encode(s);
self.src_hash.encode(s);
- self.start_pos.encode(s);
- self.end_pos.encode(s);
+ // Do not encode `start_pos` as it's global state for this session.
+ self.source_len.encode(s);
// We are always in `Lines` form by the time we reach here.
- assert!(self.lines.borrow().is_lines());
- self.lines(|lines| {
- // Store the length.
- s.emit_u32(lines.len() as u32);
-
- // Compute and store the difference list.
- if lines.len() != 0 {
- let max_line_length = if lines.len() == 1 {
- 0
- } else {
- lines
- .array_windows()
- .map(|&[fst, snd]| snd - fst)
- .map(|bp| bp.to_usize())
- .max()
- .unwrap()
- };
-
- let bytes_per_diff: usize = match max_line_length {
- 0..=0xFF => 1,
- 0x100..=0xFFFF => 2,
- _ => 4,
- };
-
- // Encode the number of bytes used per diff.
- s.emit_u8(bytes_per_diff as u8);
-
- // Encode the first element.
- lines[0].encode(s);
-
- // Encode the difference list.
- let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
- let num_diffs = lines.len() - 1;
- let mut raw_diffs;
- match bytes_per_diff {
- 1 => {
- raw_diffs = Vec::with_capacity(num_diffs);
- for diff in diff_iter {
- raw_diffs.push(diff.0 as u8);
- }
+ assert!(self.lines.read().is_lines());
+ let lines = self.lines();
+ // Store the length.
+ s.emit_u32(lines.len() as u32);
+
+ // Compute and store the difference list.
+ if lines.len() != 0 {
+ let max_line_length = if lines.len() == 1 {
+ 0
+ } else {
+ lines
+ .array_windows()
+ .map(|&[fst, snd]| snd - fst)
+ .map(|bp| bp.to_usize())
+ .max()
+ .unwrap()
+ };
+
+ let bytes_per_diff: usize = match max_line_length {
+ 0..=0xFF => 1,
+ 0x100..=0xFFFF => 2,
+ _ => 4,
+ };
+
+ // Encode the number of bytes used per diff.
+ s.emit_u8(bytes_per_diff as u8);
+
+ // Encode the first element.
+ assert_eq!(lines[0], RelativeBytePos(0));
+
+ // Encode the difference list.
+ let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
+ let num_diffs = lines.len() - 1;
+ let mut raw_diffs;
+ match bytes_per_diff {
+ 1 => {
+ raw_diffs = Vec::with_capacity(num_diffs);
+ for diff in diff_iter {
+ raw_diffs.push(diff.0 as u8);
}
- 2 => {
- raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
- for diff in diff_iter {
- raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
- }
+ }
+ 2 => {
+ raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
+ for diff in diff_iter {
+ raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
}
- 4 => {
- raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
- for diff in diff_iter {
- raw_diffs.extend_from_slice(&(diff.0).to_le_bytes());
- }
+ }
+ 4 => {
+ raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
+ for diff in diff_iter {
+ raw_diffs.extend_from_slice(&(diff.0).to_le_bytes());
}
- _ => unreachable!(),
}
- s.emit_raw_bytes(&raw_diffs);
+ _ => unreachable!(),
}
- });
+ s.emit_raw_bytes(&raw_diffs);
+ }
self.multibyte_chars.encode(s);
self.non_narrow_chars.encode(s);
@@ -1463,26 +1448,17 @@ impl<D: Decoder> Decodable<D> for SourceFile {
fn decode(d: &mut D) -> SourceFile {
let name: FileName = Decodable::decode(d);
let src_hash: SourceFileHash = Decodable::decode(d);
- let start_pos: BytePos = Decodable::decode(d);
- let end_pos: BytePos = Decodable::decode(d);
+ let source_len: RelativeBytePos = Decodable::decode(d);
let lines = {
let num_lines: u32 = Decodable::decode(d);
if num_lines > 0 {
// Read the number of bytes used per diff.
let bytes_per_diff = d.read_u8() as usize;
- // Read the first element.
- let line_start: BytePos = Decodable::decode(d);
-
// Read the difference list.
let num_diffs = num_lines as usize - 1;
let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec();
- SourceFileLines::Diffs(SourceFileDiffs {
- line_start,
- bytes_per_diff,
- num_diffs,
- raw_diffs,
- })
+ SourceFileLines::Diffs(SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs })
} else {
SourceFileLines::Lines(vec![])
}
@@ -1494,14 +1470,14 @@ impl<D: Decoder> Decodable<D> for SourceFile {
let cnum: CrateNum = Decodable::decode(d);
SourceFile {
name,
- start_pos,
- end_pos,
+ start_pos: BytePos::from_u32(0),
+ source_len,
src: None,
src_hash,
// Unused - the metadata decoder will construct
// a new SourceFile, filling in `external_src` properly
- external_src: Lock::new(ExternalSource::Unneeded),
- lines: Lock::new(lines),
+ external_src: FreezeLock::frozen(ExternalSource::Unneeded),
+ lines: FreezeLock::new(lines),
multibyte_chars,
non_narrow_chars,
normalized_pos,
@@ -1521,102 +1497,116 @@ impl SourceFile {
pub fn new(
name: FileName,
mut src: String,
- start_pos: BytePos,
hash_kind: SourceFileHashAlgorithm,
- ) -> Self {
+ ) -> Result<Self, OffsetOverflowError> {
// Compute the file hash before any normalization.
let src_hash = SourceFileHash::new(hash_kind, &src);
- let normalized_pos = normalize_src(&mut src, start_pos);
+ let normalized_pos = normalize_src(&mut src);
let name_hash = {
let mut hasher: StableHasher = StableHasher::new();
name.hash(&mut hasher);
hasher.finish()
};
- let end_pos = start_pos.to_usize() + src.len();
- assert!(end_pos <= u32::MAX as usize);
+ let source_len = src.len();
+ let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?;
let (lines, multibyte_chars, non_narrow_chars) =
- analyze_source_file::analyze_source_file(&src, start_pos);
+ analyze_source_file::analyze_source_file(&src);
- SourceFile {
+ Ok(SourceFile {
name,
src: Some(Lrc::new(src)),
src_hash,
- external_src: Lock::new(ExternalSource::Unneeded),
- start_pos,
- end_pos: Pos::from_usize(end_pos),
- lines: Lock::new(SourceFileLines::Lines(lines)),
+ external_src: FreezeLock::frozen(ExternalSource::Unneeded),
+ start_pos: BytePos::from_u32(0),
+ source_len: RelativeBytePos::from_u32(source_len),
+ lines: FreezeLock::frozen(SourceFileLines::Lines(lines)),
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
cnum: LOCAL_CRATE,
- }
+ })
}
- pub fn lines<F, R>(&self, f: F) -> R
- where
- F: FnOnce(&[BytePos]) -> R,
- {
- let mut guard = self.lines.borrow_mut();
- match &*guard {
- SourceFileLines::Lines(lines) => f(lines),
- SourceFileLines::Diffs(SourceFileDiffs {
- mut line_start,
- bytes_per_diff,
- num_diffs,
- raw_diffs,
- }) => {
- // Convert from "diffs" form to "lines" form.
- let num_lines = num_diffs + 1;
- let mut lines = Vec::with_capacity(num_lines);
- lines.push(line_start);
-
- assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
- match bytes_per_diff {
- 1 => {
- lines.extend(raw_diffs.into_iter().map(|&diff| {
- line_start = line_start + BytePos(diff as u32);
- line_start
- }));
- }
- 2 => {
- lines.extend((0..*num_diffs).map(|i| {
- let pos = bytes_per_diff * i;
- let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
- let diff = u16::from_le_bytes(bytes);
- line_start = line_start + BytePos(diff as u32);
- line_start
- }));
- }
- 4 => {
- lines.extend((0..*num_diffs).map(|i| {
- let pos = bytes_per_diff * i;
- let bytes = [
- raw_diffs[pos],
- raw_diffs[pos + 1],
- raw_diffs[pos + 2],
- raw_diffs[pos + 3],
- ];
- let diff = u32::from_le_bytes(bytes);
- line_start = line_start + BytePos(diff);
- line_start
- }));
- }
- _ => unreachable!(),
- }
- let res = f(&lines);
- *guard = SourceFileLines::Lines(lines);
- res
+ /// This converts the `lines` field to contain `SourceFileLines::Lines` if needed and freezes it.
+ fn convert_diffs_to_lines_frozen(&self) {
+ let mut guard = if let Some(guard) = self.lines.try_write() { guard } else { return };
+
+ let SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs } = match &*guard {
+ SourceFileLines::Diffs(diffs) => diffs,
+ SourceFileLines::Lines(..) => {
+ FreezeWriteGuard::freeze(guard);
+ return;
+ }
+ };
+
+ // Convert from "diffs" form to "lines" form.
+ let num_lines = num_diffs + 1;
+ let mut lines = Vec::with_capacity(num_lines);
+ let mut line_start = RelativeBytePos(0);
+ lines.push(line_start);
+
+ assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
+ match bytes_per_diff {
+ 1 => {
+ lines.extend(raw_diffs.into_iter().map(|&diff| {
+ line_start = line_start + RelativeBytePos(diff as u32);
+ line_start
+ }));
+ }
+ 2 => {
+ lines.extend((0..*num_diffs).map(|i| {
+ let pos = bytes_per_diff * i;
+ let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
+ let diff = u16::from_le_bytes(bytes);
+ line_start = line_start + RelativeBytePos(diff as u32);
+ line_start
+ }));
}
+ 4 => {
+ lines.extend((0..*num_diffs).map(|i| {
+ let pos = bytes_per_diff * i;
+ let bytes = [
+ raw_diffs[pos],
+ raw_diffs[pos + 1],
+ raw_diffs[pos + 2],
+ raw_diffs[pos + 3],
+ ];
+ let diff = u32::from_le_bytes(bytes);
+ line_start = line_start + RelativeBytePos(diff);
+ line_start
+ }));
+ }
+ _ => unreachable!(),
}
+
+ *guard = SourceFileLines::Lines(lines);
+
+ FreezeWriteGuard::freeze(guard);
+ }
+
+ pub fn lines(&self) -> &[RelativeBytePos] {
+ if let Some(SourceFileLines::Lines(lines)) = self.lines.get() {
+ return &lines[..];
+ }
+
+ outline(|| {
+ self.convert_diffs_to_lines_frozen();
+ if let Some(SourceFileLines::Lines(lines)) = self.lines.get() {
+ return &lines[..];
+ }
+ unreachable!()
+ })
}
/// Returns the `BytePos` of the beginning of the current line.
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
+ let pos = self.relative_position(pos);
let line_index = self.lookup_line(pos).unwrap();
- self.lines(|lines| lines[line_index])
+ let line_start_pos = self.lines()[line_index];
+ self.absolute_position(line_start_pos)
}
/// Add externally loaded source.
@@ -1627,35 +1617,37 @@ impl SourceFile {
where
F: FnOnce() -> Option<String>,
{
- if matches!(
- *self.external_src.borrow(),
- ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. }
- ) {
+ if !self.external_src.is_frozen() {
let src = get_src();
- let mut external_src = self.external_src.borrow_mut();
- // Check that no-one else have provided the source while we were getting it
- if let ExternalSource::Foreign {
- kind: src_kind @ ExternalSourceKind::AbsentOk, ..
- } = &mut *external_src
- {
- if let Some(mut src) = src {
- // The src_hash needs to be computed on the pre-normalized src.
- if self.src_hash.matches(&src) {
- normalize_src(&mut src, BytePos::from_usize(0));
- *src_kind = ExternalSourceKind::Present(Lrc::new(src));
- return true;
- }
+ let src = src.and_then(|mut src| {
+ // The src_hash needs to be computed on the pre-normalized src.
+ self.src_hash.matches(&src).then(|| {
+ normalize_src(&mut src);
+ src
+ })
+ });
+
+ self.external_src.try_write().map(|mut external_src| {
+ if let ExternalSource::Foreign {
+ kind: src_kind @ ExternalSourceKind::AbsentOk,
+ ..
+ } = &mut *external_src
+ {
+ *src_kind = if let Some(src) = src {
+ ExternalSourceKind::Present(Lrc::new(src))
+ } else {
+ ExternalSourceKind::AbsentErr
+ };
} else {
- *src_kind = ExternalSourceKind::AbsentErr;
+ panic!("unexpected state {:?}", *external_src)
}
- false
- } else {
- self.src.is_some() || external_src.get_source().is_some()
- }
- } else {
- self.src.is_some() || self.external_src.borrow().get_source().is_some()
+ // Freeze this so we don't try to load the source again.
+ FreezeWriteGuard::freeze(external_src)
+ });
}
+
+ self.src.is_some() || self.external_src.read().get_source().is_some()
}
/// Gets a line from the list of pre-computed line-beginnings.
@@ -1673,9 +1665,8 @@ impl SourceFile {
}
let begin = {
- let line = self.lines(|lines| lines.get(line_number).copied())?;
- let begin: BytePos = line - self.start_pos;
- begin.to_usize()
+ let line = self.lines().get(line_number).copied()?;
+ line.to_usize()
};
if let Some(ref src) = self.src {
@@ -1698,30 +1689,44 @@ impl SourceFile {
}
pub fn count_lines(&self) -> usize {
- self.lines(|lines| lines.len())
+ self.lines().len()
+ }
+
+ #[inline]
+ pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
+ BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
+ }
+
+ #[inline]
+ pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
+ RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
+ }
+
+ #[inline]
+ pub fn end_position(&self) -> BytePos {
+ self.absolute_position(self.source_len)
}
/// Finds the line containing the given position. The return value is the
/// index into the `lines` array of this `SourceFile`, not the 1-based line
/// number. If the source_file is empty or the position is located before the
/// first line, `None` is returned.
- pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
- self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
+ pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
+ self.lines().partition_point(|x| x <= &pos).checked_sub(1)
}
pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
if self.is_empty() {
- return self.start_pos..self.end_pos;
+ return self.start_pos..self.start_pos;
}
- self.lines(|lines| {
- assert!(line_index < lines.len());
- if line_index == (lines.len() - 1) {
- lines[line_index]..self.end_pos
- } else {
- lines[line_index]..lines[line_index + 1]
- }
- })
+ let lines = self.lines();
+ assert!(line_index < lines.len());
+ if line_index == (lines.len() - 1) {
+ self.absolute_position(lines[line_index])..self.end_position()
+ } else {
+ self.absolute_position(lines[line_index])..self.absolute_position(lines[line_index + 1])
+ }
}
/// Returns whether or not the file contains the given `SourceMap` byte
@@ -1730,27 +1735,29 @@ impl SourceFile {
/// returns true still contain one byte position according to this function.
#[inline]
pub fn contains(&self, byte_pos: BytePos) -> bool {
- byte_pos >= self.start_pos && byte_pos <= self.end_pos
+ byte_pos >= self.start_pos && byte_pos <= self.end_position()
}
#[inline]
pub fn is_empty(&self) -> bool {
- self.start_pos == self.end_pos
+ self.source_len.to_u32() == 0
}
/// Calculates the original byte position relative to the start of the file
/// based on the given byte position.
- pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
+ pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
+ let pos = self.relative_position(pos);
+
// Diff before any records is 0. Otherwise use the previously recorded
// diff as that applies to the following characters until a new diff
// is recorded.
let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) {
Ok(i) => self.normalized_pos[i].diff,
- Err(i) if i == 0 => 0,
+ Err(0) => 0,
Err(i) => self.normalized_pos[i - 1].diff,
};
- BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
+ RelativeBytePos::from_u32(pos.0 + diff)
}
/// Calculates a normalized byte position from a byte offset relative to the
@@ -1768,15 +1775,15 @@ impl SourceFile {
.binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset)))
{
Ok(i) => self.normalized_pos[i].diff,
- Err(i) if i == 0 => 0,
+ Err(0) => 0,
Err(i) => self.normalized_pos[i - 1].diff,
};
BytePos::from_u32(self.start_pos.0 + offset - diff)
}
- /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
- pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
+ /// Converts an relative `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
+ fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
// The number of extra bytes due to multibyte chars in the `SourceFile`.
let mut total_extra_bytes = 0;
@@ -1794,18 +1801,18 @@ impl SourceFile {
}
}
- assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
- CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
+ assert!(total_extra_bytes <= bpos.to_u32());
+ CharPos(bpos.to_usize() - total_extra_bytes as usize)
}
/// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
- /// given `BytePos`.
- pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
+ /// given `RelativeBytePos`.
+ fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
let chpos = self.bytepos_to_file_charpos(pos);
match self.lookup_line(pos) {
Some(a) => {
let line = a + 1; // Line numbers start at 1
- let linebpos = self.lines(|lines| lines[a]);
+ let linebpos = self.lines()[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
let col = chpos - linechpos;
debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos);
@@ -1821,10 +1828,11 @@ impl SourceFile {
/// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
/// column offset when displayed, for a given `BytePos`.
pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
+ let pos = self.relative_position(pos);
let (line, col_or_chpos) = self.lookup_file_pos(pos);
if line > 0 {
let col = col_or_chpos;
- let linebpos = self.lines(|lines| lines[line - 1]);
+ let linebpos = self.lines()[line - 1];
let col_display = {
let start_width_idx = self
.non_narrow_chars
@@ -1859,16 +1867,10 @@ impl SourceFile {
}
/// Normalizes the source code and records the normalizations.
-fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
+fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
let mut normalized_pos = vec![];
remove_bom(src, &mut normalized_pos);
normalize_newlines(src, &mut normalized_pos);
-
- // Offset all the positions by start_pos to match the final file positions.
- for np in &mut normalized_pos {
- np.pos.0 += start_pos.0;
- }
-
normalized_pos
}
@@ -1876,7 +1878,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
if src.starts_with('\u{feff}') {
src.drain(..3);
- normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
+ normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 });
}
}
@@ -1911,7 +1913,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)
cursor += idx - gap_len;
gap_len += 1;
normalized_pos.push(NormalizedPos {
- pos: BytePos::from_usize(cursor + 1),
+ pos: RelativeBytePos::from_usize(cursor + 1),
diff: original_gap + gap_len as u32,
});
}
@@ -2013,6 +2015,10 @@ impl_pos! {
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub struct BytePos(pub u32);
+ /// A byte offset relative to file beginning.
+ #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+ pub struct RelativeBytePos(pub u32);
+
/// A character offset.
///
/// Because of multibyte UTF-8 characters, a byte offset
@@ -2034,6 +2040,24 @@ impl<D: Decoder> Decodable<D> for BytePos {
}
}
+impl<H: HashStableContext> HashStable<H> for RelativeBytePos {
+ fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) {
+ self.0.hash_stable(hcx, hasher);
+ }
+}
+
+impl<S: Encoder> Encodable<S> for RelativeBytePos {
+ fn encode(&self, s: &mut S) {
+ s.emit_u32(self.0);
+ }
+}
+
+impl<D: Decoder> Decodable<D> for RelativeBytePos {
+ fn decode(d: &mut D) -> RelativeBytePos {
+ RelativeBytePos(d.read_u32())
+ }
+}
+
// _____________________________________________________________________________
// Loc, SourceFileAndLine, SourceFileAndBytePos
//
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index 983b2ab04..0b575c13a 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -14,16 +14,15 @@ pub use crate::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::{Hash128, Hash64, StableHasher};
-use rustc_data_structures::sync::{
- AtomicU32, IntoDynSyncSend, Lrc, MappedReadGuard, ReadGuard, RwLock,
-};
+use rustc_data_structures::sync::{IntoDynSyncSend, Lrc, MappedReadGuard, ReadGuard, RwLock};
use std::cmp;
use std::hash::Hash;
use std::path::{self, Path, PathBuf};
-use std::sync::atomic::Ordering;
use std::fs;
use std::io;
+use std::io::BorrowedBuf;
+use std::io::Read;
#[cfg(test)]
mod tests;
@@ -101,10 +100,13 @@ pub trait FileLoader {
fn file_exists(&self, path: &Path) -> bool;
/// Read the contents of a UTF-8 file into memory.
+ /// This function must return a String because we normalize
+ /// source files, which may require resizing.
fn read_file(&self, path: &Path) -> io::Result<String>;
/// Read the contents of a potentially non-UTF-8 file into memory.
- fn read_binary_file(&self, path: &Path) -> io::Result<Vec<u8>>;
+ /// We don't normalize binary files, so we can start in an Lrc.
+ fn read_binary_file(&self, path: &Path) -> io::Result<Lrc<[u8]>>;
}
/// A FileLoader that uses std::fs to load real files.
@@ -119,8 +121,45 @@ impl FileLoader for RealFileLoader {
fs::read_to_string(path)
}
- fn read_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
- fs::read(path)
+ fn read_binary_file(&self, path: &Path) -> io::Result<Lrc<[u8]>> {
+ let mut file = fs::File::open(path)?;
+ let len = file.metadata()?.len();
+
+ let mut bytes = Lrc::new_uninit_slice(len as usize);
+ let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap());
+ match file.read_buf_exact(buf.unfilled()) {
+ Ok(()) => {}
+ Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
+ drop(bytes);
+ return fs::read(path).map(Vec::into);
+ }
+ Err(e) => return Err(e),
+ }
+ // SAFETY: If the read_buf_exact call returns Ok(()), then we have
+ // read len bytes and initialized the buffer.
+ let bytes = unsafe { bytes.assume_init() };
+
+ // At this point, we've read all the bytes that filesystem metadata reported exist.
+ // But we are not guaranteed to be at the end of the file, because we did not attempt to do
+ // a read with a non-zero-sized buffer and get Ok(0).
+ // So we do small read to a fixed-size buffer. If the read returns no bytes then we're
+ // already done, and we just return the Lrc we built above.
+ // If the read returns bytes however, we just fall back to reading into a Vec then turning
+ // that into an Lrc, losing our nice peak memory behavior. This fallback code path should
+ // be rarely exercised.
+
+ let mut probe = [0u8; 32];
+ let n = loop {
+ match file.read(&mut probe) {
+ Ok(0) => return Ok(bytes),
+ Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+ Err(e) => return Err(e),
+ Ok(n) => break n,
+ }
+ };
+ let mut bytes: Vec<u8> = bytes.iter().copied().chain(probe[..n].iter().copied()).collect();
+ file.read_to_end(&mut bytes)?;
+ Ok(bytes.into())
}
}
@@ -174,9 +213,6 @@ pub(super) struct SourceMapFiles {
}
pub struct SourceMap {
- /// The address space below this value is currently used by the files in the source map.
- used_address_space: AtomicU32,
-
files: RwLock<SourceMapFiles>,
file_loader: IntoDynSyncSend<Box<dyn FileLoader + Sync + Send>>,
// This is used to apply the file path remapping as specified via
@@ -202,7 +238,6 @@ impl SourceMap {
hash_kind: SourceFileHashAlgorithm,
) -> SourceMap {
SourceMap {
- used_address_space: AtomicU32::new(0),
files: Default::default(),
file_loader: IntoDynSyncSend(file_loader),
path_mapping,
@@ -228,7 +263,7 @@ impl SourceMap {
///
/// Unlike `load_file`, guarantees that no normalization like BOM-removal
/// takes place.
- pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
+ pub fn load_binary_file(&self, path: &Path) -> io::Result<Lrc<[u8]>> {
let bytes = self.file_loader.read_binary_file(path)?;
// We need to add file to the `SourceMap`, so that it is present
@@ -254,26 +289,26 @@ impl SourceMap {
self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned()
}
- fn allocate_address_space(&self, size: usize) -> Result<usize, OffsetOverflowError> {
- let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?;
-
- loop {
- let current = self.used_address_space.load(Ordering::Relaxed);
- let next = current
- .checked_add(size)
- // Add one so there is some space between files. This lets us distinguish
- // positions in the `SourceMap`, even in the presence of zero-length files.
- .and_then(|next| next.checked_add(1))
- .ok_or(OffsetOverflowError)?;
-
- if self
- .used_address_space
- .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed)
- .is_ok()
- {
- return Ok(usize::try_from(current).unwrap());
- }
- }
+ fn register_source_file(
+ &self,
+ file_id: StableSourceFileId,
+ mut file: SourceFile,
+ ) -> Result<Lrc<SourceFile>, OffsetOverflowError> {
+ let mut files = self.files.borrow_mut();
+
+ file.start_pos = BytePos(if let Some(last_file) = files.source_files.last() {
+ // Add one so there is some space between files. This lets us distinguish
+ // positions in the `SourceMap`, even in the presence of zero-length files.
+ last_file.end_position().0.checked_add(1).ok_or(OffsetOverflowError)?
+ } else {
+ 0
+ });
+
+ let file = Lrc::new(file);
+ files.source_files.push(file.clone());
+ files.stable_id_to_source_file.insert(file_id, file.clone());
+
+ Ok(file)
}
/// Creates a new `SourceFile`.
@@ -297,32 +332,18 @@ impl SourceMap {
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
-
- let lrc_sf = match self.source_file_by_stable_id(file_id) {
- Some(lrc_sf) => lrc_sf,
+ match self.source_file_by_stable_id(file_id) {
+ Some(lrc_sf) => Ok(lrc_sf),
None => {
- let start_pos = self.allocate_address_space(src.len())?;
-
- let source_file = Lrc::new(SourceFile::new(
- filename,
- src,
- Pos::from_usize(start_pos),
- self.hash_kind,
- ));
+ let source_file = SourceFile::new(filename, src, self.hash_kind)?;
// Let's make sure the file_id we generated above actually matches
// the ID we generate for the SourceFile we just created.
debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
- let mut files = self.files.borrow_mut();
-
- files.source_files.push(source_file.clone());
- files.stable_id_to_source_file.insert(file_id, source_file.clone());
-
- source_file
+ self.register_source_file(file_id, source_file)
}
- };
- Ok(lrc_sf)
+ }
}
/// Allocates a new `SourceFile` representing a source file from an external
@@ -334,78 +355,37 @@ impl SourceMap {
filename: FileName,
src_hash: SourceFileHash,
name_hash: Hash128,
- source_len: usize,
+ source_len: u32,
cnum: CrateNum,
- file_local_lines: Lock<SourceFileLines>,
- mut file_local_multibyte_chars: Vec<MultiByteChar>,
- mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
- mut file_local_normalized_pos: Vec<NormalizedPos>,
- original_start_pos: BytePos,
+ file_local_lines: FreezeLock<SourceFileLines>,
+ multibyte_chars: Vec<MultiByteChar>,
+ non_narrow_chars: Vec<NonNarrowChar>,
+ normalized_pos: Vec<NormalizedPos>,
metadata_index: u32,
) -> Lrc<SourceFile> {
- let start_pos = self
- .allocate_address_space(source_len)
- .expect("not enough address space for imported source file");
-
- let end_pos = Pos::from_usize(start_pos + source_len);
- let start_pos = Pos::from_usize(start_pos);
-
- // Translate these positions into the new global frame of reference,
- // now that the offset of the SourceFile is known.
- //
- // These are all unsigned values. `original_start_pos` may be larger or
- // smaller than `start_pos`, but `pos` is always larger than both.
- // Therefore, `(pos - original_start_pos) + start_pos` won't overflow
- // but `start_pos - original_start_pos` might. So we use the former
- // form rather than pre-computing the offset into a local variable. The
- // compiler backend can optimize away the repeated computations in a
- // way that won't trigger overflow checks.
- match &mut *file_local_lines.borrow_mut() {
- SourceFileLines::Lines(lines) => {
- for pos in lines {
- *pos = (*pos - original_start_pos) + start_pos;
- }
- }
- SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => {
- *line_start = (*line_start - original_start_pos) + start_pos;
- }
- }
- for mbc in &mut file_local_multibyte_chars {
- mbc.pos = (mbc.pos - original_start_pos) + start_pos;
- }
- for swc in &mut file_local_non_narrow_chars {
- *swc = (*swc - original_start_pos) + start_pos;
- }
- for nc in &mut file_local_normalized_pos {
- nc.pos = (nc.pos - original_start_pos) + start_pos;
- }
+ let source_len = RelativeBytePos::from_u32(source_len);
- let source_file = Lrc::new(SourceFile {
+ let source_file = SourceFile {
name: filename,
src: None,
src_hash,
- external_src: Lock::new(ExternalSource::Foreign {
+ external_src: FreezeLock::new(ExternalSource::Foreign {
kind: ExternalSourceKind::AbsentOk,
metadata_index,
}),
- start_pos,
- end_pos,
+ start_pos: BytePos(0),
+ source_len,
lines: file_local_lines,
- multibyte_chars: file_local_multibyte_chars,
- non_narrow_chars: file_local_non_narrow_chars,
- normalized_pos: file_local_normalized_pos,
+ multibyte_chars,
+ non_narrow_chars,
+ normalized_pos,
name_hash,
cnum,
- });
-
- let mut files = self.files.borrow_mut();
-
- files.source_files.push(source_file.clone());
- files
- .stable_id_to_source_file
- .insert(StableSourceFileId::new(&source_file), source_file.clone());
+ };
- source_file
+ let file_id = StableSourceFileId::new(&source_file);
+ self.register_source_file(file_id, source_file)
+ .expect("not enough address space for imported source file")
}
/// If there is a doctest offset, applies it to the line.
@@ -439,6 +419,7 @@ impl SourceMap {
pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> {
let f = self.lookup_source_file(pos);
+ let pos = f.relative_position(pos);
match f.lookup_line(pos) {
Some(line) => Ok(SourceFileAndLine { sf: f, line }),
None => Err(f),
@@ -534,7 +515,9 @@ impl SourceMap {
return true;
}
let f = (*self.files.borrow().source_files)[lo].clone();
- f.lookup_line(sp.lo()) != f.lookup_line(sp.hi())
+ let lo = f.relative_position(sp.lo());
+ let hi = f.relative_position(sp.hi());
+ f.lookup_line(lo) != f.lookup_line(hi)
}
#[instrument(skip(self), level = "trace")]
@@ -610,11 +593,11 @@ impl SourceMap {
end: (local_end.sf.name.clone(), local_end.sf.start_pos),
})))
} else {
- self.ensure_source_file_source_present(local_begin.sf.clone());
+ self.ensure_source_file_source_present(&local_begin.sf);
let start_index = local_begin.pos.to_usize();
let end_index = local_end.pos.to_usize();
- let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
+ let source_len = local_begin.sf.source_len.to_usize();
if start_index > end_index || end_index > source_len {
return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions {
@@ -627,7 +610,7 @@ impl SourceMap {
if let Some(ref src) = local_begin.sf.src {
extract_source(src, start_index, end_index)
- } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() {
+ } else if let Some(src) = local_begin.sf.external_src.read().get_source() {
extract_source(src, start_index, end_index)
} else {
Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() })
@@ -919,7 +902,7 @@ impl SourceMap {
let sp = sp.data();
let local_begin = self.lookup_byte_offset(sp.lo);
let start_index = local_begin.pos.to_usize();
- let src = local_begin.sf.external_src.borrow();
+ let src = local_begin.sf.external_src.read();
let snippet = if let Some(ref src) = local_begin.sf.src {
Some(&src[start_index..])
@@ -1021,7 +1004,7 @@ impl SourceMap {
return 1;
}
- let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
+ let source_len = local_begin.sf.source_len.to_usize();
debug!("source_len=`{:?}`", source_len);
// Ensure indexes are also not malformed.
if start_index > end_index || end_index > source_len - 1 {
@@ -1029,7 +1012,7 @@ impl SourceMap {
return 1;
}
- let src = local_begin.sf.external_src.borrow();
+ let src = local_begin.sf.external_src.read();
let snippet = if let Some(src) = &local_begin.sf.src {
src
@@ -1076,7 +1059,7 @@ impl SourceMap {
self.files().iter().fold(0, |a, f| a + f.count_lines())
}
- pub fn ensure_source_file_source_present(&self, source_file: Lrc<SourceFile>) -> bool {
+ pub fn ensure_source_file_source_present(&self, source_file: &SourceFile) -> bool {
source_file.add_external_src(|| {
let FileName::Real(ref name) = source_file.name else {
return None;
diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs
index 686b3b00d..a12f50c87 100644
--- a/compiler/rustc_span/src/source_map/tests.rs
+++ b/compiler/rustc_span/src/source_map/tests.rs
@@ -1,6 +1,6 @@
use super::*;
-use rustc_data_structures::sync::Lrc;
+use rustc_data_structures::sync::{FreezeLock, Lrc};
fn init_source_map() -> SourceMap {
let sm = SourceMap::new(FilePathMapping::empty());
@@ -50,6 +50,7 @@ impl SourceMap {
fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
let idx = self.lookup_source_file_idx(bpos);
let sf = &(*self.files.borrow().source_files)[idx];
+ let bpos = sf.relative_position(bpos);
sf.bytepos_to_file_charpos(bpos)
}
}
@@ -230,8 +231,7 @@ fn t10() {
let SourceFile {
name,
src_hash,
- start_pos,
- end_pos,
+ source_len,
lines,
multibyte_chars,
non_narrow_chars,
@@ -244,13 +244,12 @@ fn t10() {
name,
src_hash,
name_hash,
- (end_pos - start_pos).to_usize(),
+ source_len.to_u32(),
CrateNum::new(0),
- lines,
+ FreezeLock::new(lines.read().clone()),
multibyte_chars,
non_narrow_chars,
normalized_pos,
- start_pos,
0,
);
@@ -568,3 +567,30 @@ fn test_next_point() {
assert_eq!(span.hi().0, 6);
assert!(sm.span_to_snippet(span).is_err());
}
+
+#[cfg(target_os = "linux")]
+#[test]
+fn read_binary_file_handles_lying_stat() {
+ // read_binary_file tries to read the contents of a file into an Lrc<[u8]> while
+ // never having two copies of the data in memory at once. This is an optimization
+ // to support include_bytes! with large files. But since Rust allocators are
+ // sensitive to alignment, our implementation can't be bootstrapped off calling
+ // std::fs::read. So we test that we have the same behavior even on files where
+ // fs::metadata lies.
+
+ // stat always says that /proc/self/cmdline is length 0, but it isn't.
+ let cmdline = Path::new("/proc/self/cmdline");
+ let len = std::fs::metadata(cmdline).unwrap().len() as usize;
+ let real = std::fs::read(cmdline).unwrap();
+ assert!(len < real.len());
+ let bin = RealFileLoader.read_binary_file(cmdline).unwrap();
+ assert_eq!(&real[..], &bin[..]);
+
+ // stat always says that /sys/devices/system/cpu/kernel_max is the size of a block.
+ let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max");
+ let len = std::fs::metadata(kernel_max).unwrap().len() as usize;
+ let real = std::fs::read(kernel_max).unwrap();
+ assert!(len > real.len());
+ let bin = RealFileLoader.read_binary_file(kernel_max).unwrap();
+ assert_eq!(&real[..], &bin[..]);
+}
diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs
index 1eea0f63c..93ab15460 100644
--- a/compiler/rustc_span/src/span_encoding.rs
+++ b/compiler/rustc_span/src/span_encoding.rs
@@ -1,9 +1,3 @@
-// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
-// One format is used for keeping span data inline,
-// another contains index into an out-of-line span interner.
-// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
-// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
-
use crate::def_id::{DefIndex, LocalDefId};
use crate::hygiene::SyntaxContext;
use crate::SPAN_TRACK;
@@ -13,59 +7,69 @@ use rustc_data_structures::fx::FxIndexSet;
/// A compressed span.
///
-/// Whereas [`SpanData`] is 16 bytes, which is a bit too big to stick everywhere, `Span`
-/// is a form that only takes up 8 bytes, with less space for the length, parent and
-/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
-/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
+/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
+/// takes up 8 bytes, with less space for the length, parent and context. The
+/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
+/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
/// stored in a separate interner table, and the `Span` will index into that
/// table. Interning is rare enough that the cost is low, but common enough
/// that the code is exercised regularly.
///
/// An earlier version of this code used only 4 bytes for `Span`, but that was
/// slower because only 80--90% of spans could be stored inline (even less in
-/// very large crates) and so the interner was used a lot more.
+/// very large crates) and so the interner was used a lot more. That version of
+/// the code also predated the storage of parents.
+///
+/// There are four different span forms.
///
-/// Inline (compressed) format with no parent:
-/// - `span.base_or_index == span_data.lo`
-/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
-/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
+/// Inline-context format (requires non-huge length, non-huge context, and no parent):
+/// - `span.lo_or_index == span_data.lo`
+/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
+/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
///
-/// Interned format with inline `SyntaxContext`:
-/// - `span.base_or_index == index` (indexes into the interner table)
-/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
-/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
+/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
+/// - `span.lo_or_index == span_data.lo`
+/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
+/// (must be `<= MAX_LEN`)
+/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
+/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
///
-/// Inline (compressed) format with root context:
-/// - `span.base_or_index == span_data.lo`
-/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
-/// - `span.len_or_tag` has top bit (`PARENT_MASK`) set
-/// - `span.ctxt == span_data.parent` (must be `<= MAX_CTXT`)
+/// Partially-interned format (requires non-huge context):
+/// - `span.lo_or_index == index` (indexes into the interner table)
+/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
+/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
///
-/// Interned format:
-/// - `span.base_or_index == index` (indexes into the interner table)
-/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
-/// - `span.ctxt_or_tag == CTXT_TAG`
+/// Fully-interned format (all cases not covered above):
+/// - `span.lo_or_index == index` (indexes into the interner table)
+/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
+/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
///
-/// The inline form uses 0 for the tag value (rather than 1) so that we don't
-/// need to mask out the tag bit when getting the length, and so that the
-/// dummy span can be all zeroes.
+/// The partially-interned form requires looking in the interning table for
+/// lo and length, but the context is stored inline as well as interned.
+/// This is useful because context lookups are often done in isolation, and
+/// inline lookups are quicker.
///
/// Notes about the choice of field sizes:
-/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
-/// values never cause interning. The number of bits needed for `base`
+/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
+/// values never cause interning. The number of bits needed for `lo`
/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
-/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
-/// in `SpanData`, which means that large `len` values will cause interning.
-/// The number of bits needed for `len` does not depend on the crate size.
-/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually
-/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
-/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
-/// dozens of times in a typical crate.
-/// - `ctxt_or_tag` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
-/// large `ctxt` values will cause interning. The number of bits needed for
-/// `ctxt` values depend partly on the crate size and partly on the form of
-/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt_or_tag`,
-/// but larger crates might need more than 16 bits.
+/// Having no compression on this field means there is no performance cliff
+/// if a crate exceeds a particular size.
+/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
+/// bits in `SpanData`, which means that large `len` values will cause
+/// interning. The number of bits needed for `len` does not depend on the
+/// crate size. The most common numbers of bits for `len` are from 0 to 7,
+/// with a peak usually at 3 or 4, and then it drops off quickly from 8
+/// onwards. 15 bits is enough for 99.99%+ of cases, but larger values
+/// (sometimes 20+ bits) might occur dozens of times in a typical crate.
+/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
+/// `SpanData`, which means intering will happen if `ctxt` is large, if
+/// `parent` is large, or if both values are non-zero. The number of bits
+/// needed for `ctxt` values depend partly on the crate size and partly on
+/// the form of the code. No crates in `rustc-perf` need more than 15 bits
+/// for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
+/// bits. The number of bits needed for `parent` hasn't been measured,
+/// because `parent` isn't currently used by default.
///
/// In order to reliably use parented spans in incremental compilation,
/// the dependency to the parent definition's span. This is performed
@@ -74,19 +78,22 @@ use rustc_data_structures::fx::FxIndexSet;
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
#[rustc_pass_by_value]
pub struct Span {
- base_or_index: u32,
- len_or_tag: u16,
- ctxt_or_tag: u16,
+ lo_or_index: u32,
+ len_with_tag_or_marker: u16,
+ ctxt_or_parent_or_marker: u16,
}
-const LEN_TAG: u16 = 0b1111_1111_1111_1111;
-const PARENT_MASK: u16 = 0b1000_0000_0000_0000;
-const MAX_LEN: u32 = 0b0111_1111_1111_1111;
-const CTXT_TAG: u32 = 0b1111_1111_1111_1111;
-const MAX_CTXT: u32 = CTXT_TAG - 1;
+// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
+// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
+const MAX_LEN: u32 = 0b0111_1111_1111_1110;
+const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
+const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
+const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
+const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
-/// Dummy span, both position and length are zero, syntax context is zero as well.
-pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_tag: 0 };
+/// The dummy span has zero position, length, and context, and no parent.
+pub const DUMMY_SP: Span =
+ Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
impl Span {
#[inline]
@@ -100,39 +107,43 @@ impl Span {
std::mem::swap(&mut lo, &mut hi);
}
- let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
-
- if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
- let len_or_tag = len as u16;
- debug_assert_eq!(len_or_tag & PARENT_MASK, 0);
+ let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
- if let Some(parent) = parent {
- // Inline format with parent.
- let len_or_tag = len_or_tag | PARENT_MASK;
- let parent2 = parent.local_def_index.as_u32();
- if ctxt2 == SyntaxContext::root().as_u32()
- && parent2 <= MAX_CTXT
- && len_or_tag < LEN_TAG
- {
- debug_assert_ne!(len_or_tag, LEN_TAG);
- return Span { base_or_index: base, len_or_tag, ctxt_or_tag: parent2 as u16 };
- }
- } else {
- // Inline format with ctxt.
- debug_assert_ne!(len_or_tag, LEN_TAG);
+ if len <= MAX_LEN {
+ if ctxt2 <= MAX_CTXT && parent.is_none() {
+ // Inline-context format.
return Span {
- base_or_index: base,
- len_or_tag: len as u16,
- ctxt_or_tag: ctxt2 as u16,
+ lo_or_index: lo2,
+ len_with_tag_or_marker: len as u16,
+ ctxt_or_parent_or_marker: ctxt2 as u16,
+ };
+ } else if ctxt2 == SyntaxContext::root().as_u32()
+ && let Some(parent) = parent
+ && let parent2 = parent.local_def_index.as_u32()
+ && parent2 <= MAX_CTXT
+ {
+ // Inline-parent format.
+ return Span {
+ lo_or_index: lo2,
+ len_with_tag_or_marker: PARENT_TAG | len as u16,
+ ctxt_or_parent_or_marker: parent2 as u16
};
}
}
- // Interned format.
+ // Partially-interned or fully-interned format.
let index =
with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }));
- let ctxt_or_tag = if ctxt2 <= MAX_CTXT { ctxt2 } else { CTXT_TAG } as u16;
- Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_tag }
+ let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT {
+ ctxt2 as u16 // partially-interned
+ } else {
+ CTXT_INTERNED_MARKER // fully-interned
+ };
+ Span {
+ lo_or_index: index,
+ len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER,
+ ctxt_or_parent_or_marker,
+ }
}
#[inline]
@@ -148,56 +159,80 @@ impl Span {
/// This function must not be used outside the incremental engine.
#[inline]
pub fn data_untracked(self) -> SpanData {
- if self.len_or_tag != LEN_TAG {
- // Inline format.
- if self.len_or_tag & PARENT_MASK == 0 {
- debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
+ if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
+ if self.len_with_tag_or_marker & PARENT_TAG == 0 {
+ // Inline-context format.
+ let len = self.len_with_tag_or_marker as u32;
+ debug_assert!(len <= MAX_LEN);
SpanData {
- lo: BytePos(self.base_or_index),
- hi: BytePos(self.base_or_index + self.len_or_tag as u32),
- ctxt: SyntaxContext::from_u32(self.ctxt_or_tag as u32),
+ lo: BytePos(self.lo_or_index),
+ hi: BytePos(self.lo_or_index + len),
+ ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
parent: None,
}
} else {
- let len = self.len_or_tag & !PARENT_MASK;
- debug_assert!(len as u32 <= MAX_LEN);
- let parent =
- LocalDefId { local_def_index: DefIndex::from_u32(self.ctxt_or_tag as u32) };
+ // Inline-parent format.
+ let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
+ debug_assert!(len <= MAX_LEN);
+ let parent = LocalDefId {
+ local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32),
+ };
SpanData {
- lo: BytePos(self.base_or_index),
- hi: BytePos(self.base_or_index + len as u32),
+ lo: BytePos(self.lo_or_index),
+ hi: BytePos(self.lo_or_index + len),
ctxt: SyntaxContext::root(),
parent: Some(parent),
}
}
} else {
- // Interned format.
- let index = self.base_or_index;
+ // Fully-interned or partially-interned format. In either case,
+ // the interned value contains all the data, so we don't need to
+ // distinguish them.
+ let index = self.lo_or_index;
with_span_interner(|interner| interner.spans[index as usize])
}
}
+ /// Returns `true` if this is a dummy span with any hygienic context.
+ #[inline]
+ pub fn is_dummy(self) -> bool {
+ if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
+ // Inline-context or inline-parent format.
+ let lo = self.lo_or_index;
+ let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
+ debug_assert!(len <= MAX_LEN);
+ lo == 0 && len == 0
+ } else {
+ // Fully-interned or partially-interned format.
+ let index = self.lo_or_index;
+ let data = with_span_interner(|interner| interner.spans[index as usize]);
+ data.lo == BytePos(0) && data.hi == BytePos(0)
+ }
+ }
+
/// This function is used as a fast path when decoding the full `SpanData` is not necessary.
+ /// It's a cut-down version of `data_untracked`.
#[inline]
pub fn ctxt(self) -> SyntaxContext {
- let ctxt_or_tag = self.ctxt_or_tag as u32;
- // Check for interned format.
- if self.len_or_tag == LEN_TAG {
- if ctxt_or_tag == CTXT_TAG {
- // Fully interned format.
- let index = self.base_or_index;
- with_span_interner(|interner| interner.spans[index as usize].ctxt)
+ if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
+ if self.len_with_tag_or_marker & PARENT_TAG == 0 {
+ // Inline-context format.
+ SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
} else {
- // Interned format with inline ctxt.
- SyntaxContext::from_u32(ctxt_or_tag)
+ // Inline-parent format. We know that the SyntaxContext is root.
+ SyntaxContext::root()
}
- } else if self.len_or_tag & PARENT_MASK == 0 {
- // Inline format with inline ctxt.
- SyntaxContext::from_u32(ctxt_or_tag)
} else {
- // Inline format with inline parent.
- // We know that the SyntaxContext is root.
- SyntaxContext::root()
+ if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
+ // Partially-interned format. This path avoids looking up the
+ // interned value, and is the whole point of the
+ // partially-interned format.
+ SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
+ } else {
+ // Fully-interned format.
+ let index = self.lo_or_index;
+ with_span_interner(|interner| interner.spans[index as usize].ctxt)
+ }
}
}
}
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 28a2dfebc..4f4625662 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -232,11 +232,13 @@ symbols! {
NonZeroI32,
NonZeroI64,
NonZeroI8,
+ NonZeroIsize,
NonZeroU128,
NonZeroU16,
NonZeroU32,
NonZeroU64,
NonZeroU8,
+ NonZeroUsize,
None,
Ok,
Option,
@@ -278,6 +280,7 @@ symbols! {
RwLock,
RwLockReadGuard,
RwLockWriteGuard,
+ Saturating,
Send,
SeqCst,
SliceIndex,
@@ -305,6 +308,7 @@ symbols! {
Vec,
VecDeque,
Wrapper,
+ Wrapping,
Yield,
_DECLS,
_Self,
@@ -383,6 +387,7 @@ symbols! {
asm_sym,
asm_unwind,
assert,
+ assert_eq,
assert_eq_macro,
assert_inhabited,
assert_macro,
@@ -569,6 +574,8 @@ symbols! {
cosf32,
cosf64,
count,
+ coverage,
+ coverage_attribute,
cr,
crate_id,
crate_in_paths,
@@ -585,6 +592,7 @@ symbols! {
cttz,
cttz_nonzero,
custom_attribute,
+ custom_code_classes_in_docs,
custom_derive,
custom_inner_attributes,
custom_mir,
@@ -1064,6 +1072,7 @@ symbols! {
note,
object_safe_for_dispatch,
of,
+ off,
offset,
offset_of,
omit_gdb_pretty_printer_section,
@@ -1100,6 +1109,7 @@ symbols! {
panic_handler,
panic_impl,
panic_implementation,
+ panic_in_cleanup,
panic_info,
panic_location,
panic_misaligned_pointer_dereference,
@@ -1169,7 +1179,6 @@ symbols! {
ptr_cast_const,
ptr_cast_mut,
ptr_const_is_null,
- ptr_from_mut,
ptr_from_ref,
ptr_guaranteed_cmp,
ptr_is_null,
@@ -1179,6 +1188,9 @@ symbols! {
ptr_offset_from,
ptr_offset_from_unsigned,
ptr_unique,
+ ptr_write,
+ ptr_write_unaligned,
+ ptr_write_volatile,
pub_macro_rules,
pub_restricted,
public,
@@ -1273,6 +1285,7 @@ symbols! {
rust_eh_catch_typeinfo,
rust_eh_personality,
rustc,
+ rustc_abi,
rustc_allocator,
rustc_allocator_zeroed,
rustc_allow_const_fn_unstable,
@@ -1324,6 +1337,7 @@ symbols! {
rustc_main,
rustc_mir,
rustc_must_implement_one_of,
+ rustc_never_returns_null_ptr,
rustc_nonnull_optimization_guaranteed,
rustc_nounwind,
rustc_object_lifetime_default,
@@ -1357,6 +1371,7 @@ symbols! {
rustc_trivial_field_reads,
rustc_unsafe_specialization_marker,
rustc_variance,
+ rustc_variance_of_opaques,
rustdoc,
rustdoc_internals,
rustdoc_missing_doc_code_examples,
@@ -1370,6 +1385,7 @@ symbols! {
sanitizer_cfi_normalize_integers,
sanitizer_runtime,
saturating_add,
+ saturating_div,
saturating_sub,
self_in_typedefs,
self_struct_ctor,
@@ -1449,6 +1465,7 @@ symbols! {
simd_shl,
simd_shr,
simd_shuffle,
+ simd_shuffle_generic,
simd_sub,
simd_trunc,
simd_xor,
@@ -1615,6 +1632,7 @@ symbols! {
unix_sigpipe,
unlikely,
unmarked_api,
+ unnamed_fields,
unpin,
unreachable,
unreachable_2015,
@@ -1627,6 +1645,7 @@ symbols! {
unsafe_block_in_unsafe_fn,
unsafe_cell,
unsafe_cell_from_mut,
+ unsafe_cell_raw_get,
unsafe_no_drop_flag,
unsafe_pin_internals,
unsize,
@@ -1687,7 +1706,10 @@ symbols! {
windows_subsystem,
with_negative_coherence,
wrapping_add,
+ wrapping_div,
wrapping_mul,
+ wrapping_rem,
+ wrapping_rem_euclid,
wrapping_sub,
wreg,
write_bytes,
diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs
index a242ad6d1..cb88fa890 100644
--- a/compiler/rustc_span/src/tests.rs
+++ b/compiler/rustc_span/src/tests.rs
@@ -3,24 +3,21 @@ use super::*;
#[test]
fn test_lookup_line() {
let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
- let sf = SourceFile::new(
- FileName::Anon(Hash64::ZERO),
- source,
- BytePos(3),
- SourceFileHashAlgorithm::Sha256,
- );
- sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)]));
+ let mut sf =
+ SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256)
+ .unwrap();
+ sf.start_pos = BytePos(3);
+ assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]);
- assert_eq!(sf.lookup_line(BytePos(0)), None);
- assert_eq!(sf.lookup_line(BytePos(3)), Some(0));
- assert_eq!(sf.lookup_line(BytePos(4)), Some(0));
+ assert_eq!(sf.lookup_line(RelativeBytePos(0)), Some(0));
+ assert_eq!(sf.lookup_line(RelativeBytePos(1)), Some(0));
- assert_eq!(sf.lookup_line(BytePos(16)), Some(0));
- assert_eq!(sf.lookup_line(BytePos(17)), Some(1));
- assert_eq!(sf.lookup_line(BytePos(18)), Some(1));
+ assert_eq!(sf.lookup_line(RelativeBytePos(13)), Some(0));
+ assert_eq!(sf.lookup_line(RelativeBytePos(14)), Some(1));
+ assert_eq!(sf.lookup_line(RelativeBytePos(15)), Some(1));
- assert_eq!(sf.lookup_line(BytePos(28)), Some(2));
- assert_eq!(sf.lookup_line(BytePos(29)), Some(2));
+ assert_eq!(sf.lookup_line(RelativeBytePos(25)), Some(2));
+ assert_eq!(sf.lookup_line(RelativeBytePos(26)), Some(2));
}
#[test]