summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_span
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--compiler/rustc_span/Cargo.toml21
-rw-r--r--compiler/rustc_span/src/analyze_source_file.rs274
-rw-r--r--compiler/rustc_span/src/analyze_source_file/tests.rs142
-rw-r--r--compiler/rustc_span/src/caching_source_map_view.rs293
-rw-r--r--compiler/rustc_span/src/def_id.rs444
-rw-r--r--compiler/rustc_span/src/edition.rs110
-rw-r--r--compiler/rustc_span/src/fatal_error.rs26
-rw-r--r--compiler/rustc_span/src/hygiene.rs1528
-rw-r--r--compiler/rustc_span/src/lev_distance.rs177
-rw-r--r--compiler/rustc_span/src/lev_distance/tests.rs71
-rw-r--r--compiler/rustc_span/src/lib.rs2116
-rw-r--r--compiler/rustc_span/src/profiling.rs35
-rw-r--r--compiler/rustc_span/src/source_map.rs1281
-rw-r--r--compiler/rustc_span/src/source_map/tests.rs481
-rw-r--r--compiler/rustc_span/src/span_encoding.rs150
-rw-r--r--compiler/rustc_span/src/symbol.rs2067
-rw-r--r--compiler/rustc_span/src/symbol/tests.rs25
-rw-r--r--compiler/rustc_span/src/tests.rs43
18 files changed, 9284 insertions, 0 deletions
diff --git a/compiler/rustc_span/Cargo.toml b/compiler/rustc_span/Cargo.toml
new file mode 100644
index 000000000..7227b193f
--- /dev/null
+++ b/compiler/rustc_span/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "rustc_span"
+version = "0.0.0"
+edition = "2021"
+
+[lib]
+doctest = false
+
+[dependencies]
+rustc_serialize = { path = "../rustc_serialize" }
+rustc_macros = { path = "../rustc_macros" }
+rustc_data_structures = { path = "../rustc_data_structures" }
+rustc_index = { path = "../rustc_index" }
+rustc_arena = { path = "../rustc_arena" }
+scoped-tls = "1.0"
+unicode-width = "0.1.4"
+cfg-if = "0.1.2"
+tracing = "0.1"
+sha1 = { package = "sha-1", version = "0.10.0" }
+sha2 = "0.10.1"
+md5 = { package = "md-5", version = "0.10.0" }
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
new file mode 100644
index 000000000..5987fb2a1
--- /dev/null
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -0,0 +1,274 @@
+use super::*;
+use unicode_width::UnicodeWidthChar;
+
+#[cfg(test)]
+mod tests;
+
+/// Finds all newlines, multi-byte characters, and non-narrow characters in a
+/// SourceFile.
+///
+/// This function will use an SSE2 enhanced implementation if hardware support
+/// is detected at runtime.
+pub fn analyze_source_file(
+ src: &str,
+ source_file_start_pos: BytePos,
+) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
+ let mut lines = vec![source_file_start_pos];
+ let mut multi_byte_chars = vec![];
+ let mut non_narrow_chars = vec![];
+
+ // Calls the right implementation, depending on hardware support available.
+ analyze_source_file_dispatch(
+ src,
+ source_file_start_pos,
+ &mut lines,
+ &mut multi_byte_chars,
+ &mut non_narrow_chars,
+ );
+
+ // The code above optimistically registers a new line *after* each \n
+ // it encounters. If that point is already outside the source_file, remove
+ // it again.
+ if let Some(&last_line_start) = lines.last() {
+ let source_file_end = source_file_start_pos + BytePos::from_usize(src.len());
+ assert!(source_file_end >= last_line_start);
+ if last_line_start == source_file_end {
+ lines.pop();
+ }
+ }
+
+ (lines, multi_byte_chars, non_narrow_chars)
+}
+
+cfg_if::cfg_if! {
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] {
+ fn analyze_source_file_dispatch(src: &str,
+ source_file_start_pos: BytePos,
+ lines: &mut Vec<BytePos>,
+ multi_byte_chars: &mut Vec<MultiByteChar>,
+ non_narrow_chars: &mut Vec<NonNarrowChar>) {
+ if is_x86_feature_detected!("sse2") {
+ unsafe {
+ analyze_source_file_sse2(src,
+ source_file_start_pos,
+ lines,
+ multi_byte_chars,
+ non_narrow_chars);
+ }
+ } else {
+ analyze_source_file_generic(src,
+ src.len(),
+ source_file_start_pos,
+ lines,
+ multi_byte_chars,
+ non_narrow_chars);
+
+ }
+ }
+
+ /// Checks 16 byte chunks of text at a time. If the chunk contains
+ /// something other than printable ASCII characters and newlines, the
+ /// function falls back to the generic implementation. Otherwise it uses
+ /// SSE2 intrinsics to quickly find all newlines.
+ #[target_feature(enable = "sse2")]
+ unsafe fn analyze_source_file_sse2(src: &str,
+ output_offset: BytePos,
+ lines: &mut Vec<BytePos>,
+ multi_byte_chars: &mut Vec<MultiByteChar>,
+ non_narrow_chars: &mut Vec<NonNarrowChar>) {
+ #[cfg(target_arch = "x86")]
+ use std::arch::x86::*;
+ #[cfg(target_arch = "x86_64")]
+ use std::arch::x86_64::*;
+
+ const CHUNK_SIZE: usize = 16;
+
+ let src_bytes = src.as_bytes();
+
+ let chunk_count = src.len() / CHUNK_SIZE;
+
+ // This variable keeps track of where we should start decoding a
+ // chunk. If a multi-byte character spans across chunk boundaries,
+ // we need to skip that part in the next chunk because we already
+ // handled it.
+ let mut intra_chunk_offset = 0;
+
+ for chunk_index in 0 .. chunk_count {
+ let ptr = src_bytes.as_ptr() as *const __m128i;
+ // We don't know if the pointer is aligned to 16 bytes, so we
+ // use `loadu`, which supports unaligned loading.
+ let chunk = _mm_loadu_si128(ptr.add(chunk_index));
+
+ // For character in the chunk, see if its byte value is < 0, which
+ // indicates that it's part of a UTF-8 char.
+ let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
+ // Create a bit mask from the comparison results.
+ let multibyte_mask = _mm_movemask_epi8(multibyte_test);
+
+ // If the bit mask is all zero, we only have ASCII chars here:
+ if multibyte_mask == 0 {
+ assert!(intra_chunk_offset == 0);
+
+ // Check if there are any control characters in the chunk. All
+ // control characters that we can encounter at this point have a
+ // byte value less than 32 or ...
+ let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32));
+ let control_char_mask0 = _mm_movemask_epi8(control_char_test0);
+
+ // ... it's the ASCII 'DEL' character with a value of 127.
+ let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127));
+ let control_char_mask1 = _mm_movemask_epi8(control_char_test1);
+
+ let control_char_mask = control_char_mask0 | control_char_mask1;
+
+ if control_char_mask != 0 {
+ // Check for newlines in the chunk
+ let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
+ let newlines_mask = _mm_movemask_epi8(newlines_test);
+
+ if control_char_mask == newlines_mask {
+ // All control characters are newlines, record them
+ let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
+ let output_offset = output_offset +
+ BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+
+ loop {
+ let index = newlines_mask.trailing_zeros();
+
+ if index >= CHUNK_SIZE as u32 {
+ // We have arrived at the end of the chunk.
+ break
+ }
+
+ lines.push(BytePos(index) + output_offset);
+
+ // Clear the bit, so we can find the next one.
+ newlines_mask &= (!1) << index;
+ }
+
+ // We are done for this chunk. All control characters were
+ // newlines and we took care of those.
+ continue
+ } else {
+ // Some of the control characters are not newlines,
+ // fall through to the slow path below.
+ }
+ } else {
+ // No control characters, nothing to record for this chunk
+ continue
+ }
+ }
+
+ // The slow path.
+ // There are control chars in here, fallback to generic decoding.
+ let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+ intra_chunk_offset = analyze_source_file_generic(
+ &src[scan_start .. ],
+ CHUNK_SIZE - intra_chunk_offset,
+ BytePos::from_usize(scan_start) + output_offset,
+ lines,
+ multi_byte_chars,
+ non_narrow_chars
+ );
+ }
+
+ // There might still be a tail left to analyze
+ let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+ if tail_start < src.len() {
+ analyze_source_file_generic(&src[tail_start as usize ..],
+ src.len() - tail_start,
+ output_offset + BytePos::from_usize(tail_start),
+ lines,
+ multi_byte_chars,
+ non_narrow_chars);
+ }
+ }
+ } else {
+
+ // The target (or compiler version) does not support SSE2 ...
+ fn analyze_source_file_dispatch(src: &str,
+ source_file_start_pos: BytePos,
+ lines: &mut Vec<BytePos>,
+ multi_byte_chars: &mut Vec<MultiByteChar>,
+ non_narrow_chars: &mut Vec<NonNarrowChar>) {
+ analyze_source_file_generic(src,
+ src.len(),
+ source_file_start_pos,
+ lines,
+ multi_byte_chars,
+ non_narrow_chars);
+ }
+ }
+}
+
+// `scan_len` determines the number of bytes in `src` to scan. Note that the
+// function can read past `scan_len` if a multi-byte character start within the
+// range but extends past it. The overflow is returned by the function.
+fn analyze_source_file_generic(
+ src: &str,
+ scan_len: usize,
+ output_offset: BytePos,
+ lines: &mut Vec<BytePos>,
+ multi_byte_chars: &mut Vec<MultiByteChar>,
+ non_narrow_chars: &mut Vec<NonNarrowChar>,
+) -> usize {
+ assert!(src.len() >= scan_len);
+ let mut i = 0;
+ let src_bytes = src.as_bytes();
+
+ while i < scan_len {
+ let byte = unsafe {
+ // We verified that i < scan_len <= src.len()
+ *src_bytes.get_unchecked(i as usize)
+ };
+
+ // How much to advance in order to get to the next UTF-8 char in the
+ // string.
+ let mut char_len = 1;
+
+ if byte < 32 {
+ // This is an ASCII control character, it could be one of the cases
+ // that are interesting to us.
+
+ let pos = BytePos::from_usize(i) + output_offset;
+
+ match byte {
+ b'\n' => {
+ lines.push(pos + BytePos(1));
+ }
+ b'\t' => {
+ non_narrow_chars.push(NonNarrowChar::Tab(pos));
+ }
+ _ => {
+ non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
+ }
+ }
+ } else if byte >= 127 {
+ // The slow path:
+ // This is either ASCII control character "DEL" or the beginning of
+ // a multibyte char. Just decode to `char`.
+ let c = (&src[i..]).chars().next().unwrap();
+ char_len = c.len_utf8();
+
+ let pos = BytePos::from_usize(i) + output_offset;
+
+ if char_len > 1 {
+ assert!((2..=4).contains(&char_len));
+ let mbc = MultiByteChar { pos, bytes: char_len as u8 };
+ multi_byte_chars.push(mbc);
+ }
+
+ // Assume control characters are zero width.
+ // FIXME: How can we decide between `width` and `width_cjk`?
+ let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
+
+ if char_width != 1 {
+ non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
+ }
+ }
+
+ i += char_len;
+ }
+
+ i - scan_len
+}
diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs
new file mode 100644
index 000000000..66aefc9a7
--- /dev/null
+++ b/compiler/rustc_span/src/analyze_source_file/tests.rs
@@ -0,0 +1,142 @@
+use super::*;
+
+macro_rules! test {
+ (case: $test_name:ident,
+ text: $text:expr,
+ source_file_start_pos: $source_file_start_pos:expr,
+ lines: $lines:expr,
+ multi_byte_chars: $multi_byte_chars:expr,
+ non_narrow_chars: $non_narrow_chars:expr,) => {
+ #[test]
+ fn $test_name() {
+ let (lines, multi_byte_chars, non_narrow_chars) =
+ analyze_source_file($text, BytePos($source_file_start_pos));
+
+ let expected_lines: Vec<BytePos> = $lines.into_iter().map(BytePos).collect();
+
+ assert_eq!(lines, expected_lines);
+
+ let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
+ .into_iter()
+ .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes })
+ .collect();
+
+ assert_eq!(multi_byte_chars, expected_mbcs);
+
+ let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
+ .into_iter()
+ .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width))
+ .collect();
+
+ assert_eq!(non_narrow_chars, expected_nncs);
+ }
+ };
+}
+
+test!(
+ case: empty_text,
+ text: "",
+ source_file_start_pos: 0,
+ lines: vec![],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: newlines_short,
+ text: "a\nc",
+ source_file_start_pos: 0,
+ lines: vec![0, 2],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: newlines_long,
+ text: "012345678\nabcdef012345678\na",
+ source_file_start_pos: 0,
+ lines: vec![0, 10, 26],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: newline_and_multi_byte_char_in_same_chunk,
+ text: "01234β789\nbcdef0123456789abcdef",
+ source_file_start_pos: 0,
+ lines: vec![0, 11],
+ multi_byte_chars: vec![(5, 2)],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: newline_and_control_char_in_same_chunk,
+ text: "01234\u{07}6789\nbcdef0123456789abcdef",
+ source_file_start_pos: 0,
+ lines: vec![0, 11],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![(5, 0)],
+);
+
+test!(
+ case: multi_byte_char_short,
+ text: "aβc",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![(1, 2)],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: multi_byte_char_long,
+ text: "0123456789abcΔf012345β",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![(13, 2), (22, 2)],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: multi_byte_char_across_chunk_boundary,
+ text: "0123456789abcdeΔ123456789abcdef01234",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![(15, 2)],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: multi_byte_char_across_chunk_boundary_tail,
+ text: "0123456789abcdeΔ....",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![(15, 2)],
+ non_narrow_chars: vec![],
+);
+
+test!(
+ case: non_narrow_short,
+ text: "0\t2",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![(1, 4)],
+);
+
+test!(
+ case: non_narrow_long,
+ text: "01\t3456789abcdef01234567\u{07}9",
+ source_file_start_pos: 0,
+ lines: vec![0],
+ multi_byte_chars: vec![],
+ non_narrow_chars: vec![(2, 4), (24, 0)],
+);
+
+test!(
+ case: output_offset_all,
+ text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
+ source_file_start_pos: 1000,
+ lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
+ multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
+ non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
+);
diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs
new file mode 100644
index 000000000..fdabf404a
--- /dev/null
+++ b/compiler/rustc_span/src/caching_source_map_view.rs
@@ -0,0 +1,293 @@
+use crate::source_map::SourceMap;
+use crate::{BytePos, SourceFile, SpanData};
+use rustc_data_structures::sync::Lrc;
+use std::ops::Range;
+
+#[derive(Clone)]
+struct CacheEntry {
+ time_stamp: usize,
+ line_number: usize,
+ // The line's byte position range in the `SourceMap`. This range will fail to contain a valid
+ // position in certain edge cases. Spans often start/end one past something, and when that
+ // something is the last character of a file (this can happen when a file doesn't end in a
+ // newline, for example), we'd still like for the position to be considered within the last
+ // line. However, it isn't according to the exclusive upper bound of this range. We cannot
+ // change the upper bound to be inclusive, because for most lines, the upper bound is the same
+ // as the lower bound of the next line, so there would be an ambiguity.
+ //
+ // Since the containment aspect of this range is only used to see whether or not the cache
+ // entry contains a position, the only ramification of the above is that we will get cache
+ // misses for these rare positions. A line lookup for the position via `SourceMap::lookup_line`
+ // after a cache miss will produce the last line number, as desired.
+ line: Range<BytePos>,
+ file: Lrc<SourceFile>,
+ file_index: usize,
+}
+
+impl CacheEntry {
+ #[inline]
+ fn update(
+ &mut self,
+ new_file_and_idx: Option<(Lrc<SourceFile>, usize)>,
+ pos: BytePos,
+ time_stamp: usize,
+ ) {
+ if let Some((file, file_idx)) = new_file_and_idx {
+ self.file = file;
+ self.file_index = file_idx;
+ }
+
+ let line_index = self.file.lookup_line(pos).unwrap();
+ let line_bounds = self.file.line_bounds(line_index);
+ self.line_number = line_index + 1;
+ self.line = line_bounds;
+ self.touch(time_stamp);
+ }
+
+ #[inline]
+ fn touch(&mut self, time_stamp: usize) {
+ self.time_stamp = time_stamp;
+ }
+}
+
+#[derive(Clone)]
+pub struct CachingSourceMapView<'sm> {
+ source_map: &'sm SourceMap,
+ line_cache: [CacheEntry; 3],
+ time_stamp: usize,
+}
+
+impl<'sm> CachingSourceMapView<'sm> {
+ pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> {
+ let files = source_map.files();
+ let first_file = files[0].clone();
+ let entry = CacheEntry {
+ time_stamp: 0,
+ line_number: 0,
+ line: BytePos(0)..BytePos(0),
+ file: first_file,
+ file_index: 0,
+ };
+
+ CachingSourceMapView {
+ source_map,
+ line_cache: [entry.clone(), entry.clone(), entry],
+ time_stamp: 0,
+ }
+ }
+
+ pub fn byte_pos_to_line_and_col(
+ &mut self,
+ pos: BytePos,
+ ) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
+ self.time_stamp += 1;
+
+ // Check if the position is in one of the cached lines
+ let cache_idx = self.cache_entry_index(pos);
+ if cache_idx != -1 {
+ let cache_entry = &mut self.line_cache[cache_idx as usize];
+ cache_entry.touch(self.time_stamp);
+
+ return Some((
+ cache_entry.file.clone(),
+ cache_entry.line_number,
+ pos - cache_entry.line.start,
+ ));
+ }
+
+ // No cache hit ...
+ let oldest = self.oldest_cache_entry_index();
+
+ // If the entry doesn't point to the correct file, get the new file and index.
+ let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, pos) {
+ Some(self.file_for_position(pos)?)
+ } else {
+ None
+ };
+
+ let cache_entry = &mut self.line_cache[oldest];
+ cache_entry.update(new_file_and_idx, pos, self.time_stamp);
+
+ Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line.start))
+ }
+
+ pub fn span_data_to_lines_and_cols(
+ &mut self,
+ span_data: &SpanData,
+ ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)> {
+ self.time_stamp += 1;
+
+ // Check if lo and hi are in the cached lines.
+ let lo_cache_idx = self.cache_entry_index(span_data.lo);
+ let hi_cache_idx = self.cache_entry_index(span_data.hi);
+
+ if lo_cache_idx != -1 && hi_cache_idx != -1 {
+ // Cache hit for span lo and hi. Check if they belong to the same file.
+ let result = {
+ let lo = &self.line_cache[lo_cache_idx as usize];
+ let hi = &self.line_cache[hi_cache_idx as usize];
+
+ if lo.file_index != hi.file_index {
+ return None;
+ }
+
+ (
+ lo.file.clone(),
+ lo.line_number,
+ span_data.lo - lo.line.start,
+ hi.line_number,
+ span_data.hi - hi.line.start,
+ )
+ };
+
+ self.line_cache[lo_cache_idx as usize].touch(self.time_stamp);
+ self.line_cache[hi_cache_idx as usize].touch(self.time_stamp);
+
+ return Some(result);
+ }
+
+ // No cache hit or cache hit for only one of span lo and hi.
+ let oldest = if lo_cache_idx != -1 || hi_cache_idx != -1 {
+ let avoid_idx = if lo_cache_idx != -1 { lo_cache_idx } else { hi_cache_idx };
+ self.oldest_cache_entry_index_avoid(avoid_idx as usize)
+ } else {
+ self.oldest_cache_entry_index()
+ };
+
+ // If the entry doesn't point to the correct file, get the new file and index.
+ // Return early if the file containing beginning of span doesn't contain end of span.
+ let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, span_data.lo) {
+ let new_file_and_idx = self.file_for_position(span_data.lo)?;
+ if !file_contains(&new_file_and_idx.0, span_data.hi) {
+ return None;
+ }
+
+ Some(new_file_and_idx)
+ } else {
+ let file = &self.line_cache[oldest].file;
+ if !file_contains(&file, span_data.hi) {
+ return None;
+ }
+
+ None
+ };
+
+ // Update the cache entries.
+ let (lo_idx, hi_idx) = match (lo_cache_idx, hi_cache_idx) {
+ // Oldest cache entry is for span_data.lo line.
+ (-1, -1) => {
+ let lo = &mut self.line_cache[oldest];
+ lo.update(new_file_and_idx, span_data.lo, self.time_stamp);
+
+ if !lo.line.contains(&span_data.hi) {
+ let new_file_and_idx = Some((lo.file.clone(), lo.file_index));
+ let next_oldest = self.oldest_cache_entry_index_avoid(oldest);
+ let hi = &mut self.line_cache[next_oldest];
+ hi.update(new_file_and_idx, span_data.hi, self.time_stamp);
+ (oldest, next_oldest)
+ } else {
+ (oldest, oldest)
+ }
+ }
+ // Oldest cache entry is for span_data.lo line.
+ (-1, _) => {
+ let lo = &mut self.line_cache[oldest];
+ lo.update(new_file_and_idx, span_data.lo, self.time_stamp);
+ let hi = &mut self.line_cache[hi_cache_idx as usize];
+ hi.touch(self.time_stamp);
+ (oldest, hi_cache_idx as usize)
+ }
+ // Oldest cache entry is for span_data.hi line.
+ (_, -1) => {
+ let hi = &mut self.line_cache[oldest];
+ hi.update(new_file_and_idx, span_data.hi, self.time_stamp);
+ let lo = &mut self.line_cache[lo_cache_idx as usize];
+ lo.touch(self.time_stamp);
+ (lo_cache_idx as usize, oldest)
+ }
+ _ => {
+ panic!();
+ }
+ };
+
+ let lo = &self.line_cache[lo_idx];
+ let hi = &self.line_cache[hi_idx];
+
+ // Span lo and hi may equal line end when last line doesn't
+ // end in newline, hence the inclusive upper bounds below.
+ assert!(span_data.lo >= lo.line.start);
+ assert!(span_data.lo <= lo.line.end);
+ assert!(span_data.hi >= hi.line.start);
+ assert!(span_data.hi <= hi.line.end);
+ assert!(lo.file.contains(span_data.lo));
+ assert!(lo.file.contains(span_data.hi));
+ assert_eq!(lo.file_index, hi.file_index);
+
+ Some((
+ lo.file.clone(),
+ lo.line_number,
+ span_data.lo - lo.line.start,
+ hi.line_number,
+ span_data.hi - hi.line.start,
+ ))
+ }
+
+ fn cache_entry_index(&self, pos: BytePos) -> isize {
+ for (idx, cache_entry) in self.line_cache.iter().enumerate() {
+ if cache_entry.line.contains(&pos) {
+ return idx as isize;
+ }
+ }
+
+ -1
+ }
+
+ fn oldest_cache_entry_index(&self) -> usize {
+ let mut oldest = 0;
+
+ for idx in 1..self.line_cache.len() {
+ if self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp {
+ oldest = idx;
+ }
+ }
+
+ oldest
+ }
+
+ fn oldest_cache_entry_index_avoid(&self, avoid_idx: usize) -> usize {
+ let mut oldest = if avoid_idx != 0 { 0 } else { 1 };
+
+ for idx in 0..self.line_cache.len() {
+ if idx != avoid_idx
+ && self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp
+ {
+ oldest = idx;
+ }
+ }
+
+ oldest
+ }
+
+ fn file_for_position(&self, pos: BytePos) -> Option<(Lrc<SourceFile>, usize)> {
+ if !self.source_map.files().is_empty() {
+ let file_idx = self.source_map.lookup_source_file_idx(pos);
+ let file = &self.source_map.files()[file_idx];
+
+ if file_contains(file, pos) {
+ return Some((file.clone(), file_idx));
+ }
+ }
+
+ None
+ }
+}
+
+#[inline]
+fn file_contains(file: &SourceFile, pos: BytePos) -> bool {
+ // `SourceMap::lookup_source_file_idx` and `SourceFile::contains` both consider the position
+ // one past the end of a file to belong to it. Normally, that's what we want. But for the
+ // purposes of converting a byte position to a line and column number, we can't come up with a
+ // line and column number if the file is empty, because an empty file doesn't contain any
+ // lines. So for our purposes, we don't consider empty files to contain any byte position.
+ file.contains(pos) && !file.is_empty()
+}
diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs
new file mode 100644
index 000000000..a1533fe46
--- /dev/null
+++ b/compiler/rustc_span/src/def_id.rs
@@ -0,0 +1,444 @@
+use crate::HashStableContext;
+use rustc_data_structures::fingerprint::Fingerprint;
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
+use rustc_data_structures::AtomicRef;
+use rustc_index::vec::Idx;
+use rustc_macros::HashStable_Generic;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::borrow::Borrow;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+rustc_index::newtype_index! {
+ pub struct CrateNum {
+ ENCODABLE = custom
+ DEBUG_FORMAT = "crate{}"
+ }
+}
+
+/// Item definitions in the currently-compiled crate would have the `CrateNum`
+/// `LOCAL_CRATE` in their `DefId`.
+pub const LOCAL_CRATE: CrateNum = CrateNum::from_u32(0);
+
+impl CrateNum {
+ #[inline]
+ pub fn new(x: usize) -> CrateNum {
+ CrateNum::from_usize(x)
+ }
+
+ #[inline]
+ pub fn as_def_id(self) -> DefId {
+ DefId { krate: self, index: CRATE_DEF_INDEX }
+ }
+}
+
+impl fmt::Display for CrateNum {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&self.private, f)
+ }
+}
+
+/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx.
+/// Therefore, make sure to include the context when encode a `CrateNum`.
+impl<E: Encoder> Encodable<E> for CrateNum {
+ default fn encode(&self, s: &mut E) {
+ s.emit_u32(self.as_u32());
+ }
+}
+
+impl<D: Decoder> Decodable<D> for CrateNum {
+ default fn decode(d: &mut D) -> CrateNum {
+ CrateNum::from_u32(d.read_u32())
+ }
+}
+
+/// A `DefPathHash` is a fixed-size representation of a `DefPath` that is
+/// stable across crate and compilation session boundaries. It consists of two
+/// separate 64-bit hashes. The first uniquely identifies the crate this
+/// `DefPathHash` originates from (see [StableCrateId]), and the second
+/// uniquely identifies the corresponding `DefPath` within that crate. Together
+/// they form a unique identifier within an entire crate graph.
+///
+/// There is a very small chance of hash collisions, which would mean that two
+/// different `DefPath`s map to the same `DefPathHash`. Proceeding compilation
+/// with such a hash collision would very probably lead to an ICE, and in the
+/// worst case lead to a silent mis-compilation. The compiler therefore actively
+/// and exhaustively checks for such hash collisions and aborts compilation if
+/// it finds one.
+///
+/// `DefPathHash` uses 64-bit hashes for both the crate-id part and the
+/// crate-internal part, even though it is likely that there are many more
+/// `LocalDefId`s in a single crate than there are individual crates in a crate
+/// graph. Since we use the same number of bits in both cases, the collision
+/// probability for the crate-local part will be quite a bit higher (though
+/// still very small).
+///
+/// This imbalance is not by accident: A hash collision in the
+/// crate-local part of a `DefPathHash` will be detected and reported while
+/// compiling the crate in question. Such a collision does not depend on
+/// outside factors and can be easily fixed by the crate maintainer (e.g. by
+/// renaming the item in question or by bumping the crate version in a harmless
+/// way).
+///
+/// A collision between crate-id hashes on the other hand is harder to fix
+/// because it depends on the set of crates in the entire crate graph of a
+/// compilation session. Again, using the same crate with a different version
+/// number would fix the issue with a high probability -- but that might be
+/// easier said then done if the crates in questions are dependencies of
+/// third-party crates.
+///
+/// That being said, given a high quality hash function, the collision
+/// probabilities in question are very small. For example, for a big crate like
+/// `rustc_middle` (with ~50000 `LocalDefId`s as of the time of writing) there
+/// is a probability of roughly 1 in 14,750,000,000 of a crate-internal
+/// collision occurring. For a big crate graph with 1000 crates in it, there is
+/// a probability of 1 in 36,890,000,000,000 of a `StableCrateId` collision.
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[derive(HashStable_Generic, Encodable, Decodable)]
+pub struct DefPathHash(pub Fingerprint);
+
+impl DefPathHash {
+ /// Returns the [StableCrateId] identifying the crate this [DefPathHash]
+ /// originates from.
+ #[inline]
+ pub fn stable_crate_id(&self) -> StableCrateId {
+ StableCrateId(self.0.as_value().0)
+ }
+
+ /// Returns the crate-local part of the [DefPathHash].
+ ///
+ /// Used for tests.
+ #[inline]
+ pub fn local_hash(&self) -> u64 {
+ self.0.as_value().1
+ }
+
+ /// Builds a new [DefPathHash] with the given [StableCrateId] and
+ /// `local_hash`, where `local_hash` must be unique within its crate.
+ pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash {
+ DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash))
+ }
+}
+
+impl Borrow<Fingerprint> for DefPathHash {
+ #[inline]
+ fn borrow(&self) -> &Fingerprint {
+ &self.0
+ }
+}
+
+/// A [`StableCrateId`] is a 64-bit hash of a crate name, together with all
+/// `-Cmetadata` arguments, and some other data. It is to [`CrateNum`] what [`DefPathHash`] is to
+/// [`DefId`]. It is stable across compilation sessions.
+///
+/// Since the ID is a hash value, there is a small chance that two crates
+/// end up with the same [`StableCrateId`]. The compiler will check for such
+/// collisions when loading crates and abort compilation in order to avoid
+/// further trouble.
+///
+/// For more information on the possibility of hash collisions in rustc,
+/// see the discussion in [`DefId`].
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[derive(HashStable_Generic, Encodable, Decodable)]
+pub struct StableCrateId(pub(crate) u64);
+
+impl StableCrateId {
+ pub fn to_u64(self) -> u64 {
+ self.0
+ }
+
+ /// Computes the stable ID for a crate with the given name and
+ /// `-Cmetadata` arguments.
+ pub fn new(crate_name: &str, is_exe: bool, mut metadata: Vec<String>) -> StableCrateId {
+ let mut hasher = StableHasher::new();
+ crate_name.hash(&mut hasher);
+
+ // We don't want the stable crate ID to depend on the order of
+ // -C metadata arguments, so sort them:
+ metadata.sort();
+ // Every distinct -C metadata value is only incorporated once:
+ metadata.dedup();
+
+ hasher.write(b"metadata");
+ for s in &metadata {
+ // Also incorporate the length of a metadata string, so that we generate
+ // different values for `-Cmetadata=ab -Cmetadata=c` and
+ // `-Cmetadata=a -Cmetadata=bc`
+ hasher.write_usize(s.len());
+ hasher.write(s.as_bytes());
+ }
+
+ // Also incorporate crate type, so that we don't get symbol conflicts when
+ // linking against a library of the same name, if this is an executable.
+ hasher.write(if is_exe { b"exe" } else { b"lib" });
+
+ // Also incorporate the rustc version. Otherwise, with -Zsymbol-mangling-version=v0
+ // and no -Cmetadata, symbols from the same crate compiled with different versions of
+ // rustc are named the same.
+ //
+ // RUSTC_FORCE_RUSTC_VERSION is used to inject rustc version information
+ // during testing.
+ if let Some(val) = std::env::var_os("RUSTC_FORCE_RUSTC_VERSION") {
+ hasher.write(val.to_string_lossy().into_owned().as_bytes())
+ } else {
+ hasher.write(option_env!("CFG_VERSION").unwrap_or("unknown version").as_bytes());
+ }
+
+ StableCrateId(hasher.finish())
+ }
+}
+
+rustc_index::newtype_index! {
+ /// A DefIndex is an index into the hir-map for a crate, identifying a
+ /// particular definition. It should really be considered an interned
+ /// shorthand for a particular DefPath.
+ pub struct DefIndex {
+ ENCODABLE = custom // (only encodable in metadata)
+
+ DEBUG_FORMAT = "DefIndex({})",
+ /// The crate root is always assigned index 0 by the AST Map code,
+ /// thanks to `NodeCollector::new`.
+ const CRATE_DEF_INDEX = 0,
+ }
+}
+
+impl<E: Encoder> Encodable<E> for DefIndex {
+ default fn encode(&self, _: &mut E) {
+ panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>());
+ }
+}
+
+impl<D: Decoder> Decodable<D> for DefIndex {
+ default fn decode(_: &mut D) -> DefIndex {
+ panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>());
+ }
+}
+
+/// A `DefId` identifies a particular *definition*, by combining a crate
+/// index and a def index.
+///
+/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`.
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy)]
+// On below-64 bit systems we can simply use the derived `Hash` impl
+#[cfg_attr(not(target_pointer_width = "64"), derive(Hash))]
+#[repr(C)]
+#[rustc_pass_by_value]
+// We guarantee field order. Note that the order is essential here, see below why.
+pub struct DefId {
+ // cfg-ing the order of fields so that the `DefIndex` which is high entropy always ends up in
+ // the lower bits no matter the endianness. This allows the compiler to turn that `Hash` impl
+ // into a direct call to 'u64::hash(_)`.
+ #[cfg(not(all(target_pointer_width = "64", target_endian = "big")))]
+ pub index: DefIndex,
+ pub krate: CrateNum,
+ #[cfg(all(target_pointer_width = "64", target_endian = "big"))]
+ pub index: DefIndex,
+}
+
+// On 64-bit systems, we can hash the whole `DefId` as one `u64` instead of two `u32`s. This
+// improves performance without impairing `FxHash` quality. So the below code gets compiled to a
+// noop on little endian systems because the memory layout of `DefId` is as follows:
+//
+// ```
+// +-1--------------31-+-32-------------63-+
+// ! index ! krate !
+// +-------------------+-------------------+
+// ```
+//
+// The order here has direct impact on `FxHash` quality because we have far more `DefIndex` per
+// crate than we have `Crate`s within one compilation. Or in other words, this arrangement puts
+// more entropy in the low bits than the high bits. The reason this matters is that `FxHash`, which
+// is used throughout rustc, has problems distributing the entropy from the high bits, so reversing
+// the order would lead to a large number of collisions and thus far worse performance.
+//
+// On 64-bit big-endian systems, this compiles to a 64-bit rotation by 32 bits, which is still
+// faster than another `FxHash` round.
+#[cfg(target_pointer_width = "64")]
+impl Hash for DefId {
+ fn hash<H: Hasher>(&self, h: &mut H) {
+ (((self.krate.as_u32() as u64) << 32) | (self.index.as_u32() as u64)).hash(h)
+ }
+}
+
+impl DefId {
+ /// Makes a local `DefId` from the given `DefIndex`.
+ #[inline]
+ pub fn local(index: DefIndex) -> DefId {
+ DefId { krate: LOCAL_CRATE, index }
+ }
+
+ /// Returns whether the item is defined in the crate currently being compiled.
+ #[inline]
+ pub fn is_local(self) -> bool {
+ self.krate == LOCAL_CRATE
+ }
+
+ #[inline]
+ pub fn as_local(self) -> Option<LocalDefId> {
+ if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None }
+ }
+
+ #[inline]
+ #[track_caller]
+ pub fn expect_local(self) -> LocalDefId {
+ // NOTE: `match` below is required to apply `#[track_caller]`,
+ // i.e. don't use closures.
+ match self.as_local() {
+ Some(local_def_id) => local_def_id,
+ None => panic!("DefId::expect_local: `{:?}` isn't local", self),
+ }
+ }
+
+ #[inline]
+ pub fn is_crate_root(self) -> bool {
+ self.index == CRATE_DEF_INDEX
+ }
+
+ #[inline]
+ pub fn as_crate_root(self) -> Option<CrateNum> {
+ if self.is_crate_root() { Some(self.krate) } else { None }
+ }
+
+ #[inline]
+ pub fn is_top_level_module(self) -> bool {
+ self.is_local() && self.is_crate_root()
+ }
+}
+
+impl<E: Encoder> Encodable<E> for DefId {
+ default fn encode(&self, s: &mut E) {
+ self.krate.encode(s);
+ self.index.encode(s);
+ }
+}
+
+impl<D: Decoder> Decodable<D> for DefId {
+ default fn decode(d: &mut D) -> DefId {
+ DefId { krate: Decodable::decode(d), index: Decodable::decode(d) }
+ }
+}
+
+pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish()
+}
+
+pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> =
+ AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _));
+
+impl fmt::Debug for DefId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ (*DEF_ID_DEBUG)(*self, f)
+ }
+}
+
+rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId);
+
+/// A `LocalDefId` is equivalent to a `DefId` with `krate == LOCAL_CRATE`. Since
+/// we encode this information in the type, we can ensure at compile time that
+/// no `DefId`s from upstream crates get thrown into the mix. There are quite a
+/// few cases where we know that only `DefId`s from the local crate are expected;
+/// a `DefId` from a different crate would signify a bug somewhere. This
+/// is when `LocalDefId` comes in handy.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct LocalDefId {
+ pub local_def_index: DefIndex,
+}
+
+// To ensure correctness of incremental compilation,
+// `LocalDefId` must not implement `Ord` or `PartialOrd`.
+// See https://github.com/rust-lang/rust/issues/90317.
+impl !Ord for LocalDefId {}
+impl !PartialOrd for LocalDefId {}
+
+pub const CRATE_DEF_ID: LocalDefId = LocalDefId { local_def_index: CRATE_DEF_INDEX };
+
+impl Idx for LocalDefId {
+ #[inline]
+ fn new(idx: usize) -> Self {
+ LocalDefId { local_def_index: Idx::new(idx) }
+ }
+ #[inline]
+ fn index(self) -> usize {
+ self.local_def_index.index()
+ }
+}
+
+impl LocalDefId {
+ #[inline]
+ pub fn to_def_id(self) -> DefId {
+ DefId { krate: LOCAL_CRATE, index: self.local_def_index }
+ }
+
+ #[inline]
+ pub fn is_top_level_module(self) -> bool {
+ self == CRATE_DEF_ID
+ }
+}
+
+impl fmt::Debug for LocalDefId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.to_def_id().fmt(f)
+ }
+}
+
+impl<E: Encoder> Encodable<E> for LocalDefId {
+ fn encode(&self, s: &mut E) {
+ self.to_def_id().encode(s);
+ }
+}
+
+impl<D: Decoder> Decodable<D> for LocalDefId {
+ fn decode(d: &mut D) -> LocalDefId {
+ DefId::decode(d).expect_local()
+ }
+}
+
+rustc_data_structures::define_id_collections!(LocalDefIdMap, LocalDefIdSet, LocalDefId);
+
+impl<CTX: HashStableContext> HashStable<CTX> for DefId {
+ #[inline]
+ fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+ self.to_stable_hash_key(hcx).hash_stable(hcx, hasher);
+ }
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for LocalDefId {
+ #[inline]
+ fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+ self.to_stable_hash_key(hcx).hash_stable(hcx, hasher);
+ }
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for CrateNum {
+ #[inline]
+ fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+ self.to_stable_hash_key(hcx).hash_stable(hcx, hasher);
+ }
+}
+
+impl<CTX: HashStableContext> ToStableHashKey<CTX> for DefId {
+ type KeyType = DefPathHash;
+
+ #[inline]
+ fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash {
+ hcx.def_path_hash(*self)
+ }
+}
+
+impl<CTX: HashStableContext> ToStableHashKey<CTX> for LocalDefId {
+ type KeyType = DefPathHash;
+
+ #[inline]
+ fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash {
+ hcx.def_path_hash(self.to_def_id())
+ }
+}
+
+impl<CTX: HashStableContext> ToStableHashKey<CTX> for CrateNum {
+ type KeyType = DefPathHash;
+
+ #[inline]
+ fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash {
+ self.as_def_id().to_stable_hash_key(hcx)
+ }
+}
diff --git a/compiler/rustc_span/src/edition.rs b/compiler/rustc_span/src/edition.rs
new file mode 100644
index 000000000..065d3660e
--- /dev/null
+++ b/compiler/rustc_span/src/edition.rs
@@ -0,0 +1,110 @@
+use crate::symbol::{sym, Symbol};
+use std::fmt;
+use std::str::FromStr;
+
+use rustc_macros::HashStable_Generic;
+
+/// The edition of the compiler. (See [RFC 2052](https://github.com/rust-lang/rfcs/blob/master/text/2052-epochs.md).)
+#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)]
+#[derive(HashStable_Generic)]
+pub enum Edition {
+ // When adding new editions, be sure to do the following:
+ //
+ // - update the `ALL_EDITIONS` const
+ // - update the `EDITION_NAME_LIST` const
+ // - add a `rust_####()` function to the session
+ // - update the enum in Cargo's sources as well
+ //
+ // Editions *must* be kept in order, oldest to newest.
+ /// The 2015 edition
+ Edition2015,
+ /// The 2018 edition
+ Edition2018,
+ /// The 2021 edition
+ Edition2021,
+ /// The 2024 edition
+ Edition2024,
+}
+
+// Must be in order from oldest to newest.
+pub const ALL_EDITIONS: &[Edition] =
+ &[Edition::Edition2015, Edition::Edition2018, Edition::Edition2021, Edition::Edition2024];
+
+pub const EDITION_NAME_LIST: &str = "2015|2018|2021|2024";
+
+pub const DEFAULT_EDITION: Edition = Edition::Edition2015;
+
+pub const LATEST_STABLE_EDITION: Edition = Edition::Edition2021;
+
+impl fmt::Display for Edition {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let s = match *self {
+ Edition::Edition2015 => "2015",
+ Edition::Edition2018 => "2018",
+ Edition::Edition2021 => "2021",
+ Edition::Edition2024 => "2024",
+ };
+ write!(f, "{}", s)
+ }
+}
+
+impl Edition {
+ pub fn lint_name(&self) -> &'static str {
+ match *self {
+ Edition::Edition2015 => "rust_2015_compatibility",
+ Edition::Edition2018 => "rust_2018_compatibility",
+ Edition::Edition2021 => "rust_2021_compatibility",
+ Edition::Edition2024 => "rust_2024_compatibility",
+ }
+ }
+
+ pub fn feature_name(&self) -> Symbol {
+ match *self {
+ Edition::Edition2015 => sym::rust_2015_preview,
+ Edition::Edition2018 => sym::rust_2018_preview,
+ Edition::Edition2021 => sym::rust_2021_preview,
+ Edition::Edition2024 => sym::rust_2024_preview,
+ }
+ }
+
+ pub fn is_stable(&self) -> bool {
+ match *self {
+ Edition::Edition2015 => true,
+ Edition::Edition2018 => true,
+ Edition::Edition2021 => true,
+ Edition::Edition2024 => false,
+ }
+ }
+
+ pub fn rust_2015(&self) -> bool {
+ *self == Edition::Edition2015
+ }
+
+ /// Are we allowed to use features from the Rust 2018 edition?
+ pub fn rust_2018(&self) -> bool {
+ *self >= Edition::Edition2018
+ }
+
+ /// Are we allowed to use features from the Rust 2021 edition?
+ pub fn rust_2021(&self) -> bool {
+ *self >= Edition::Edition2021
+ }
+
+ /// Are we allowed to use features from the Rust 2024 edition?
+ pub fn rust_2024(&self) -> bool {
+ *self >= Edition::Edition2024
+ }
+}
+
+impl FromStr for Edition {
+ type Err = ();
+ fn from_str(s: &str) -> Result<Self, ()> {
+ match s {
+ "2015" => Ok(Edition::Edition2015),
+ "2018" => Ok(Edition::Edition2018),
+ "2021" => Ok(Edition::Edition2021),
+ "2024" => Ok(Edition::Edition2024),
+ _ => Err(()),
+ }
+ }
+}
diff --git a/compiler/rustc_span/src/fatal_error.rs b/compiler/rustc_span/src/fatal_error.rs
new file mode 100644
index 000000000..fa84c486d
--- /dev/null
+++ b/compiler/rustc_span/src/fatal_error.rs
@@ -0,0 +1,26 @@
+/// Used as a return value to signify a fatal error occurred. (It is also
+/// used as the argument to panic at the moment, but that will eventually
+/// not be true.)
+#[derive(Copy, Clone, Debug)]
+#[must_use]
+pub struct FatalError;
+
+pub struct FatalErrorMarker;
+
+// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError).
+// We don't want to invoke the panic handler and print a backtrace for fatal errors.
+impl !Send for FatalError {}
+
+impl FatalError {
+ pub fn raise(self) -> ! {
+ std::panic::resume_unwind(Box::new(FatalErrorMarker))
+ }
+}
+
+impl std::fmt::Display for FatalError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "fatal error")
+ }
+}
+
+impl std::error::Error for FatalError {}
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs
new file mode 100644
index 000000000..e169d3c7c
--- /dev/null
+++ b/compiler/rustc_span/src/hygiene.rs
@@ -0,0 +1,1528 @@
+//! Machinery for hygienic macros.
+//!
+//! Inspired by Matthew Flatt et al., “Macros That Work Together: Compile-Time Bindings, Partial
+//! Expansion, and Definition Contexts,” *Journal of Functional Programming* 22, no. 2
+//! (March 1, 2012): 181–216, <https://doi.org/10.1017/S0956796812000093>.
+
+// Hygiene data is stored in a global variable and accessed via TLS, which
+// means that accesses are somewhat expensive. (`HygieneData::with`
+// encapsulates a single access.) Therefore, on hot code paths it is worth
+// ensuring that multiple HygieneData accesses are combined into a single
+// `HygieneData::with`.
+//
+// This explains why `HygieneData`, `SyntaxContext` and `ExpnId` have interfaces
+// with a certain amount of redundancy in them. For example,
+// `SyntaxContext::outer_expn_data` combines `SyntaxContext::outer` and
+// `ExpnId::expn_data` so that two `HygieneData` accesses can be performed within
+// a single `HygieneData::with` call.
+//
+// It also explains why many functions appear in `HygieneData` and again in
+// `SyntaxContext` or `ExpnId`. For example, `HygieneData::outer` and
+// `SyntaxContext::outer` do the same thing, but the former is for use within a
+// `HygieneData::with` call while the latter is for use outside such a call.
+// When modifying this file it is important to understand this distinction,
+// because getting it wrong can lead to nested `HygieneData::with` calls that
+// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)
+
+use crate::edition::Edition;
+use crate::symbol::{kw, sym, Symbol};
+use crate::with_session_globals;
+use crate::{HashStableContext, Span, DUMMY_SP};
+
+use crate::def_id::{CrateNum, DefId, StableCrateId, CRATE_DEF_ID, LOCAL_CRATE};
+use rustc_data_structures::fingerprint::Fingerprint;
+use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_data_structures::stable_hasher::HashingControls;
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_data_structures::unhash::UnhashMap;
+use rustc_index::vec::IndexVec;
+use rustc_macros::HashStable_Generic;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::fmt;
+use std::hash::Hash;
+use tracing::*;
+
+/// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks".
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct SyntaxContext(u32);
+
+#[derive(Debug, Encodable, Decodable, Clone)]
+pub struct SyntaxContextData {
+ outer_expn: ExpnId,
+ outer_transparency: Transparency,
+ parent: SyntaxContext,
+ /// This context, but with all transparent and semi-transparent expansions filtered away.
+ opaque: SyntaxContext,
+ /// This context, but with all transparent expansions filtered away.
+ opaque_and_semitransparent: SyntaxContext,
+ /// Name of the crate to which `$crate` with this context would resolve.
+ dollar_crate_name: Symbol,
+}
+
+rustc_index::newtype_index! {
+ /// A unique ID associated with a macro invocation and expansion.
+ pub struct ExpnIndex {
+ ENCODABLE = custom
+ }
+}
+
+/// A unique ID associated with a macro invocation and expansion.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct ExpnId {
+ pub krate: CrateNum,
+ pub local_id: ExpnIndex,
+}
+
+impl fmt::Debug for ExpnId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Generate crate_::{{expn_}}.
+ write!(f, "{:?}::{{{{expn{}}}}}", self.krate, self.local_id.private)
+ }
+}
+
+rustc_index::newtype_index! {
+ /// A unique ID associated with a macro invocation and expansion.
+ pub struct LocalExpnId {
+ ENCODABLE = custom
+ ORD_IMPL = custom
+ DEBUG_FORMAT = "expn{}"
+ }
+}
+
+// To ensure correctness of incremental compilation,
+// `LocalExpnId` must not implement `Ord` or `PartialOrd`.
+// See https://github.com/rust-lang/rust/issues/90317.
+impl !Ord for LocalExpnId {}
+impl !PartialOrd for LocalExpnId {}
+
+/// Assert that the provided `HashStableContext` is configured with the 'default'
+/// `HashingControls`. We should always have bailed out before getting to here
+/// with a non-default mode. With this check in place, we can avoid the need
+/// to maintain separate versions of `ExpnData` hashes for each permutation
+/// of `HashingControls` settings.
+fn assert_default_hashing_controls<CTX: HashStableContext>(ctx: &CTX, msg: &str) {
+ match ctx.hashing_controls() {
+ // Note that we require that `hash_spans` be set according to the global
+ // `-Z incremental-ignore-spans` option. Normally, this option is disabled,
+ // which will cause us to require that this method always be called with `Span` hashing
+ // enabled.
+ HashingControls { hash_spans }
+ if hash_spans == !ctx.unstable_opts_incremental_ignore_spans() => {}
+ other => panic!("Attempted hashing of {msg} with non-default HashingControls: {:?}", other),
+ }
+}
+
+/// A unique hash value associated to an expansion.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)]
+pub struct ExpnHash(Fingerprint);
+
+impl ExpnHash {
+ /// Returns the [StableCrateId] identifying the crate this [ExpnHash]
+ /// originates from.
+ #[inline]
+ pub fn stable_crate_id(self) -> StableCrateId {
+ StableCrateId(self.0.as_value().0)
+ }
+
+ /// Returns the crate-local part of the [ExpnHash].
+ ///
+ /// Used for tests.
+ #[inline]
+ pub fn local_hash(self) -> u64 {
+ self.0.as_value().1
+ }
+
+ #[inline]
+ pub fn is_root(self) -> bool {
+ self.0 == Fingerprint::ZERO
+ }
+
+ /// Builds a new [ExpnHash] with the given [StableCrateId] and
+ /// `local_hash`, where `local_hash` must be unique within its crate.
+ fn new(stable_crate_id: StableCrateId, local_hash: u64) -> ExpnHash {
+ ExpnHash(Fingerprint::new(stable_crate_id.0, local_hash))
+ }
+}
+
+/// A property of a macro expansion that determines how identifiers
+/// produced by that expansion are resolved.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Encodable, Decodable)]
+#[derive(HashStable_Generic)]
+pub enum Transparency {
+ /// Identifier produced by a transparent expansion is always resolved at call-site.
+ /// Call-site spans in procedural macros, hygiene opt-out in `macro` should use this.
+ Transparent,
+ /// Identifier produced by a semi-transparent expansion may be resolved
+ /// either at call-site or at definition-site.
+ /// If it's a local variable, label or `$crate` then it's resolved at def-site.
+ /// Otherwise it's resolved at call-site.
+ /// `macro_rules` macros behave like this, built-in macros currently behave like this too,
+ /// but that's an implementation detail.
+ SemiTransparent,
+ /// Identifier produced by an opaque expansion is always resolved at definition-site.
+ /// Def-site spans in procedural macros, identifiers from `macro` by default use this.
+ Opaque,
+}
+
+impl LocalExpnId {
+ /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST.
+ pub const ROOT: LocalExpnId = LocalExpnId::from_u32(0);
+
+ #[inline]
+ pub fn from_raw(idx: ExpnIndex) -> LocalExpnId {
+ LocalExpnId::from_u32(idx.as_u32())
+ }
+
+ #[inline]
+ pub fn as_raw(self) -> ExpnIndex {
+ ExpnIndex::from_u32(self.as_u32())
+ }
+
+ pub fn fresh_empty() -> LocalExpnId {
+ HygieneData::with(|data| {
+ let expn_id = data.local_expn_data.push(None);
+ let _eid = data.local_expn_hashes.push(ExpnHash(Fingerprint::ZERO));
+ debug_assert_eq!(expn_id, _eid);
+ expn_id
+ })
+ }
+
+ pub fn fresh(mut expn_data: ExpnData, ctx: impl HashStableContext) -> LocalExpnId {
+ debug_assert_eq!(expn_data.parent.krate, LOCAL_CRATE);
+ let expn_hash = update_disambiguator(&mut expn_data, ctx);
+ HygieneData::with(|data| {
+ let expn_id = data.local_expn_data.push(Some(expn_data));
+ let _eid = data.local_expn_hashes.push(expn_hash);
+ debug_assert_eq!(expn_id, _eid);
+ let _old_id = data.expn_hash_to_expn_id.insert(expn_hash, expn_id.to_expn_id());
+ debug_assert!(_old_id.is_none());
+ expn_id
+ })
+ }
+
+ #[inline]
+ pub fn expn_hash(self) -> ExpnHash {
+ HygieneData::with(|data| data.local_expn_hash(self))
+ }
+
+ #[inline]
+ pub fn expn_data(self) -> ExpnData {
+ HygieneData::with(|data| data.local_expn_data(self).clone())
+ }
+
+ #[inline]
+ pub fn to_expn_id(self) -> ExpnId {
+ ExpnId { krate: LOCAL_CRATE, local_id: self.as_raw() }
+ }
+
+ #[inline]
+ pub fn set_expn_data(self, mut expn_data: ExpnData, ctx: impl HashStableContext) {
+ debug_assert_eq!(expn_data.parent.krate, LOCAL_CRATE);
+ let expn_hash = update_disambiguator(&mut expn_data, ctx);
+ HygieneData::with(|data| {
+ let old_expn_data = &mut data.local_expn_data[self];
+ assert!(old_expn_data.is_none(), "expansion data is reset for an expansion ID");
+ *old_expn_data = Some(expn_data);
+ debug_assert_eq!(data.local_expn_hashes[self].0, Fingerprint::ZERO);
+ data.local_expn_hashes[self] = expn_hash;
+ let _old_id = data.expn_hash_to_expn_id.insert(expn_hash, self.to_expn_id());
+ debug_assert!(_old_id.is_none());
+ });
+ }
+
+ #[inline]
+ pub fn is_descendant_of(self, ancestor: LocalExpnId) -> bool {
+ self.to_expn_id().is_descendant_of(ancestor.to_expn_id())
+ }
+
+ /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than
+ /// `expn_id.is_descendant_of(ctxt.outer_expn())`.
+ #[inline]
+ pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool {
+ self.to_expn_id().outer_expn_is_descendant_of(ctxt)
+ }
+
+ /// Returns span for the macro which originally caused this expansion to happen.
+ ///
+ /// Stops backtracing at include! boundary.
+ #[inline]
+ pub fn expansion_cause(self) -> Option<Span> {
+ self.to_expn_id().expansion_cause()
+ }
+
+ #[inline]
+ #[track_caller]
+ pub fn parent(self) -> LocalExpnId {
+ self.expn_data().parent.as_local().unwrap()
+ }
+}
+
+impl ExpnId {
+ /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST.
+ /// Invariant: we do not create any ExpnId with local_id == 0 and krate != 0.
+ pub const fn root() -> ExpnId {
+ ExpnId { krate: LOCAL_CRATE, local_id: ExpnIndex::from_u32(0) }
+ }
+
+ #[inline]
+ pub fn expn_hash(self) -> ExpnHash {
+ HygieneData::with(|data| data.expn_hash(self))
+ }
+
+ #[inline]
+ pub fn from_hash(hash: ExpnHash) -> Option<ExpnId> {
+ HygieneData::with(|data| data.expn_hash_to_expn_id.get(&hash).copied())
+ }
+
+ #[inline]
+ pub fn as_local(self) -> Option<LocalExpnId> {
+ if self.krate == LOCAL_CRATE { Some(LocalExpnId::from_raw(self.local_id)) } else { None }
+ }
+
+ #[inline]
+ #[track_caller]
+ pub fn expect_local(self) -> LocalExpnId {
+ self.as_local().unwrap()
+ }
+
+ #[inline]
+ pub fn expn_data(self) -> ExpnData {
+ HygieneData::with(|data| data.expn_data(self).clone())
+ }
+
+ #[inline]
+ pub fn is_descendant_of(self, ancestor: ExpnId) -> bool {
+ // a few "fast path" cases to avoid locking HygieneData
+ if ancestor == ExpnId::root() || ancestor == self {
+ return true;
+ }
+ if ancestor.krate != self.krate {
+ return false;
+ }
+ HygieneData::with(|data| data.is_descendant_of(self, ancestor))
+ }
+
+ /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than
+ /// `expn_id.is_descendant_of(ctxt.outer_expn())`.
+ pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool {
+ HygieneData::with(|data| data.is_descendant_of(self, data.outer_expn(ctxt)))
+ }
+
+ /// Returns span for the macro which originally caused this expansion to happen.
+ ///
+ /// Stops backtracing at include! boundary.
+ pub fn expansion_cause(mut self) -> Option<Span> {
+ let mut last_macro = None;
+ loop {
+ let expn_data = self.expn_data();
+ // Stop going up the backtrace once include! is encountered
+ if expn_data.is_root()
+ || expn_data.kind == ExpnKind::Macro(MacroKind::Bang, sym::include)
+ {
+ break;
+ }
+ self = expn_data.call_site.ctxt().outer_expn();
+ last_macro = Some(expn_data.call_site);
+ }
+ last_macro
+ }
+}
+
+#[derive(Debug)]
+pub struct HygieneData {
+ /// Each expansion should have an associated expansion data, but sometimes there's a delay
+ /// between creation of an expansion ID and obtaining its data (e.g. macros are collected
+ /// first and then resolved later), so we use an `Option` here.
+ local_expn_data: IndexVec<LocalExpnId, Option<ExpnData>>,
+ local_expn_hashes: IndexVec<LocalExpnId, ExpnHash>,
+ /// Data and hash information from external crates. We may eventually want to remove these
+ /// maps, and fetch the information directly from the other crate's metadata like DefIds do.
+ foreign_expn_data: FxHashMap<ExpnId, ExpnData>,
+ foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>,
+ expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>,
+ syntax_context_data: Vec<SyntaxContextData>,
+ syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>,
+ /// Maps the `local_hash` of an `ExpnData` to the next disambiguator value.
+ /// This is used by `update_disambiguator` to keep track of which `ExpnData`s
+ /// would have collisions without a disambiguator.
+ /// The keys of this map are always computed with `ExpnData.disambiguator`
+ /// set to 0.
+ expn_data_disambiguators: FxHashMap<u64, u32>,
+}
+
+impl HygieneData {
+ pub(crate) fn new(edition: Edition) -> Self {
+ let root_data = ExpnData::default(
+ ExpnKind::Root,
+ DUMMY_SP,
+ edition,
+ Some(CRATE_DEF_ID.to_def_id()),
+ None,
+ );
+
+ HygieneData {
+ local_expn_data: IndexVec::from_elem_n(Some(root_data), 1),
+ local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1),
+ foreign_expn_data: FxHashMap::default(),
+ foreign_expn_hashes: FxHashMap::default(),
+ expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
+ .collect(),
+ syntax_context_data: vec![SyntaxContextData {
+ outer_expn: ExpnId::root(),
+ outer_transparency: Transparency::Opaque,
+ parent: SyntaxContext(0),
+ opaque: SyntaxContext(0),
+ opaque_and_semitransparent: SyntaxContext(0),
+ dollar_crate_name: kw::DollarCrate,
+ }],
+ syntax_context_map: FxHashMap::default(),
+ expn_data_disambiguators: FxHashMap::default(),
+ }
+ }
+
+ pub fn with<T, F: FnOnce(&mut HygieneData) -> T>(f: F) -> T {
+ with_session_globals(|session_globals| f(&mut *session_globals.hygiene_data.borrow_mut()))
+ }
+
+ #[inline]
+ fn local_expn_hash(&self, expn_id: LocalExpnId) -> ExpnHash {
+ self.local_expn_hashes[expn_id]
+ }
+
+ #[inline]
+ fn expn_hash(&self, expn_id: ExpnId) -> ExpnHash {
+ match expn_id.as_local() {
+ Some(expn_id) => self.local_expn_hashes[expn_id],
+ None => self.foreign_expn_hashes[&expn_id],
+ }
+ }
+
+ fn local_expn_data(&self, expn_id: LocalExpnId) -> &ExpnData {
+ self.local_expn_data[expn_id].as_ref().expect("no expansion data for an expansion ID")
+ }
+
+ fn expn_data(&self, expn_id: ExpnId) -> &ExpnData {
+ if let Some(expn_id) = expn_id.as_local() {
+ self.local_expn_data[expn_id].as_ref().expect("no expansion data for an expansion ID")
+ } else {
+ &self.foreign_expn_data[&expn_id]
+ }
+ }
+
+ fn is_descendant_of(&self, mut expn_id: ExpnId, ancestor: ExpnId) -> bool {
+ // a couple "fast path" cases to avoid traversing parents in the loop below
+ if ancestor == ExpnId::root() {
+ return true;
+ }
+ if expn_id.krate != ancestor.krate {
+ return false;
+ }
+ loop {
+ if expn_id == ancestor {
+ return true;
+ }
+ if expn_id == ExpnId::root() {
+ return false;
+ }
+ expn_id = self.expn_data(expn_id).parent;
+ }
+ }
+
+ fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext {
+ self.syntax_context_data[ctxt.0 as usize].opaque
+ }
+
+ fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext {
+ self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent
+ }
+
+ fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
+ self.syntax_context_data[ctxt.0 as usize].outer_expn
+ }
+
+ fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
+ let data = &self.syntax_context_data[ctxt.0 as usize];
+ (data.outer_expn, data.outer_transparency)
+ }
+
+ fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext {
+ self.syntax_context_data[ctxt.0 as usize].parent
+ }
+
+ fn remove_mark(&self, ctxt: &mut SyntaxContext) -> (ExpnId, Transparency) {
+ let outer_mark = self.outer_mark(*ctxt);
+ *ctxt = self.parent_ctxt(*ctxt);
+ outer_mark
+ }
+
+ fn marks(&self, mut ctxt: SyntaxContext) -> Vec<(ExpnId, Transparency)> {
+ let mut marks = Vec::new();
+ while ctxt != SyntaxContext::root() {
+ debug!("marks: getting parent of {:?}", ctxt);
+ marks.push(self.outer_mark(ctxt));
+ ctxt = self.parent_ctxt(ctxt);
+ }
+ marks.reverse();
+ marks
+ }
+
+ fn walk_chain(&self, mut span: Span, to: SyntaxContext) -> Span {
+ debug!("walk_chain({:?}, {:?})", span, to);
+ debug!("walk_chain: span ctxt = {:?}", span.ctxt());
+ while span.from_expansion() && span.ctxt() != to {
+ let outer_expn = self.outer_expn(span.ctxt());
+ debug!("walk_chain({:?}): outer_expn={:?}", span, outer_expn);
+ let expn_data = self.expn_data(outer_expn);
+ debug!("walk_chain({:?}): expn_data={:?}", span, expn_data);
+ span = expn_data.call_site;
+ }
+ span
+ }
+
+ fn adjust(&self, ctxt: &mut SyntaxContext, expn_id: ExpnId) -> Option<ExpnId> {
+ let mut scope = None;
+ while !self.is_descendant_of(expn_id, self.outer_expn(*ctxt)) {
+ scope = Some(self.remove_mark(ctxt).0);
+ }
+ scope
+ }
+
+ fn apply_mark(
+ &mut self,
+ ctxt: SyntaxContext,
+ expn_id: ExpnId,
+ transparency: Transparency,
+ ) -> SyntaxContext {
+ assert_ne!(expn_id, ExpnId::root());
+ if transparency == Transparency::Opaque {
+ return self.apply_mark_internal(ctxt, expn_id, transparency);
+ }
+
+ let call_site_ctxt = self.expn_data(expn_id).call_site.ctxt();
+ let mut call_site_ctxt = if transparency == Transparency::SemiTransparent {
+ self.normalize_to_macros_2_0(call_site_ctxt)
+ } else {
+ self.normalize_to_macro_rules(call_site_ctxt)
+ };
+
+ if call_site_ctxt == SyntaxContext::root() {
+ return self.apply_mark_internal(ctxt, expn_id, transparency);
+ }
+
+ // Otherwise, `expn_id` is a macros 1.0 definition and the call site is in a
+ // macros 2.0 expansion, i.e., a macros 1.0 invocation is in a macros 2.0 definition.
+ //
+ // In this case, the tokens from the macros 1.0 definition inherit the hygiene
+ // at their invocation. That is, we pretend that the macros 1.0 definition
+ // was defined at its invocation (i.e., inside the macros 2.0 definition)
+ // so that the macros 2.0 definition remains hygienic.
+ //
+ // See the example at `test/ui/hygiene/legacy_interaction.rs`.
+ for (expn_id, transparency) in self.marks(ctxt) {
+ call_site_ctxt = self.apply_mark_internal(call_site_ctxt, expn_id, transparency);
+ }
+ self.apply_mark_internal(call_site_ctxt, expn_id, transparency)
+ }
+
+ fn apply_mark_internal(
+ &mut self,
+ ctxt: SyntaxContext,
+ expn_id: ExpnId,
+ transparency: Transparency,
+ ) -> SyntaxContext {
+ let syntax_context_data = &mut self.syntax_context_data;
+ let mut opaque = syntax_context_data[ctxt.0 as usize].opaque;
+ let mut opaque_and_semitransparent =
+ syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent;
+
+ if transparency >= Transparency::Opaque {
+ let parent = opaque;
+ opaque = *self
+ .syntax_context_map
+ .entry((parent, expn_id, transparency))
+ .or_insert_with(|| {
+ let new_opaque = SyntaxContext(syntax_context_data.len() as u32);
+ syntax_context_data.push(SyntaxContextData {
+ outer_expn: expn_id,
+ outer_transparency: transparency,
+ parent,
+ opaque: new_opaque,
+ opaque_and_semitransparent: new_opaque,
+ dollar_crate_name: kw::DollarCrate,
+ });
+ new_opaque
+ });
+ }
+
+ if transparency >= Transparency::SemiTransparent {
+ let parent = opaque_and_semitransparent;
+ opaque_and_semitransparent = *self
+ .syntax_context_map
+ .entry((parent, expn_id, transparency))
+ .or_insert_with(|| {
+ let new_opaque_and_semitransparent =
+ SyntaxContext(syntax_context_data.len() as u32);
+ syntax_context_data.push(SyntaxContextData {
+ outer_expn: expn_id,
+ outer_transparency: transparency,
+ parent,
+ opaque,
+ opaque_and_semitransparent: new_opaque_and_semitransparent,
+ dollar_crate_name: kw::DollarCrate,
+ });
+ new_opaque_and_semitransparent
+ });
+ }
+
+ let parent = ctxt;
+ *self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| {
+ let new_opaque_and_semitransparent_and_transparent =
+ SyntaxContext(syntax_context_data.len() as u32);
+ syntax_context_data.push(SyntaxContextData {
+ outer_expn: expn_id,
+ outer_transparency: transparency,
+ parent,
+ opaque,
+ opaque_and_semitransparent,
+ dollar_crate_name: kw::DollarCrate,
+ });
+ new_opaque_and_semitransparent_and_transparent
+ })
+ }
+}
+
+pub fn clear_syntax_context_map() {
+ HygieneData::with(|data| data.syntax_context_map = FxHashMap::default());
+}
+
+pub fn walk_chain(span: Span, to: SyntaxContext) -> Span {
+ HygieneData::with(|data| data.walk_chain(span, to))
+}
+
+pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) {
+ // The new contexts that need updating are at the end of the list and have `$crate` as a name.
+ let (len, to_update) = HygieneData::with(|data| {
+ (
+ data.syntax_context_data.len(),
+ data.syntax_context_data
+ .iter()
+ .rev()
+ .take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate)
+ .count(),
+ )
+ });
+ // The callback must be called from outside of the `HygieneData` lock,
+ // since it will try to acquire it too.
+ let range_to_update = len - to_update..len;
+ let names: Vec<_> =
+ range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx as u32))).collect();
+ HygieneData::with(|data| {
+ range_to_update.zip(names).for_each(|(idx, name)| {
+ data.syntax_context_data[idx].dollar_crate_name = name;
+ })
+ })
+}
+
+pub fn debug_hygiene_data(verbose: bool) -> String {
+ HygieneData::with(|data| {
+ if verbose {
+ format!("{:#?}", data)
+ } else {
+ let mut s = String::from("Expansions:");
+ let mut debug_expn_data = |(id, expn_data): (&ExpnId, &ExpnData)| {
+ s.push_str(&format!(
+ "\n{:?}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}",
+ id,
+ expn_data.parent,
+ expn_data.call_site.ctxt(),
+ expn_data.def_site.ctxt(),
+ expn_data.kind,
+ ))
+ };
+ data.local_expn_data.iter_enumerated().for_each(|(id, expn_data)| {
+ let expn_data = expn_data.as_ref().expect("no expansion data for an expansion ID");
+ debug_expn_data((&id.to_expn_id(), expn_data))
+ });
+
+ // Sort the hash map for more reproducible output.
+ // Because of this, it is fine to rely on the unstable iteration order of the map.
+ #[allow(rustc::potential_query_instability)]
+ let mut foreign_expn_data: Vec<_> = data.foreign_expn_data.iter().collect();
+ foreign_expn_data.sort_by_key(|(id, _)| (id.krate, id.local_id));
+ foreign_expn_data.into_iter().for_each(debug_expn_data);
+ s.push_str("\n\nSyntaxContexts:");
+ data.syntax_context_data.iter().enumerate().for_each(|(id, ctxt)| {
+ s.push_str(&format!(
+ "\n#{}: parent: {:?}, outer_mark: ({:?}, {:?})",
+ id, ctxt.parent, ctxt.outer_expn, ctxt.outer_transparency,
+ ));
+ });
+ s
+ }
+ })
+}
+
+impl SyntaxContext {
+ #[inline]
+ pub const fn root() -> Self {
+ SyntaxContext(0)
+ }
+
+ #[inline]
+ pub(crate) fn as_u32(self) -> u32 {
+ self.0
+ }
+
+ #[inline]
+ pub(crate) fn from_u32(raw: u32) -> SyntaxContext {
+ SyntaxContext(raw)
+ }
+
+ /// Extend a syntax context with a given expansion and transparency.
+ pub(crate) fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext {
+ HygieneData::with(|data| data.apply_mark(self, expn_id, transparency))
+ }
+
+ /// Pulls a single mark off of the syntax context. This effectively moves the
+ /// context up one macro definition level. That is, if we have a nested macro
+ /// definition as follows:
+ ///
+ /// ```ignore (illustrative)
+ /// macro_rules! f {
+ /// macro_rules! g {
+ /// ...
+ /// }
+ /// }
+ /// ```
+ ///
+ /// and we have a SyntaxContext that is referring to something declared by an invocation
+ /// of g (call it g1), calling remove_mark will result in the SyntaxContext for the
+ /// invocation of f that created g1.
+ /// Returns the mark that was removed.
+ pub fn remove_mark(&mut self) -> ExpnId {
+ HygieneData::with(|data| data.remove_mark(self).0)
+ }
+
+ pub fn marks(self) -> Vec<(ExpnId, Transparency)> {
+ HygieneData::with(|data| data.marks(self))
+ }
+
+ /// Adjust this context for resolution in a scope created by the given expansion.
+ /// For example, consider the following three resolutions of `f`:
+ ///
+ /// ```rust
+ /// #![feature(decl_macro)]
+ /// mod foo { pub fn f() {} } // `f`'s `SyntaxContext` is empty.
+ /// m!(f);
+ /// macro m($f:ident) {
+ /// mod bar {
+ /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`.
+ /// pub fn $f() {} // `$f`'s `SyntaxContext` is empty.
+ /// }
+ /// foo::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m`
+ /// //^ Since `mod foo` is outside this expansion, `adjust` removes the mark from `f`,
+ /// //| and it resolves to `::foo::f`.
+ /// bar::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m`
+ /// //^ Since `mod bar` not outside this expansion, `adjust` does not change `f`,
+ /// //| and it resolves to `::bar::f`.
+ /// bar::$f(); // `f`'s `SyntaxContext` is empty.
+ /// //^ Since `mod bar` is not outside this expansion, `adjust` does not change `$f`,
+ /// //| and it resolves to `::bar::$f`.
+ /// }
+ /// ```
+ /// This returns the expansion whose definition scope we use to privacy check the resolution,
+ /// or `None` if we privacy check as usual (i.e., not w.r.t. a macro definition scope).
+ pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
+ HygieneData::with(|data| data.adjust(self, expn_id))
+ }
+
+ /// Like `SyntaxContext::adjust`, but also normalizes `self` to macros 2.0.
+ pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
+ HygieneData::with(|data| {
+ *self = data.normalize_to_macros_2_0(*self);
+ data.adjust(self, expn_id)
+ })
+ }
+
+ /// Adjust this context for resolution in a scope created by the given expansion
+ /// via a glob import with the given `SyntaxContext`.
+ /// For example:
+ ///
+ /// ```compile_fail,E0425
+ /// #![feature(decl_macro)]
+ /// m!(f);
+ /// macro m($i:ident) {
+ /// mod foo {
+ /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`.
+ /// pub fn $i() {} // `$i`'s `SyntaxContext` is empty.
+ /// }
+ /// n!(f);
+ /// macro n($j:ident) {
+ /// use foo::*;
+ /// f(); // `f`'s `SyntaxContext` has a mark from `m` and a mark from `n`
+ /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::f`.
+ /// $i(); // `$i`'s `SyntaxContext` has a mark from `n`
+ /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::$i`.
+ /// $j(); // `$j`'s `SyntaxContext` has a mark from `m`
+ /// //^ This cannot be glob-adjusted, so this is a resolution error.
+ /// }
+ /// }
+ /// ```
+ /// This returns `None` if the context cannot be glob-adjusted.
+ /// Otherwise, it returns the scope to use when privacy checking (see `adjust` for details).
+ pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> {
+ HygieneData::with(|data| {
+ let mut scope = None;
+ let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt());
+ while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) {
+ scope = Some(data.remove_mark(&mut glob_ctxt).0);
+ if data.remove_mark(self).0 != scope.unwrap() {
+ return None;
+ }
+ }
+ if data.adjust(self, expn_id).is_some() {
+ return None;
+ }
+ Some(scope)
+ })
+ }
+
+ /// Undo `glob_adjust` if possible:
+ ///
+ /// ```ignore (illustrative)
+ /// if let Some(privacy_checking_scope) = self.reverse_glob_adjust(expansion, glob_ctxt) {
+ /// assert!(self.glob_adjust(expansion, glob_ctxt) == Some(privacy_checking_scope));
+ /// }
+ /// ```
+ pub fn reverse_glob_adjust(
+ &mut self,
+ expn_id: ExpnId,
+ glob_span: Span,
+ ) -> Option<Option<ExpnId>> {
+ HygieneData::with(|data| {
+ if data.adjust(self, expn_id).is_some() {
+ return None;
+ }
+
+ let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt());
+ let mut marks = Vec::new();
+ while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) {
+ marks.push(data.remove_mark(&mut glob_ctxt));
+ }
+
+ let scope = marks.last().map(|mark| mark.0);
+ while let Some((expn_id, transparency)) = marks.pop() {
+ *self = data.apply_mark(*self, expn_id, transparency);
+ }
+ Some(scope)
+ })
+ }
+
+ pub fn hygienic_eq(self, other: SyntaxContext, expn_id: ExpnId) -> bool {
+ HygieneData::with(|data| {
+ let mut self_normalized = data.normalize_to_macros_2_0(self);
+ data.adjust(&mut self_normalized, expn_id);
+ self_normalized == data.normalize_to_macros_2_0(other)
+ })
+ }
+
+ #[inline]
+ pub fn normalize_to_macros_2_0(self) -> SyntaxContext {
+ HygieneData::with(|data| data.normalize_to_macros_2_0(self))
+ }
+
+ #[inline]
+ pub fn normalize_to_macro_rules(self) -> SyntaxContext {
+ HygieneData::with(|data| data.normalize_to_macro_rules(self))
+ }
+
+ #[inline]
+ pub fn outer_expn(self) -> ExpnId {
+ HygieneData::with(|data| data.outer_expn(self))
+ }
+
+ /// `ctxt.outer_expn_data()` is equivalent to but faster than
+ /// `ctxt.outer_expn().expn_data()`.
+ #[inline]
+ pub fn outer_expn_data(self) -> ExpnData {
+ HygieneData::with(|data| data.expn_data(data.outer_expn(self)).clone())
+ }
+
+ #[inline]
+ pub fn outer_mark(self) -> (ExpnId, Transparency) {
+ HygieneData::with(|data| data.outer_mark(self))
+ }
+
+ pub fn dollar_crate_name(self) -> Symbol {
+ HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name)
+ }
+
+ pub fn edition(self) -> Edition {
+ HygieneData::with(|data| data.expn_data(data.outer_expn(self)).edition)
+ }
+}
+
+impl fmt::Debug for SyntaxContext {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "#{}", self.0)
+ }
+}
+
+impl Span {
+ /// Creates a fresh expansion with given properties.
+ /// Expansions are normally created by macros, but in some cases expansions are created for
+ /// other compiler-generated code to set per-span properties like allowed unstable features.
+ /// The returned span belongs to the created expansion and has the new properties,
+ /// but its location is inherited from the current span.
+ pub fn fresh_expansion(self, expn_id: LocalExpnId) -> Span {
+ HygieneData::with(|data| {
+ self.with_ctxt(data.apply_mark(
+ SyntaxContext::root(),
+ expn_id.to_expn_id(),
+ Transparency::Transparent,
+ ))
+ })
+ }
+
+ /// Reuses the span but adds information like the kind of the desugaring and features that are
+ /// allowed inside this span.
+ pub fn mark_with_reason(
+ self,
+ allow_internal_unstable: Option<Lrc<[Symbol]>>,
+ reason: DesugaringKind,
+ edition: Edition,
+ ctx: impl HashStableContext,
+ ) -> Span {
+ let expn_data = ExpnData {
+ allow_internal_unstable,
+ ..ExpnData::default(ExpnKind::Desugaring(reason), self, edition, None, None)
+ };
+ let expn_id = LocalExpnId::fresh(expn_data, ctx);
+ self.fresh_expansion(expn_id)
+ }
+}
+
+/// A subset of properties from both macro definition and macro call available through global data.
+/// Avoid using this if you have access to the original definition or call structures.
+#[derive(Clone, Debug, Encodable, Decodable, HashStable_Generic)]
+pub struct ExpnData {
+ // --- The part unique to each expansion.
+ /// The kind of this expansion - macro or compiler desugaring.
+ pub kind: ExpnKind,
+ /// The expansion that produced this expansion.
+ pub parent: ExpnId,
+ /// The location of the actual macro invocation or syntax sugar , e.g.
+ /// `let x = foo!();` or `if let Some(y) = x {}`
+ ///
+ /// This may recursively refer to other macro invocations, e.g., if
+ /// `foo!()` invoked `bar!()` internally, and there was an
+ /// expression inside `bar!`; the call_site of the expression in
+ /// the expansion would point to the `bar!` invocation; that
+ /// call_site span would have its own ExpnData, with the call_site
+ /// pointing to the `foo!` invocation.
+ pub call_site: Span,
+ /// Used to force two `ExpnData`s to have different `Fingerprint`s.
+ /// Due to macro expansion, it's possible to end up with two `ExpnId`s
+ /// that have identical `ExpnData`s. This violates the contract of `HashStable`
+ /// - the two `ExpnId`s are not equal, but their `Fingerprint`s are equal
+ /// (since the numerical `ExpnId` value is not considered by the `HashStable`
+ /// implementation).
+ ///
+ /// The `disambiguator` field is set by `update_disambiguator` when two distinct
+ /// `ExpnId`s would end up with the same `Fingerprint`. Since `ExpnData` includes
+ /// a `krate` field, this value only needs to be unique within a single crate.
+ disambiguator: u32,
+
+ // --- The part specific to the macro/desugaring definition.
+ // --- It may be reasonable to share this part between expansions with the same definition,
+ // --- but such sharing is known to bring some minor inconveniences without also bringing
+ // --- noticeable perf improvements (PR #62898).
+ /// The span of the macro definition (possibly dummy).
+ /// This span serves only informational purpose and is not used for resolution.
+ pub def_site: Span,
+ /// List of `#[unstable]`/feature-gated features that the macro is allowed to use
+ /// internally without forcing the whole crate to opt-in
+ /// to them.
+ pub allow_internal_unstable: Option<Lrc<[Symbol]>>,
+ /// Whether the macro is allowed to use `unsafe` internally
+ /// even if the user crate has `#![forbid(unsafe_code)]`.
+ pub allow_internal_unsafe: bool,
+ /// Enables the macro helper hack (`ident!(...)` -> `$crate::ident!(...)`)
+ /// for a given macro.
+ pub local_inner_macros: bool,
+ /// Edition of the crate in which the macro is defined.
+ pub edition: Edition,
+ /// The `DefId` of the macro being invoked,
+ /// if this `ExpnData` corresponds to a macro invocation
+ pub macro_def_id: Option<DefId>,
+ /// The normal module (`mod`) in which the expanded macro was defined.
+ pub parent_module: Option<DefId>,
+}
+
+impl !PartialEq for ExpnData {}
+impl !Hash for ExpnData {}
+
+impl ExpnData {
+ pub fn new(
+ kind: ExpnKind,
+ parent: ExpnId,
+ call_site: Span,
+ def_site: Span,
+ allow_internal_unstable: Option<Lrc<[Symbol]>>,
+ allow_internal_unsafe: bool,
+ local_inner_macros: bool,
+ edition: Edition,
+ macro_def_id: Option<DefId>,
+ parent_module: Option<DefId>,
+ ) -> ExpnData {
+ ExpnData {
+ kind,
+ parent,
+ call_site,
+ def_site,
+ allow_internal_unstable,
+ allow_internal_unsafe,
+ local_inner_macros,
+ edition,
+ macro_def_id,
+ parent_module,
+ disambiguator: 0,
+ }
+ }
+
+ /// Constructs expansion data with default properties.
+ pub fn default(
+ kind: ExpnKind,
+ call_site: Span,
+ edition: Edition,
+ macro_def_id: Option<DefId>,
+ parent_module: Option<DefId>,
+ ) -> ExpnData {
+ ExpnData {
+ kind,
+ parent: ExpnId::root(),
+ call_site,
+ def_site: DUMMY_SP,
+ allow_internal_unstable: None,
+ allow_internal_unsafe: false,
+ local_inner_macros: false,
+ edition,
+ macro_def_id,
+ parent_module,
+ disambiguator: 0,
+ }
+ }
+
+ pub fn allow_unstable(
+ kind: ExpnKind,
+ call_site: Span,
+ edition: Edition,
+ allow_internal_unstable: Lrc<[Symbol]>,
+ macro_def_id: Option<DefId>,
+ parent_module: Option<DefId>,
+ ) -> ExpnData {
+ ExpnData {
+ allow_internal_unstable: Some(allow_internal_unstable),
+ ..ExpnData::default(kind, call_site, edition, macro_def_id, parent_module)
+ }
+ }
+
+ #[inline]
+ pub fn is_root(&self) -> bool {
+ matches!(self.kind, ExpnKind::Root)
+ }
+
+ #[inline]
+ fn hash_expn(&self, ctx: &mut impl HashStableContext) -> u64 {
+ let mut hasher = StableHasher::new();
+ self.hash_stable(ctx, &mut hasher);
+ hasher.finish()
+ }
+}
+
+/// Expansion kind.
+#[derive(Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
+pub enum ExpnKind {
+ /// No expansion, aka root expansion. Only `ExpnId::root()` has this kind.
+ Root,
+ /// Expansion produced by a macro.
+ Macro(MacroKind, Symbol),
+ /// Transform done by the compiler on the AST.
+ AstPass(AstPass),
+ /// Desugaring done by the compiler during HIR lowering.
+ Desugaring(DesugaringKind),
+ /// MIR inlining
+ Inlined,
+}
+
+impl ExpnKind {
+ pub fn descr(&self) -> String {
+ match *self {
+ ExpnKind::Root => kw::PathRoot.to_string(),
+ ExpnKind::Macro(macro_kind, name) => match macro_kind {
+ MacroKind::Bang => format!("{}!", name),
+ MacroKind::Attr => format!("#[{}]", name),
+ MacroKind::Derive => format!("#[derive({})]", name),
+ },
+ ExpnKind::AstPass(kind) => kind.descr().to_string(),
+ ExpnKind::Desugaring(kind) => format!("desugaring of {}", kind.descr()),
+ ExpnKind::Inlined => "inlined source".to_string(),
+ }
+ }
+}
+
+/// The kind of macro invocation or definition.
+#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Hash, Debug)]
+#[derive(HashStable_Generic)]
+pub enum MacroKind {
+ /// A bang macro `foo!()`.
+ Bang,
+ /// An attribute macro `#[foo]`.
+ Attr,
+ /// A derive macro `#[derive(Foo)]`
+ Derive,
+}
+
+impl MacroKind {
+ pub fn descr(self) -> &'static str {
+ match self {
+ MacroKind::Bang => "macro",
+ MacroKind::Attr => "attribute macro",
+ MacroKind::Derive => "derive macro",
+ }
+ }
+
+ pub fn descr_expected(self) -> &'static str {
+ match self {
+ MacroKind::Attr => "attribute",
+ _ => self.descr(),
+ }
+ }
+
+ pub fn article(self) -> &'static str {
+ match self {
+ MacroKind::Attr => "an",
+ _ => "a",
+ }
+ }
+}
+
+/// The kind of AST transform.
+#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
+pub enum AstPass {
+ StdImports,
+ TestHarness,
+ ProcMacroHarness,
+}
+
+impl AstPass {
+ pub fn descr(self) -> &'static str {
+ match self {
+ AstPass::StdImports => "standard library imports",
+ AstPass::TestHarness => "test harness",
+ AstPass::ProcMacroHarness => "proc macro harness",
+ }
+ }
+}
+
+/// The kind of compiler desugaring.
+#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)]
+pub enum DesugaringKind {
+ /// We desugar `if c { i } else { e }` to `match $ExprKind::Use(c) { true => i, _ => e }`.
+ /// However, we do not want to blame `c` for unreachability but rather say that `i`
+ /// is unreachable. This desugaring kind allows us to avoid blaming `c`.
+ /// This also applies to `while` loops.
+ CondTemporary,
+ QuestionMark,
+ TryBlock,
+ YeetExpr,
+ /// Desugaring of an `impl Trait` in return type position
+ /// to an `type Foo = impl Trait;` and replacing the
+ /// `impl Trait` with `Foo`.
+ OpaqueTy,
+ Async,
+ Await,
+ ForLoop,
+ WhileLoop,
+}
+
+impl DesugaringKind {
+ /// The description wording should combine well with "desugaring of {}".
+ pub fn descr(self) -> &'static str {
+ match self {
+ DesugaringKind::CondTemporary => "`if` or `while` condition",
+ DesugaringKind::Async => "`async` block or function",
+ DesugaringKind::Await => "`await` expression",
+ DesugaringKind::QuestionMark => "operator `?`",
+ DesugaringKind::TryBlock => "`try` block",
+ DesugaringKind::YeetExpr => "`do yeet` expression",
+ DesugaringKind::OpaqueTy => "`impl Trait`",
+ DesugaringKind::ForLoop => "`for` loop",
+ DesugaringKind::WhileLoop => "`while` loop",
+ }
+ }
+}
+
+#[derive(Default)]
+pub struct HygieneEncodeContext {
+ /// All `SyntaxContexts` for which we have written `SyntaxContextData` into crate metadata.
+ /// This is `None` after we finish encoding `SyntaxContexts`, to ensure
+ /// that we don't accidentally try to encode any more `SyntaxContexts`
+ serialized_ctxts: Lock<FxHashSet<SyntaxContext>>,
+ /// The `SyntaxContexts` that we have serialized (e.g. as a result of encoding `Spans`)
+ /// in the most recent 'round' of serializing. Serializing `SyntaxContextData`
+ /// may cause us to serialize more `SyntaxContext`s, so serialize in a loop
+ /// until we reach a fixed point.
+ latest_ctxts: Lock<FxHashSet<SyntaxContext>>,
+
+ serialized_expns: Lock<FxHashSet<ExpnId>>,
+
+ latest_expns: Lock<FxHashSet<ExpnId>>,
+}
+
+impl HygieneEncodeContext {
+ /// Record the fact that we need to serialize the corresponding `ExpnData`.
+ pub fn schedule_expn_data_for_encoding(&self, expn: ExpnId) {
+ if !self.serialized_expns.lock().contains(&expn) {
+ self.latest_expns.lock().insert(expn);
+ }
+ }
+
+ pub fn encode<T>(
+ &self,
+ encoder: &mut T,
+ mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
+ mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
+ ) {
+ // When we serialize a `SyntaxContextData`, we may end up serializing
+ // a `SyntaxContext` that we haven't seen before
+ while !self.latest_ctxts.lock().is_empty() || !self.latest_expns.lock().is_empty() {
+ debug!(
+ "encode_hygiene: Serializing a round of {:?} SyntaxContextDatas: {:?}",
+ self.latest_ctxts.lock().len(),
+ self.latest_ctxts
+ );
+
+ // Consume the current round of SyntaxContexts.
+ // Drop the lock() temporary early
+ let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
+
+ // It's fine to iterate over a HashMap, because the serialization
+ // of the table that we insert data into doesn't depend on insertion
+ // order
+ #[allow(rustc::potential_query_instability)]
+ for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, data| {
+ if self.serialized_ctxts.lock().insert(ctxt) {
+ encode_ctxt(encoder, index, data);
+ }
+ });
+
+ let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
+
+ // Same as above, this is fine as we are inserting into a order-independent hashset
+ #[allow(rustc::potential_query_instability)]
+ for_all_expns_in(latest_expns.into_iter(), |expn, data, hash| {
+ if self.serialized_expns.lock().insert(expn) {
+ encode_expn(encoder, expn, data, hash);
+ }
+ });
+ }
+ debug!("encode_hygiene: Done serializing SyntaxContextData");
+ }
+}
+
+#[derive(Default)]
+/// Additional information used to assist in decoding hygiene data
+pub struct HygieneDecodeContext {
+ // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
+ // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
+ // a new id in the global `HygieneData`. This map tracks the ID we end up picking,
+ // so that multiple occurrences of the same serialized id are decoded to the same
+ // `SyntaxContext`
+ remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>,
+}
+
+/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
+pub fn register_local_expn_id(data: ExpnData, hash: ExpnHash) -> ExpnId {
+ HygieneData::with(|hygiene_data| {
+ let expn_id = hygiene_data.local_expn_data.next_index();
+ hygiene_data.local_expn_data.push(Some(data));
+ let _eid = hygiene_data.local_expn_hashes.push(hash);
+ debug_assert_eq!(expn_id, _eid);
+
+ let expn_id = expn_id.to_expn_id();
+
+ let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id);
+ debug_assert!(_old_id.is_none());
+ expn_id
+ })
+}
+
+/// Register an expansion which has been decoded from the metadata of a foreign crate.
+pub fn register_expn_id(
+ krate: CrateNum,
+ local_id: ExpnIndex,
+ data: ExpnData,
+ hash: ExpnHash,
+) -> ExpnId {
+ debug_assert!(data.parent == ExpnId::root() || krate == data.parent.krate);
+ let expn_id = ExpnId { krate, local_id };
+ HygieneData::with(|hygiene_data| {
+ let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data);
+ debug_assert!(_old_data.is_none());
+ let _old_hash = hygiene_data.foreign_expn_hashes.insert(expn_id, hash);
+ debug_assert!(_old_hash.is_none());
+ let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id);
+ debug_assert!(_old_id.is_none());
+ });
+ expn_id
+}
+
+/// Decode an expansion from the metadata of a foreign crate.
+pub fn decode_expn_id(
+ krate: CrateNum,
+ index: u32,
+ decode_data: impl FnOnce(ExpnId) -> (ExpnData, ExpnHash),
+) -> ExpnId {
+ if index == 0 {
+ debug!("decode_expn_id: deserialized root");
+ return ExpnId::root();
+ }
+
+ let index = ExpnIndex::from_u32(index);
+
+ // This function is used to decode metadata, so it cannot decode information about LOCAL_CRATE.
+ debug_assert_ne!(krate, LOCAL_CRATE);
+ let expn_id = ExpnId { krate, local_id: index };
+
+ // Fast path if the expansion has already been decoded.
+ if HygieneData::with(|hygiene_data| hygiene_data.foreign_expn_data.contains_key(&expn_id)) {
+ return expn_id;
+ }
+
+ // Don't decode the data inside `HygieneData::with`, since we need to recursively decode
+ // other ExpnIds
+ let (expn_data, hash) = decode_data(expn_id);
+
+ register_expn_id(krate, index, expn_data, hash)
+}
+
+// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext`
+// to track which `SyntaxContext`s we have already decoded.
+// The provided closure will be invoked to deserialize a `SyntaxContextData`
+// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
+pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
+ d: &mut D,
+ context: &HygieneDecodeContext,
+ decode_data: F,
+) -> SyntaxContext {
+ let raw_id: u32 = Decodable::decode(d);
+ if raw_id == 0 {
+ debug!("decode_syntax_context: deserialized root");
+ // The root is special
+ return SyntaxContext::root();
+ }
+
+ let outer_ctxts = &context.remapped_ctxts;
+
+ // Ensure that the lock() temporary is dropped early
+ {
+ if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() {
+ return ctxt;
+ }
+ }
+
+ // Allocate and store SyntaxContext id *before* calling the decoder function,
+ // as the SyntaxContextData may reference itself.
+ let new_ctxt = HygieneData::with(|hygiene_data| {
+ let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
+ // Push a dummy SyntaxContextData to ensure that nobody else can get the
+ // same ID as us. This will be overwritten after call `decode_Data`
+ hygiene_data.syntax_context_data.push(SyntaxContextData {
+ outer_expn: ExpnId::root(),
+ outer_transparency: Transparency::Transparent,
+ parent: SyntaxContext::root(),
+ opaque: SyntaxContext::root(),
+ opaque_and_semitransparent: SyntaxContext::root(),
+ dollar_crate_name: kw::Empty,
+ });
+ let mut ctxts = outer_ctxts.lock();
+ let new_len = raw_id as usize + 1;
+ if ctxts.len() < new_len {
+ ctxts.resize(new_len, None);
+ }
+ ctxts[raw_id as usize] = Some(new_ctxt);
+ drop(ctxts);
+ new_ctxt
+ });
+
+ // Don't try to decode data while holding the lock, since we need to
+ // be able to recursively decode a SyntaxContext
+ let mut ctxt_data = decode_data(d, raw_id);
+ // Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`
+ // We don't care what the encoding crate set this to - we want to resolve it
+ // from the perspective of the current compilation session
+ ctxt_data.dollar_crate_name = kw::DollarCrate;
+
+ // Overwrite the dummy data with our decoded SyntaxContextData
+ HygieneData::with(|hygiene_data| {
+ let dummy = std::mem::replace(
+ &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize],
+ ctxt_data,
+ );
+ // Make sure nothing weird happening while `decode_data` was running
+ assert_eq!(dummy.dollar_crate_name, kw::Empty);
+ });
+
+ new_ctxt
+}
+
+fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
+ ctxts: impl Iterator<Item = SyntaxContext>,
+ mut f: F,
+) {
+ let all_data: Vec<_> = HygieneData::with(|data| {
+ ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
+ });
+ for (ctxt, data) in all_data.into_iter() {
+ f(ctxt.0, ctxt, &data);
+ }
+}
+
+fn for_all_expns_in(
+ expns: impl Iterator<Item = ExpnId>,
+ mut f: impl FnMut(ExpnId, &ExpnData, ExpnHash),
+) {
+ let all_data: Vec<_> = HygieneData::with(|data| {
+ expns.map(|expn| (expn, data.expn_data(expn).clone(), data.expn_hash(expn))).collect()
+ });
+ for (expn, data, hash) in all_data.into_iter() {
+ f(expn, &data, hash);
+ }
+}
+
+impl<E: Encoder> Encodable<E> for LocalExpnId {
+ fn encode(&self, e: &mut E) {
+ self.to_expn_id().encode(e);
+ }
+}
+
+impl<E: Encoder> Encodable<E> for ExpnId {
+ default fn encode(&self, _: &mut E) {
+ panic!("cannot encode `ExpnId` with `{}`", std::any::type_name::<E>());
+ }
+}
+
+impl<D: Decoder> Decodable<D> for LocalExpnId {
+ fn decode(d: &mut D) -> Self {
+ ExpnId::expect_local(ExpnId::decode(d))
+ }
+}
+
+impl<D: Decoder> Decodable<D> for ExpnId {
+ default fn decode(_: &mut D) -> Self {
+ panic!("cannot decode `ExpnId` with `{}`", std::any::type_name::<D>());
+ }
+}
+
+pub fn raw_encode_syntax_context<E: Encoder>(
+ ctxt: SyntaxContext,
+ context: &HygieneEncodeContext,
+ e: &mut E,
+) {
+ if !context.serialized_ctxts.lock().contains(&ctxt) {
+ context.latest_ctxts.lock().insert(ctxt);
+ }
+ ctxt.0.encode(e);
+}
+
+impl<E: Encoder> Encodable<E> for SyntaxContext {
+ default fn encode(&self, _: &mut E) {
+ panic!("cannot encode `SyntaxContext` with `{}`", std::any::type_name::<E>());
+ }
+}
+
+impl<D: Decoder> Decodable<D> for SyntaxContext {
+ default fn decode(_: &mut D) -> Self {
+ panic!("cannot decode `SyntaxContext` with `{}`", std::any::type_name::<D>());
+ }
+}
+
+/// Updates the `disambiguator` field of the corresponding `ExpnData`
+/// such that the `Fingerprint` of the `ExpnData` does not collide with
+/// any other `ExpnIds`.
+///
+/// This method is called only when an `ExpnData` is first associated
+/// with an `ExpnId` (when the `ExpnId` is initially constructed, or via
+/// `set_expn_data`). It is *not* called for foreign `ExpnId`s deserialized
+/// from another crate's metadata - since `ExpnHash` includes the stable crate id,
+/// collisions are only possible between `ExpnId`s within the same crate.
+fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContext) -> ExpnHash {
+ // This disambiguator should not have been set yet.
+ assert_eq!(
+ expn_data.disambiguator, 0,
+ "Already set disambiguator for ExpnData: {:?}",
+ expn_data
+ );
+ assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)");
+ let mut expn_hash = expn_data.hash_expn(&mut ctx);
+
+ let disambiguator = HygieneData::with(|data| {
+ // If this is the first ExpnData with a given hash, then keep our
+ // disambiguator at 0 (the default u32 value)
+ let disambig = data.expn_data_disambiguators.entry(expn_hash).or_default();
+ let disambiguator = *disambig;
+ *disambig += 1;
+ disambiguator
+ });
+
+ if disambiguator != 0 {
+ debug!("Set disambiguator for expn_data={:?} expn_hash={:?}", expn_data, expn_hash);
+
+ expn_data.disambiguator = disambiguator;
+ expn_hash = expn_data.hash_expn(&mut ctx);
+
+ // Verify that the new disambiguator makes the hash unique
+ #[cfg(debug_assertions)]
+ HygieneData::with(|data| {
+ assert_eq!(
+ data.expn_data_disambiguators.get(&expn_hash),
+ None,
+ "Hash collision after disambiguator update!",
+ );
+ });
+ }
+
+ ExpnHash::new(ctx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(), expn_hash)
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext {
+ fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
+ const TAG_EXPANSION: u8 = 0;
+ const TAG_NO_EXPANSION: u8 = 1;
+
+ if *self == SyntaxContext::root() {
+ TAG_NO_EXPANSION.hash_stable(ctx, hasher);
+ } else {
+ TAG_EXPANSION.hash_stable(ctx, hasher);
+ let (expn_id, transparency) = self.outer_mark();
+ expn_id.hash_stable(ctx, hasher);
+ transparency.hash_stable(ctx, hasher);
+ }
+ }
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
+ fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
+ assert_default_hashing_controls(ctx, "ExpnId");
+ let hash = if *self == ExpnId::root() {
+ // Avoid fetching TLS storage for a trivial often-used value.
+ Fingerprint::ZERO
+ } else {
+ self.expn_hash().0
+ };
+
+ hash.hash_stable(ctx, hasher);
+ }
+}
diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs
new file mode 100644
index 000000000..61e4b98a8
--- /dev/null
+++ b/compiler/rustc_span/src/lev_distance.rs
@@ -0,0 +1,177 @@
+//! Levenshtein distances.
+//!
+//! The [Levenshtein distance] is a metric for measuring the difference between two strings.
+//!
+//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
+
+use crate::symbol::Symbol;
+use std::cmp;
+
+#[cfg(test)]
+mod tests;
+
+/// Finds the Levenshtein distance between two strings.
+///
+/// Returns None if the distance exceeds the limit.
+pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
+ let n = a.chars().count();
+ let m = b.chars().count();
+ let min_dist = if n < m { m - n } else { n - m };
+
+ if min_dist > limit {
+ return None;
+ }
+ if n == 0 || m == 0 {
+ return (min_dist <= limit).then_some(min_dist);
+ }
+
+ let mut dcol: Vec<_> = (0..=m).collect();
+
+ for (i, sc) in a.chars().enumerate() {
+ let mut current = i;
+ dcol[0] = current + 1;
+
+ for (j, tc) in b.chars().enumerate() {
+ let next = dcol[j + 1];
+ if sc == tc {
+ dcol[j + 1] = current;
+ } else {
+ dcol[j + 1] = cmp::min(current, next);
+ dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
+ }
+ current = next;
+ }
+ }
+
+ (dcol[m] <= limit).then_some(dcol[m])
+}
+
+/// Provides a word similarity score between two words that accounts for substrings being more
+/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match.
+/// 0 is an identical match.
+///
+/// Uses the Levenshtein distance between the two strings and removes the cost of the length
+/// difference. If this is 0 then it is either a substring match or a full word match, in the
+/// substring match case we detect this and return `1`. To prevent finding meaningless substrings,
+/// eg. "in" in "shrink", we only perform this subtraction of length difference if one of the words
+/// is not greater than twice the length of the other. For cases where the words are close in size
+/// but not an exact substring then the cost of the length difference is discounted by half.
+///
+/// Returns `None` if the distance exceeds the limit.
+pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
+ let n = a.chars().count();
+ let m = b.chars().count();
+
+ // Check one isn't less than half the length of the other. If this is true then there is a
+ // big difference in length.
+ let big_len_diff = (n * 2) < m || (m * 2) < n;
+ let len_diff = if n < m { m - n } else { n - m };
+ let lev = lev_distance(a, b, limit + len_diff)?;
+
+ // This is the crux, subtracting length difference means exact substring matches will now be 0
+ let score = lev - len_diff;
+
+ // If the score is 0 but the words have different lengths then it's a substring match not a full
+ // word match
+ let score = if score == 0 && len_diff > 0 && !big_len_diff {
+ 1 // Exact substring match, but not a total word match so return non-zero
+ } else if !big_len_diff {
+ // Not a big difference in length, discount cost of length difference
+ score + (len_diff + 1) / 2
+ } else {
+ // A big difference in length, add back the difference in length to the score
+ score + len_diff
+ };
+
+ (score <= limit).then_some(score)
+}
+
+/// Finds the best match for given word in the given iterator where substrings are meaningful.
+///
+/// A version of [`find_best_match_for_name`] that uses [`lev_distance_with_substrings`] as the score
+/// for word similarity. This takes an optional distance limit which defaults to one-third of the
+/// given word.
+///
+/// Besides the modified Levenshtein, we use case insensitive comparison to improve accuracy
+/// on an edge case with a lower(upper)case letters mismatch.
+pub fn find_best_match_for_name_with_substrings(
+ candidates: &[Symbol],
+ lookup: Symbol,
+ dist: Option<usize>,
+) -> Option<Symbol> {
+ find_best_match_for_name_impl(true, candidates, lookup, dist)
+}
+
+/// Finds the best match for a given word in the given iterator.
+///
+/// As a loose rule to avoid the obviously incorrect suggestions, it takes
+/// an optional limit for the maximum allowable edit distance, which defaults
+/// to one-third of the given word.
+///
+/// Besides Levenshtein, we use case insensitive comparison to improve accuracy
+/// on an edge case with a lower(upper)case letters mismatch.
+pub fn find_best_match_for_name(
+ candidates: &[Symbol],
+ lookup: Symbol,
+ dist: Option<usize>,
+) -> Option<Symbol> {
+ find_best_match_for_name_impl(false, candidates, lookup, dist)
+}
+
+#[cold]
+fn find_best_match_for_name_impl(
+ use_substring_score: bool,
+ candidates: &[Symbol],
+ lookup: Symbol,
+ dist: Option<usize>,
+) -> Option<Symbol> {
+ let lookup = lookup.as_str();
+ let lookup_uppercase = lookup.to_uppercase();
+
+ // Priority of matches:
+ // 1. Exact case insensitive match
+ // 2. Levenshtein distance match
+ // 3. Sorted word match
+ if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) {
+ return Some(*c);
+ }
+
+ let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
+ let mut best = None;
+ for c in candidates {
+ match if use_substring_score {
+ lev_distance_with_substrings(lookup, c.as_str(), dist)
+ } else {
+ lev_distance(lookup, c.as_str(), dist)
+ } {
+ Some(0) => return Some(*c),
+ Some(d) => {
+ dist = d - 1;
+ best = Some(*c);
+ }
+ None => {}
+ }
+ }
+ if best.is_some() {
+ return best;
+ }
+
+ find_match_by_sorted_words(candidates, lookup)
+}
+
+fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
+ iter_names.iter().fold(None, |result, candidate| {
+ if sort_by_words(candidate.as_str()) == sort_by_words(lookup) {
+ Some(*candidate)
+ } else {
+ result
+ }
+ })
+}
+
+fn sort_by_words(name: &str) -> String {
+ let mut split_words: Vec<&str> = name.split('_').collect();
+ // We are sorting primitive &strs and can use unstable sort here.
+ split_words.sort_unstable();
+ split_words.join("_")
+}
diff --git a/compiler/rustc_span/src/lev_distance/tests.rs b/compiler/rustc_span/src/lev_distance/tests.rs
new file mode 100644
index 000000000..b17d6588c
--- /dev/null
+++ b/compiler/rustc_span/src/lev_distance/tests.rs
@@ -0,0 +1,71 @@
+use super::*;
+
+#[test]
+fn test_lev_distance() {
+ use std::char::{from_u32, MAX};
+ // Test bytelength agnosticity
+ for c in (0..MAX as u32).filter_map(from_u32).map(|i| i.to_string()) {
+ assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0));
+ }
+
+ let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+ let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
+ let c = "Mary häd ä little lämb\n\nLittle lämb\n";
+ assert_eq!(lev_distance(a, b, usize::MAX), Some(1));
+ assert_eq!(lev_distance(b, a, usize::MAX), Some(1));
+ assert_eq!(lev_distance(a, c, usize::MAX), Some(2));
+ assert_eq!(lev_distance(c, a, usize::MAX), Some(2));
+ assert_eq!(lev_distance(b, c, usize::MAX), Some(1));
+ assert_eq!(lev_distance(c, b, usize::MAX), Some(1));
+}
+
+#[test]
+fn test_lev_distance_limit() {
+ assert_eq!(lev_distance("abc", "abcd", 1), Some(1));
+ assert_eq!(lev_distance("abc", "abcd", 0), None);
+ assert_eq!(lev_distance("abc", "xyz", 3), Some(3));
+ assert_eq!(lev_distance("abc", "xyz", 2), None);
+}
+
+#[test]
+fn test_method_name_similarity_score() {
+ assert_eq!(lev_distance_with_substrings("empty", "is_empty", 1), Some(1));
+ assert_eq!(lev_distance_with_substrings("shrunk", "rchunks", 2), None);
+ assert_eq!(lev_distance_with_substrings("abc", "abcd", 1), Some(1));
+ assert_eq!(lev_distance_with_substrings("a", "abcd", 1), None);
+ assert_eq!(lev_distance_with_substrings("edf", "eq", 1), None);
+ assert_eq!(lev_distance_with_substrings("abc", "xyz", 3), Some(3));
+ assert_eq!(lev_distance_with_substrings("abcdef", "abcdef", 2), Some(0));
+}
+
+#[test]
+fn test_find_best_match_for_name() {
+ use crate::create_default_session_globals_then;
+ create_default_session_globals_then(|| {
+ let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
+ assert_eq!(
+ find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
+ Some(Symbol::intern("aaab"))
+ );
+
+ assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
+
+ let input = vec![Symbol::intern("AAAA")];
+ assert_eq!(
+ find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
+ Some(Symbol::intern("AAAA"))
+ );
+
+ let input = vec![Symbol::intern("AAAA")];
+ assert_eq!(
+ find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
+ Some(Symbol::intern("AAAA"))
+ );
+
+ let input = vec![Symbol::intern("a_longer_variable_name")];
+ assert_eq!(
+ find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
+ Some(Symbol::intern("a_longer_variable_name"))
+ );
+ })
+}
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
new file mode 100644
index 000000000..cf3069281
--- /dev/null
+++ b/compiler/rustc_span/src/lib.rs
@@ -0,0 +1,2116 @@
+//! Source positions and related helper functions.
+//!
+//! Important concepts in this module include:
+//!
+//! - the *span*, represented by [`SpanData`] and related types;
+//! - source code as represented by a [`SourceMap`]; and
+//! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module.
+//!
+//! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata,
+//! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`].
+//!
+//! ## Note
+//!
+//! This API is completely unstable and subject to change.
+
+#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
+#![feature(array_windows)]
+#![feature(let_else)]
+#![feature(if_let_guard)]
+#![feature(negative_impls)]
+#![feature(min_specialization)]
+#![feature(rustc_attrs)]
+
+#[macro_use]
+extern crate rustc_macros;
+
+#[macro_use]
+extern crate tracing;
+
+use rustc_data_structures::AtomicRef;
+use rustc_macros::HashStable_Generic;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+
+mod caching_source_map_view;
+pub mod source_map;
+pub use self::caching_source_map_view::CachingSourceMapView;
+use source_map::SourceMap;
+
+pub mod edition;
+use edition::Edition;
+pub mod hygiene;
+use hygiene::Transparency;
+pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
+pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
+use rustc_data_structures::stable_hasher::HashingControls;
+pub mod def_id;
+use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
+pub mod lev_distance;
+mod span_encoding;
+pub use span_encoding::{Span, DUMMY_SP};
+
+pub mod symbol;
+pub use symbol::{sym, Symbol};
+
+mod analyze_source_file;
+pub mod fatal_error;
+
+pub mod profiling;
+
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::sync::{Lock, Lrc};
+
+use std::borrow::Cow;
+use std::cmp::{self, Ordering};
+use std::fmt;
+use std::hash::Hash;
+use std::ops::{Add, Range, Sub};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use md5::Digest;
+use md5::Md5;
+use sha1::Sha1;
+use sha2::Sha256;
+
+use tracing::debug;
+
+#[cfg(test)]
+mod tests;
+
+// Per-session global variables: this struct is stored in thread-local storage
+// in such a way that it is accessible without any kind of handle to all
+// threads within the compilation session, but is not accessible outside the
+// session.
+pub struct SessionGlobals {
+ symbol_interner: symbol::Interner,
+ span_interner: Lock<span_encoding::SpanInterner>,
+ hygiene_data: Lock<hygiene::HygieneData>,
+ source_map: Lock<Option<Lrc<SourceMap>>>,
+}
+
+impl SessionGlobals {
+ pub fn new(edition: Edition) -> SessionGlobals {
+ SessionGlobals {
+ symbol_interner: symbol::Interner::fresh(),
+ span_interner: Lock::new(span_encoding::SpanInterner::default()),
+ hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
+ source_map: Lock::new(None),
+ }
+ }
+}
+
+#[inline]
+pub fn create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R {
+ assert!(
+ !SESSION_GLOBALS.is_set(),
+ "SESSION_GLOBALS should never be overwritten! \
+ Use another thread if you need another SessionGlobals"
+ );
+ let session_globals = SessionGlobals::new(edition);
+ SESSION_GLOBALS.set(&session_globals, f)
+}
+
+#[inline]
+pub fn set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R {
+ assert!(
+ !SESSION_GLOBALS.is_set(),
+ "SESSION_GLOBALS should never be overwritten! \
+ Use another thread if you need another SessionGlobals"
+ );
+ SESSION_GLOBALS.set(session_globals, f)
+}
+
+#[inline]
+pub fn create_default_session_if_not_set_then<R, F>(f: F) -> R
+where
+ F: FnOnce(&SessionGlobals) -> R,
+{
+ create_session_if_not_set_then(edition::DEFAULT_EDITION, f)
+}
+
+#[inline]
+pub fn create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R
+where
+ F: FnOnce(&SessionGlobals) -> R,
+{
+ if !SESSION_GLOBALS.is_set() {
+ let session_globals = SessionGlobals::new(edition);
+ SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
+ } else {
+ SESSION_GLOBALS.with(f)
+ }
+}
+
+#[inline]
+pub fn with_session_globals<R, F>(f: F) -> R
+where
+ F: FnOnce(&SessionGlobals) -> R,
+{
+ SESSION_GLOBALS.with(f)
+}
+
+#[inline]
+pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
+ create_session_globals_then(edition::DEFAULT_EDITION, f)
+}
+
+// If this ever becomes non thread-local, `decode_syntax_context`
+// and `decode_expn_id` will need to be updated to handle concurrent
+// deserialization.
+scoped_tls::scoped_thread_local!(static SESSION_GLOBALS: SessionGlobals);
+
+// FIXME: We should use this enum or something like it to get rid of the
+// use of magic `/rust/1.x/...` paths across the board.
+#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
+#[derive(Decodable)]
+pub enum RealFileName {
+ LocalPath(PathBuf),
+ /// For remapped paths (namely paths into libstd that have been mapped
+ /// to the appropriate spot on the local host's file system, and local file
+ /// system paths that have been remapped with `FilePathMapping`),
+ Remapped {
+ /// `local_path` is the (host-dependent) local path to the file. This is
+ /// None if the file was imported from another crate
+ local_path: Option<PathBuf>,
+ /// `virtual_name` is the stable path rustc will store internally within
+ /// build artifacts.
+ virtual_name: PathBuf,
+ },
+}
+
+impl Hash for RealFileName {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ // To prevent #70924 from happening again we should only hash the
+ // remapped (virtualized) path if that exists. This is because
+ // virtualized paths to sysroot crates (/rust/$hash or /rust/$version)
+ // remain stable even if the corresponding local_path changes
+ self.remapped_path_if_available().hash(state)
+ }
+}
+
+// This is functionally identical to #[derive(Encodable)], with the exception of
+// an added assert statement
+impl<S: Encoder> Encodable<S> for RealFileName {
+ fn encode(&self, encoder: &mut S) {
+ match *self {
+ RealFileName::LocalPath(ref local_path) => encoder.emit_enum_variant(0, |encoder| {
+ local_path.encode(encoder);
+ }),
+
+ RealFileName::Remapped { ref local_path, ref virtual_name } => encoder
+ .emit_enum_variant(1, |encoder| {
+ // For privacy and build reproducibility, we must not embed host-dependant path in artifacts
+ // if they have been remapped by --remap-path-prefix
+ assert!(local_path.is_none());
+ local_path.encode(encoder);
+ virtual_name.encode(encoder);
+ }),
+ }
+ }
+}
+
+impl RealFileName {
+ /// Returns the path suitable for reading from the file system on the local host,
+ /// if this information exists.
+ /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
+ pub fn local_path(&self) -> Option<&Path> {
+ match self {
+ RealFileName::LocalPath(p) => Some(p),
+ RealFileName::Remapped { local_path: p, virtual_name: _ } => {
+ p.as_ref().map(PathBuf::as_path)
+ }
+ }
+ }
+
+ /// Returns the path suitable for reading from the file system on the local host,
+ /// if this information exists.
+ /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
+ pub fn into_local_path(self) -> Option<PathBuf> {
+ match self {
+ RealFileName::LocalPath(p) => Some(p),
+ RealFileName::Remapped { local_path: p, virtual_name: _ } => p,
+ }
+ }
+
+ /// Returns the path suitable for embedding into build artifacts. This would still
+ /// be a local path if it has not been remapped. A remapped path will not correspond
+ /// to a valid file system path: see `local_path_if_available()` for something that
+ /// is more likely to return paths into the local host file system.
+ pub fn remapped_path_if_available(&self) -> &Path {
+ match self {
+ RealFileName::LocalPath(p)
+ | RealFileName::Remapped { local_path: _, virtual_name: p } => &p,
+ }
+ }
+
+ /// Returns the path suitable for reading from the file system on the local host,
+ /// if this information exists. Otherwise returns the remapped name.
+ /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
+ pub fn local_path_if_available(&self) -> &Path {
+ match self {
+ RealFileName::LocalPath(path)
+ | RealFileName::Remapped { local_path: None, virtual_name: path }
+ | RealFileName::Remapped { local_path: Some(path), virtual_name: _ } => path,
+ }
+ }
+
+ pub fn to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str> {
+ match display_pref {
+ FileNameDisplayPreference::Local => self.local_path_if_available().to_string_lossy(),
+ FileNameDisplayPreference::Remapped => {
+ self.remapped_path_if_available().to_string_lossy()
+ }
+ }
+ }
+}
+
+/// Differentiates between real files and common virtual files.
+#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)]
+#[derive(Decodable, Encodable)]
+pub enum FileName {
+ Real(RealFileName),
+ /// Call to `quote!`.
+ QuoteExpansion(u64),
+ /// Command line.
+ Anon(u64),
+ /// Hack in `src/librustc_ast/parse.rs`.
+ // FIXME(jseyfried)
+ MacroExpansion(u64),
+ ProcMacroSourceCode(u64),
+ /// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`.
+ CfgSpec(u64),
+ /// Strings provided as crate attributes in the CLI.
+ CliCrateAttr(u64),
+ /// Custom sources for explicit parser calls from plugins and drivers.
+ Custom(String),
+ DocTest(PathBuf, isize),
+ /// Post-substitution inline assembly from LLVM.
+ InlineAsm(u64),
+}
+
+impl From<PathBuf> for FileName {
+ fn from(p: PathBuf) -> Self {
+ assert!(!p.to_string_lossy().ends_with('>'));
+ FileName::Real(RealFileName::LocalPath(p))
+ }
+}
+
+#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
+pub enum FileNameDisplayPreference {
+ Remapped,
+ Local,
+}
+
+pub struct FileNameDisplay<'a> {
+ inner: &'a FileName,
+ display_pref: FileNameDisplayPreference,
+}
+
+impl fmt::Display for FileNameDisplay<'_> {
+ fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use FileName::*;
+ match *self.inner {
+ Real(ref name) => {
+ write!(fmt, "{}", name.to_string_lossy(self.display_pref))
+ }
+ QuoteExpansion(_) => write!(fmt, "<quote expansion>"),
+ MacroExpansion(_) => write!(fmt, "<macro expansion>"),
+ Anon(_) => write!(fmt, "<anon>"),
+ ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"),
+ CfgSpec(_) => write!(fmt, "<cfgspec>"),
+ CliCrateAttr(_) => write!(fmt, "<crate attribute>"),
+ Custom(ref s) => write!(fmt, "<{}>", s),
+ DocTest(ref path, _) => write!(fmt, "{}", path.display()),
+ InlineAsm(_) => write!(fmt, "<inline asm>"),
+ }
+ }
+}
+
+impl<'a> FileNameDisplay<'a> {
+ pub fn to_string_lossy(&self) -> Cow<'a, str> {
+ match self.inner {
+ FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
+ _ => Cow::from(self.to_string()),
+ }
+ }
+}
+
+impl FileName {
+ pub fn is_real(&self) -> bool {
+ use FileName::*;
+ match *self {
+ Real(_) => true,
+ Anon(_)
+ | MacroExpansion(_)
+ | ProcMacroSourceCode(_)
+ | CfgSpec(_)
+ | CliCrateAttr(_)
+ | Custom(_)
+ | QuoteExpansion(_)
+ | DocTest(_, _)
+ | InlineAsm(_) => false,
+ }
+ }
+
+ pub fn prefer_remapped(&self) -> FileNameDisplay<'_> {
+ FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped }
+ }
+
+ // This may include transient local filesystem information.
+ // Must not be embedded in build outputs.
+ pub fn prefer_local(&self) -> FileNameDisplay<'_> {
+ FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Local }
+ }
+
+ pub fn display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_> {
+ FileNameDisplay { inner: self, display_pref }
+ }
+
+ pub fn macro_expansion_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::MacroExpansion(hasher.finish())
+ }
+
+ pub fn anon_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::Anon(hasher.finish())
+ }
+
+ pub fn proc_macro_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::ProcMacroSourceCode(hasher.finish())
+ }
+
+ pub fn cfg_spec_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::QuoteExpansion(hasher.finish())
+ }
+
+ pub fn cli_crate_attr_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::CliCrateAttr(hasher.finish())
+ }
+
+ pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName {
+ FileName::DocTest(path, line)
+ }
+
+ pub fn inline_asm_source_code(src: &str) -> FileName {
+ let mut hasher = StableHasher::new();
+ src.hash(&mut hasher);
+ FileName::InlineAsm(hasher.finish())
+ }
+}
+
+/// Represents a span.
+///
+/// Spans represent a region of code, used for error reporting. Positions in spans
+/// are *absolute* positions from the beginning of the [`SourceMap`], not positions
+/// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back
+/// to the original source.
+///
+/// You must be careful if the span crosses more than one file, since you will not be
+/// able to use many of the functions on spans in source_map and you cannot assume
+/// that the length of the span is equal to `span.hi - span.lo`; there may be space in the
+/// [`BytePos`] range between files.
+///
+/// `SpanData` is public because `Span` uses a thread-local interner and can't be
+/// sent to other threads, but some pieces of performance infra run in a separate thread.
+/// Using `Span` is generally preferred.
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub struct SpanData {
+ pub lo: BytePos,
+ pub hi: BytePos,
+ /// Information about where the macro came from, if this piece of
+ /// code was created by a macro expansion.
+ pub ctxt: SyntaxContext,
+ pub parent: Option<LocalDefId>,
+}
+
+// Order spans by position in the file.
+impl Ord for SpanData {
+ fn cmp(&self, other: &Self) -> Ordering {
+ let SpanData {
+ lo: s_lo,
+ hi: s_hi,
+ ctxt: s_ctxt,
+ // `LocalDefId` does not implement `Ord`.
+ // The other fields are enough to determine in-file order.
+ parent: _,
+ } = self;
+ let SpanData {
+ lo: o_lo,
+ hi: o_hi,
+ ctxt: o_ctxt,
+ // `LocalDefId` does not implement `Ord`.
+ // The other fields are enough to determine in-file order.
+ parent: _,
+ } = other;
+
+ (s_lo, s_hi, s_ctxt).cmp(&(o_lo, o_hi, o_ctxt))
+ }
+}
+
+impl PartialOrd for SpanData {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl SpanData {
+ #[inline]
+ pub fn span(&self) -> Span {
+ Span::new(self.lo, self.hi, self.ctxt, self.parent)
+ }
+ #[inline]
+ pub fn with_lo(&self, lo: BytePos) -> Span {
+ Span::new(lo, self.hi, self.ctxt, self.parent)
+ }
+ #[inline]
+ pub fn with_hi(&self, hi: BytePos) -> Span {
+ Span::new(self.lo, hi, self.ctxt, self.parent)
+ }
+ #[inline]
+ pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
+ Span::new(self.lo, self.hi, ctxt, self.parent)
+ }
+ #[inline]
+ pub fn with_parent(&self, parent: Option<LocalDefId>) -> Span {
+ Span::new(self.lo, self.hi, self.ctxt, parent)
+ }
+ /// Returns `true` if this is a dummy span with any hygienic context.
+ #[inline]
+ pub fn is_dummy(self) -> bool {
+ self.lo.0 == 0 && self.hi.0 == 0
+ }
+ /// Returns `true` if `self` fully encloses `other`.
+ pub fn contains(self, other: Self) -> bool {
+ self.lo <= other.lo && other.hi <= self.hi
+ }
+}
+
+// The interner is pointed to by a thread local value which is only set on the main thread
+// with parallelization is disabled. So we don't allow `Span` to transfer between threads
+// to avoid panics and other errors, even though it would be memory safe to do so.
+#[cfg(not(parallel_compiler))]
+impl !Send for Span {}
+#[cfg(not(parallel_compiler))]
+impl !Sync for Span {}
+
+impl PartialOrd for Span {
+ fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
+ PartialOrd::partial_cmp(&self.data(), &rhs.data())
+ }
+}
+impl Ord for Span {
+ fn cmp(&self, rhs: &Self) -> Ordering {
+ Ord::cmp(&self.data(), &rhs.data())
+ }
+}
+
+impl Span {
+ #[inline]
+ pub fn lo(self) -> BytePos {
+ self.data().lo
+ }
+ #[inline]
+ pub fn with_lo(self, lo: BytePos) -> Span {
+ self.data().with_lo(lo)
+ }
+ #[inline]
+ pub fn hi(self) -> BytePos {
+ self.data().hi
+ }
+ #[inline]
+ pub fn with_hi(self, hi: BytePos) -> Span {
+ self.data().with_hi(hi)
+ }
+ #[inline]
+ pub fn ctxt(self) -> SyntaxContext {
+ self.data_untracked().ctxt
+ }
+ pub fn eq_ctxt(self, other: Span) -> bool {
+ self.data_untracked().ctxt == other.data_untracked().ctxt
+ }
+ #[inline]
+ pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span {
+ self.data_untracked().with_ctxt(ctxt)
+ }
+ #[inline]
+ pub fn parent(self) -> Option<LocalDefId> {
+ self.data().parent
+ }
+ #[inline]
+ pub fn with_parent(self, ctxt: Option<LocalDefId>) -> Span {
+ self.data().with_parent(ctxt)
+ }
+
+ /// Returns `true` if this is a dummy span with any hygienic context.
+ #[inline]
+ pub fn is_dummy(self) -> bool {
+ self.data_untracked().is_dummy()
+ }
+
+ /// Returns `true` if this span comes from a macro or desugaring.
+ #[inline]
+ pub fn from_expansion(self) -> bool {
+ self.ctxt() != SyntaxContext::root()
+ }
+
+ /// Returns `true` if `span` originates in a derive-macro's expansion.
+ pub fn in_derive_expansion(self) -> bool {
+ matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
+ }
+
+ /// Gate suggestions that would not be appropriate in a context the user didn't write.
+ pub fn can_be_used_for_suggestions(self) -> bool {
+ !self.from_expansion()
+ // FIXME: If this span comes from a `derive` macro but it points at code the user wrote,
+ // the callsite span and the span will be pointing at different places. It also means that
+ // we can safely provide suggestions on this span.
+ || (matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
+ && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi())))
+ }
+
+ #[inline]
+ pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span {
+ Span::new(lo, hi, SyntaxContext::root(), None)
+ }
+
+ /// Returns a new span representing an empty span at the beginning of this span.
+ #[inline]
+ pub fn shrink_to_lo(self) -> Span {
+ let span = self.data_untracked();
+ span.with_hi(span.lo)
+ }
+ /// Returns a new span representing an empty span at the end of this span.
+ #[inline]
+ pub fn shrink_to_hi(self) -> Span {
+ let span = self.data_untracked();
+ span.with_lo(span.hi)
+ }
+
+ #[inline]
+ /// Returns `true` if `hi == lo`.
+ pub fn is_empty(self) -> bool {
+ let span = self.data_untracked();
+ span.hi == span.lo
+ }
+
+ /// Returns `self` if `self` is not the dummy span, and `other` otherwise.
+ pub fn substitute_dummy(self, other: Span) -> Span {
+ if self.is_dummy() { other } else { self }
+ }
+
+ /// Returns `true` if `self` fully encloses `other`.
+ pub fn contains(self, other: Span) -> bool {
+ let span = self.data();
+ let other = other.data();
+ span.contains(other)
+ }
+
+ /// Returns `true` if `self` touches `other`.
+ pub fn overlaps(self, other: Span) -> bool {
+ let span = self.data();
+ let other = other.data();
+ span.lo < other.hi && other.lo < span.hi
+ }
+
+ /// Returns `true` if the spans are equal with regards to the source text.
+ ///
+ /// Use this instead of `==` when either span could be generated code,
+ /// and you only care that they point to the same bytes of source text.
+ pub fn source_equal(self, other: Span) -> bool {
+ let span = self.data();
+ let other = other.data();
+ span.lo == other.lo && span.hi == other.hi
+ }
+
+ /// Returns `Some(span)`, where the start is trimmed by the end of `other`.
+ pub fn trim_start(self, other: Span) -> Option<Span> {
+ let span = self.data();
+ let other = other.data();
+ if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None }
+ }
+
+ /// Returns the source span -- this is either the supplied span, or the span for
+ /// the macro callsite that expanded to it.
+ pub fn source_callsite(self) -> Span {
+ let expn_data = self.ctxt().outer_expn_data();
+ if !expn_data.is_root() { expn_data.call_site.source_callsite() } else { self }
+ }
+
+ /// The `Span` for the tokens in the previous macro expansion from which `self` was generated,
+ /// if any.
+ pub fn parent_callsite(self) -> Option<Span> {
+ let expn_data = self.ctxt().outer_expn_data();
+ if !expn_data.is_root() { Some(expn_data.call_site) } else { None }
+ }
+
+ /// Walk down the expansion ancestors to find a span that's contained within `outer`.
+ pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> {
+ while !outer.contains(self) {
+ self = self.parent_callsite()?;
+ }
+ Some(self)
+ }
+
+ /// Edition of the crate from which this span came.
+ pub fn edition(self) -> edition::Edition {
+ self.ctxt().edition()
+ }
+
+ #[inline]
+ pub fn rust_2015(self) -> bool {
+ self.edition() == edition::Edition::Edition2015
+ }
+
+ #[inline]
+ pub fn rust_2018(self) -> bool {
+ self.edition() >= edition::Edition::Edition2018
+ }
+
+ #[inline]
+ pub fn rust_2021(self) -> bool {
+ self.edition() >= edition::Edition::Edition2021
+ }
+
+ #[inline]
+ pub fn rust_2024(self) -> bool {
+ self.edition() >= edition::Edition::Edition2024
+ }
+
+ /// Returns the source callee.
+ ///
+ /// Returns `None` if the supplied span has no expansion trace,
+ /// else returns the `ExpnData` for the macro definition
+ /// corresponding to the source callsite.
+ pub fn source_callee(self) -> Option<ExpnData> {
+ fn source_callee(expn_data: ExpnData) -> ExpnData {
+ let next_expn_data = expn_data.call_site.ctxt().outer_expn_data();
+ if !next_expn_data.is_root() { source_callee(next_expn_data) } else { expn_data }
+ }
+ let expn_data = self.ctxt().outer_expn_data();
+ if !expn_data.is_root() { Some(source_callee(expn_data)) } else { None }
+ }
+
+ /// Checks if a span is "internal" to a macro in which `#[unstable]`
+ /// items can be used (that is, a macro marked with
+ /// `#[allow_internal_unstable]`).
+ pub fn allows_unstable(self, feature: Symbol) -> bool {
+ self.ctxt()
+ .outer_expn_data()
+ .allow_internal_unstable
+ .map_or(false, |features| features.iter().any(|&f| f == feature))
+ }
+
+ /// Checks if this span arises from a compiler desugaring of kind `kind`.
+ pub fn is_desugaring(self, kind: DesugaringKind) -> bool {
+ match self.ctxt().outer_expn_data().kind {
+ ExpnKind::Desugaring(k) => k == kind,
+ _ => false,
+ }
+ }
+
+ /// Returns the compiler desugaring that created this span, or `None`
+ /// if this span is not from a desugaring.
+ pub fn desugaring_kind(self) -> Option<DesugaringKind> {
+ match self.ctxt().outer_expn_data().kind {
+ ExpnKind::Desugaring(k) => Some(k),
+ _ => None,
+ }
+ }
+
+ /// Checks if a span is "internal" to a macro in which `unsafe`
+ /// can be used without triggering the `unsafe_code` lint.
+ // (that is, a macro marked with `#[allow_internal_unsafe]`).
+ pub fn allows_unsafe(self) -> bool {
+ self.ctxt().outer_expn_data().allow_internal_unsafe
+ }
+
+ pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> {
+ let mut prev_span = DUMMY_SP;
+ std::iter::from_fn(move || {
+ loop {
+ let expn_data = self.ctxt().outer_expn_data();
+ if expn_data.is_root() {
+ return None;
+ }
+
+ let is_recursive = expn_data.call_site.source_equal(prev_span);
+
+ prev_span = self;
+ self = expn_data.call_site;
+
+ // Don't print recursive invocations.
+ if !is_recursive {
+ return Some(expn_data);
+ }
+ }
+ })
+ }
+
+ /// Returns a `Span` that would enclose both `self` and `end`.
+ ///
+ /// ```text
+ /// ____ ___
+ /// self lorem ipsum end
+ /// ^^^^^^^^^^^^^^^^^^^^
+ /// ```
+ pub fn to(self, end: Span) -> Span {
+ let span_data = self.data();
+ let end_data = end.data();
+ // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
+ // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
+ // have an incomplete span than a completely nonsensical one.
+ if span_data.ctxt != end_data.ctxt {
+ if span_data.ctxt == SyntaxContext::root() {
+ return end;
+ } else if end_data.ctxt == SyntaxContext::root() {
+ return self;
+ }
+ // Both spans fall within a macro.
+ // FIXME(estebank): check if it is the *same* macro.
+ }
+ Span::new(
+ cmp::min(span_data.lo, end_data.lo),
+ cmp::max(span_data.hi, end_data.hi),
+ if span_data.ctxt == SyntaxContext::root() { end_data.ctxt } else { span_data.ctxt },
+ if span_data.parent == end_data.parent { span_data.parent } else { None },
+ )
+ }
+
+ /// Returns a `Span` between the end of `self` to the beginning of `end`.
+ ///
+ /// ```text
+ /// ____ ___
+ /// self lorem ipsum end
+ /// ^^^^^^^^^^^^^
+ /// ```
+ pub fn between(self, end: Span) -> Span {
+ let span = self.data();
+ let end = end.data();
+ Span::new(
+ span.hi,
+ end.lo,
+ if end.ctxt == SyntaxContext::root() { end.ctxt } else { span.ctxt },
+ if span.parent == end.parent { span.parent } else { None },
+ )
+ }
+
+ /// Returns a `Span` from the beginning of `self` until the beginning of `end`.
+ ///
+ /// ```text
+ /// ____ ___
+ /// self lorem ipsum end
+ /// ^^^^^^^^^^^^^^^^^
+ /// ```
+ pub fn until(self, end: Span) -> Span {
+ // Most of this function's body is copied from `to`.
+ // We can't just do `self.to(end.shrink_to_lo())`,
+ // because to also does some magic where it uses min/max so
+ // it can handle overlapping spans. Some advanced mis-use of
+ // `until` with different ctxts makes this visible.
+ let span_data = self.data();
+ let end_data = end.data();
+ // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
+ // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
+ // have an incomplete span than a completely nonsensical one.
+ if span_data.ctxt != end_data.ctxt {
+ if span_data.ctxt == SyntaxContext::root() {
+ return end;
+ } else if end_data.ctxt == SyntaxContext::root() {
+ return self;
+ }
+ // Both spans fall within a macro.
+ // FIXME(estebank): check if it is the *same* macro.
+ }
+ Span::new(
+ span_data.lo,
+ end_data.lo,
+ if end_data.ctxt == SyntaxContext::root() { end_data.ctxt } else { span_data.ctxt },
+ if span_data.parent == end_data.parent { span_data.parent } else { None },
+ )
+ }
+
+ pub fn from_inner(self, inner: InnerSpan) -> Span {
+ let span = self.data();
+ Span::new(
+ span.lo + BytePos::from_usize(inner.start),
+ span.lo + BytePos::from_usize(inner.end),
+ span.ctxt,
+ span.parent,
+ )
+ }
+
+ /// Equivalent of `Span::def_site` from the proc macro API,
+ /// except that the location is taken from the `self` span.
+ pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span {
+ self.with_ctxt_from_mark(expn_id, Transparency::Opaque)
+ }
+
+ /// Equivalent of `Span::call_site` from the proc macro API,
+ /// except that the location is taken from the `self` span.
+ pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span {
+ self.with_ctxt_from_mark(expn_id, Transparency::Transparent)
+ }
+
+ /// Equivalent of `Span::mixed_site` from the proc macro API,
+ /// except that the location is taken from the `self` span.
+ pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span {
+ self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent)
+ }
+
+ /// Produces a span with the same location as `self` and context produced by a macro with the
+ /// given ID and transparency, assuming that macro was defined directly and not produced by
+ /// some other macro (which is the case for built-in and procedural macros).
+ pub fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
+ self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency))
+ }
+
+ #[inline]
+ pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
+ let span = self.data();
+ span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency))
+ }
+
+ #[inline]
+ pub fn remove_mark(&mut self) -> ExpnId {
+ let mut span = self.data();
+ let mark = span.ctxt.remove_mark();
+ *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
+ mark
+ }
+
+ #[inline]
+ pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
+ let mut span = self.data();
+ let mark = span.ctxt.adjust(expn_id);
+ *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
+ mark
+ }
+
+ #[inline]
+ pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
+ let mut span = self.data();
+ let mark = span.ctxt.normalize_to_macros_2_0_and_adjust(expn_id);
+ *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
+ mark
+ }
+
+ #[inline]
+ pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> {
+ let mut span = self.data();
+ let mark = span.ctxt.glob_adjust(expn_id, glob_span);
+ *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
+ mark
+ }
+
+ #[inline]
+ pub fn reverse_glob_adjust(
+ &mut self,
+ expn_id: ExpnId,
+ glob_span: Span,
+ ) -> Option<Option<ExpnId>> {
+ let mut span = self.data();
+ let mark = span.ctxt.reverse_glob_adjust(expn_id, glob_span);
+ *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
+ mark
+ }
+
+ #[inline]
+ pub fn normalize_to_macros_2_0(self) -> Span {
+ let span = self.data();
+ span.with_ctxt(span.ctxt.normalize_to_macros_2_0())
+ }
+
+ #[inline]
+ pub fn normalize_to_macro_rules(self) -> Span {
+ let span = self.data();
+ span.with_ctxt(span.ctxt.normalize_to_macro_rules())
+ }
+}
+
+impl Default for Span {
+ fn default() -> Self {
+ DUMMY_SP
+ }
+}
+
+impl<E: Encoder> Encodable<E> for Span {
+ default fn encode(&self, s: &mut E) {
+ let span = self.data();
+ span.lo.encode(s);
+ span.hi.encode(s);
+ }
+}
+impl<D: Decoder> Decodable<D> for Span {
+ default fn decode(s: &mut D) -> Span {
+ let lo = Decodable::decode(s);
+ let hi = Decodable::decode(s);
+
+ Span::new(lo, hi, SyntaxContext::root(), None)
+ }
+}
+
+/// Calls the provided closure, using the provided `SourceMap` to format
+/// any spans that are debug-printed during the closure's execution.
+///
+/// Normally, the global `TyCtxt` is used to retrieve the `SourceMap`
+/// (see `rustc_interface::callbacks::span_debug1`). However, some parts
+/// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before
+/// a `TyCtxt` is available. In this case, we fall back to
+/// the `SourceMap` provided to this function. If that is not available,
+/// we fall back to printing the raw `Span` field values.
+pub fn with_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T {
+ with_session_globals(|session_globals| {
+ *session_globals.source_map.borrow_mut() = Some(source_map);
+ });
+ struct ClearSourceMap;
+ impl Drop for ClearSourceMap {
+ fn drop(&mut self) {
+ with_session_globals(|session_globals| {
+ session_globals.source_map.borrow_mut().take();
+ });
+ }
+ }
+
+ let _guard = ClearSourceMap;
+ f()
+}
+
+impl fmt::Debug for Span {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ with_session_globals(|session_globals| {
+ if let Some(source_map) = &*session_globals.source_map.borrow() {
+ write!(f, "{} ({:?})", source_map.span_to_diagnostic_string(*self), self.ctxt())
+ } else {
+ f.debug_struct("Span")
+ .field("lo", &self.lo())
+ .field("hi", &self.hi())
+ .field("ctxt", &self.ctxt())
+ .finish()
+ }
+ })
+ }
+}
+
+impl fmt::Debug for SpanData {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&Span::new(self.lo, self.hi, self.ctxt, self.parent), f)
+ }
+}
+
+/// Identifies an offset of a multi-byte character in a `SourceFile`.
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+pub struct MultiByteChar {
+ /// The absolute offset of the character in the `SourceMap`.
+ pub pos: BytePos,
+ /// The number of bytes, `>= 2`.
+ pub bytes: u8,
+}
+
+/// Identifies an offset of a non-narrow character in a `SourceFile`.
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+pub enum NonNarrowChar {
+ /// Represents a zero-width character.
+ ZeroWidth(BytePos),
+ /// Represents a wide (full-width) character.
+ Wide(BytePos),
+ /// Represents a tab character, represented visually with a width of 4 characters.
+ Tab(BytePos),
+}
+
+impl NonNarrowChar {
+ fn new(pos: BytePos, width: usize) -> Self {
+ match width {
+ 0 => NonNarrowChar::ZeroWidth(pos),
+ 2 => NonNarrowChar::Wide(pos),
+ 4 => NonNarrowChar::Tab(pos),
+ _ => panic!("width {} given for non-narrow character", width),
+ }
+ }
+
+ /// Returns the absolute offset of the character in the `SourceMap`.
+ pub fn pos(&self) -> BytePos {
+ match *self {
+ NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
+ }
+ }
+
+ /// Returns the width of the character, 0 (zero-width) or 2 (wide).
+ pub fn width(&self) -> usize {
+ match *self {
+ NonNarrowChar::ZeroWidth(_) => 0,
+ NonNarrowChar::Wide(_) => 2,
+ NonNarrowChar::Tab(_) => 4,
+ }
+ }
+}
+
+impl Add<BytePos> for NonNarrowChar {
+ type Output = Self;
+
+ fn add(self, rhs: BytePos) -> Self {
+ match self {
+ NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
+ NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
+ NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos + rhs),
+ }
+ }
+}
+
+impl Sub<BytePos> for NonNarrowChar {
+ type Output = Self;
+
+ fn sub(self, rhs: BytePos) -> Self {
+ match self {
+ NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
+ NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
+ NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos - rhs),
+ }
+ }
+}
+
+/// Identifies an offset of a character that was normalized away from `SourceFile`.
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+pub struct NormalizedPos {
+ /// The absolute offset of the character in the `SourceMap`.
+ pub pos: BytePos,
+ /// The difference between original and normalized string at position.
+ pub diff: u32,
+}
+
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum ExternalSource {
+ /// No external source has to be loaded, since the `SourceFile` represents a local crate.
+ Unneeded,
+ Foreign {
+ kind: ExternalSourceKind,
+ /// This SourceFile's byte-offset within the source_map of its original crate.
+ original_start_pos: BytePos,
+ /// The end of this SourceFile within the source_map of its original crate.
+ original_end_pos: BytePos,
+ },
+}
+
+/// The state of the lazy external source loading mechanism of a `SourceFile`.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum ExternalSourceKind {
+ /// The external source has been loaded already.
+ Present(Lrc<String>),
+ /// No attempt has been made to load the external source.
+ AbsentOk,
+ /// A failed attempt has been made to load the external source.
+ AbsentErr,
+ Unneeded,
+}
+
+impl ExternalSource {
+ pub fn get_source(&self) -> Option<&Lrc<String>> {
+ match self {
+ ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src),
+ _ => None,
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct OffsetOverflowError;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)]
+#[derive(HashStable_Generic)]
+pub enum SourceFileHashAlgorithm {
+ Md5,
+ Sha1,
+ Sha256,
+}
+
+impl FromStr for SourceFileHashAlgorithm {
+ type Err = ();
+
+ fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
+ match s {
+ "md5" => Ok(SourceFileHashAlgorithm::Md5),
+ "sha1" => Ok(SourceFileHashAlgorithm::Sha1),
+ "sha256" => Ok(SourceFileHashAlgorithm::Sha256),
+ _ => Err(()),
+ }
+ }
+}
+
+/// The hash of the on-disk source file used for debug info.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[derive(HashStable_Generic, Encodable, Decodable)]
+pub struct SourceFileHash {
+ pub kind: SourceFileHashAlgorithm,
+ value: [u8; 32],
+}
+
+impl SourceFileHash {
+ pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
+ let mut hash = SourceFileHash { kind, value: Default::default() };
+ let len = hash.hash_len();
+ let value = &mut hash.value[..len];
+ let data = src.as_bytes();
+ match kind {
+ SourceFileHashAlgorithm::Md5 => {
+ value.copy_from_slice(&Md5::digest(data));
+ }
+ SourceFileHashAlgorithm::Sha1 => {
+ value.copy_from_slice(&Sha1::digest(data));
+ }
+ SourceFileHashAlgorithm::Sha256 => {
+ value.copy_from_slice(&Sha256::digest(data));
+ }
+ }
+ hash
+ }
+
+ /// Check if the stored hash matches the hash of the string.
+ pub fn matches(&self, src: &str) -> bool {
+ Self::new(self.kind, src) == *self
+ }
+
+ /// The bytes of the hash.
+ pub fn hash_bytes(&self) -> &[u8] {
+ let len = self.hash_len();
+ &self.value[..len]
+ }
+
+ fn hash_len(&self) -> usize {
+ match self.kind {
+ SourceFileHashAlgorithm::Md5 => 16,
+ SourceFileHashAlgorithm::Sha1 => 20,
+ SourceFileHashAlgorithm::Sha256 => 32,
+ }
+ }
+}
+
+#[derive(HashStable_Generic)]
+#[derive(Copy, PartialEq, PartialOrd, Clone, Ord, Eq, Hash, Debug, Encodable, Decodable)]
+pub enum DebuggerVisualizerType {
+ Natvis,
+ GdbPrettyPrinter,
+}
+
+/// A single debugger visualizer file.
+#[derive(HashStable_Generic)]
+#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Encodable, Decodable)]
+pub struct DebuggerVisualizerFile {
+ /// The complete debugger visualizer source.
+ pub src: Arc<[u8]>,
+ /// Indicates which visualizer type this targets.
+ pub visualizer_type: DebuggerVisualizerType,
+}
+
+impl DebuggerVisualizerFile {
+ pub fn new(src: Arc<[u8]>, visualizer_type: DebuggerVisualizerType) -> Self {
+ DebuggerVisualizerFile { src, visualizer_type }
+ }
+}
+
+#[derive(Clone)]
+pub enum SourceFileLines {
+ /// The source file lines, in decoded (random-access) form.
+ Lines(Vec<BytePos>),
+
+ /// The source file lines, in undecoded difference list form.
+ Diffs(SourceFileDiffs),
+}
+
+impl SourceFileLines {
+ pub fn is_lines(&self) -> bool {
+ matches!(self, SourceFileLines::Lines(_))
+ }
+}
+
+/// The source file lines in difference list form. This matches the form
+/// used within metadata, which saves space by exploiting the fact that the
+/// lines list is sorted and individual lines are usually not that long.
+///
+/// We read it directly from metadata and only decode it into `Lines` form
+/// when necessary. This is a significant performance win, especially for
+/// small crates where very little of `std`'s metadata is used.
+#[derive(Clone)]
+pub struct SourceFileDiffs {
+ /// Position of the first line. Note that this is always encoded as a
+ /// `BytePos` because it is often much larger than any of the
+ /// differences.
+ line_start: BytePos,
+
+ /// Always 1, 2, or 4. Always as small as possible, while being big
+ /// enough to hold the length of the longest line in the source file.
+ /// The 1 case is by far the most common.
+ bytes_per_diff: usize,
+
+ /// The number of diffs encoded in `raw_diffs`. Always one less than
+ /// the number of lines in the source file.
+ num_diffs: usize,
+
+ /// The diffs in "raw" form. Each segment of `bytes_per_diff` length
+ /// encodes one little-endian diff. Note that they aren't LEB128
+ /// encoded. This makes for much faster decoding. Besides, the
+ /// bytes_per_diff==1 case is by far the most common, and LEB128
+ /// encoding has no effect on that case.
+ raw_diffs: Vec<u8>,
+}
+
+/// A single source in the [`SourceMap`].
+#[derive(Clone)]
+pub struct SourceFile {
+ /// The name of the file that the source came from. Source that doesn't
+ /// originate from files has names between angle brackets by convention
+ /// (e.g., `<anon>`).
+ pub name: FileName,
+ /// The complete source code.
+ pub src: Option<Lrc<String>>,
+ /// The source code's hash.
+ pub src_hash: SourceFileHash,
+ /// The external source code (used for external crates, which will have a `None`
+ /// value as `self.src`.
+ pub external_src: Lock<ExternalSource>,
+ /// The start position of this source in the `SourceMap`.
+ pub start_pos: BytePos,
+ /// The end position of this source in the `SourceMap`.
+ pub end_pos: BytePos,
+ /// Locations of lines beginnings in the source code.
+ pub lines: Lock<SourceFileLines>,
+ /// Locations of multi-byte characters in the source code.
+ pub multibyte_chars: Vec<MultiByteChar>,
+ /// Width of characters that are not narrow in the source code.
+ pub non_narrow_chars: Vec<NonNarrowChar>,
+ /// Locations of characters removed during normalization.
+ pub normalized_pos: Vec<NormalizedPos>,
+ /// A hash of the filename, used for speeding up hashing in incremental compilation.
+ pub name_hash: u128,
+ /// Indicates which crate this `SourceFile` was imported from.
+ pub cnum: CrateNum,
+}
+
+impl<S: Encoder> Encodable<S> for SourceFile {
+ fn encode(&self, s: &mut S) {
+ self.name.encode(s);
+ self.src_hash.encode(s);
+ self.start_pos.encode(s);
+ self.end_pos.encode(s);
+
+ // We are always in `Lines` form by the time we reach here.
+ assert!(self.lines.borrow().is_lines());
+ self.lines(|lines| {
+ // Store the length.
+ s.emit_u32(lines.len() as u32);
+
+ // Compute and store the difference list.
+ if lines.len() != 0 {
+ let max_line_length = if lines.len() == 1 {
+ 0
+ } else {
+ lines
+ .array_windows()
+ .map(|&[fst, snd]| snd - fst)
+ .map(|bp| bp.to_usize())
+ .max()
+ .unwrap()
+ };
+
+ let bytes_per_diff: usize = match max_line_length {
+ 0..=0xFF => 1,
+ 0x100..=0xFFFF => 2,
+ _ => 4,
+ };
+
+ // Encode the number of bytes used per diff.
+ s.emit_u8(bytes_per_diff as u8);
+
+ // Encode the first element.
+ lines[0].encode(s);
+
+ // Encode the difference list.
+ let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
+ let num_diffs = lines.len() - 1;
+ let mut raw_diffs;
+ match bytes_per_diff {
+ 1 => {
+ raw_diffs = Vec::with_capacity(num_diffs);
+ for diff in diff_iter {
+ raw_diffs.push(diff.0 as u8);
+ }
+ }
+ 2 => {
+ raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
+ for diff in diff_iter {
+ raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
+ }
+ }
+ 4 => {
+ raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
+ for diff in diff_iter {
+ raw_diffs.extend_from_slice(&(diff.0 as u32).to_le_bytes());
+ }
+ }
+ _ => unreachable!(),
+ }
+ s.emit_raw_bytes(&raw_diffs);
+ }
+ });
+
+ self.multibyte_chars.encode(s);
+ self.non_narrow_chars.encode(s);
+ self.name_hash.encode(s);
+ self.normalized_pos.encode(s);
+ self.cnum.encode(s);
+ }
+}
+
+impl<D: Decoder> Decodable<D> for SourceFile {
+ fn decode(d: &mut D) -> SourceFile {
+ let name: FileName = Decodable::decode(d);
+ let src_hash: SourceFileHash = Decodable::decode(d);
+ let start_pos: BytePos = Decodable::decode(d);
+ let end_pos: BytePos = Decodable::decode(d);
+ let lines = {
+ let num_lines: u32 = Decodable::decode(d);
+ if num_lines > 0 {
+ // Read the number of bytes used per diff.
+ let bytes_per_diff = d.read_u8() as usize;
+
+ // Read the first element.
+ let line_start: BytePos = Decodable::decode(d);
+
+ // Read the difference list.
+ let num_diffs = num_lines as usize - 1;
+ let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec();
+ SourceFileLines::Diffs(SourceFileDiffs {
+ line_start,
+ bytes_per_diff,
+ num_diffs,
+ raw_diffs,
+ })
+ } else {
+ SourceFileLines::Lines(vec![])
+ }
+ };
+ let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
+ let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
+ let name_hash: u128 = Decodable::decode(d);
+ let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
+ let cnum: CrateNum = Decodable::decode(d);
+ SourceFile {
+ name,
+ start_pos,
+ end_pos,
+ src: None,
+ src_hash,
+ // Unused - the metadata decoder will construct
+ // a new SourceFile, filling in `external_src` properly
+ external_src: Lock::new(ExternalSource::Unneeded),
+ lines: Lock::new(lines),
+ multibyte_chars,
+ non_narrow_chars,
+ normalized_pos,
+ name_hash,
+ cnum,
+ }
+ }
+}
+
+impl fmt::Debug for SourceFile {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(fmt, "SourceFile({:?})", self.name)
+ }
+}
+
+impl SourceFile {
+ pub fn new(
+ name: FileName,
+ mut src: String,
+ start_pos: BytePos,
+ hash_kind: SourceFileHashAlgorithm,
+ ) -> Self {
+ // Compute the file hash before any normalization.
+ let src_hash = SourceFileHash::new(hash_kind, &src);
+ let normalized_pos = normalize_src(&mut src, start_pos);
+
+ let name_hash = {
+ let mut hasher: StableHasher = StableHasher::new();
+ name.hash(&mut hasher);
+ hasher.finish::<u128>()
+ };
+ let end_pos = start_pos.to_usize() + src.len();
+ assert!(end_pos <= u32::MAX as usize);
+
+ let (lines, multibyte_chars, non_narrow_chars) =
+ analyze_source_file::analyze_source_file(&src, start_pos);
+
+ SourceFile {
+ name,
+ src: Some(Lrc::new(src)),
+ src_hash,
+ external_src: Lock::new(ExternalSource::Unneeded),
+ start_pos,
+ end_pos: Pos::from_usize(end_pos),
+ lines: Lock::new(SourceFileLines::Lines(lines)),
+ multibyte_chars,
+ non_narrow_chars,
+ normalized_pos,
+ name_hash,
+ cnum: LOCAL_CRATE,
+ }
+ }
+
+ pub fn lines<F, R>(&self, f: F) -> R
+ where
+ F: FnOnce(&[BytePos]) -> R,
+ {
+ let mut guard = self.lines.borrow_mut();
+ match &*guard {
+ SourceFileLines::Lines(lines) => f(lines),
+ SourceFileLines::Diffs(SourceFileDiffs {
+ mut line_start,
+ bytes_per_diff,
+ num_diffs,
+ raw_diffs,
+ }) => {
+ // Convert from "diffs" form to "lines" form.
+ let num_lines = num_diffs + 1;
+ let mut lines = Vec::with_capacity(num_lines);
+ lines.push(line_start);
+
+ assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
+ match bytes_per_diff {
+ 1 => {
+ lines.extend(raw_diffs.into_iter().map(|&diff| {
+ line_start = line_start + BytePos(diff as u32);
+ line_start
+ }));
+ }
+ 2 => {
+ lines.extend((0..*num_diffs).map(|i| {
+ let pos = bytes_per_diff * i;
+ let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
+ let diff = u16::from_le_bytes(bytes);
+ line_start = line_start + BytePos(diff as u32);
+ line_start
+ }));
+ }
+ 4 => {
+ lines.extend((0..*num_diffs).map(|i| {
+ let pos = bytes_per_diff * i;
+ let bytes = [
+ raw_diffs[pos],
+ raw_diffs[pos + 1],
+ raw_diffs[pos + 2],
+ raw_diffs[pos + 3],
+ ];
+ let diff = u32::from_le_bytes(bytes);
+ line_start = line_start + BytePos(diff);
+ line_start
+ }));
+ }
+ _ => unreachable!(),
+ }
+ let res = f(&lines);
+ *guard = SourceFileLines::Lines(lines);
+ res
+ }
+ }
+ }
+
+ /// Returns the `BytePos` of the beginning of the current line.
+ pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
+ let line_index = self.lookup_line(pos).unwrap();
+ self.lines(|lines| lines[line_index])
+ }
+
+ /// Add externally loaded source.
+ /// If the hash of the input doesn't match or no input is supplied via None,
+ /// it is interpreted as an error and the corresponding enum variant is set.
+ /// The return value signifies whether some kind of source is present.
+ pub fn add_external_src<F>(&self, get_src: F) -> bool
+ where
+ F: FnOnce() -> Option<String>,
+ {
+ if matches!(
+ *self.external_src.borrow(),
+ ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. }
+ ) {
+ let src = get_src();
+ let mut external_src = self.external_src.borrow_mut();
+ // Check that no-one else have provided the source while we were getting it
+ if let ExternalSource::Foreign {
+ kind: src_kind @ ExternalSourceKind::AbsentOk, ..
+ } = &mut *external_src
+ {
+ if let Some(mut src) = src {
+ // The src_hash needs to be computed on the pre-normalized src.
+ if self.src_hash.matches(&src) {
+ normalize_src(&mut src, BytePos::from_usize(0));
+ *src_kind = ExternalSourceKind::Present(Lrc::new(src));
+ return true;
+ }
+ } else {
+ *src_kind = ExternalSourceKind::AbsentErr;
+ }
+
+ false
+ } else {
+ self.src.is_some() || external_src.get_source().is_some()
+ }
+ } else {
+ self.src.is_some() || self.external_src.borrow().get_source().is_some()
+ }
+ }
+
+ /// Gets a line from the list of pre-computed line-beginnings.
+ /// The line number here is 0-based.
+ pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> {
+ fn get_until_newline(src: &str, begin: usize) -> &str {
+ // We can't use `lines.get(line_number+1)` because we might
+ // be parsing when we call this function and thus the current
+ // line is the last one we have line info for.
+ let slice = &src[begin..];
+ match slice.find('\n') {
+ Some(e) => &slice[..e],
+ None => slice,
+ }
+ }
+
+ let begin = {
+ let line = self.lines(|lines| lines.get(line_number).copied())?;
+ let begin: BytePos = line - self.start_pos;
+ begin.to_usize()
+ };
+
+ if let Some(ref src) = self.src {
+ Some(Cow::from(get_until_newline(src, begin)))
+ } else if let Some(src) = self.external_src.borrow().get_source() {
+ Some(Cow::Owned(String::from(get_until_newline(src, begin))))
+ } else {
+ None
+ }
+ }
+
+ pub fn is_real_file(&self) -> bool {
+ self.name.is_real()
+ }
+
+ #[inline]
+ pub fn is_imported(&self) -> bool {
+ self.src.is_none()
+ }
+
+ pub fn count_lines(&self) -> usize {
+ self.lines(|lines| lines.len())
+ }
+
+ /// Finds the line containing the given position. The return value is the
+ /// index into the `lines` array of this `SourceFile`, not the 1-based line
+ /// number. If the source_file is empty or the position is located before the
+ /// first line, `None` is returned.
+ pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
+ self.lines(|lines| match lines.binary_search(&pos) {
+ Ok(idx) => Some(idx),
+ Err(0) => None,
+ Err(idx) => Some(idx - 1),
+ })
+ }
+
+ pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
+ if self.is_empty() {
+ return self.start_pos..self.end_pos;
+ }
+
+ self.lines(|lines| {
+ assert!(line_index < lines.len());
+ if line_index == (lines.len() - 1) {
+ lines[line_index]..self.end_pos
+ } else {
+ lines[line_index]..lines[line_index + 1]
+ }
+ })
+ }
+
+ /// Returns whether or not the file contains the given `SourceMap` byte
+ /// position. The position one past the end of the file is considered to be
+ /// contained by the file. This implies that files for which `is_empty`
+ /// returns true still contain one byte position according to this function.
+ #[inline]
+ pub fn contains(&self, byte_pos: BytePos) -> bool {
+ byte_pos >= self.start_pos && byte_pos <= self.end_pos
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.start_pos == self.end_pos
+ }
+
+ /// Calculates the original byte position relative to the start of the file
+ /// based on the given byte position.
+ pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
+ // Diff before any records is 0. Otherwise use the previously recorded
+ // diff as that applies to the following characters until a new diff
+ // is recorded.
+ let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) {
+ Ok(i) => self.normalized_pos[i].diff,
+ Err(i) if i == 0 => 0,
+ Err(i) => self.normalized_pos[i - 1].diff,
+ };
+
+ BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
+ }
+
+ /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
+ pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
+ // The number of extra bytes due to multibyte chars in the `SourceFile`.
+ let mut total_extra_bytes = 0;
+
+ for mbc in self.multibyte_chars.iter() {
+ debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
+ if mbc.pos < bpos {
+ // Every character is at least one byte, so we only
+ // count the actual extra bytes.
+ total_extra_bytes += mbc.bytes as u32 - 1;
+ // We should never see a byte position in the middle of a
+ // character.
+ assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
+ } else {
+ break;
+ }
+ }
+
+ assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
+ CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
+ }
+
+ /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
+ /// given `BytePos`.
+ pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
+ let chpos = self.bytepos_to_file_charpos(pos);
+ match self.lookup_line(pos) {
+ Some(a) => {
+ let line = a + 1; // Line numbers start at 1
+ let linebpos = self.lines(|lines| lines[a]);
+ let linechpos = self.bytepos_to_file_charpos(linebpos);
+ let col = chpos - linechpos;
+ debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos);
+ debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos);
+ debug!("byte is on line: {}", line);
+ assert!(chpos >= linechpos);
+ (line, col)
+ }
+ None => (0, chpos),
+ }
+ }
+
+ /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
+ /// column offset when displayed, for a given `BytePos`.
+ pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
+ let (line, col_or_chpos) = self.lookup_file_pos(pos);
+ if line > 0 {
+ let col = col_or_chpos;
+ let linebpos = self.lines(|lines| lines[line - 1]);
+ let col_display = {
+ let start_width_idx = self
+ .non_narrow_chars
+ .binary_search_by_key(&linebpos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let end_width_idx = self
+ .non_narrow_chars
+ .binary_search_by_key(&pos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let special_chars = end_width_idx - start_width_idx;
+ let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx]
+ .iter()
+ .map(|x| x.width())
+ .sum();
+ col.0 - special_chars + non_narrow
+ };
+ (line, col, col_display)
+ } else {
+ let chpos = col_or_chpos;
+ let col_display = {
+ let end_width_idx = self
+ .non_narrow_chars
+ .binary_search_by_key(&pos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let non_narrow: usize =
+ self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum();
+ chpos.0 - end_width_idx + non_narrow
+ };
+ (0, chpos, col_display)
+ }
+ }
+}
+
+/// Normalizes the source code and records the normalizations.
+fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
+ let mut normalized_pos = vec![];
+ remove_bom(src, &mut normalized_pos);
+ normalize_newlines(src, &mut normalized_pos);
+
+ // Offset all the positions by start_pos to match the final file positions.
+ for np in &mut normalized_pos {
+ np.pos.0 += start_pos.0;
+ }
+
+ normalized_pos
+}
+
+/// Removes UTF-8 BOM, if any.
+fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
+ if src.starts_with('\u{feff}') {
+ src.drain(..3);
+ normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
+ }
+}
+
+/// Replaces `\r\n` with `\n` in-place in `src`.
+///
+/// Returns error if there's a lone `\r` in the string.
+fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
+ if !src.as_bytes().contains(&b'\r') {
+ return;
+ }
+
+ // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
+ // While we *can* call `as_mut_vec` and do surgery on the live string
+ // directly, let's rather steal the contents of `src`. This makes the code
+ // safe even if a panic occurs.
+
+ let mut buf = std::mem::replace(src, String::new()).into_bytes();
+ let mut gap_len = 0;
+ let mut tail = buf.as_mut_slice();
+ let mut cursor = 0;
+ let original_gap = normalized_pos.last().map_or(0, |l| l.diff);
+ loop {
+ let idx = match find_crlf(&tail[gap_len..]) {
+ None => tail.len(),
+ Some(idx) => idx + gap_len,
+ };
+ tail.copy_within(gap_len..idx, 0);
+ tail = &mut tail[idx - gap_len..];
+ if tail.len() == gap_len {
+ break;
+ }
+ cursor += idx - gap_len;
+ gap_len += 1;
+ normalized_pos.push(NormalizedPos {
+ pos: BytePos::from_usize(cursor + 1),
+ diff: original_gap + gap_len as u32,
+ });
+ }
+
+ // Account for removed `\r`.
+ // After `set_len`, `buf` is guaranteed to contain utf-8 again.
+ let new_len = buf.len() - gap_len;
+ unsafe {
+ buf.set_len(new_len);
+ *src = String::from_utf8_unchecked(buf);
+ }
+
+ fn find_crlf(src: &[u8]) -> Option<usize> {
+ let mut search_idx = 0;
+ while let Some(idx) = find_cr(&src[search_idx..]) {
+ if src[search_idx..].get(idx + 1) != Some(&b'\n') {
+ search_idx += idx + 1;
+ continue;
+ }
+ return Some(search_idx + idx);
+ }
+ None
+ }
+
+ fn find_cr(src: &[u8]) -> Option<usize> {
+ src.iter().position(|&b| b == b'\r')
+ }
+}
+
+// _____________________________________________________________________________
+// Pos, BytePos, CharPos
+//
+
+pub trait Pos {
+ fn from_usize(n: usize) -> Self;
+ fn to_usize(&self) -> usize;
+ fn from_u32(n: u32) -> Self;
+ fn to_u32(&self) -> u32;
+}
+
+macro_rules! impl_pos {
+ (
+ $(
+ $(#[$attr:meta])*
+ $vis:vis struct $ident:ident($inner_vis:vis $inner_ty:ty);
+ )*
+ ) => {
+ $(
+ $(#[$attr])*
+ $vis struct $ident($inner_vis $inner_ty);
+
+ impl Pos for $ident {
+ #[inline(always)]
+ fn from_usize(n: usize) -> $ident {
+ $ident(n as $inner_ty)
+ }
+
+ #[inline(always)]
+ fn to_usize(&self) -> usize {
+ self.0 as usize
+ }
+
+ #[inline(always)]
+ fn from_u32(n: u32) -> $ident {
+ $ident(n as $inner_ty)
+ }
+
+ #[inline(always)]
+ fn to_u32(&self) -> u32 {
+ self.0 as u32
+ }
+ }
+
+ impl Add for $ident {
+ type Output = $ident;
+
+ #[inline(always)]
+ fn add(self, rhs: $ident) -> $ident {
+ $ident(self.0 + rhs.0)
+ }
+ }
+
+ impl Sub for $ident {
+ type Output = $ident;
+
+ #[inline(always)]
+ fn sub(self, rhs: $ident) -> $ident {
+ $ident(self.0 - rhs.0)
+ }
+ }
+ )*
+ };
+}
+
+impl_pos! {
+ /// A byte offset.
+ ///
+ /// Keep this small (currently 32-bits), as AST contains a lot of them.
+ #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+ pub struct BytePos(pub u32);
+
+ /// A character offset.
+ ///
+ /// Because of multibyte UTF-8 characters, a byte offset
+ /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`]
+ /// values to `CharPos` values as necessary.
+ #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
+ pub struct CharPos(pub usize);
+}
+
+impl<S: Encoder> Encodable<S> for BytePos {
+ fn encode(&self, s: &mut S) {
+ s.emit_u32(self.0);
+ }
+}
+
+impl<D: Decoder> Decodable<D> for BytePos {
+ fn decode(d: &mut D) -> BytePos {
+ BytePos(d.read_u32())
+ }
+}
+
+// _____________________________________________________________________________
+// Loc, SourceFileAndLine, SourceFileAndBytePos
+//
+
+/// A source code location used for error reporting.
+#[derive(Debug, Clone)]
+pub struct Loc {
+ /// Information about the original source.
+ pub file: Lrc<SourceFile>,
+ /// The (1-based) line number.
+ pub line: usize,
+ /// The (0-based) column offset.
+ pub col: CharPos,
+ /// The (0-based) column offset when displayed.
+ pub col_display: usize,
+}
+
+// Used to be structural records.
+#[derive(Debug)]
+pub struct SourceFileAndLine {
+ pub sf: Lrc<SourceFile>,
+ /// Index of line, starting from 0.
+ pub line: usize,
+}
+#[derive(Debug)]
+pub struct SourceFileAndBytePos {
+ pub sf: Lrc<SourceFile>,
+ pub pos: BytePos,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct LineInfo {
+ /// Index of line, starting from 0.
+ pub line_index: usize,
+
+ /// Column in line where span begins, starting from 0.
+ pub start_col: CharPos,
+
+ /// Column in line where span ends, starting from 0, exclusive.
+ pub end_col: CharPos,
+}
+
+pub struct FileLines {
+ pub file: Lrc<SourceFile>,
+ pub lines: Vec<LineInfo>,
+}
+
+pub static SPAN_TRACK: AtomicRef<fn(LocalDefId)> = AtomicRef::new(&((|_| {}) as fn(_)));
+
+// _____________________________________________________________________________
+// SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions
+//
+
+pub type FileLinesResult = Result<FileLines, SpanLinesError>;
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum SpanLinesError {
+ DistinctSources(DistinctSources),
+}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum SpanSnippetError {
+ IllFormedSpan(Span),
+ DistinctSources(DistinctSources),
+ MalformedForSourcemap(MalformedSourceMapPositions),
+ SourceNotAvailable { filename: FileName },
+}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct DistinctSources {
+ pub begin: (FileName, BytePos),
+ pub end: (FileName, BytePos),
+}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct MalformedSourceMapPositions {
+ pub name: FileName,
+ pub source_len: usize,
+ pub begin_pos: BytePos,
+ pub end_pos: BytePos,
+}
+
+/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct InnerSpan {
+ pub start: usize,
+ pub end: usize,
+}
+
+impl InnerSpan {
+ pub fn new(start: usize, end: usize) -> InnerSpan {
+ InnerSpan { start, end }
+ }
+}
+
+/// Requirements for a `StableHashingContext` to be used in this crate.
+///
+/// This is a hack to allow using the [`HashStable_Generic`] derive macro
+/// instead of implementing everything in rustc_middle.
+pub trait HashStableContext {
+ fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
+ fn hash_spans(&self) -> bool;
+ /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
+ /// we don't have easy access to a `Session`
+ fn unstable_opts_incremental_ignore_spans(&self) -> bool;
+ fn def_span(&self, def_id: LocalDefId) -> Span;
+ fn span_data_to_lines_and_cols(
+ &mut self,
+ span: &SpanData,
+ ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>;
+ fn hashing_controls(&self) -> HashingControls;
+}
+
+impl<CTX> HashStable<CTX> for Span
+where
+ CTX: HashStableContext,
+{
+ /// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
+ /// fields (that would be similar to hashing pointers, since those are just
+ /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
+ /// triple, which stays the same even if the containing `SourceFile` has moved
+ /// within the `SourceMap`.
+ ///
+ /// Also note that we are hashing byte offsets for the column, not unicode
+ /// codepoint offsets. For the purpose of the hash that's sufficient.
+ /// Also, hashing filenames is expensive so we avoid doing it twice when the
+ /// span starts and ends in the same file, which is almost always the case.
+ fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
+ const TAG_VALID_SPAN: u8 = 0;
+ const TAG_INVALID_SPAN: u8 = 1;
+ const TAG_RELATIVE_SPAN: u8 = 2;
+
+ if !ctx.hash_spans() {
+ return;
+ }
+
+ let span = self.data_untracked();
+ span.ctxt.hash_stable(ctx, hasher);
+ span.parent.hash_stable(ctx, hasher);
+
+ if span.is_dummy() {
+ Hash::hash(&TAG_INVALID_SPAN, hasher);
+ return;
+ }
+
+ if let Some(parent) = span.parent {
+ let def_span = ctx.def_span(parent).data_untracked();
+ if def_span.contains(span) {
+ // This span is enclosed in a definition: only hash the relative position.
+ Hash::hash(&TAG_RELATIVE_SPAN, hasher);
+ (span.lo - def_span.lo).to_u32().hash_stable(ctx, hasher);
+ (span.hi - def_span.lo).to_u32().hash_stable(ctx, hasher);
+ return;
+ }
+ }
+
+ // If this is not an empty or invalid span, we want to hash the last
+ // position that belongs to it, as opposed to hashing the first
+ // position past it.
+ let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span) else {
+ Hash::hash(&TAG_INVALID_SPAN, hasher);
+ return;
+ };
+
+ Hash::hash(&TAG_VALID_SPAN, hasher);
+ // We truncate the stable ID hash and line and column numbers. The chances
+ // of causing a collision this way should be minimal.
+ Hash::hash(&(file.name_hash as u64), hasher);
+
+ // Hash both the length and the end location (line/column) of a span. If we
+ // hash only the length, for example, then two otherwise equal spans with
+ // different end locations will have the same hash. This can cause a problem
+ // during incremental compilation wherein a previous result for a query that
+ // depends on the end location of a span will be incorrectly reused when the
+ // end location of the span it depends on has changed (see issue #74890). A
+ // similar analysis applies if some query depends specifically on the length
+ // of the span, but we only hash the end location. So hash both.
+
+ let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
+ let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
+ let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
+ let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
+ let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
+ let len = (span.hi - span.lo).0;
+ Hash::hash(&col_line, hasher);
+ Hash::hash(&len, hasher);
+ }
+}
diff --git a/compiler/rustc_span/src/profiling.rs b/compiler/rustc_span/src/profiling.rs
new file mode 100644
index 000000000..f169007fa
--- /dev/null
+++ b/compiler/rustc_span/src/profiling.rs
@@ -0,0 +1,35 @@
+use std::borrow::Borrow;
+
+use rustc_data_structures::profiling::EventArgRecorder;
+
+/// Extension trait for self-profiling purposes: allows to record spans within a generic activity's
+/// event arguments.
+pub trait SpannedEventArgRecorder {
+ /// Records the following event arguments within the current generic activity being profiled:
+ /// - the provided `event_arg`
+ /// - a string representation of the provided `span`
+ ///
+ /// Note: when self-profiling with costly event arguments, at least one argument
+ /// needs to be recorded. A panic will be triggered if that doesn't happen.
+ fn record_arg_with_span<A>(&mut self, event_arg: A, span: crate::Span)
+ where
+ A: Borrow<str> + Into<String>;
+}
+
+impl SpannedEventArgRecorder for EventArgRecorder<'_> {
+ fn record_arg_with_span<A>(&mut self, event_arg: A, span: crate::Span)
+ where
+ A: Borrow<str> + Into<String>,
+ {
+ self.record_arg(event_arg);
+
+ let span_arg = crate::with_session_globals(|session_globals| {
+ if let Some(source_map) = &*session_globals.source_map.borrow() {
+ source_map.span_to_embeddable_string(span)
+ } else {
+ format!("{:?}", span)
+ }
+ });
+ self.record_arg(span_arg);
+ }
+}
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
new file mode 100644
index 000000000..28381157d
--- /dev/null
+++ b/compiler/rustc_span/src/source_map.rs
@@ -0,0 +1,1281 @@
+//! Types for tracking pieces of source code within a crate.
+//!
+//! The [`SourceMap`] tracks all the source code used within a single crate, mapping
+//! from integer byte positions to the original source code location. Each bit
+//! of source parsed during crate parsing (typically files, in-memory strings,
+//! or various bits of macro expansion) cover a continuous range of bytes in the
+//! `SourceMap` and are represented by [`SourceFile`]s. Byte positions are stored in
+//! [`Span`] and used pervasively in the compiler. They are absolute positions
+//! within the `SourceMap`, which upon request can be converted to line and column
+//! information, source code snippets, etc.
+
+pub use crate::hygiene::{ExpnData, ExpnKind};
+pub use crate::*;
+
+use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::stable_hasher::StableHasher;
+use rustc_data_structures::sync::{AtomicU32, Lrc, MappedReadGuard, ReadGuard, RwLock};
+use std::hash::Hash;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::Ordering;
+use std::{clone::Clone, cmp};
+use std::{convert::TryFrom, unreachable};
+
+use std::fs;
+use std::io;
+use tracing::debug;
+
+#[cfg(test)]
+mod tests;
+
+/// Returns the span itself if it doesn't come from a macro expansion,
+/// otherwise return the call site span up to the `enclosing_sp` by
+/// following the `expn_data` chain.
+pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
+ let expn_data1 = sp.ctxt().outer_expn_data();
+ let expn_data2 = enclosing_sp.ctxt().outer_expn_data();
+ if expn_data1.is_root() || !expn_data2.is_root() && expn_data1.call_site == expn_data2.call_site
+ {
+ sp
+ } else {
+ original_sp(expn_data1.call_site, enclosing_sp)
+ }
+}
+
+pub mod monotonic {
+ use std::ops::{Deref, DerefMut};
+
+ /// A `MonotonicVec` is a `Vec` which can only be grown.
+ /// Once inserted, an element can never be removed or swapped,
+ /// guaranteeing that any indices into a `MonotonicVec` are stable
+ // This is declared in its own module to ensure that the private
+ // field is inaccessible
+ pub struct MonotonicVec<T>(Vec<T>);
+ impl<T> MonotonicVec<T> {
+ pub fn new(val: Vec<T>) -> MonotonicVec<T> {
+ MonotonicVec(val)
+ }
+
+ pub fn push(&mut self, val: T) {
+ self.0.push(val);
+ }
+ }
+
+ impl<T> Default for MonotonicVec<T> {
+ fn default() -> Self {
+ MonotonicVec::new(vec![])
+ }
+ }
+
+ impl<T> Deref for MonotonicVec<T> {
+ type Target = Vec<T>;
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+ }
+
+ impl<T> !DerefMut for MonotonicVec<T> {}
+}
+
+#[derive(Clone, Encodable, Decodable, Debug, Copy, HashStable_Generic)]
+pub struct Spanned<T> {
+ pub node: T,
+ pub span: Span,
+}
+
+pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
+ Spanned { node: t, span: sp }
+}
+
+pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
+ respan(DUMMY_SP, t)
+}
+
+// _____________________________________________________________________________
+// SourceFile, MultiByteChar, FileName, FileLines
+//
+
+/// An abstraction over the fs operations used by the Parser.
+pub trait FileLoader {
+ /// Query the existence of a file.
+ fn file_exists(&self, path: &Path) -> bool;
+
+ /// Read the contents of a UTF-8 file into memory.
+ fn read_file(&self, path: &Path) -> io::Result<String>;
+}
+
+/// A FileLoader that uses std::fs to load real files.
+pub struct RealFileLoader;
+
+impl FileLoader for RealFileLoader {
+ fn file_exists(&self, path: &Path) -> bool {
+ path.exists()
+ }
+
+ fn read_file(&self, path: &Path) -> io::Result<String> {
+ fs::read_to_string(path)
+ }
+}
+
+/// This is a [SourceFile] identifier that is used to correlate source files between
+/// subsequent compilation sessions (which is something we need to do during
+/// incremental compilation).
+///
+/// The [StableSourceFileId] also contains the CrateNum of the crate the source
+/// file was originally parsed for. This way we get two separate entries in
+/// the [SourceMap] if the same file is part of both the local and an upstream
+/// crate. Trying to only have one entry for both cases is problematic because
+/// at the point where we discover that there's a local use of the file in
+/// addition to the upstream one, we might already have made decisions based on
+/// the assumption that it's an upstream file. Treating the two files as
+/// different has no real downsides.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
+pub struct StableSourceFileId {
+ // A hash of the source file's FileName. This is hash so that it's size
+ // is more predictable than if we included the actual FileName value.
+ pub file_name_hash: u64,
+
+ // The CrateNum of the crate this source file was originally parsed for.
+ // We cannot include this information in the hash because at the time
+ // of hashing we don't have the context to map from the CrateNum's numeric
+ // value to a StableCrateId.
+ pub cnum: CrateNum,
+}
+
+// FIXME: we need a more globally consistent approach to the problem solved by
+// StableSourceFileId, perhaps built atop source_file.name_hash.
+impl StableSourceFileId {
+ pub fn new(source_file: &SourceFile) -> StableSourceFileId {
+ StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
+ }
+
+ fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
+ let mut hasher = StableHasher::new();
+ name.hash(&mut hasher);
+ StableSourceFileId { file_name_hash: hasher.finish(), cnum }
+ }
+}
+
+// _____________________________________________________________________________
+// SourceMap
+//
+
+#[derive(Default)]
+pub(super) struct SourceMapFiles {
+ source_files: monotonic::MonotonicVec<Lrc<SourceFile>>,
+ stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>,
+}
+
+pub struct SourceMap {
+ /// The address space below this value is currently used by the files in the source map.
+ used_address_space: AtomicU32,
+
+ files: RwLock<SourceMapFiles>,
+ file_loader: Box<dyn FileLoader + Sync + Send>,
+ // This is used to apply the file path remapping as specified via
+ // `--remap-path-prefix` to all `SourceFile`s allocated within this `SourceMap`.
+ path_mapping: FilePathMapping,
+
+ /// The algorithm used for hashing the contents of each source file.
+ hash_kind: SourceFileHashAlgorithm,
+}
+
+impl SourceMap {
+ pub fn new(path_mapping: FilePathMapping) -> SourceMap {
+ Self::with_file_loader_and_hash_kind(
+ Box::new(RealFileLoader),
+ path_mapping,
+ SourceFileHashAlgorithm::Md5,
+ )
+ }
+
+ pub fn with_file_loader_and_hash_kind(
+ file_loader: Box<dyn FileLoader + Sync + Send>,
+ path_mapping: FilePathMapping,
+ hash_kind: SourceFileHashAlgorithm,
+ ) -> SourceMap {
+ SourceMap {
+ used_address_space: AtomicU32::new(0),
+ files: Default::default(),
+ file_loader,
+ path_mapping,
+ hash_kind,
+ }
+ }
+
+ pub fn path_mapping(&self) -> &FilePathMapping {
+ &self.path_mapping
+ }
+
+ pub fn file_exists(&self, path: &Path) -> bool {
+ self.file_loader.file_exists(path)
+ }
+
+ pub fn load_file(&self, path: &Path) -> io::Result<Lrc<SourceFile>> {
+ let src = self.file_loader.read_file(path)?;
+ let filename = path.to_owned().into();
+ Ok(self.new_source_file(filename, src))
+ }
+
+ /// Loads source file as a binary blob.
+ ///
+ /// Unlike `load_file`, guarantees that no normalization like BOM-removal
+ /// takes place.
+ pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
+ // Ideally, this should use `self.file_loader`, but it can't
+ // deal with binary files yet.
+ let bytes = fs::read(path)?;
+
+ // We need to add file to the `SourceMap`, so that it is present
+ // in dep-info. There's also an edge case that file might be both
+ // loaded as a binary via `include_bytes!` and as proper `SourceFile`
+ // via `mod`, so we try to use real file contents and not just an
+ // empty string.
+ let text = std::str::from_utf8(&bytes).unwrap_or("").to_string();
+ self.new_source_file(path.to_owned().into(), text);
+ Ok(bytes)
+ }
+
+ // By returning a `MonotonicVec`, we ensure that consumers cannot invalidate
+ // any existing indices pointing into `files`.
+ pub fn files(&self) -> MappedReadGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> {
+ ReadGuard::map(self.files.borrow(), |files| &files.source_files)
+ }
+
+ pub fn source_file_by_stable_id(
+ &self,
+ stable_id: StableSourceFileId,
+ ) -> Option<Lrc<SourceFile>> {
+ self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned()
+ }
+
+ fn allocate_address_space(&self, size: usize) -> Result<usize, OffsetOverflowError> {
+ let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?;
+
+ loop {
+ let current = self.used_address_space.load(Ordering::Relaxed);
+ let next = current
+ .checked_add(size)
+ // Add one so there is some space between files. This lets us distinguish
+ // positions in the `SourceMap`, even in the presence of zero-length files.
+ .and_then(|next| next.checked_add(1))
+ .ok_or(OffsetOverflowError)?;
+
+ if self
+ .used_address_space
+ .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed)
+ .is_ok()
+ {
+ return Ok(usize::try_from(current).unwrap());
+ }
+ }
+ }
+
+ /// Creates a new `SourceFile`.
+ /// If a file already exists in the `SourceMap` with the same ID, that file is returned
+ /// unmodified.
+ pub fn new_source_file(&self, filename: FileName, src: String) -> Lrc<SourceFile> {
+ self.try_new_source_file(filename, src).unwrap_or_else(|OffsetOverflowError| {
+ eprintln!("fatal error: rustc does not support files larger than 4GB");
+ crate::fatal_error::FatalError.raise()
+ })
+ }
+
+ fn try_new_source_file(
+ &self,
+ filename: FileName,
+ src: String,
+ ) -> Result<Lrc<SourceFile>, OffsetOverflowError> {
+ // Note that filename may not be a valid path, eg it may be `<anon>` etc,
+ // but this is okay because the directory determined by `path.pop()` will
+ // be empty, so the working directory will be used.
+ let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
+
+ let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
+
+ let lrc_sf = match self.source_file_by_stable_id(file_id) {
+ Some(lrc_sf) => lrc_sf,
+ None => {
+ let start_pos = self.allocate_address_space(src.len())?;
+
+ let source_file = Lrc::new(SourceFile::new(
+ filename,
+ src,
+ Pos::from_usize(start_pos),
+ self.hash_kind,
+ ));
+
+ // Let's make sure the file_id we generated above actually matches
+ // the ID we generate for the SourceFile we just created.
+ debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
+
+ let mut files = self.files.borrow_mut();
+
+ files.source_files.push(source_file.clone());
+ files.stable_id_to_source_file.insert(file_id, source_file.clone());
+
+ source_file
+ }
+ };
+ Ok(lrc_sf)
+ }
+
+ /// Allocates a new `SourceFile` representing a source file from an external
+ /// crate. The source code of such an "imported `SourceFile`" is not available,
+ /// but we still know enough to generate accurate debuginfo location
+ /// information for things inlined from other crates.
+ pub fn new_imported_source_file(
+ &self,
+ filename: FileName,
+ src_hash: SourceFileHash,
+ name_hash: u128,
+ source_len: usize,
+ cnum: CrateNum,
+ file_local_lines: Lock<SourceFileLines>,
+ mut file_local_multibyte_chars: Vec<MultiByteChar>,
+ mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
+ mut file_local_normalized_pos: Vec<NormalizedPos>,
+ original_start_pos: BytePos,
+ original_end_pos: BytePos,
+ ) -> Lrc<SourceFile> {
+ let start_pos = self
+ .allocate_address_space(source_len)
+ .expect("not enough address space for imported source file");
+
+ let end_pos = Pos::from_usize(start_pos + source_len);
+ let start_pos = Pos::from_usize(start_pos);
+
+ // Translate these positions into the new global frame of reference,
+ // now that the offset of the SourceFile is known.
+ //
+ // These are all unsigned values. `original_start_pos` may be larger or
+ // smaller than `start_pos`, but `pos` is always larger than both.
+ // Therefore, `(pos - original_start_pos) + start_pos` won't overflow
+ // but `start_pos - original_start_pos` might. So we use the former
+ // form rather than pre-computing the offset into a local variable. The
+ // compiler backend can optimize away the repeated computations in a
+ // way that won't trigger overflow checks.
+ match &mut *file_local_lines.borrow_mut() {
+ SourceFileLines::Lines(lines) => {
+ for pos in lines {
+ *pos = (*pos - original_start_pos) + start_pos;
+ }
+ }
+ SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => {
+ *line_start = (*line_start - original_start_pos) + start_pos;
+ }
+ }
+ for mbc in &mut file_local_multibyte_chars {
+ mbc.pos = (mbc.pos - original_start_pos) + start_pos;
+ }
+ for swc in &mut file_local_non_narrow_chars {
+ *swc = (*swc - original_start_pos) + start_pos;
+ }
+ for nc in &mut file_local_normalized_pos {
+ nc.pos = (nc.pos - original_start_pos) + start_pos;
+ }
+
+ let source_file = Lrc::new(SourceFile {
+ name: filename,
+ src: None,
+ src_hash,
+ external_src: Lock::new(ExternalSource::Foreign {
+ kind: ExternalSourceKind::AbsentOk,
+ original_start_pos,
+ original_end_pos,
+ }),
+ start_pos,
+ end_pos,
+ lines: file_local_lines,
+ multibyte_chars: file_local_multibyte_chars,
+ non_narrow_chars: file_local_non_narrow_chars,
+ normalized_pos: file_local_normalized_pos,
+ name_hash,
+ cnum,
+ });
+
+ let mut files = self.files.borrow_mut();
+
+ files.source_files.push(source_file.clone());
+ files
+ .stable_id_to_source_file
+ .insert(StableSourceFileId::new(&source_file), source_file.clone());
+
+ source_file
+ }
+
+ // If there is a doctest offset, applies it to the line.
+ pub fn doctest_offset_line(&self, file: &FileName, orig: usize) -> usize {
+ match file {
+ FileName::DocTest(_, offset) => {
+ if *offset < 0 {
+ orig - (-(*offset)) as usize
+ } else {
+ orig + *offset as usize
+ }
+ }
+ _ => orig,
+ }
+ }
+
+ /// Return the SourceFile that contains the given `BytePos`
+ pub fn lookup_source_file(&self, pos: BytePos) -> Lrc<SourceFile> {
+ let idx = self.lookup_source_file_idx(pos);
+ (*self.files.borrow().source_files)[idx].clone()
+ }
+
+ /// Looks up source information about a `BytePos`.
+ pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
+ let sf = self.lookup_source_file(pos);
+ let (line, col, col_display) = sf.lookup_file_pos_with_col_display(pos);
+ Loc { file: sf, line, col, col_display }
+ }
+
+ // If the corresponding `SourceFile` is empty, does not return a line number.
+ pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> {
+ let f = self.lookup_source_file(pos);
+
+ match f.lookup_line(pos) {
+ Some(line) => Ok(SourceFileAndLine { sf: f, line }),
+ None => Err(f),
+ }
+ }
+
+ fn span_to_string(&self, sp: Span, filename_display_pref: FileNameDisplayPreference) -> String {
+ if self.files.borrow().source_files.is_empty() || sp.is_dummy() {
+ return "no-location".to_string();
+ }
+
+ let lo = self.lookup_char_pos(sp.lo());
+ let hi = self.lookup_char_pos(sp.hi());
+ format!(
+ "{}:{}:{}: {}:{}",
+ lo.file.name.display(filename_display_pref),
+ lo.line,
+ lo.col.to_usize() + 1,
+ hi.line,
+ hi.col.to_usize() + 1,
+ )
+ }
+
+ /// Format the span location suitable for embedding in build artifacts
+ pub fn span_to_embeddable_string(&self, sp: Span) -> String {
+ self.span_to_string(sp, FileNameDisplayPreference::Remapped)
+ }
+
+ /// Format the span location suitable for pretty printing anotations with relative line numbers
+ pub fn span_to_relative_line_string(&self, sp: Span, relative_to: Span) -> String {
+ if self.files.borrow().source_files.is_empty() || sp.is_dummy() || relative_to.is_dummy() {
+ return "no-location".to_string();
+ }
+
+ let lo = self.lookup_char_pos(sp.lo());
+ let hi = self.lookup_char_pos(sp.hi());
+ let offset = self.lookup_char_pos(relative_to.lo());
+
+ if lo.file.name != offset.file.name {
+ return self.span_to_embeddable_string(sp);
+ }
+
+ let lo_line = lo.line.saturating_sub(offset.line);
+ let hi_line = hi.line.saturating_sub(offset.line);
+
+ format!(
+ "{}:+{}:{}: +{}:{}",
+ lo.file.name.display(FileNameDisplayPreference::Remapped),
+ lo_line,
+ lo.col.to_usize() + 1,
+ hi_line,
+ hi.col.to_usize() + 1,
+ )
+ }
+
+ /// Format the span location to be printed in diagnostics. Must not be emitted
+ /// to build artifacts as this may leak local file paths. Use span_to_embeddable_string
+ /// for string suitable for embedding.
+ pub fn span_to_diagnostic_string(&self, sp: Span) -> String {
+ self.span_to_string(sp, self.path_mapping.filename_display_for_diagnostics)
+ }
+
+ pub fn span_to_filename(&self, sp: Span) -> FileName {
+ self.lookup_char_pos(sp.lo()).file.name.clone()
+ }
+
+ pub fn filename_for_diagnostics<'a>(&self, filename: &'a FileName) -> FileNameDisplay<'a> {
+ filename.display(self.path_mapping.filename_display_for_diagnostics)
+ }
+
+ pub fn is_multiline(&self, sp: Span) -> bool {
+ let lo = self.lookup_source_file_idx(sp.lo());
+ let hi = self.lookup_source_file_idx(sp.hi());
+ if lo != hi {
+ return true;
+ }
+ let f = (*self.files.borrow().source_files)[lo].clone();
+ f.lookup_line(sp.lo()) != f.lookup_line(sp.hi())
+ }
+
+ #[instrument(skip(self), level = "trace")]
+ pub fn is_valid_span(&self, sp: Span) -> Result<(Loc, Loc), SpanLinesError> {
+ let lo = self.lookup_char_pos(sp.lo());
+ trace!(?lo);
+ let hi = self.lookup_char_pos(sp.hi());
+ trace!(?hi);
+ if lo.file.start_pos != hi.file.start_pos {
+ return Err(SpanLinesError::DistinctSources(DistinctSources {
+ begin: (lo.file.name.clone(), lo.file.start_pos),
+ end: (hi.file.name.clone(), hi.file.start_pos),
+ }));
+ }
+ Ok((lo, hi))
+ }
+
+ pub fn is_line_before_span_empty(&self, sp: Span) -> bool {
+ match self.span_to_prev_source(sp) {
+ Ok(s) => s.rsplit_once('\n').unwrap_or(("", &s)).1.trim_start().is_empty(),
+ Err(_) => false,
+ }
+ }
+
+ pub fn span_to_lines(&self, sp: Span) -> FileLinesResult {
+ debug!("span_to_lines(sp={:?})", sp);
+ let (lo, hi) = self.is_valid_span(sp)?;
+ assert!(hi.line >= lo.line);
+
+ if sp.is_dummy() {
+ return Ok(FileLines { file: lo.file, lines: Vec::new() });
+ }
+
+ let mut lines = Vec::with_capacity(hi.line - lo.line + 1);
+
+ // The span starts partway through the first line,
+ // but after that it starts from offset 0.
+ let mut start_col = lo.col;
+
+ // For every line but the last, it extends from `start_col`
+ // and to the end of the line. Be careful because the line
+ // numbers in Loc are 1-based, so we subtract 1 to get 0-based
+ // lines.
+ //
+ // FIXME: now that we handle DUMMY_SP up above, we should consider
+ // asserting that the line numbers here are all indeed 1-based.
+ let hi_line = hi.line.saturating_sub(1);
+ for line_index in lo.line.saturating_sub(1)..hi_line {
+ let line_len = lo.file.get_line(line_index).map_or(0, |s| s.chars().count());
+ lines.push(LineInfo { line_index, start_col, end_col: CharPos::from_usize(line_len) });
+ start_col = CharPos::from_usize(0);
+ }
+
+ // For the last line, it extends from `start_col` to `hi.col`:
+ lines.push(LineInfo { line_index: hi_line, start_col, end_col: hi.col });
+
+ Ok(FileLines { file: lo.file, lines })
+ }
+
+ /// Extracts the source surrounding the given `Span` using the `extract_source` function. The
+ /// extract function takes three arguments: a string slice containing the source, an index in
+ /// the slice for the beginning of the span and an index in the slice for the end of the span.
+ fn span_to_source<F, T>(&self, sp: Span, extract_source: F) -> Result<T, SpanSnippetError>
+ where
+ F: Fn(&str, usize, usize) -> Result<T, SpanSnippetError>,
+ {
+ let local_begin = self.lookup_byte_offset(sp.lo());
+ let local_end = self.lookup_byte_offset(sp.hi());
+
+ if local_begin.sf.start_pos != local_end.sf.start_pos {
+ Err(SpanSnippetError::DistinctSources(DistinctSources {
+ begin: (local_begin.sf.name.clone(), local_begin.sf.start_pos),
+ end: (local_end.sf.name.clone(), local_end.sf.start_pos),
+ }))
+ } else {
+ self.ensure_source_file_source_present(local_begin.sf.clone());
+
+ let start_index = local_begin.pos.to_usize();
+ let end_index = local_end.pos.to_usize();
+ let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
+
+ if start_index > end_index || end_index > source_len {
+ return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions {
+ name: local_begin.sf.name.clone(),
+ source_len,
+ begin_pos: local_begin.pos,
+ end_pos: local_end.pos,
+ }));
+ }
+
+ if let Some(ref src) = local_begin.sf.src {
+ extract_source(src, start_index, end_index)
+ } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() {
+ extract_source(src, start_index, end_index)
+ } else {
+ Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() })
+ }
+ }
+ }
+
+ pub fn is_span_accessible(&self, sp: Span) -> bool {
+ self.span_to_source(sp, |src, start_index, end_index| {
+ Ok(src.get(start_index..end_index).is_some())
+ })
+ .map_or(false, |is_accessible| is_accessible)
+ }
+
+ /// Returns the source snippet as `String` corresponding to the given `Span`.
+ pub fn span_to_snippet(&self, sp: Span) -> Result<String, SpanSnippetError> {
+ self.span_to_source(sp, |src, start_index, end_index| {
+ src.get(start_index..end_index)
+ .map(|s| s.to_string())
+ .ok_or(SpanSnippetError::IllFormedSpan(sp))
+ })
+ }
+
+ pub fn span_to_margin(&self, sp: Span) -> Option<usize> {
+ Some(self.indentation_before(sp)?.len())
+ }
+
+ pub fn indentation_before(&self, sp: Span) -> Option<String> {
+ self.span_to_source(sp, |src, start_index, _| {
+ let before = &src[..start_index];
+ let last_line = before.rsplit_once('\n').map_or(before, |(_, last)| last);
+ Ok(last_line
+ .split_once(|c: char| !c.is_whitespace())
+ .map_or(last_line, |(indent, _)| indent)
+ .to_string())
+ })
+ .ok()
+ }
+
+ /// Returns the source snippet as `String` before the given `Span`.
+ pub fn span_to_prev_source(&self, sp: Span) -> Result<String, SpanSnippetError> {
+ self.span_to_source(sp, |src, start_index, _| {
+ src.get(..start_index).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp))
+ })
+ }
+
+ /// Extends the given `Span` to just after the previous occurrence of `c`. Return the same span
+ /// if no character could be found or if an error occurred while retrieving the code snippet.
+ pub fn span_extend_to_prev_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span {
+ if let Ok(prev_source) = self.span_to_prev_source(sp) {
+ let prev_source = prev_source.rsplit(c).next().unwrap_or("");
+ if !prev_source.is_empty() && (accept_newlines || !prev_source.contains('\n')) {
+ return sp.with_lo(BytePos(sp.lo().0 - prev_source.len() as u32));
+ }
+ }
+
+ sp
+ }
+
+ /// Extends the given `Span` to just after the previous occurrence of `pat` when surrounded by
+ /// whitespace. Returns None if the pattern could not be found or if an error occurred while
+ /// retrieving the code snippet.
+ pub fn span_extend_to_prev_str(
+ &self,
+ sp: Span,
+ pat: &str,
+ accept_newlines: bool,
+ include_whitespace: bool,
+ ) -> Option<Span> {
+ // assure that the pattern is delimited, to avoid the following
+ // fn my_fn()
+ // ^^^^ returned span without the check
+ // ---------- correct span
+ let prev_source = self.span_to_prev_source(sp).ok()?;
+ for ws in &[" ", "\t", "\n"] {
+ let pat = pat.to_owned() + ws;
+ if let Some(pat_pos) = prev_source.rfind(&pat) {
+ let just_after_pat_pos = pat_pos + pat.len() - 1;
+ let just_after_pat_plus_ws = if include_whitespace {
+ just_after_pat_pos
+ + prev_source[just_after_pat_pos..]
+ .find(|c: char| !c.is_whitespace())
+ .unwrap_or(0)
+ } else {
+ just_after_pat_pos
+ };
+ let len = prev_source.len() - just_after_pat_plus_ws;
+ let prev_source = &prev_source[just_after_pat_plus_ws..];
+ if accept_newlines || !prev_source.trim_start().contains('\n') {
+ return Some(sp.with_lo(BytePos(sp.lo().0 - len as u32)));
+ }
+ }
+ }
+
+ None
+ }
+
+ /// Returns the source snippet as `String` after the given `Span`.
+ pub fn span_to_next_source(&self, sp: Span) -> Result<String, SpanSnippetError> {
+ self.span_to_source(sp, |src, _, end_index| {
+ src.get(end_index..).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp))
+ })
+ }
+
+ /// Extends the given `Span` while the next character matches the predicate
+ pub fn span_extend_while(
+ &self,
+ span: Span,
+ f: impl Fn(char) -> bool,
+ ) -> Result<Span, SpanSnippetError> {
+ self.span_to_source(span, |s, _start, end| {
+ let n = s[end..].char_indices().find(|&(_, c)| !f(c)).map_or(s.len() - end, |(i, _)| i);
+ Ok(span.with_hi(span.hi() + BytePos(n as u32)))
+ })
+ }
+
+ /// Extends the given `Span` to just after the next occurrence of `c`.
+ pub fn span_extend_to_next_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span {
+ if let Ok(next_source) = self.span_to_next_source(sp) {
+ let next_source = next_source.split(c).next().unwrap_or("");
+ if !next_source.is_empty() && (accept_newlines || !next_source.contains('\n')) {
+ return sp.with_hi(BytePos(sp.hi().0 + next_source.len() as u32));
+ }
+ }
+
+ sp
+ }
+
+ /// Extends the given `Span` to contain the entire line it is on.
+ pub fn span_extend_to_line(&self, sp: Span) -> Span {
+ self.span_extend_to_prev_char(self.span_extend_to_next_char(sp, '\n', true), '\n', true)
+ }
+
+ /// Given a `Span`, tries to get a shorter span ending before the first occurrence of `char`
+ /// `c`.
+ pub fn span_until_char(&self, sp: Span, c: char) -> Span {
+ match self.span_to_snippet(sp) {
+ Ok(snippet) => {
+ let snippet = snippet.split(c).next().unwrap_or("").trim_end();
+ if !snippet.is_empty() && !snippet.contains('\n') {
+ sp.with_hi(BytePos(sp.lo().0 + snippet.len() as u32))
+ } else {
+ sp
+ }
+ }
+ _ => sp,
+ }
+ }
+
+ /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char`
+ /// `c`.
+ pub fn span_through_char(&self, sp: Span, c: char) -> Span {
+ if let Ok(snippet) = self.span_to_snippet(sp) {
+ if let Some(offset) = snippet.find(c) {
+ return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32));
+ }
+ }
+ sp
+ }
+
+ /// Given a `Span`, gets a new `Span` covering the first token and all its trailing whitespace
+ /// or the original `Span`.
+ ///
+ /// If `sp` points to `"let mut x"`, then a span pointing at `"let "` will be returned.
+ pub fn span_until_non_whitespace(&self, sp: Span) -> Span {
+ let mut whitespace_found = false;
+
+ self.span_take_while(sp, |c| {
+ if !whitespace_found && c.is_whitespace() {
+ whitespace_found = true;
+ }
+
+ !whitespace_found || c.is_whitespace()
+ })
+ }
+
+ /// Given a `Span`, gets a new `Span` covering the first token without its trailing whitespace
+ /// or the original `Span` in case of error.
+ ///
+ /// If `sp` points to `"let mut x"`, then a span pointing at `"let"` will be returned.
+ pub fn span_until_whitespace(&self, sp: Span) -> Span {
+ self.span_take_while(sp, |c| !c.is_whitespace())
+ }
+
+ /// Given a `Span`, gets a shorter one until `predicate` yields `false`.
+ pub fn span_take_while<P>(&self, sp: Span, predicate: P) -> Span
+ where
+ P: for<'r> FnMut(&'r char) -> bool,
+ {
+ if let Ok(snippet) = self.span_to_snippet(sp) {
+ let offset = snippet.chars().take_while(predicate).map(|c| c.len_utf8()).sum::<usize>();
+
+ sp.with_hi(BytePos(sp.lo().0 + (offset as u32)))
+ } else {
+ sp
+ }
+ }
+
+ /// Given a `Span`, return a span ending in the closest `{`. This is useful when you have a
+ /// `Span` enclosing a whole item but we need to point at only the head (usually the first
+ /// line) of that item.
+ ///
+ /// *Only suitable for diagnostics.*
+ pub fn guess_head_span(&self, sp: Span) -> Span {
+ // FIXME: extend the AST items to have a head span, or replace callers with pointing at
+ // the item's ident when appropriate.
+ self.span_until_char(sp, '{')
+ }
+
+ /// Returns a new span representing just the first character of the given span.
+ pub fn start_point(&self, sp: Span) -> Span {
+ let width = {
+ let sp = sp.data();
+ let local_begin = self.lookup_byte_offset(sp.lo);
+ let start_index = local_begin.pos.to_usize();
+ let src = local_begin.sf.external_src.borrow();
+
+ let snippet = if let Some(ref src) = local_begin.sf.src {
+ Some(&src[start_index..])
+ } else if let Some(src) = src.get_source() {
+ Some(&src[start_index..])
+ } else {
+ None
+ };
+
+ match snippet {
+ None => 1,
+ Some(snippet) => match snippet.chars().next() {
+ None => 1,
+ Some(c) => c.len_utf8(),
+ },
+ }
+ };
+
+ sp.with_hi(BytePos(sp.lo().0 + width as u32))
+ }
+
+ /// Returns a new span representing just the last character of this span.
+ pub fn end_point(&self, sp: Span) -> Span {
+ let pos = sp.hi().0;
+
+ let width = self.find_width_of_character_at_span(sp, false);
+ let corrected_end_position = pos.checked_sub(width).unwrap_or(pos);
+
+ let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0));
+ sp.with_lo(end_point)
+ }
+
+ /// Returns a new span representing the next character after the end-point of this span.
+ pub fn next_point(&self, sp: Span) -> Span {
+ if sp.is_dummy() {
+ return sp;
+ }
+ let start_of_next_point = sp.hi().0;
+
+ let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true);
+ // If the width is 1, then the next span should point to the same `lo` and `hi`. However,
+ // in the case of a multibyte character, where the width != 1, the next span should
+ // span multiple bytes to include the whole character.
+ let end_of_next_point =
+ start_of_next_point.checked_add(width - 1).unwrap_or(start_of_next_point);
+
+ let end_of_next_point = BytePos(cmp::max(sp.lo().0 + 1, end_of_next_point));
+ Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None)
+ }
+
+ /// Finds the width of the character, either before or after the end of provided span,
+ /// depending on the `forwards` parameter.
+ fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
+ let sp = sp.data();
+ if sp.lo == sp.hi {
+ debug!("find_width_of_character_at_span: early return empty span");
+ return 1;
+ }
+
+ let local_begin = self.lookup_byte_offset(sp.lo);
+ let local_end = self.lookup_byte_offset(sp.hi);
+ debug!(
+ "find_width_of_character_at_span: local_begin=`{:?}`, local_end=`{:?}`",
+ local_begin, local_end
+ );
+
+ if local_begin.sf.start_pos != local_end.sf.start_pos {
+ debug!("find_width_of_character_at_span: begin and end are in different files");
+ return 1;
+ }
+
+ let start_index = local_begin.pos.to_usize();
+ let end_index = local_end.pos.to_usize();
+ debug!(
+ "find_width_of_character_at_span: start_index=`{:?}`, end_index=`{:?}`",
+ start_index, end_index
+ );
+
+ // Disregard indexes that are at the start or end of their spans, they can't fit bigger
+ // characters.
+ if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) {
+ debug!("find_width_of_character_at_span: start or end of span, cannot be multibyte");
+ return 1;
+ }
+
+ let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
+ debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len);
+ // Ensure indexes are also not malformed.
+ if start_index > end_index || end_index > source_len {
+ debug!("find_width_of_character_at_span: source indexes are malformed");
+ return 1;
+ }
+
+ let src = local_begin.sf.external_src.borrow();
+
+ // We need to extend the snippet to the end of the src rather than to end_index so when
+ // searching forwards for boundaries we've got somewhere to search.
+ let snippet = if let Some(ref src) = local_begin.sf.src {
+ &src[start_index..]
+ } else if let Some(src) = src.get_source() {
+ &src[start_index..]
+ } else {
+ return 1;
+ };
+ debug!("find_width_of_character_at_span: snippet=`{:?}`", snippet);
+
+ let mut target = if forwards { end_index + 1 } else { end_index - 1 };
+ debug!("find_width_of_character_at_span: initial target=`{:?}`", target);
+
+ while !snippet.is_char_boundary(target - start_index) && target < source_len {
+ target = if forwards {
+ target + 1
+ } else {
+ match target.checked_sub(1) {
+ Some(target) => target,
+ None => {
+ break;
+ }
+ }
+ };
+ debug!("find_width_of_character_at_span: target=`{:?}`", target);
+ }
+ debug!("find_width_of_character_at_span: final target=`{:?}`", target);
+
+ if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 }
+ }
+
+ pub fn get_source_file(&self, filename: &FileName) -> Option<Lrc<SourceFile>> {
+ // Remap filename before lookup
+ let filename = self.path_mapping().map_filename_prefix(filename).0;
+ for sf in self.files.borrow().source_files.iter() {
+ if filename == sf.name {
+ return Some(sf.clone());
+ }
+ }
+ None
+ }
+
+ /// For a global `BytePos`, computes the local offset within the containing `SourceFile`.
+ pub fn lookup_byte_offset(&self, bpos: BytePos) -> SourceFileAndBytePos {
+ let idx = self.lookup_source_file_idx(bpos);
+ let sf = (*self.files.borrow().source_files)[idx].clone();
+ let offset = bpos - sf.start_pos;
+ SourceFileAndBytePos { sf, pos: offset }
+ }
+
+ // Returns the index of the `SourceFile` (in `self.files`) that contains `pos`.
+ // This index is guaranteed to be valid for the lifetime of this `SourceMap`,
+ // since `source_files` is a `MonotonicVec`
+ pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize {
+ self.files
+ .borrow()
+ .source_files
+ .binary_search_by_key(&pos, |key| key.start_pos)
+ .unwrap_or_else(|p| p - 1)
+ }
+
+ pub fn count_lines(&self) -> usize {
+ self.files().iter().fold(0, |a, f| a + f.count_lines())
+ }
+
+ pub fn generate_fn_name_span(&self, span: Span) -> Option<Span> {
+ let prev_span = self.span_extend_to_prev_str(span, "fn", true, true)?;
+ if let Ok(snippet) = self.span_to_snippet(prev_span) {
+ debug!(
+ "generate_fn_name_span: span={:?}, prev_span={:?}, snippet={:?}",
+ span, prev_span, snippet
+ );
+
+ if snippet.is_empty() {
+ return None;
+ };
+
+ let len = snippet
+ .find(|c: char| !c.is_alphanumeric() && c != '_')
+ .expect("no label after fn");
+ Some(prev_span.with_hi(BytePos(prev_span.lo().0 + len as u32)))
+ } else {
+ None
+ }
+ }
+
+ /// Takes the span of a type parameter in a function signature and try to generate a span for
+ /// the function name (with generics) and a new snippet for this span with the pointed type
+ /// parameter as a new local type parameter.
+ ///
+ /// For instance:
+ /// ```rust,ignore (pseudo-Rust)
+ /// // Given span
+ /// fn my_function(param: T)
+ /// // ^ Original span
+ ///
+ /// // Result
+ /// fn my_function(param: T)
+ /// // ^^^^^^^^^^^ Generated span with snippet `my_function<T>`
+ /// ```
+ ///
+ /// Attention: The method used is very fragile since it essentially duplicates the work of the
+ /// parser. If you need to use this function or something similar, please consider updating the
+ /// `SourceMap` functions and this function to something more robust.
+ pub fn generate_local_type_param_snippet(&self, span: Span) -> Option<(Span, String)> {
+ // Try to extend the span to the previous "fn" keyword to retrieve the function
+ // signature.
+ if let Some(sugg_span) = self.span_extend_to_prev_str(span, "fn", false, true) {
+ if let Ok(snippet) = self.span_to_snippet(sugg_span) {
+ // Consume the function name.
+ let mut offset = snippet
+ .find(|c: char| !c.is_alphanumeric() && c != '_')
+ .expect("no label after fn");
+
+ // Consume the generics part of the function signature.
+ let mut bracket_counter = 0;
+ let mut last_char = None;
+ for c in snippet[offset..].chars() {
+ match c {
+ '<' => bracket_counter += 1,
+ '>' => bracket_counter -= 1,
+ '(' => {
+ if bracket_counter == 0 {
+ break;
+ }
+ }
+ _ => {}
+ }
+ offset += c.len_utf8();
+ last_char = Some(c);
+ }
+
+ // Adjust the suggestion span to encompass the function name with its generics.
+ let sugg_span = sugg_span.with_hi(BytePos(sugg_span.lo().0 + offset as u32));
+
+ // Prepare the new suggested snippet to append the type parameter that triggered
+ // the error in the generics of the function signature.
+ let mut new_snippet = if last_char == Some('>') {
+ format!("{}, ", &snippet[..(offset - '>'.len_utf8())])
+ } else {
+ format!("{}<", &snippet[..offset])
+ };
+ new_snippet
+ .push_str(&self.span_to_snippet(span).unwrap_or_else(|_| "T".to_string()));
+ new_snippet.push('>');
+
+ return Some((sugg_span, new_snippet));
+ }
+ }
+
+ None
+ }
+ pub fn ensure_source_file_source_present(&self, source_file: Lrc<SourceFile>) -> bool {
+ source_file.add_external_src(|| {
+ match source_file.name {
+ FileName::Real(ref name) if let Some(local_path) = name.local_path() => {
+ self.file_loader.read_file(local_path).ok()
+ }
+ _ => None,
+ }
+ })
+ }
+
+ pub fn is_imported(&self, sp: Span) -> bool {
+ let source_file_index = self.lookup_source_file_idx(sp.lo());
+ let source_file = &self.files()[source_file_index];
+ source_file.is_imported()
+ }
+
+ /// Gets the span of a statement. If the statement is a macro expansion, the
+ /// span in the context of the block span is found. The trailing semicolon is included
+ /// on a best-effort basis.
+ pub fn stmt_span(&self, stmt_span: Span, block_span: Span) -> Span {
+ if !stmt_span.from_expansion() {
+ return stmt_span;
+ }
+ let mac_call = original_sp(stmt_span, block_span);
+ self.mac_call_stmt_semi_span(mac_call).map_or(mac_call, |s| mac_call.with_hi(s.hi()))
+ }
+
+ /// Tries to find the span of the semicolon of a macro call statement.
+ /// The input must be the *call site* span of a statement from macro expansion.
+ /// ```ignore (illustrative)
+ /// // v output
+ /// mac!();
+ /// // ^^^^^^ input
+ /// ```
+ pub fn mac_call_stmt_semi_span(&self, mac_call: Span) -> Option<Span> {
+ let span = self.span_extend_while(mac_call, char::is_whitespace).ok()?;
+ let span = span.shrink_to_hi().with_hi(BytePos(span.hi().0.checked_add(1)?));
+ if self.span_to_snippet(span).as_deref() != Ok(";") {
+ return None;
+ }
+ Some(span)
+ }
+}
+
+#[derive(Clone)]
+pub struct FilePathMapping {
+ mapping: Vec<(PathBuf, PathBuf)>,
+ filename_display_for_diagnostics: FileNameDisplayPreference,
+}
+
+impl FilePathMapping {
+ pub fn empty() -> FilePathMapping {
+ FilePathMapping::new(Vec::new())
+ }
+
+ pub fn new(mapping: Vec<(PathBuf, PathBuf)>) -> FilePathMapping {
+ let filename_display_for_diagnostics = if mapping.is_empty() {
+ FileNameDisplayPreference::Local
+ } else {
+ FileNameDisplayPreference::Remapped
+ };
+
+ FilePathMapping { mapping, filename_display_for_diagnostics }
+ }
+
+ /// Applies any path prefix substitution as defined by the mapping.
+ /// The return value is the remapped path and a boolean indicating whether
+ /// the path was affected by the mapping.
+ pub fn map_prefix(&self, path: PathBuf) -> (PathBuf, bool) {
+ if path.as_os_str().is_empty() {
+ // Exit early if the path is empty and therefore there's nothing to remap.
+ // This is mostly to reduce spam for `RUSTC_LOG=[remap_path_prefix]`.
+ return (path, false);
+ }
+
+ return remap_path_prefix(&self.mapping, path);
+
+ #[instrument(level = "debug", skip(mapping))]
+ fn remap_path_prefix(mapping: &[(PathBuf, PathBuf)], path: PathBuf) -> (PathBuf, bool) {
+ // NOTE: We are iterating over the mapping entries from last to first
+ // because entries specified later on the command line should
+ // take precedence.
+ for &(ref from, ref to) in mapping.iter().rev() {
+ debug!("Trying to apply {:?} => {:?}", from, to);
+
+ if let Ok(rest) = path.strip_prefix(from) {
+ let remapped = if rest.as_os_str().is_empty() {
+ // This is subtle, joining an empty path onto e.g. `foo/bar` will
+ // result in `foo/bar/`, that is, there'll be an additional directory
+ // separator at the end. This can lead to duplicated directory separators
+ // in remapped paths down the line.
+ // So, if we have an exact match, we just return that without a call
+ // to `Path::join()`.
+ to.clone()
+ } else {
+ to.join(rest)
+ };
+ debug!("Match - remapped {:?} => {:?}", path, remapped);
+
+ return (remapped, true);
+ } else {
+ debug!("No match - prefix {:?} does not match {:?}", from, path);
+ }
+ }
+
+ debug!("Path {:?} was not remapped", path);
+ (path, false)
+ }
+ }
+
+ fn map_filename_prefix(&self, file: &FileName) -> (FileName, bool) {
+ match file {
+ FileName::Real(realfile) if let RealFileName::LocalPath(local_path) = realfile => {
+ let (mapped_path, mapped) = self.map_prefix(local_path.to_path_buf());
+ let realfile = if mapped {
+ RealFileName::Remapped {
+ local_path: Some(local_path.clone()),
+ virtual_name: mapped_path,
+ }
+ } else {
+ realfile.clone()
+ };
+ (FileName::Real(realfile), mapped)
+ }
+ FileName::Real(_) => unreachable!("attempted to remap an already remapped filename"),
+ other => (other.clone(), false),
+ }
+ }
+
+ /// Expand a relative path to an absolute path with remapping taken into account.
+ /// Use this when absolute paths are required (e.g. debuginfo or crate metadata).
+ ///
+ /// The resulting `RealFileName` will have its `local_path` portion erased if
+ /// possible (i.e. if there's also a remapped path).
+ pub fn to_embeddable_absolute_path(
+ &self,
+ file_path: RealFileName,
+ working_directory: &RealFileName,
+ ) -> RealFileName {
+ match file_path {
+ // Anything that's already remapped we don't modify, except for erasing
+ // the `local_path` portion.
+ RealFileName::Remapped { local_path: _, virtual_name } => {
+ RealFileName::Remapped {
+ // We do not want any local path to be exported into metadata
+ local_path: None,
+ // We use the remapped name verbatim, even if it looks like a relative
+ // path. The assumption is that the user doesn't want us to further
+ // process paths that have gone through remapping.
+ virtual_name,
+ }
+ }
+
+ RealFileName::LocalPath(unmapped_file_path) => {
+ // If no remapping has been applied yet, try to do so
+ let (new_path, was_remapped) = self.map_prefix(unmapped_file_path);
+ if was_remapped {
+ // It was remapped, so don't modify further
+ return RealFileName::Remapped { local_path: None, virtual_name: new_path };
+ }
+
+ if new_path.is_absolute() {
+ // No remapping has applied to this path and it is absolute,
+ // so the working directory cannot influence it either, so
+ // we are done.
+ return RealFileName::LocalPath(new_path);
+ }
+
+ debug_assert!(new_path.is_relative());
+ let unmapped_file_path_rel = new_path;
+
+ match working_directory {
+ RealFileName::LocalPath(unmapped_working_dir_abs) => {
+ let file_path_abs = unmapped_working_dir_abs.join(unmapped_file_path_rel);
+
+ // Although neither `working_directory` nor the file name were subject
+ // to path remapping, the concatenation between the two may be. Hence
+ // we need to do a remapping here.
+ let (file_path_abs, was_remapped) = self.map_prefix(file_path_abs);
+ if was_remapped {
+ RealFileName::Remapped {
+ // Erase the actual path
+ local_path: None,
+ virtual_name: file_path_abs,
+ }
+ } else {
+ // No kind of remapping applied to this path, so
+ // we leave it as it is.
+ RealFileName::LocalPath(file_path_abs)
+ }
+ }
+ RealFileName::Remapped {
+ local_path: _,
+ virtual_name: remapped_working_dir_abs,
+ } => {
+ // If working_directory has been remapped, then we emit
+ // Remapped variant as the expanded path won't be valid
+ RealFileName::Remapped {
+ local_path: None,
+ virtual_name: Path::new(remapped_working_dir_abs)
+ .join(unmapped_file_path_rel),
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs
new file mode 100644
index 000000000..be827cea8
--- /dev/null
+++ b/compiler/rustc_span/src/source_map/tests.rs
@@ -0,0 +1,481 @@
+use super::*;
+
+use rustc_data_structures::sync::Lrc;
+
+fn init_source_map() -> SourceMap {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string());
+ sm.new_source_file(PathBuf::from("empty.rs").into(), String::new());
+ sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string());
+ sm
+}
+
+impl SourceMap {
+ /// Returns `Some(span)`, a union of the LHS and RHS span. The LHS must precede the RHS. If
+ /// there are gaps between LHS and RHS, the resulting union will cross these gaps.
+ /// For this to work,
+ ///
+ /// * the syntax contexts of both spans much match,
+ /// * the LHS span needs to end on the same line the RHS span begins,
+ /// * the LHS span must start at or before the RHS span.
+ fn merge_spans(&self, sp_lhs: Span, sp_rhs: Span) -> Option<Span> {
+ // Ensure we're at the same expansion ID.
+ if sp_lhs.ctxt() != sp_rhs.ctxt() {
+ return None;
+ }
+
+ let lhs_end = match self.lookup_line(sp_lhs.hi()) {
+ Ok(x) => x,
+ Err(_) => return None,
+ };
+ let rhs_begin = match self.lookup_line(sp_rhs.lo()) {
+ Ok(x) => x,
+ Err(_) => return None,
+ };
+
+ // If we must cross lines to merge, don't merge.
+ if lhs_end.line != rhs_begin.line {
+ return None;
+ }
+
+ // Ensure these follow the expected order and that we don't overlap.
+ if (sp_lhs.lo() <= sp_rhs.lo()) && (sp_lhs.hi() <= sp_rhs.lo()) {
+ Some(sp_lhs.to(sp_rhs))
+ } else {
+ None
+ }
+ }
+
+ /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
+ fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
+ let idx = self.lookup_source_file_idx(bpos);
+ let sf = &(*self.files.borrow().source_files)[idx];
+ sf.bytepos_to_file_charpos(bpos)
+ }
+}
+
+/// Tests `lookup_byte_offset`.
+#[test]
+fn t3() {
+ let sm = init_source_map();
+
+ let srcfbp1 = sm.lookup_byte_offset(BytePos(23));
+ assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into());
+ assert_eq!(srcfbp1.pos, BytePos(23));
+
+ let srcfbp1 = sm.lookup_byte_offset(BytePos(24));
+ assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into());
+ assert_eq!(srcfbp1.pos, BytePos(0));
+
+ let srcfbp2 = sm.lookup_byte_offset(BytePos(25));
+ assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into());
+ assert_eq!(srcfbp2.pos, BytePos(0));
+}
+
+/// Tests `bytepos_to_file_charpos`.
+#[test]
+fn t4() {
+ let sm = init_source_map();
+
+ let cp1 = sm.bytepos_to_file_charpos(BytePos(22));
+ assert_eq!(cp1, CharPos(22));
+
+ let cp2 = sm.bytepos_to_file_charpos(BytePos(25));
+ assert_eq!(cp2, CharPos(0));
+}
+
+/// Tests zero-length `SourceFile`s.
+#[test]
+fn t5() {
+ let sm = init_source_map();
+
+ let loc1 = sm.lookup_char_pos(BytePos(22));
+ assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into());
+ assert_eq!(loc1.line, 2);
+ assert_eq!(loc1.col, CharPos(10));
+
+ let loc2 = sm.lookup_char_pos(BytePos(25));
+ assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into());
+ assert_eq!(loc2.line, 1);
+ assert_eq!(loc2.col, CharPos(0));
+}
+
+fn init_source_map_mbc() -> SourceMap {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ // "€" is a three-byte UTF8 char.
+ sm.new_source_file(
+ PathBuf::from("blork.rs").into(),
+ "fir€st €€€€ line.\nsecond line".to_string(),
+ );
+ sm.new_source_file(
+ PathBuf::from("blork2.rs").into(),
+ "first line€€.\n€ second line".to_string(),
+ );
+ sm
+}
+
+/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars.
+#[test]
+fn t6() {
+ let sm = init_source_map_mbc();
+
+ let cp1 = sm.bytepos_to_file_charpos(BytePos(3));
+ assert_eq!(cp1, CharPos(3));
+
+ let cp2 = sm.bytepos_to_file_charpos(BytePos(6));
+ assert_eq!(cp2, CharPos(4));
+
+ let cp3 = sm.bytepos_to_file_charpos(BytePos(56));
+ assert_eq!(cp3, CharPos(12));
+
+ let cp4 = sm.bytepos_to_file_charpos(BytePos(61));
+ assert_eq!(cp4, CharPos(15));
+}
+
+/// Test `span_to_lines` for a span ending at the end of a `SourceFile`.
+#[test]
+fn t7() {
+ let sm = init_source_map();
+ let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+ let file_lines = sm.span_to_lines(span).unwrap();
+
+ assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into());
+ assert_eq!(file_lines.lines.len(), 1);
+ assert_eq!(file_lines.lines[0].line_index, 1);
+}
+
+/// Given a string like " ~~~~~~~~~~~~ ", produces a span
+/// converting that range. The idea is that the string has the same
+/// length as the input, and we uncover the byte positions. Note
+/// that this can span lines and so on.
+fn span_from_selection(input: &str, selection: &str) -> Span {
+ assert_eq!(input.len(), selection.len());
+ let left_index = selection.find('~').unwrap() as u32;
+ let right_index = selection.rfind('~').map_or(left_index, |x| x as u32);
+ Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1))
+}
+
+/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3
+/// lines in the middle of a file.
+#[test]
+fn span_to_snippet_and_lines_spanning_multiple_lines() {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
+ let selection = " \n ~~\n~~~\n~~~~~ \n \n";
+ sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
+ let span = span_from_selection(inputtext, selection);
+
+ // Check that we are extracting the text we thought we were extracting.
+ assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD");
+
+ // Check that span_to_lines gives us the complete result with the lines/cols we expected.
+ let lines = sm.span_to_lines(span).unwrap();
+ let expected = vec![
+ LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) },
+ LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) },
+ LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) },
+ ];
+ assert_eq!(lines.lines, expected);
+}
+
+/// Test span_to_snippet for a span ending at the end of a `SourceFile`.
+#[test]
+fn t8() {
+ let sm = init_source_map();
+ let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+ let snippet = sm.span_to_snippet(span);
+
+ assert_eq!(snippet, Ok("second line".to_string()));
+}
+
+/// Test `span_to_str` for a span ending at the end of a `SourceFile`.
+#[test]
+fn t9() {
+ let sm = init_source_map();
+ let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+ let sstr = sm.span_to_diagnostic_string(span);
+
+ assert_eq!(sstr, "blork.rs:2:1: 2:12");
+}
+
+/// Tests failing to merge two spans on different lines.
+#[test]
+fn span_merging_fail() {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ let inputtext = "bbbb BB\ncc CCC\n";
+ let selection1 = " ~~\n \n";
+ let selection2 = " \n ~~~\n";
+ sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
+ let span1 = span_from_selection(inputtext, selection1);
+ let span2 = span_from_selection(inputtext, selection2);
+
+ assert!(sm.merge_spans(span1, span2).is_none());
+}
+
+/// Tests loading an external source file that requires normalization.
+#[test]
+fn t10() {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ let unnormalized = "first line.\r\nsecond line";
+ let normalized = "first line.\nsecond line";
+
+ let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string());
+
+ assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized);
+ assert!(
+ src_file.src_hash.matches(unnormalized),
+ "src_hash should use the source before normalization"
+ );
+
+ let SourceFile {
+ name,
+ src_hash,
+ start_pos,
+ end_pos,
+ lines,
+ multibyte_chars,
+ non_narrow_chars,
+ normalized_pos,
+ name_hash,
+ ..
+ } = (*src_file).clone();
+
+ let imported_src_file = sm.new_imported_source_file(
+ name,
+ src_hash,
+ name_hash,
+ (end_pos - start_pos).to_usize(),
+ CrateNum::new(0),
+ lines,
+ multibyte_chars,
+ non_narrow_chars,
+ normalized_pos,
+ start_pos,
+ end_pos,
+ );
+
+ assert!(
+ imported_src_file.external_src.borrow().get_source().is_none(),
+ "imported source file should not have source yet"
+ );
+ imported_src_file.add_external_src(|| Some(unnormalized.to_string()));
+ assert_eq!(
+ imported_src_file.external_src.borrow().get_source().unwrap().as_ref(),
+ normalized,
+ "imported source file should be normalized"
+ );
+}
+
+/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`.
+trait SourceMapExtension {
+ fn span_substr(
+ &self,
+ file: &Lrc<SourceFile>,
+ source_text: &str,
+ substring: &str,
+ n: usize,
+ ) -> Span;
+}
+
+impl SourceMapExtension for SourceMap {
+ fn span_substr(
+ &self,
+ file: &Lrc<SourceFile>,
+ source_text: &str,
+ substring: &str,
+ n: usize,
+ ) -> Span {
+ eprintln!(
+ "span_substr(file={:?}/{:?}, substring={:?}, n={})",
+ file.name, file.start_pos, substring, n
+ );
+ let mut i = 0;
+ let mut hi = 0;
+ loop {
+ let offset = source_text[hi..].find(substring).unwrap_or_else(|| {
+ panic!(
+ "source_text `{}` does not have {} occurrences of `{}`, only {}",
+ source_text, n, substring, i
+ );
+ });
+ let lo = hi + offset;
+ hi = lo + substring.len();
+ if i == n {
+ let span = Span::with_root_ctxt(
+ BytePos(lo as u32 + file.start_pos.0),
+ BytePos(hi as u32 + file.start_pos.0),
+ );
+ assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring);
+ return span;
+ }
+ i += 1;
+ }
+ }
+}
+
+// Takes a unix-style path and returns a platform specific path.
+fn path(p: &str) -> PathBuf {
+ path_str(p).into()
+}
+
+// Takes a unix-style path and returns a platform specific path.
+fn path_str(p: &str) -> String {
+ #[cfg(not(windows))]
+ {
+ return p.into();
+ }
+
+ #[cfg(windows)]
+ {
+ let mut path = p.replace('/', "\\");
+ if let Some(rest) = path.strip_prefix('\\') {
+ path = ["X:\\", rest].concat();
+ }
+
+ path
+ }
+}
+
+fn map_path_prefix(mapping: &FilePathMapping, p: &str) -> String {
+ // It's important that we convert to a string here because that's what
+ // later stages do too (e.g. in the backend), and comparing `Path` values
+ // won't catch some differences at the string level, e.g. "abc" and "abc/"
+ // compare as equal.
+ mapping.map_prefix(path(p)).0.to_string_lossy().to_string()
+}
+
+#[test]
+fn path_prefix_remapping() {
+ // Relative to relative
+ {
+ let mapping = &FilePathMapping::new(vec![(path("abc/def"), path("foo"))]);
+
+ assert_eq!(map_path_prefix(mapping, "abc/def/src/main.rs"), path_str("foo/src/main.rs"));
+ assert_eq!(map_path_prefix(mapping, "abc/def"), path_str("foo"));
+ }
+
+ // Relative to absolute
+ {
+ let mapping = &FilePathMapping::new(vec![(path("abc/def"), path("/foo"))]);
+
+ assert_eq!(map_path_prefix(mapping, "abc/def/src/main.rs"), path_str("/foo/src/main.rs"));
+ assert_eq!(map_path_prefix(mapping, "abc/def"), path_str("/foo"));
+ }
+
+ // Absolute to relative
+ {
+ let mapping = &FilePathMapping::new(vec![(path("/abc/def"), path("foo"))]);
+
+ assert_eq!(map_path_prefix(mapping, "/abc/def/src/main.rs"), path_str("foo/src/main.rs"));
+ assert_eq!(map_path_prefix(mapping, "/abc/def"), path_str("foo"));
+ }
+
+ // Absolute to absolute
+ {
+ let mapping = &FilePathMapping::new(vec![(path("/abc/def"), path("/foo"))]);
+
+ assert_eq!(map_path_prefix(mapping, "/abc/def/src/main.rs"), path_str("/foo/src/main.rs"));
+ assert_eq!(map_path_prefix(mapping, "/abc/def"), path_str("/foo"));
+ }
+}
+
+#[test]
+fn path_prefix_remapping_expand_to_absolute() {
+ // "virtual" working directory is relative path
+ let mapping =
+ &FilePathMapping::new(vec![(path("/foo"), path("FOO")), (path("/bar"), path("BAR"))]);
+ let working_directory = path("/foo");
+ let working_directory = RealFileName::Remapped {
+ local_path: Some(working_directory.clone()),
+ virtual_name: mapping.map_prefix(working_directory).0,
+ };
+
+ assert_eq!(working_directory.remapped_path_if_available(), path("FOO"));
+
+ // Unmapped absolute path
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("/foo/src/main.rs")),
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") }
+ );
+
+ // Unmapped absolute path with unrelated working directory
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("/bar/src/main.rs")),
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("BAR/src/main.rs") }
+ );
+
+ // Unmapped absolute path that does not match any prefix
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("/quux/src/main.rs")),
+ &working_directory
+ ),
+ RealFileName::LocalPath(path("/quux/src/main.rs")),
+ );
+
+ // Unmapped relative path
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("src/main.rs")),
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") }
+ );
+
+ // Unmapped relative path with `./`
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("./src/main.rs")),
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") }
+ );
+
+ // Unmapped relative path that does not match any prefix
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::LocalPath(path("quux/src/main.rs")),
+ &RealFileName::LocalPath(path("/abc")),
+ ),
+ RealFileName::LocalPath(path("/abc/quux/src/main.rs")),
+ );
+
+ // Already remapped absolute path
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::Remapped {
+ local_path: Some(path("/foo/src/main.rs")),
+ virtual_name: path("FOO/src/main.rs"),
+ },
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") }
+ );
+
+ // Already remapped absolute path, with unrelated working directory
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::Remapped {
+ local_path: Some(path("/bar/src/main.rs")),
+ virtual_name: path("BAR/src/main.rs"),
+ },
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("BAR/src/main.rs") }
+ );
+
+ // Already remapped relative path
+ assert_eq!(
+ mapping.to_embeddable_absolute_path(
+ RealFileName::Remapped { local_path: None, virtual_name: path("XYZ/src/main.rs") },
+ &working_directory
+ ),
+ RealFileName::Remapped { local_path: None, virtual_name: path("XYZ/src/main.rs") }
+ );
+}
diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs
new file mode 100644
index 000000000..3ee329e97
--- /dev/null
+++ b/compiler/rustc_span/src/span_encoding.rs
@@ -0,0 +1,150 @@
+// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
+// One format is used for keeping span data inline,
+// another contains index into an out-of-line span interner.
+// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
+// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
+
+use crate::def_id::LocalDefId;
+use crate::hygiene::SyntaxContext;
+use crate::SPAN_TRACK;
+use crate::{BytePos, SpanData};
+
+use rustc_data_structures::fx::FxIndexSet;
+
+/// A compressed span.
+///
+/// Whereas [`SpanData`] is 12 bytes, which is a bit too big to stick everywhere, `Span`
+/// is a form that only takes up 8 bytes, with less space for the length and
+/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
+/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
+/// stored in a separate interner table, and the `Span` will index into that
+/// table. Interning is rare enough that the cost is low, but common enough
+/// that the code is exercised regularly.
+///
+/// An earlier version of this code used only 4 bytes for `Span`, but that was
+/// slower because only 80--90% of spans could be stored inline (even less in
+/// very large crates) and so the interner was used a lot more.
+///
+/// Inline (compressed) format:
+/// - `span.base_or_index == span_data.lo`
+/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
+/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`)
+///
+/// Interned format:
+/// - `span.base_or_index == index` (indexes into the interner table)
+/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
+/// - `span.ctxt == 0`
+///
+/// The inline form uses 0 for the tag value (rather than 1) so that we don't
+/// need to mask out the tag bit when getting the length, and so that the
+/// dummy span can be all zeroes.
+///
+/// Notes about the choice of field sizes:
+/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
+/// values never cause interning. The number of bits needed for `base`
+/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
+/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
+/// in `SpanData`, which means that large `len` values will cause interning.
+/// The number of bits needed for `len` does not depend on the crate size.
+/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually
+/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
+/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
+/// dozens of times in a typical crate.
+/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
+/// large `ctxt` values will cause interning. The number of bits needed for
+/// `ctxt` values depend partly on the crate size and partly on the form of
+/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`,
+/// but larger crates might need more than 16 bits.
+///
+/// In order to reliably use parented spans in incremental compilation,
+/// the dependency to the parent definition's span. This is performed
+/// using the callback `SPAN_TRACK` to access the query engine.
+///
+#[derive(Clone, Copy, Eq, PartialEq, Hash)]
+#[rustc_pass_by_value]
+pub struct Span {
+ base_or_index: u32,
+ len_or_tag: u16,
+ ctxt_or_zero: u16,
+}
+
+const LEN_TAG: u16 = 0b1000_0000_0000_0000;
+const MAX_LEN: u32 = 0b0111_1111_1111_1111;
+const MAX_CTXT: u32 = 0b1111_1111_1111_1111;
+
+/// Dummy span, both position and length are zero, syntax context is zero as well.
+pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 };
+
+impl Span {
+ #[inline]
+ pub fn new(
+ mut lo: BytePos,
+ mut hi: BytePos,
+ ctxt: SyntaxContext,
+ parent: Option<LocalDefId>,
+ ) -> Self {
+ if lo > hi {
+ std::mem::swap(&mut lo, &mut hi);
+ }
+
+ let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
+
+ if len <= MAX_LEN && ctxt2 <= MAX_CTXT && parent.is_none() {
+ // Inline format.
+ Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 }
+ } else {
+ // Interned format.
+ let index =
+ with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }));
+ Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 }
+ }
+ }
+
+ #[inline]
+ pub fn data(self) -> SpanData {
+ let data = self.data_untracked();
+ if let Some(parent) = data.parent {
+ (*SPAN_TRACK)(parent);
+ }
+ data
+ }
+
+ /// Internal function to translate between an encoded span and the expanded representation.
+ /// This function must not be used outside the incremental engine.
+ #[inline]
+ pub fn data_untracked(self) -> SpanData {
+ if self.len_or_tag != LEN_TAG {
+ // Inline format.
+ debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
+ SpanData {
+ lo: BytePos(self.base_or_index),
+ hi: BytePos(self.base_or_index + self.len_or_tag as u32),
+ ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32),
+ parent: None,
+ }
+ } else {
+ // Interned format.
+ debug_assert!(self.ctxt_or_zero == 0);
+ let index = self.base_or_index;
+ with_span_interner(|interner| interner.spans[index as usize])
+ }
+ }
+}
+
+#[derive(Default)]
+pub struct SpanInterner {
+ spans: FxIndexSet<SpanData>,
+}
+
+impl SpanInterner {
+ fn intern(&mut self, span_data: &SpanData) -> u32 {
+ let (index, _) = self.spans.insert_full(*span_data);
+ index as u32
+ }
+}
+
+// If an interner exists, return it. Otherwise, prepare a fresh one.
+#[inline]
+fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
+ crate::with_session_globals(|session_globals| f(&mut *session_globals.span_interner.lock()))
+}
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
new file mode 100644
index 000000000..791160ff6
--- /dev/null
+++ b/compiler/rustc_span/src/symbol.rs
@@ -0,0 +1,2067 @@
+//! An "interner" is a data structure that associates values with usize tags and
+//! allows bidirectional lookup; i.e., given a value, one can easily find the
+//! type, and vice versa.
+
+use rustc_arena::DroplessArena;
+use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
+use rustc_data_structures::sync::Lock;
+use rustc_macros::HashStable_Generic;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+
+use std::cmp::{Ord, PartialEq, PartialOrd};
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::str;
+
+use crate::{with_session_globals, Edition, Span, DUMMY_SP};
+
+#[cfg(test)]
+mod tests;
+
+// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
+symbols! {
+ // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
+ // this should be rarely necessary though if the keywords are kept in alphabetic order.
+ Keywords {
+ // Special reserved identifiers used internally for elided lifetimes,
+ // unnamed method parameters, crate root module, error recovery etc.
+ Empty: "",
+ PathRoot: "{{root}}",
+ DollarCrate: "$crate",
+ Underscore: "_",
+
+ // Keywords that are used in stable Rust.
+ As: "as",
+ Break: "break",
+ Const: "const",
+ Continue: "continue",
+ Crate: "crate",
+ Else: "else",
+ Enum: "enum",
+ Extern: "extern",
+ False: "false",
+ Fn: "fn",
+ For: "for",
+ If: "if",
+ Impl: "impl",
+ In: "in",
+ Let: "let",
+ Loop: "loop",
+ Match: "match",
+ Mod: "mod",
+ Move: "move",
+ Mut: "mut",
+ Pub: "pub",
+ Ref: "ref",
+ Return: "return",
+ SelfLower: "self",
+ SelfUpper: "Self",
+ Static: "static",
+ Struct: "struct",
+ Super: "super",
+ Trait: "trait",
+ True: "true",
+ Type: "type",
+ Unsafe: "unsafe",
+ Use: "use",
+ Where: "where",
+ While: "while",
+
+ // Keywords that are used in unstable Rust or reserved for future use.
+ Abstract: "abstract",
+ Become: "become",
+ Box: "box",
+ Do: "do",
+ Final: "final",
+ Macro: "macro",
+ Override: "override",
+ Priv: "priv",
+ Typeof: "typeof",
+ Unsized: "unsized",
+ Virtual: "virtual",
+ Yield: "yield",
+
+ // Edition-specific keywords that are used in stable Rust.
+ Async: "async", // >= 2018 Edition only
+ Await: "await", // >= 2018 Edition only
+ Dyn: "dyn", // >= 2018 Edition only
+
+ // Edition-specific keywords that are used in unstable Rust or reserved for future use.
+ Try: "try", // >= 2018 Edition only
+
+ // Special lifetime names
+ UnderscoreLifetime: "'_",
+ StaticLifetime: "'static",
+
+ // Weak keywords, have special meaning only in specific contexts.
+ Auto: "auto",
+ Catch: "catch",
+ Default: "default",
+ MacroRules: "macro_rules",
+ Raw: "raw",
+ Union: "union",
+ Yeet: "yeet",
+ }
+
+ // Pre-interned symbols that can be referred to with `rustc_span::sym::*`.
+ //
+ // The symbol is the stringified identifier unless otherwise specified, in
+ // which case the name should mention the non-identifier punctuation.
+ // E.g. `sym::proc_dash_macro` represents "proc-macro", and it shouldn't be
+ // called `sym::proc_macro` because then it's easy to mistakenly think it
+ // represents "proc_macro".
+ //
+ // As well as the symbols listed, there are symbols for the strings
+ // "0", "1", ..., "9", which are accessible via `sym::integer`.
+ //
+ // The proc macro will abort if symbols are not in alphabetical order (as
+ // defined by `impl Ord for str`) or if any symbols are duplicated. Vim
+ // users can sort the list by selecting it and executing the command
+ // `:'<,'>!LC_ALL=C sort`.
+ //
+ // There is currently no checking that all symbols are used; that would be
+ // nice to have.
+ Symbols {
+ AcqRel,
+ Acquire,
+ AddSubdiagnostic,
+ Alignment,
+ Any,
+ Arc,
+ Argument,
+ ArgumentV1,
+ ArgumentV1Methods,
+ Arguments,
+ AsMut,
+ AsRef,
+ AssertParamIsClone,
+ AssertParamIsCopy,
+ AssertParamIsEq,
+ AtomicBool,
+ AtomicI128,
+ AtomicI16,
+ AtomicI32,
+ AtomicI64,
+ AtomicI8,
+ AtomicIsize,
+ AtomicPtr,
+ AtomicU128,
+ AtomicU16,
+ AtomicU32,
+ AtomicU64,
+ AtomicU8,
+ AtomicUsize,
+ BTreeEntry,
+ BTreeMap,
+ BTreeSet,
+ BinaryHeap,
+ Borrow,
+ Break,
+ C,
+ CStr,
+ CString,
+ Capture,
+ Center,
+ Clone,
+ Continue,
+ Copy,
+ Count,
+ Cow,
+ Debug,
+ DebugStruct,
+ DebugTuple,
+ Decodable,
+ Decoder,
+ DecorateLint,
+ Default,
+ Deref,
+ DiagnosticMessage,
+ DirBuilder,
+ Display,
+ DoubleEndedIterator,
+ Duration,
+ Encodable,
+ Encoder,
+ Eq,
+ Equal,
+ Err,
+ Error,
+ File,
+ FileType,
+ Fn,
+ FnMut,
+ FnOnce,
+ FormatSpec,
+ Formatter,
+ From,
+ FromIterator,
+ FromResidual,
+ Future,
+ FxHashMap,
+ FxHashSet,
+ GlobalAlloc,
+ Hash,
+ HashMap,
+ HashMapEntry,
+ HashSet,
+ Hasher,
+ Implied,
+ Input,
+ Into,
+ IntoFuture,
+ IntoIterator,
+ IoRead,
+ IoWrite,
+ IrTyKind,
+ Is,
+ ItemContext,
+ Iterator,
+ Layout,
+ Left,
+ LinkedList,
+ LintPass,
+ Mutex,
+ N,
+ NonZeroI128,
+ NonZeroI16,
+ NonZeroI32,
+ NonZeroI64,
+ NonZeroI8,
+ NonZeroU128,
+ NonZeroU16,
+ NonZeroU32,
+ NonZeroU64,
+ NonZeroU8,
+ None,
+ Ok,
+ Option,
+ Ord,
+ Ordering,
+ OsStr,
+ OsString,
+ Output,
+ Param,
+ PartialEq,
+ PartialOrd,
+ Path,
+ PathBuf,
+ Pending,
+ Pin,
+ Pointer,
+ Poll,
+ ProcMacro,
+ ProcMacroHack,
+ ProceduralMasqueradeDummyType,
+ Range,
+ RangeFrom,
+ RangeFull,
+ RangeInclusive,
+ RangeTo,
+ RangeToInclusive,
+ Rc,
+ Ready,
+ Receiver,
+ Relaxed,
+ Release,
+ Result,
+ Return,
+ Right,
+ Rust,
+ RustcDecodable,
+ RustcEncodable,
+ Send,
+ SeqCst,
+ SessionDiagnostic,
+ SliceIndex,
+ Some,
+ String,
+ StructuralEq,
+ StructuralPartialEq,
+ SubdiagnosticMessage,
+ Sync,
+ Target,
+ ToOwned,
+ ToString,
+ Try,
+ TryCaptureGeneric,
+ TryCapturePrintable,
+ TryFrom,
+ TryInto,
+ Ty,
+ TyCtxt,
+ TyKind,
+ Unknown,
+ UnsafeArg,
+ Vec,
+ VecDeque,
+ Wrapper,
+ Yield,
+ _DECLS,
+ _Self,
+ __D,
+ __H,
+ __S,
+ __awaitee,
+ __try_var,
+ _d,
+ _e,
+ _task_context,
+ a32,
+ aarch64_target_feature,
+ aarch64_ver_target_feature,
+ abi,
+ abi_amdgpu_kernel,
+ abi_avr_interrupt,
+ abi_c_cmse_nonsecure_call,
+ abi_efiapi,
+ abi_msp430_interrupt,
+ abi_ptx,
+ abi_sysv64,
+ abi_thiscall,
+ abi_unadjusted,
+ abi_vectorcall,
+ abi_x86_interrupt,
+ abort,
+ aborts,
+ add,
+ add_assign,
+ add_with_overflow,
+ address,
+ adt_const_params,
+ advanced_slice_patterns,
+ adx_target_feature,
+ alias,
+ align,
+ align_offset,
+ alignstack,
+ all,
+ alloc,
+ alloc_error_handler,
+ alloc_layout,
+ alloc_zeroed,
+ allocator,
+ allocator_api,
+ allocator_internals,
+ allow,
+ allow_fail,
+ allow_internal_unsafe,
+ allow_internal_unstable,
+ allowed,
+ alu32,
+ always,
+ and,
+ and_then,
+ anonymous_lifetime_in_impl_trait,
+ any,
+ append_const_msg,
+ arbitrary_enum_discriminant,
+ arbitrary_self_types,
+ args,
+ arith_offset,
+ arm,
+ arm_target_feature,
+ array,
+ arrays,
+ as_ptr,
+ as_ref,
+ as_str,
+ asm,
+ asm_const,
+ asm_experimental_arch,
+ asm_sym,
+ asm_unwind,
+ assert,
+ assert_eq_macro,
+ assert_inhabited,
+ assert_macro,
+ assert_ne_macro,
+ assert_receiver_is_total_eq,
+ assert_uninit_valid,
+ assert_zero_valid,
+ asserting,
+ associated_const_equality,
+ associated_consts,
+ associated_type_bounds,
+ associated_type_defaults,
+ associated_types,
+ assume,
+ assume_init,
+ async_await,
+ async_closure,
+ atomic,
+ atomic_mod,
+ atomics,
+ att_syntax,
+ attr,
+ attr_literals,
+ attributes,
+ augmented_assignments,
+ auto_traits,
+ automatically_derived,
+ avx,
+ avx512_target_feature,
+ avx512bw,
+ avx512f,
+ await_macro,
+ bang,
+ begin_panic,
+ bench,
+ bin,
+ bind_by_move_pattern_guards,
+ bindings_after_at,
+ bitand,
+ bitand_assign,
+ bitor,
+ bitor_assign,
+ bitreverse,
+ bitxor,
+ bitxor_assign,
+ black_box,
+ block,
+ bool,
+ borrowck_graphviz_format,
+ borrowck_graphviz_postflow,
+ borrowck_graphviz_preflow,
+ box_free,
+ box_patterns,
+ box_syntax,
+ bpf_target_feature,
+ braced_empty_structs,
+ branch,
+ breakpoint,
+ bridge,
+ bswap,
+ c_str,
+ c_unwind,
+ c_variadic,
+ call,
+ call_mut,
+ call_once,
+ caller_location,
+ capture_disjoint_fields,
+ cdylib,
+ ceilf32,
+ ceilf64,
+ cfg,
+ cfg_accessible,
+ cfg_attr,
+ cfg_attr_multi,
+ cfg_doctest,
+ cfg_eval,
+ cfg_hide,
+ cfg_macro,
+ cfg_panic,
+ cfg_sanitize,
+ cfg_target_abi,
+ cfg_target_compact,
+ cfg_target_feature,
+ cfg_target_has_atomic,
+ cfg_target_has_atomic_equal_alignment,
+ cfg_target_has_atomic_load_store,
+ cfg_target_thread_local,
+ cfg_target_vendor,
+ cfg_version,
+ cfi,
+ char,
+ client,
+ clippy,
+ clobber_abi,
+ clone,
+ clone_closures,
+ clone_from,
+ closure,
+ closure_lifetime_binder,
+ closure_to_fn_coercion,
+ closure_track_caller,
+ cmp,
+ cmp_max,
+ cmp_min,
+ cmpxchg16b_target_feature,
+ cmse_nonsecure_entry,
+ coerce_unsized,
+ cold,
+ column,
+ column_macro,
+ compare_and_swap,
+ compare_exchange,
+ compare_exchange_weak,
+ compile_error,
+ compile_error_macro,
+ compiler,
+ compiler_builtins,
+ compiler_fence,
+ concat,
+ concat_bytes,
+ concat_idents,
+ concat_macro,
+ conservative_impl_trait,
+ console,
+ const_allocate,
+ const_async_blocks,
+ const_compare_raw_pointers,
+ const_constructor,
+ const_deallocate,
+ const_eval_limit,
+ const_eval_select,
+ const_eval_select_ct,
+ const_evaluatable_checked,
+ const_extern_fn,
+ const_fn,
+ const_fn_floating_point_arithmetic,
+ const_fn_fn_ptr_basics,
+ const_fn_trait_bound,
+ const_fn_transmute,
+ const_fn_union,
+ const_fn_unsize,
+ const_for,
+ const_format_args,
+ const_generic_defaults,
+ const_generics,
+ const_generics_defaults,
+ const_if_match,
+ const_impl_trait,
+ const_in_array_repeat_expressions,
+ const_indexing,
+ const_let,
+ const_loop,
+ const_mut_refs,
+ const_panic,
+ const_panic_fmt,
+ const_precise_live_drops,
+ const_raw_ptr_deref,
+ const_raw_ptr_to_usize_cast,
+ const_refs_to_cell,
+ const_trait,
+ const_trait_bound_opt_out,
+ const_trait_impl,
+ const_transmute,
+ const_try,
+ constant,
+ constructor,
+ contents,
+ context,
+ convert,
+ copy,
+ copy_closures,
+ copy_nonoverlapping,
+ copysignf32,
+ copysignf64,
+ core,
+ core_intrinsics,
+ core_panic,
+ core_panic_2015_macro,
+ core_panic_macro,
+ cosf32,
+ cosf64,
+ count,
+ cr,
+ crate_id,
+ crate_in_paths,
+ crate_local,
+ crate_name,
+ crate_type,
+ crate_visibility_modifier,
+ crt_dash_static: "crt-static",
+ cstring_type,
+ ctlz,
+ ctlz_nonzero,
+ ctpop,
+ cttz,
+ cttz_nonzero,
+ custom_attribute,
+ custom_derive,
+ custom_inner_attributes,
+ custom_test_frameworks,
+ d,
+ d32,
+ dbg_macro,
+ dead_code,
+ dealloc,
+ debug,
+ debug_assert_eq_macro,
+ debug_assert_macro,
+ debug_assert_ne_macro,
+ debug_assertions,
+ debug_struct,
+ debug_struct_fields_finish,
+ debug_trait_builder,
+ debug_tuple,
+ debug_tuple_fields_finish,
+ debugger_visualizer,
+ decl_macro,
+ declare_lint_pass,
+ decode,
+ default_alloc_error_handler,
+ default_lib_allocator,
+ default_method_body_is_const,
+ default_type_parameter_fallback,
+ default_type_params,
+ delay_span_bug_from_inside_query,
+ deny,
+ deprecated,
+ deprecated_safe,
+ deprecated_suggestion,
+ deref,
+ deref_method,
+ deref_mut,
+ deref_target,
+ derive,
+ derive_default_enum,
+ destruct,
+ destructuring_assignment,
+ diagnostic,
+ direct,
+ discriminant_kind,
+ discriminant_type,
+ discriminant_value,
+ dispatch_from_dyn,
+ display_trait,
+ div,
+ div_assign,
+ doc,
+ doc_alias,
+ doc_auto_cfg,
+ doc_cfg,
+ doc_cfg_hide,
+ doc_keyword,
+ doc_masked,
+ doc_notable_trait,
+ doc_primitive,
+ doc_spotlight,
+ doctest,
+ document_private_items,
+ dotdot: "..",
+ dotdot_in_tuple_patterns,
+ dotdoteq_in_patterns,
+ dreg,
+ dreg_low16,
+ dreg_low8,
+ drop,
+ drop_in_place,
+ drop_types_in_const,
+ dropck_eyepatch,
+ dropck_parametricity,
+ dylib,
+ dyn_metadata,
+ dyn_trait,
+ e,
+ edition_macro_pats,
+ edition_panic,
+ eh_catch_typeinfo,
+ eh_personality,
+ emit_enum,
+ emit_enum_variant,
+ emit_enum_variant_arg,
+ emit_struct,
+ emit_struct_field,
+ enable,
+ enclosing_scope,
+ encode,
+ end,
+ env,
+ env_macro,
+ eprint_macro,
+ eprintln_macro,
+ eq,
+ ermsb_target_feature,
+ exact_div,
+ except,
+ exchange_malloc,
+ exclusive_range_pattern,
+ exhaustive_integer_patterns,
+ exhaustive_patterns,
+ existential_type,
+ exp2f32,
+ exp2f64,
+ expect,
+ expected,
+ expf32,
+ expf64,
+ explicit_generic_args_with_impl_trait,
+ export_name,
+ expr,
+ extended_key_value_attributes,
+ extern_absolute_paths,
+ extern_crate_item_prelude,
+ extern_crate_self,
+ extern_in_paths,
+ extern_prelude,
+ extern_types,
+ external_doc,
+ f,
+ f16c_target_feature,
+ f32,
+ f64,
+ fabsf32,
+ fabsf64,
+ fadd_fast,
+ fake_variadic,
+ fdiv_fast,
+ feature,
+ fence,
+ ferris: "🦀",
+ fetch_update,
+ ffi,
+ ffi_const,
+ ffi_pure,
+ ffi_returns_twice,
+ field,
+ field_init_shorthand,
+ file,
+ file_macro,
+ fill,
+ finish,
+ flags,
+ float,
+ float_to_int_unchecked,
+ floorf32,
+ floorf64,
+ fmaf32,
+ fmaf64,
+ fmt,
+ fmt_as_str,
+ fmt_internals,
+ fmul_fast,
+ fn_align,
+ fn_must_use,
+ fn_mut,
+ fn_once,
+ fn_once_output,
+ forbid,
+ forget,
+ format,
+ format_args,
+ format_args_capture,
+ format_args_macro,
+ format_args_nl,
+ format_macro,
+ fp,
+ freeze,
+ freg,
+ frem_fast,
+ from,
+ from_desugaring,
+ from_generator,
+ from_iter,
+ from_method,
+ from_output,
+ from_residual,
+ from_size_align_unchecked,
+ from_usize,
+ from_yeet,
+ fsub_fast,
+ fundamental,
+ future,
+ future_trait,
+ gdb_script_file,
+ ge,
+ gen_future,
+ gen_kill,
+ generator,
+ generator_return,
+ generator_state,
+ generators,
+ generic_arg_infer,
+ generic_assert,
+ generic_associated_types,
+ generic_associated_types_extended,
+ generic_const_exprs,
+ generic_param_attrs,
+ get_context,
+ global_allocator,
+ global_asm,
+ globs,
+ gt,
+ half_open_range_patterns,
+ hash,
+ hexagon_target_feature,
+ hidden,
+ homogeneous_aggregate,
+ html_favicon_url,
+ html_logo_url,
+ html_no_source,
+ html_playground_url,
+ html_root_url,
+ hwaddress,
+ i,
+ i128,
+ i128_type,
+ i16,
+ i32,
+ i64,
+ i8,
+ ident,
+ if_let,
+ if_let_guard,
+ if_while_or_patterns,
+ ignore,
+ impl_header_lifetime_elision,
+ impl_lint_pass,
+ impl_macros,
+ impl_trait_in_bindings,
+ implied_by,
+ import,
+ import_shadowing,
+ imported_main,
+ in_band_lifetimes,
+ include,
+ include_bytes,
+ include_bytes_macro,
+ include_macro,
+ include_str,
+ include_str_macro,
+ inclusive_range_syntax,
+ index,
+ index_mut,
+ infer_outlives_requirements,
+ infer_static_outlives_requirements,
+ inherent_associated_types,
+ inlateout,
+ inline,
+ inline_const,
+ inline_const_pat,
+ inout,
+ instruction_set,
+ integer_: "integer",
+ integral,
+ intel,
+ into_future,
+ into_iter,
+ intra_doc_pointers,
+ intrinsics,
+ irrefutable_let_patterns,
+ isa_attribute,
+ isize,
+ issue,
+ issue_5723_bootstrap,
+ issue_tracker_base_url,
+ item,
+ item_like_imports,
+ iter,
+ iter_repeat,
+ keyword,
+ kind,
+ kreg,
+ kreg0,
+ label,
+ label_break_value,
+ lang,
+ lang_items,
+ large_assignments,
+ lateout,
+ lazy_normalization_consts,
+ le,
+ len,
+ let_chains,
+ let_else,
+ lhs,
+ lib,
+ libc,
+ lifetime,
+ likely,
+ line,
+ line_macro,
+ link,
+ link_args,
+ link_cfg,
+ link_llvm_intrinsics,
+ link_name,
+ link_ordinal,
+ link_section,
+ linkage,
+ linker,
+ lint_reasons,
+ literal,
+ load,
+ loaded_from_disk,
+ local,
+ local_inner_macros,
+ log10f32,
+ log10f64,
+ log2f32,
+ log2f64,
+ log_syntax,
+ logf32,
+ logf64,
+ loop_break_value,
+ lt,
+ macro_at_most_once_rep,
+ macro_attributes_in_derive_output,
+ macro_escape,
+ macro_export,
+ macro_lifetime_matcher,
+ macro_literal_matcher,
+ macro_metavar_expr,
+ macro_reexport,
+ macro_use,
+ macro_vis_matcher,
+ macros_in_extern,
+ main,
+ managed_boxes,
+ manually_drop,
+ map,
+ marker,
+ marker_trait_attr,
+ masked,
+ match_beginning_vert,
+ match_default_bindings,
+ matches_macro,
+ maxnumf32,
+ maxnumf64,
+ may_dangle,
+ may_unwind,
+ maybe_uninit,
+ maybe_uninit_uninit,
+ maybe_uninit_zeroed,
+ mem_discriminant,
+ mem_drop,
+ mem_forget,
+ mem_replace,
+ mem_size_of,
+ mem_size_of_val,
+ mem_uninitialized,
+ mem_variant_count,
+ mem_zeroed,
+ member_constraints,
+ memory,
+ memtag,
+ message,
+ meta,
+ metadata_type,
+ min_align_of,
+ min_align_of_val,
+ min_const_fn,
+ min_const_generics,
+ min_const_unsafe_fn,
+ min_specialization,
+ min_type_alias_impl_trait,
+ minnumf32,
+ minnumf64,
+ mips_target_feature,
+ miri,
+ misc,
+ mmx_reg,
+ modifiers,
+ module,
+ module_path,
+ module_path_macro,
+ more_qualified_paths,
+ more_struct_aliases,
+ movbe_target_feature,
+ move_ref_pattern,
+ move_size_limit,
+ mul,
+ mul_assign,
+ mul_with_overflow,
+ must_not_suspend,
+ must_use,
+ naked,
+ naked_functions,
+ name,
+ names,
+ native_link_modifiers,
+ native_link_modifiers_as_needed,
+ native_link_modifiers_bundle,
+ native_link_modifiers_verbatim,
+ native_link_modifiers_whole_archive,
+ natvis_file,
+ ne,
+ nearbyintf32,
+ nearbyintf64,
+ needs_allocator,
+ needs_drop,
+ needs_panic_runtime,
+ neg,
+ negate_unsigned,
+ negative_impls,
+ neon,
+ never,
+ never_type,
+ never_type_fallback,
+ new,
+ new_unchecked,
+ next,
+ nll,
+ no,
+ no_builtins,
+ no_core,
+ no_coverage,
+ no_crate_inject,
+ no_debug,
+ no_default_passes,
+ no_implicit_prelude,
+ no_inline,
+ no_link,
+ no_main,
+ no_mangle,
+ no_sanitize,
+ no_stack_check,
+ no_start,
+ no_std,
+ nomem,
+ non_ascii_idents,
+ non_exhaustive,
+ non_exhaustive_omitted_patterns_lint,
+ non_modrs_mods,
+ none_error,
+ nontemporal_store,
+ noop_method_borrow,
+ noop_method_clone,
+ noop_method_deref,
+ noreturn,
+ nostack,
+ not,
+ notable_trait,
+ note,
+ object_safe_for_dispatch,
+ of,
+ offset,
+ omit_gdb_pretty_printer_section,
+ on,
+ on_unimplemented,
+ oom,
+ opaque,
+ ops,
+ opt_out_copy,
+ optimize,
+ optimize_attribute,
+ optin_builtin_traits,
+ option,
+ option_env,
+ option_env_macro,
+ options,
+ or,
+ or_patterns,
+ other,
+ out,
+ overlapping_marker_traits,
+ owned_box,
+ packed,
+ panic,
+ panic_2015,
+ panic_2021,
+ panic_abort,
+ panic_bounds_check,
+ panic_display,
+ panic_fmt,
+ panic_handler,
+ panic_impl,
+ panic_implementation,
+ panic_info,
+ panic_location,
+ panic_no_unwind,
+ panic_runtime,
+ panic_str,
+ panic_unwind,
+ panicking,
+ param_attrs,
+ partial_cmp,
+ partial_ord,
+ passes,
+ pat,
+ pat_param,
+ path,
+ pattern_parentheses,
+ phantom_data,
+ pin,
+ platform_intrinsics,
+ plugin,
+ plugin_registrar,
+ plugins,
+ pointee_trait,
+ pointer,
+ pointer_trait_fmt,
+ poll,
+ position,
+ post_dash_lto: "post-lto",
+ powerpc_target_feature,
+ powf32,
+ powf64,
+ powif32,
+ powif64,
+ pre_dash_lto: "pre-lto",
+ precise_pointer_size_matching,
+ precision,
+ pref_align_of,
+ prefetch_read_data,
+ prefetch_read_instruction,
+ prefetch_write_data,
+ prefetch_write_instruction,
+ preg,
+ prelude,
+ prelude_import,
+ preserves_flags,
+ primitive,
+ print_macro,
+ println_macro,
+ proc_dash_macro: "proc-macro",
+ proc_macro,
+ proc_macro_attribute,
+ proc_macro_def_site,
+ proc_macro_derive,
+ proc_macro_expr,
+ proc_macro_gen,
+ proc_macro_hygiene,
+ proc_macro_internals,
+ proc_macro_mod,
+ proc_macro_non_items,
+ proc_macro_path_invoc,
+ profiler_builtins,
+ profiler_runtime,
+ ptr,
+ ptr_guaranteed_eq,
+ ptr_guaranteed_ne,
+ ptr_null,
+ ptr_null_mut,
+ ptr_offset_from,
+ ptr_offset_from_unsigned,
+ pub_macro_rules,
+ pub_restricted,
+ pure,
+ pushpop_unsafe,
+ qreg,
+ qreg_low4,
+ qreg_low8,
+ quad_precision_float,
+ question_mark,
+ quote,
+ range_inclusive_new,
+ raw_dylib,
+ raw_eq,
+ raw_identifiers,
+ raw_ref_op,
+ re_rebalance_coherence,
+ read_enum,
+ read_enum_variant,
+ read_enum_variant_arg,
+ read_struct,
+ read_struct_field,
+ readonly,
+ realloc,
+ reason,
+ receiver,
+ recursion_limit,
+ reexport_test_harness_main,
+ ref_unwind_safe_trait,
+ reference,
+ reflect,
+ reg,
+ reg16,
+ reg32,
+ reg64,
+ reg_abcd,
+ reg_byte,
+ reg_iw,
+ reg_nonzero,
+ reg_pair,
+ reg_ptr,
+ reg_upper,
+ register_attr,
+ register_tool,
+ relaxed_adts,
+ relaxed_struct_unsize,
+ rem,
+ rem_assign,
+ repr,
+ repr128,
+ repr_align,
+ repr_align_enum,
+ repr_packed,
+ repr_simd,
+ repr_transparent,
+ residual,
+ result,
+ rhs,
+ rintf32,
+ rintf64,
+ riscv_target_feature,
+ rlib,
+ rotate_left,
+ rotate_right,
+ roundf32,
+ roundf64,
+ rt,
+ rtm_target_feature,
+ rust,
+ rust_2015,
+ rust_2015_preview,
+ rust_2018,
+ rust_2018_preview,
+ rust_2021,
+ rust_2021_preview,
+ rust_2024,
+ rust_2024_preview,
+ rust_begin_unwind,
+ rust_cold_cc,
+ rust_eh_catch_typeinfo,
+ rust_eh_personality,
+ rust_eh_register_frames,
+ rust_eh_unregister_frames,
+ rust_oom,
+ rustc,
+ rustc_allocator,
+ rustc_allocator_nounwind,
+ rustc_allocator_zeroed,
+ rustc_allow_const_fn_unstable,
+ rustc_allow_incoherent_impl,
+ rustc_allowed_through_unstable_modules,
+ rustc_attrs,
+ rustc_box,
+ rustc_builtin_macro,
+ rustc_capture_analysis,
+ rustc_clean,
+ rustc_coherence_is_core,
+ rustc_const_stable,
+ rustc_const_unstable,
+ rustc_conversion_suggestion,
+ rustc_deallocator,
+ rustc_def_path,
+ rustc_diagnostic_item,
+ rustc_diagnostic_macros,
+ rustc_dirty,
+ rustc_do_not_const_check,
+ rustc_dummy,
+ rustc_dump_env_program_clauses,
+ rustc_dump_program_clauses,
+ rustc_dump_user_substs,
+ rustc_dump_vtable,
+ rustc_error,
+ rustc_evaluate_where_clauses,
+ rustc_expected_cgu_reuse,
+ rustc_has_incoherent_inherent_impls,
+ rustc_if_this_changed,
+ rustc_inherit_overflow_checks,
+ rustc_insignificant_dtor,
+ rustc_layout,
+ rustc_layout_scalar_valid_range_end,
+ rustc_layout_scalar_valid_range_start,
+ rustc_legacy_const_generics,
+ rustc_lint_diagnostics,
+ rustc_lint_opt_deny_field_access,
+ rustc_lint_opt_ty,
+ rustc_lint_query_instability,
+ rustc_macro_transparency,
+ rustc_main,
+ rustc_mir,
+ rustc_must_implement_one_of,
+ rustc_nonnull_optimization_guaranteed,
+ rustc_object_lifetime_default,
+ rustc_on_unimplemented,
+ rustc_outlives,
+ rustc_paren_sugar,
+ rustc_partition_codegened,
+ rustc_partition_reused,
+ rustc_pass_by_value,
+ rustc_peek,
+ rustc_peek_definite_init,
+ rustc_peek_liveness,
+ rustc_peek_maybe_init,
+ rustc_peek_maybe_uninit,
+ rustc_polymorphize_error,
+ rustc_private,
+ rustc_proc_macro_decls,
+ rustc_promotable,
+ rustc_reallocator,
+ rustc_regions,
+ rustc_reservation_impl,
+ rustc_serialize,
+ rustc_skip_array_during_method_dispatch,
+ rustc_specialization_trait,
+ rustc_stable,
+ rustc_std_internal_symbol,
+ rustc_strict_coherence,
+ rustc_symbol_name,
+ rustc_test_marker,
+ rustc_then_this_would_need,
+ rustc_trivial_field_reads,
+ rustc_unsafe_specialization_marker,
+ rustc_variance,
+ rustdoc,
+ rustdoc_internals,
+ rustfmt,
+ rvalue_static_promotion,
+ s,
+ sanitize,
+ sanitizer_runtime,
+ saturating_add,
+ saturating_sub,
+ self_in_typedefs,
+ self_struct_ctor,
+ semitransparent,
+ shadow_call_stack,
+ shl,
+ shl_assign,
+ should_panic,
+ shr,
+ shr_assign,
+ simd,
+ simd_add,
+ simd_and,
+ simd_arith_offset,
+ simd_as,
+ simd_bitmask,
+ simd_cast,
+ simd_ceil,
+ simd_div,
+ simd_eq,
+ simd_extract,
+ simd_fabs,
+ simd_fcos,
+ simd_fexp,
+ simd_fexp2,
+ simd_ffi,
+ simd_flog,
+ simd_flog10,
+ simd_flog2,
+ simd_floor,
+ simd_fma,
+ simd_fmax,
+ simd_fmin,
+ simd_fpow,
+ simd_fpowi,
+ simd_fsin,
+ simd_fsqrt,
+ simd_gather,
+ simd_ge,
+ simd_gt,
+ simd_insert,
+ simd_le,
+ simd_lt,
+ simd_mul,
+ simd_ne,
+ simd_neg,
+ simd_or,
+ simd_reduce_add_ordered,
+ simd_reduce_add_unordered,
+ simd_reduce_all,
+ simd_reduce_and,
+ simd_reduce_any,
+ simd_reduce_max,
+ simd_reduce_max_nanless,
+ simd_reduce_min,
+ simd_reduce_min_nanless,
+ simd_reduce_mul_ordered,
+ simd_reduce_mul_unordered,
+ simd_reduce_or,
+ simd_reduce_xor,
+ simd_rem,
+ simd_round,
+ simd_saturating_add,
+ simd_saturating_sub,
+ simd_scatter,
+ simd_select,
+ simd_select_bitmask,
+ simd_shl,
+ simd_shr,
+ simd_shuffle,
+ simd_sub,
+ simd_trunc,
+ simd_xor,
+ since,
+ sinf32,
+ sinf64,
+ size,
+ size_of,
+ size_of_val,
+ sized,
+ skip,
+ slice,
+ slice_len_fn,
+ slice_patterns,
+ slicing_syntax,
+ soft,
+ specialization,
+ speed,
+ spotlight,
+ sqrtf32,
+ sqrtf64,
+ sreg,
+ sreg_low16,
+ sse,
+ sse4a_target_feature,
+ stable,
+ staged_api,
+ start,
+ state,
+ static_in_const,
+ static_nobundle,
+ static_recursion,
+ staticlib,
+ std,
+ std_inject,
+ std_panic,
+ std_panic_2015_macro,
+ std_panic_macro,
+ stmt,
+ stmt_expr_attributes,
+ stop_after_dataflow,
+ store,
+ str,
+ str_split_whitespace,
+ str_trim,
+ str_trim_end,
+ str_trim_start,
+ strict_provenance,
+ stringify,
+ stringify_macro,
+ struct_field_attributes,
+ struct_inherit,
+ struct_variant,
+ structural_match,
+ structural_peq,
+ structural_teq,
+ sty,
+ sub,
+ sub_assign,
+ sub_with_overflow,
+ suggestion,
+ sym,
+ sync,
+ t32,
+ target,
+ target_abi,
+ target_arch,
+ target_endian,
+ target_env,
+ target_family,
+ target_feature,
+ target_feature_11,
+ target_has_atomic,
+ target_has_atomic_equal_alignment,
+ target_has_atomic_load_store,
+ target_os,
+ target_pointer_width,
+ target_target_vendor,
+ target_thread_local,
+ target_vendor,
+ task,
+ tbm_target_feature,
+ termination,
+ termination_trait,
+ termination_trait_test,
+ test,
+ test_2018_feature,
+ test_accepted_feature,
+ test_case,
+ test_removed_feature,
+ test_runner,
+ test_unstable_lint,
+ then_with,
+ thread,
+ thread_local,
+ thread_local_macro,
+ thumb2,
+ thumb_mode: "thumb-mode",
+ tmm_reg,
+ to_string,
+ to_vec,
+ todo_macro,
+ tool_attributes,
+ tool_lints,
+ trace_macros,
+ track_caller,
+ trait_alias,
+ trait_upcasting,
+ transmute,
+ transmute_trait,
+ transparent,
+ transparent_enums,
+ transparent_unions,
+ trivial_bounds,
+ truncf32,
+ truncf64,
+ try_blocks,
+ try_capture,
+ try_from,
+ try_into,
+ try_trait_v2,
+ tt,
+ tuple,
+ tuple_from_req,
+ tuple_indexing,
+ two_phase,
+ ty,
+ type_alias_enum_variants,
+ type_alias_impl_trait,
+ type_ascription,
+ type_changing_struct_update,
+ type_id,
+ type_length_limit,
+ type_macros,
+ type_name,
+ u128,
+ u16,
+ u32,
+ u64,
+ u8,
+ unaligned_volatile_load,
+ unaligned_volatile_store,
+ unboxed_closures,
+ unchecked_add,
+ unchecked_div,
+ unchecked_mul,
+ unchecked_rem,
+ unchecked_shl,
+ unchecked_shr,
+ unchecked_sub,
+ underscore_const_names,
+ underscore_imports,
+ underscore_lifetimes,
+ uniform_paths,
+ unimplemented_macro,
+ unit,
+ universal_impl_trait,
+ unix,
+ unlikely,
+ unmarked_api,
+ unpin,
+ unreachable,
+ unreachable_2015,
+ unreachable_2015_macro,
+ unreachable_2021,
+ unreachable_2021_macro,
+ unreachable_code,
+ unreachable_display,
+ unreachable_macro,
+ unrestricted_attribute_tokens,
+ unsafe_block_in_unsafe_fn,
+ unsafe_cell,
+ unsafe_no_drop_flag,
+ unsafe_pin_internals,
+ unsize,
+ unsized_fn_params,
+ unsized_locals,
+ unsized_tuple_coercion,
+ unstable,
+ unstable_location_reason_default: "this crate is being loaded from the sysroot, an \
+ unstable location; did you mean to load this crate \
+ from crates.io via `Cargo.toml` instead?",
+ untagged_unions,
+ unused_imports,
+ unused_qualifications,
+ unwind,
+ unwind_attributes,
+ unwind_safe_trait,
+ unwrap,
+ unwrap_or,
+ use_extern_macros,
+ use_nested_groups,
+ used,
+ used_with_arg,
+ using,
+ usize,
+ v1,
+ va_arg,
+ va_copy,
+ va_end,
+ va_list,
+ va_start,
+ val,
+ values,
+ var,
+ variant_count,
+ vec,
+ vec_macro,
+ version,
+ vfp2,
+ vis,
+ visible_private_types,
+ volatile,
+ volatile_copy_memory,
+ volatile_copy_nonoverlapping_memory,
+ volatile_load,
+ volatile_set_memory,
+ volatile_store,
+ vreg,
+ vreg_low16,
+ vtable_align,
+ vtable_size,
+ warn,
+ wasm_abi,
+ wasm_import_module,
+ wasm_target_feature,
+ while_let,
+ width,
+ windows,
+ windows_subsystem,
+ with_negative_coherence,
+ wrapping_add,
+ wrapping_mul,
+ wrapping_sub,
+ wreg,
+ write_bytes,
+ write_macro,
+ write_str,
+ writeln_macro,
+ x87_reg,
+ xer,
+ xmm_reg,
+ yeet_desugar_details,
+ yeet_expr,
+ ymm_reg,
+ zmm_reg,
+ }
+}
+
+#[derive(Copy, Clone, Eq, HashStable_Generic, Encodable, Decodable)]
+pub struct Ident {
+ pub name: Symbol,
+ pub span: Span,
+}
+
+impl Ident {
+ #[inline]
+ /// Constructs a new identifier from a symbol and a span.
+ pub const fn new(name: Symbol, span: Span) -> Ident {
+ Ident { name, span }
+ }
+
+ /// Constructs a new identifier with a dummy span.
+ #[inline]
+ pub const fn with_dummy_span(name: Symbol) -> Ident {
+ Ident::new(name, DUMMY_SP)
+ }
+
+ #[inline]
+ pub fn empty() -> Ident {
+ Ident::with_dummy_span(kw::Empty)
+ }
+
+ /// Maps a string to an identifier with a dummy span.
+ pub fn from_str(string: &str) -> Ident {
+ Ident::with_dummy_span(Symbol::intern(string))
+ }
+
+ /// Maps a string and a span to an identifier.
+ pub fn from_str_and_span(string: &str, span: Span) -> Ident {
+ Ident::new(Symbol::intern(string), span)
+ }
+
+ /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context.
+ pub fn with_span_pos(self, span: Span) -> Ident {
+ Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
+ }
+
+ pub fn without_first_quote(self) -> Ident {
+ Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
+ }
+
+ /// "Normalize" ident for use in comparisons using "item hygiene".
+ /// Identifiers with same string value become same if they came from the same macro 2.0 macro
+ /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
+ /// different macro 2.0 macros.
+ /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
+ pub fn normalize_to_macros_2_0(self) -> Ident {
+ Ident::new(self.name, self.span.normalize_to_macros_2_0())
+ }
+
+ /// "Normalize" ident for use in comparisons using "local variable hygiene".
+ /// Identifiers with same string value become same if they came from the same non-transparent
+ /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
+ /// non-transparent macros.
+ /// Technically, this operation strips all transparent marks from ident's syntactic context.
+ pub fn normalize_to_macro_rules(self) -> Ident {
+ Ident::new(self.name, self.span.normalize_to_macro_rules())
+ }
+
+ /// Access the underlying string. This is a slowish operation because it
+ /// requires locking the symbol interner.
+ ///
+ /// Note that the lifetime of the return value is a lie. See
+ /// `Symbol::as_str()` for details.
+ pub fn as_str(&self) -> &str {
+ self.name.as_str()
+ }
+}
+
+impl PartialEq for Ident {
+ fn eq(&self, rhs: &Self) -> bool {
+ self.name == rhs.name && self.span.eq_ctxt(rhs.span)
+ }
+}
+
+impl Hash for Ident {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.span.ctxt().hash(state);
+ }
+}
+
+impl fmt::Debug for Ident {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self, f)?;
+ fmt::Debug::fmt(&self.span.ctxt(), f)
+ }
+}
+
+/// This implementation is supposed to be used in error messages, so it's expected to be identical
+/// to printing the original identifier token written in source code (`token_to_string`),
+/// except that AST identifiers don't keep the rawness flag, so we have to guess it.
+impl fmt::Display for Ident {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&IdentPrinter::new(self.name, self.is_raw_guess(), None), f)
+ }
+}
+
+/// This is the most general way to print identifiers.
+/// AST pretty-printer is used as a fallback for turning AST structures into token streams for
+/// proc macros. Additionally, proc macros may stringify their input and expect it survive the
+/// stringification (especially true for proc macro derives written between Rust 1.15 and 1.30).
+/// So we need to somehow pretty-print `$crate` in a way preserving at least some of its
+/// hygiene data, most importantly name of the crate it refers to.
+/// As a result we print `$crate` as `crate` if it refers to the local crate
+/// and as `::other_crate_name` if it refers to some other crate.
+/// Note, that this is only done if the ident token is printed from inside of AST pretty-printing,
+/// but not otherwise. Pretty-printing is the only way for proc macros to discover token contents,
+/// so we should not perform this lossy conversion if the top level call to the pretty-printer was
+/// done for a token stream or a single token.
+pub struct IdentPrinter {
+ symbol: Symbol,
+ is_raw: bool,
+ /// Span used for retrieving the crate name to which `$crate` refers to,
+ /// if this field is `None` then the `$crate` conversion doesn't happen.
+ convert_dollar_crate: Option<Span>,
+}
+
+impl IdentPrinter {
+ /// The most general `IdentPrinter` constructor. Do not use this.
+ pub fn new(symbol: Symbol, is_raw: bool, convert_dollar_crate: Option<Span>) -> IdentPrinter {
+ IdentPrinter { symbol, is_raw, convert_dollar_crate }
+ }
+
+ /// This implementation is supposed to be used when printing identifiers
+ /// as a part of pretty-printing for larger AST pieces.
+ /// Do not use this either.
+ pub fn for_ast_ident(ident: Ident, is_raw: bool) -> IdentPrinter {
+ IdentPrinter::new(ident.name, is_raw, Some(ident.span))
+ }
+}
+
+impl fmt::Display for IdentPrinter {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if self.is_raw {
+ f.write_str("r#")?;
+ } else if self.symbol == kw::DollarCrate {
+ if let Some(span) = self.convert_dollar_crate {
+ let converted = span.ctxt().dollar_crate_name();
+ if !converted.is_path_segment_keyword() {
+ f.write_str("::")?;
+ }
+ return fmt::Display::fmt(&converted, f);
+ }
+ }
+ fmt::Display::fmt(&self.symbol, f)
+ }
+}
+
+/// An newtype around `Ident` that calls [Ident::normalize_to_macro_rules] on
+/// construction.
+// FIXME(matthewj, petrochenkov) Use this more often, add a similar
+// `ModernIdent` struct and use that as well.
+#[derive(Copy, Clone, Eq, PartialEq, Hash)]
+pub struct MacroRulesNormalizedIdent(Ident);
+
+impl MacroRulesNormalizedIdent {
+ pub fn new(ident: Ident) -> Self {
+ Self(ident.normalize_to_macro_rules())
+ }
+}
+
+impl fmt::Debug for MacroRulesNormalizedIdent {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&self.0, f)
+ }
+}
+
+impl fmt::Display for MacroRulesNormalizedIdent {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&self.0, f)
+ }
+}
+
+/// An interned string.
+///
+/// Internally, a `Symbol` is implemented as an index, and all operations
+/// (including hashing, equality, and ordering) operate on that index. The use
+/// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
+/// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes.
+///
+/// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it
+/// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Symbol(SymbolIndex);
+
+rustc_index::newtype_index! {
+ struct SymbolIndex { .. }
+}
+
+impl Symbol {
+ const fn new(n: u32) -> Self {
+ Symbol(SymbolIndex::from_u32(n))
+ }
+
+ /// Maps a string to its interned representation.
+ pub fn intern(string: &str) -> Self {
+ with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
+ }
+
+ /// Access the underlying string. This is a slowish operation because it
+ /// requires locking the symbol interner.
+ ///
+ /// Note that the lifetime of the return value is a lie. It's not the same
+ /// as `&self`, but actually tied to the lifetime of the underlying
+ /// interner. Interners are long-lived, and there are very few of them, and
+ /// this function is typically used for short-lived things, so in practice
+ /// it works out ok.
+ pub fn as_str(&self) -> &str {
+ with_session_globals(|session_globals| unsafe {
+ std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self))
+ })
+ }
+
+ pub fn as_u32(self) -> u32 {
+ self.0.as_u32()
+ }
+
+ pub fn is_empty(self) -> bool {
+ self == kw::Empty
+ }
+
+ /// This method is supposed to be used in error messages, so it's expected to be
+ /// identical to printing the original identifier token written in source code
+ /// (`token_to_string`, `Ident::to_string`), except that symbols don't keep the rawness flag
+ /// or edition, so we have to guess the rawness using the global edition.
+ pub fn to_ident_string(self) -> String {
+ Ident::with_dummy_span(self).to_string()
+ }
+}
+
+impl fmt::Debug for Symbol {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.as_str(), f)
+ }
+}
+
+impl fmt::Display for Symbol {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.as_str(), f)
+ }
+}
+
+impl<S: Encoder> Encodable<S> for Symbol {
+ fn encode(&self, s: &mut S) {
+ s.emit_str(self.as_str());
+ }
+}
+
+impl<D: Decoder> Decodable<D> for Symbol {
+ #[inline]
+ fn decode(d: &mut D) -> Symbol {
+ Symbol::intern(&d.read_str())
+ }
+}
+
+impl<CTX> HashStable<CTX> for Symbol {
+ #[inline]
+ fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+ self.as_str().hash_stable(hcx, hasher);
+ }
+}
+
+impl<CTX> ToStableHashKey<CTX> for Symbol {
+ type KeyType = String;
+ #[inline]
+ fn to_stable_hash_key(&self, _: &CTX) -> String {
+ self.as_str().to_string()
+ }
+}
+
+#[derive(Default)]
+pub(crate) struct Interner(Lock<InternerInner>);
+
+// The `&'static str`s in this type actually point into the arena.
+//
+// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
+// found that to regress performance up to 2% in some cases. This might be
+// revisited after further improvements to `indexmap`.
+//
+// This type is private to prevent accidentally constructing more than one
+// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
+// between `Interner`s.
+#[derive(Default)]
+struct InternerInner {
+ arena: DroplessArena,
+ names: FxHashMap<&'static str, Symbol>,
+ strings: Vec<&'static str>,
+}
+
+impl Interner {
+ fn prefill(init: &[&'static str]) -> Self {
+ Interner(Lock::new(InternerInner {
+ strings: init.into(),
+ names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
+ ..Default::default()
+ }))
+ }
+
+ #[inline]
+ fn intern(&self, string: &str) -> Symbol {
+ let mut inner = self.0.lock();
+ if let Some(&name) = inner.names.get(string) {
+ return name;
+ }
+
+ let name = Symbol::new(inner.strings.len() as u32);
+
+ // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
+ // and immediately convert the clone back to `&[u8], all because there
+ // is no `inner.arena.alloc_str()` method. This is clearly safe.
+ let string: &str =
+ unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) };
+
+ // SAFETY: we can extend the arena allocation to `'static` because we
+ // only access these while the arena is still alive.
+ let string: &'static str = unsafe { &*(string as *const str) };
+ inner.strings.push(string);
+
+ // This second hash table lookup can be avoided by using `RawEntryMut`,
+ // but this code path isn't hot enough for it to be worth it. See
+ // #91445 for details.
+ inner.names.insert(string, name);
+ name
+ }
+
+ // Get the symbol as a string. `Symbol::as_str()` should be used in
+ // preference to this function.
+ fn get(&self, symbol: Symbol) -> &str {
+ self.0.lock().strings[symbol.0.as_usize()]
+ }
+}
+
+// This module has a very short name because it's used a lot.
+/// This module contains all the defined keyword `Symbol`s.
+///
+/// Given that `kw` is imported, use them like `kw::keyword_name`.
+/// For example `kw::Loop` or `kw::Break`.
+pub mod kw {
+ pub use super::kw_generated::*;
+}
+
+// This module has a very short name because it's used a lot.
+/// This module contains all the defined non-keyword `Symbol`s.
+///
+/// Given that `sym` is imported, use them like `sym::symbol_name`.
+/// For example `sym::rustfmt` or `sym::u8`.
+pub mod sym {
+ use super::Symbol;
+ use std::convert::TryInto;
+
+ #[doc(inline)]
+ pub use super::sym_generated::*;
+
+ // Used from a macro in `librustc_feature/accepted.rs`
+ pub use super::kw::MacroRules as macro_rules;
+
+ /// Get the symbol for an integer.
+ ///
+ /// The first few non-negative integers each have a static symbol and therefore
+ /// are fast.
+ pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol {
+ if let Result::Ok(idx) = n.try_into() {
+ if idx < 10 {
+ return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32);
+ }
+ }
+ Symbol::intern(&n.to_string())
+ }
+}
+
+impl Symbol {
+ fn is_special(self) -> bool {
+ self <= kw::Underscore
+ }
+
+ fn is_used_keyword_always(self) -> bool {
+ self >= kw::As && self <= kw::While
+ }
+
+ fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool {
+ (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018
+ }
+
+ fn is_unused_keyword_always(self) -> bool {
+ self >= kw::Abstract && self <= kw::Yield
+ }
+
+ fn is_unused_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool {
+ self == kw::Try && edition() >= Edition::Edition2018
+ }
+
+ pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
+ self.is_special()
+ || self.is_used_keyword_always()
+ || self.is_unused_keyword_always()
+ || self.is_used_keyword_conditional(edition)
+ || self.is_unused_keyword_conditional(edition)
+ }
+
+ /// A keyword or reserved identifier that can be used as a path segment.
+ pub fn is_path_segment_keyword(self) -> bool {
+ self == kw::Super
+ || self == kw::SelfLower
+ || self == kw::SelfUpper
+ || self == kw::Crate
+ || self == kw::PathRoot
+ || self == kw::DollarCrate
+ }
+
+ /// Returns `true` if the symbol is `true` or `false`.
+ pub fn is_bool_lit(self) -> bool {
+ self == kw::True || self == kw::False
+ }
+
+ /// Returns `true` if this symbol can be a raw identifier.
+ pub fn can_be_raw(self) -> bool {
+ self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword()
+ }
+}
+
+impl Ident {
+ // Returns `true` for reserved identifiers used internally for elided lifetimes,
+ // unnamed method parameters, crate root module, error recovery etc.
+ pub fn is_special(self) -> bool {
+ self.name.is_special()
+ }
+
+ /// Returns `true` if the token is a keyword used in the language.
+ pub fn is_used_keyword(self) -> bool {
+ // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
+ self.name.is_used_keyword_always()
+ || self.name.is_used_keyword_conditional(|| self.span.edition())
+ }
+
+ /// Returns `true` if the token is a keyword reserved for possible future use.
+ pub fn is_unused_keyword(self) -> bool {
+ // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
+ self.name.is_unused_keyword_always()
+ || self.name.is_unused_keyword_conditional(|| self.span.edition())
+ }
+
+ /// Returns `true` if the token is either a special identifier or a keyword.
+ pub fn is_reserved(self) -> bool {
+ // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
+ self.name.is_reserved(|| self.span.edition())
+ }
+
+ /// A keyword or reserved identifier that can be used as a path segment.
+ pub fn is_path_segment_keyword(self) -> bool {
+ self.name.is_path_segment_keyword()
+ }
+
+ /// We see this identifier in a normal identifier position, like variable name or a type.
+ /// How was it written originally? Did it use the raw form? Let's try to guess.
+ pub fn is_raw_guess(self) -> bool {
+ self.name.can_be_raw() && self.is_reserved()
+ }
+}
diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs
new file mode 100644
index 000000000..0958fce5f
--- /dev/null
+++ b/compiler/rustc_span/src/symbol/tests.rs
@@ -0,0 +1,25 @@
+use super::*;
+
+use crate::create_default_session_globals_then;
+
+#[test]
+fn interner_tests() {
+ let i = Interner::default();
+ // first one is zero:
+ assert_eq!(i.intern("dog"), Symbol::new(0));
+ // re-use gets the same entry:
+ assert_eq!(i.intern("dog"), Symbol::new(0));
+ // different string gets a different #:
+ assert_eq!(i.intern("cat"), Symbol::new(1));
+ assert_eq!(i.intern("cat"), Symbol::new(1));
+ // dog is still at zero
+ assert_eq!(i.intern("dog"), Symbol::new(0));
+}
+
+#[test]
+fn without_first_quote_test() {
+ create_default_session_globals_then(|| {
+ let i = Ident::from_str("'break");
+ assert_eq!(i.without_first_quote().name, kw::Break);
+ });
+}
diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs
new file mode 100644
index 000000000..5b3915c33
--- /dev/null
+++ b/compiler/rustc_span/src/tests.rs
@@ -0,0 +1,43 @@
+use super::*;
+
+#[test]
+fn test_lookup_line() {
+ let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
+ let sf =
+ SourceFile::new(FileName::Anon(0), source, BytePos(3), SourceFileHashAlgorithm::Sha256);
+ sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)]));
+
+ assert_eq!(sf.lookup_line(BytePos(0)), None);
+ assert_eq!(sf.lookup_line(BytePos(3)), Some(0));
+ assert_eq!(sf.lookup_line(BytePos(4)), Some(0));
+
+ assert_eq!(sf.lookup_line(BytePos(16)), Some(0));
+ assert_eq!(sf.lookup_line(BytePos(17)), Some(1));
+ assert_eq!(sf.lookup_line(BytePos(18)), Some(1));
+
+ assert_eq!(sf.lookup_line(BytePos(28)), Some(2));
+ assert_eq!(sf.lookup_line(BytePos(29)), Some(2));
+}
+
+#[test]
+fn test_normalize_newlines() {
+ fn check(before: &str, after: &str, expected_positions: &[u32]) {
+ let mut actual = before.to_string();
+ let mut actual_positions = vec![];
+ normalize_newlines(&mut actual, &mut actual_positions);
+ let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect();
+ assert_eq!(actual.as_str(), after);
+ assert_eq!(actual_positions, expected_positions);
+ }
+ check("", "", &[]);
+ check("\n", "\n", &[]);
+ check("\r", "\r", &[]);
+ check("\r\r", "\r\r", &[]);
+ check("\r\n", "\n", &[1]);
+ check("hello world", "hello world", &[]);
+ check("hello\nworld", "hello\nworld", &[]);
+ check("hello\r\nworld", "hello\nworld", &[6]);
+ check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]);
+ check("\r\r\n", "\r\n", &[2]);
+ check("hello\rworld", "hello\rworld", &[]);
+}