diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_span | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_span/Cargo.toml | 21 | ||||
-rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 274 | ||||
-rw-r--r-- | compiler/rustc_span/src/analyze_source_file/tests.rs | 142 | ||||
-rw-r--r-- | compiler/rustc_span/src/caching_source_map_view.rs | 293 | ||||
-rw-r--r-- | compiler/rustc_span/src/def_id.rs | 444 | ||||
-rw-r--r-- | compiler/rustc_span/src/edition.rs | 110 | ||||
-rw-r--r-- | compiler/rustc_span/src/fatal_error.rs | 26 | ||||
-rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 1528 | ||||
-rw-r--r-- | compiler/rustc_span/src/lev_distance.rs | 177 | ||||
-rw-r--r-- | compiler/rustc_span/src/lev_distance/tests.rs | 71 | ||||
-rw-r--r-- | compiler/rustc_span/src/lib.rs | 2116 | ||||
-rw-r--r-- | compiler/rustc_span/src/profiling.rs | 35 | ||||
-rw-r--r-- | compiler/rustc_span/src/source_map.rs | 1281 | ||||
-rw-r--r-- | compiler/rustc_span/src/source_map/tests.rs | 481 | ||||
-rw-r--r-- | compiler/rustc_span/src/span_encoding.rs | 150 | ||||
-rw-r--r-- | compiler/rustc_span/src/symbol.rs | 2067 | ||||
-rw-r--r-- | compiler/rustc_span/src/symbol/tests.rs | 25 | ||||
-rw-r--r-- | compiler/rustc_span/src/tests.rs | 43 |
18 files changed, 9284 insertions, 0 deletions
diff --git a/compiler/rustc_span/Cargo.toml b/compiler/rustc_span/Cargo.toml new file mode 100644 index 000000000..7227b193f --- /dev/null +++ b/compiler/rustc_span/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "rustc_span" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +rustc_serialize = { path = "../rustc_serialize" } +rustc_macros = { path = "../rustc_macros" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_index = { path = "../rustc_index" } +rustc_arena = { path = "../rustc_arena" } +scoped-tls = "1.0" +unicode-width = "0.1.4" +cfg-if = "0.1.2" +tracing = "0.1" +sha1 = { package = "sha-1", version = "0.10.0" } +sha2 = "0.10.1" +md5 = { package = "md-5", version = "0.10.0" } diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs new file mode 100644 index 000000000..5987fb2a1 --- /dev/null +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -0,0 +1,274 @@ +use super::*; +use unicode_width::UnicodeWidthChar; + +#[cfg(test)] +mod tests; + +/// Finds all newlines, multi-byte characters, and non-narrow characters in a +/// SourceFile. +/// +/// This function will use an SSE2 enhanced implementation if hardware support +/// is detected at runtime. +pub fn analyze_source_file( + src: &str, + source_file_start_pos: BytePos, +) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) { + let mut lines = vec![source_file_start_pos]; + let mut multi_byte_chars = vec![]; + let mut non_narrow_chars = vec![]; + + // Calls the right implementation, depending on hardware support available. + analyze_source_file_dispatch( + src, + source_file_start_pos, + &mut lines, + &mut multi_byte_chars, + &mut non_narrow_chars, + ); + + // The code above optimistically registers a new line *after* each \n + // it encounters. If that point is already outside the source_file, remove + // it again. + if let Some(&last_line_start) = lines.last() { + let source_file_end = source_file_start_pos + BytePos::from_usize(src.len()); + assert!(source_file_end >= last_line_start); + if last_line_start == source_file_end { + lines.pop(); + } + } + + (lines, multi_byte_chars, non_narrow_chars) +} + +cfg_if::cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] { + fn analyze_source_file_dispatch(src: &str, + source_file_start_pos: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + if is_x86_feature_detected!("sse2") { + unsafe { + analyze_source_file_sse2(src, + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } else { + analyze_source_file_generic(src, + src.len(), + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + + } + } + + /// Checks 16 byte chunks of text at a time. If the chunk contains + /// something other than printable ASCII characters and newlines, the + /// function falls back to the generic implementation. Otherwise it uses + /// SSE2 intrinsics to quickly find all newlines. + #[target_feature(enable = "sse2")] + unsafe fn analyze_source_file_sse2(src: &str, + output_offset: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + const CHUNK_SIZE: usize = 16; + + let src_bytes = src.as_bytes(); + + let chunk_count = src.len() / CHUNK_SIZE; + + // This variable keeps track of where we should start decoding a + // chunk. If a multi-byte character spans across chunk boundaries, + // we need to skip that part in the next chunk because we already + // handled it. + let mut intra_chunk_offset = 0; + + for chunk_index in 0 .. chunk_count { + let ptr = src_bytes.as_ptr() as *const __m128i; + // We don't know if the pointer is aligned to 16 bytes, so we + // use `loadu`, which supports unaligned loading. + let chunk = _mm_loadu_si128(ptr.add(chunk_index)); + + // For character in the chunk, see if its byte value is < 0, which + // indicates that it's part of a UTF-8 char. + let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)); + // Create a bit mask from the comparison results. + let multibyte_mask = _mm_movemask_epi8(multibyte_test); + + // If the bit mask is all zero, we only have ASCII chars here: + if multibyte_mask == 0 { + assert!(intra_chunk_offset == 0); + + // Check if there are any control characters in the chunk. All + // control characters that we can encounter at this point have a + // byte value less than 32 or ... + let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)); + let control_char_mask0 = _mm_movemask_epi8(control_char_test0); + + // ... it's the ASCII 'DEL' character with a value of 127. + let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)); + let control_char_mask1 = _mm_movemask_epi8(control_char_test1); + + let control_char_mask = control_char_mask0 | control_char_mask1; + + if control_char_mask != 0 { + // Check for newlines in the chunk + let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)); + let newlines_mask = _mm_movemask_epi8(newlines_test); + + if control_char_mask == newlines_mask { + // All control characters are newlines, record them + let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32; + let output_offset = output_offset + + BytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + + loop { + let index = newlines_mask.trailing_zeros(); + + if index >= CHUNK_SIZE as u32 { + // We have arrived at the end of the chunk. + break + } + + lines.push(BytePos(index) + output_offset); + + // Clear the bit, so we can find the next one. + newlines_mask &= (!1) << index; + } + + // We are done for this chunk. All control characters were + // newlines and we took care of those. + continue + } else { + // Some of the control characters are not newlines, + // fall through to the slow path below. + } + } else { + // No control characters, nothing to record for this chunk + continue + } + } + + // The slow path. + // There are control chars in here, fallback to generic decoding. + let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; + intra_chunk_offset = analyze_source_file_generic( + &src[scan_start .. ], + CHUNK_SIZE - intra_chunk_offset, + BytePos::from_usize(scan_start) + output_offset, + lines, + multi_byte_chars, + non_narrow_chars + ); + } + + // There might still be a tail left to analyze + let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; + if tail_start < src.len() { + analyze_source_file_generic(&src[tail_start as usize ..], + src.len() - tail_start, + output_offset + BytePos::from_usize(tail_start), + lines, + multi_byte_chars, + non_narrow_chars); + } + } + } else { + + // The target (or compiler version) does not support SSE2 ... + fn analyze_source_file_dispatch(src: &str, + source_file_start_pos: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + analyze_source_file_generic(src, + src.len(), + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } +} + +// `scan_len` determines the number of bytes in `src` to scan. Note that the +// function can read past `scan_len` if a multi-byte character start within the +// range but extends past it. The overflow is returned by the function. +fn analyze_source_file_generic( + src: &str, + scan_len: usize, + output_offset: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>, +) -> usize { + assert!(src.len() >= scan_len); + let mut i = 0; + let src_bytes = src.as_bytes(); + + while i < scan_len { + let byte = unsafe { + // We verified that i < scan_len <= src.len() + *src_bytes.get_unchecked(i as usize) + }; + + // How much to advance in order to get to the next UTF-8 char in the + // string. + let mut char_len = 1; + + if byte < 32 { + // This is an ASCII control character, it could be one of the cases + // that are interesting to us. + + let pos = BytePos::from_usize(i) + output_offset; + + match byte { + b'\n' => { + lines.push(pos + BytePos(1)); + } + b'\t' => { + non_narrow_chars.push(NonNarrowChar::Tab(pos)); + } + _ => { + non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos)); + } + } + } else if byte >= 127 { + // The slow path: + // This is either ASCII control character "DEL" or the beginning of + // a multibyte char. Just decode to `char`. + let c = (&src[i..]).chars().next().unwrap(); + char_len = c.len_utf8(); + + let pos = BytePos::from_usize(i) + output_offset; + + if char_len > 1 { + assert!((2..=4).contains(&char_len)); + let mbc = MultiByteChar { pos, bytes: char_len as u8 }; + multi_byte_chars.push(mbc); + } + + // Assume control characters are zero width. + // FIXME: How can we decide between `width` and `width_cjk`? + let char_width = UnicodeWidthChar::width(c).unwrap_or(0); + + if char_width != 1 { + non_narrow_chars.push(NonNarrowChar::new(pos, char_width)); + } + } + + i += char_len; + } + + i - scan_len +} diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs new file mode 100644 index 000000000..66aefc9a7 --- /dev/null +++ b/compiler/rustc_span/src/analyze_source_file/tests.rs @@ -0,0 +1,142 @@ +use super::*; + +macro_rules! test { + (case: $test_name:ident, + text: $text:expr, + source_file_start_pos: $source_file_start_pos:expr, + lines: $lines:expr, + multi_byte_chars: $multi_byte_chars:expr, + non_narrow_chars: $non_narrow_chars:expr,) => { + #[test] + fn $test_name() { + let (lines, multi_byte_chars, non_narrow_chars) = + analyze_source_file($text, BytePos($source_file_start_pos)); + + let expected_lines: Vec<BytePos> = $lines.into_iter().map(BytePos).collect(); + + assert_eq!(lines, expected_lines); + + let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars + .into_iter() + .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes }) + .collect(); + + assert_eq!(multi_byte_chars, expected_mbcs); + + let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars + .into_iter() + .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width)) + .collect(); + + assert_eq!(non_narrow_chars, expected_nncs); + } + }; +} + +test!( + case: empty_text, + text: "", + source_file_start_pos: 0, + lines: vec![], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_short, + text: "a\nc", + source_file_start_pos: 0, + lines: vec![0, 2], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_long, + text: "012345678\nabcdef012345678\na", + source_file_start_pos: 0, + lines: vec![0, 10, 26], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_multi_byte_char_in_same_chunk, + text: "01234β789\nbcdef0123456789abcdef", + source_file_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![(5, 2)], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_control_char_in_same_chunk, + text: "01234\u{07}6789\nbcdef0123456789abcdef", + source_file_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![], + non_narrow_chars: vec![(5, 0)], +); + +test!( + case: multi_byte_char_short, + text: "aβc", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(1, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_long, + text: "0123456789abcΔf012345β", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(13, 2), (22, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary, + text: "0123456789abcdeΔ123456789abcdef01234", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary_tail, + text: "0123456789abcdeΔ....", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: non_narrow_short, + text: "0\t2", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(1, 4)], +); + +test!( + case: non_narrow_long, + text: "01\t3456789abcdef01234567\u{07}9", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(2, 4), (24, 0)], +); + +test!( + case: output_offset_all, + text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf", + source_file_start_pos: 1000, + lines: vec![0 + 1000, 7 + 1000, 27 + 1000], + multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)], + non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], +); diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs new file mode 100644 index 000000000..fdabf404a --- /dev/null +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -0,0 +1,293 @@ +use crate::source_map::SourceMap; +use crate::{BytePos, SourceFile, SpanData}; +use rustc_data_structures::sync::Lrc; +use std::ops::Range; + +#[derive(Clone)] +struct CacheEntry { + time_stamp: usize, + line_number: usize, + // The line's byte position range in the `SourceMap`. This range will fail to contain a valid + // position in certain edge cases. Spans often start/end one past something, and when that + // something is the last character of a file (this can happen when a file doesn't end in a + // newline, for example), we'd still like for the position to be considered within the last + // line. However, it isn't according to the exclusive upper bound of this range. We cannot + // change the upper bound to be inclusive, because for most lines, the upper bound is the same + // as the lower bound of the next line, so there would be an ambiguity. + // + // Since the containment aspect of this range is only used to see whether or not the cache + // entry contains a position, the only ramification of the above is that we will get cache + // misses for these rare positions. A line lookup for the position via `SourceMap::lookup_line` + // after a cache miss will produce the last line number, as desired. + line: Range<BytePos>, + file: Lrc<SourceFile>, + file_index: usize, +} + +impl CacheEntry { + #[inline] + fn update( + &mut self, + new_file_and_idx: Option<(Lrc<SourceFile>, usize)>, + pos: BytePos, + time_stamp: usize, + ) { + if let Some((file, file_idx)) = new_file_and_idx { + self.file = file; + self.file_index = file_idx; + } + + let line_index = self.file.lookup_line(pos).unwrap(); + let line_bounds = self.file.line_bounds(line_index); + self.line_number = line_index + 1; + self.line = line_bounds; + self.touch(time_stamp); + } + + #[inline] + fn touch(&mut self, time_stamp: usize) { + self.time_stamp = time_stamp; + } +} + +#[derive(Clone)] +pub struct CachingSourceMapView<'sm> { + source_map: &'sm SourceMap, + line_cache: [CacheEntry; 3], + time_stamp: usize, +} + +impl<'sm> CachingSourceMapView<'sm> { + pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> { + let files = source_map.files(); + let first_file = files[0].clone(); + let entry = CacheEntry { + time_stamp: 0, + line_number: 0, + line: BytePos(0)..BytePos(0), + file: first_file, + file_index: 0, + }; + + CachingSourceMapView { + source_map, + line_cache: [entry.clone(), entry.clone(), entry], + time_stamp: 0, + } + } + + pub fn byte_pos_to_line_and_col( + &mut self, + pos: BytePos, + ) -> Option<(Lrc<SourceFile>, usize, BytePos)> { + self.time_stamp += 1; + + // Check if the position is in one of the cached lines + let cache_idx = self.cache_entry_index(pos); + if cache_idx != -1 { + let cache_entry = &mut self.line_cache[cache_idx as usize]; + cache_entry.touch(self.time_stamp); + + return Some(( + cache_entry.file.clone(), + cache_entry.line_number, + pos - cache_entry.line.start, + )); + } + + // No cache hit ... + let oldest = self.oldest_cache_entry_index(); + + // If the entry doesn't point to the correct file, get the new file and index. + let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, pos) { + Some(self.file_for_position(pos)?) + } else { + None + }; + + let cache_entry = &mut self.line_cache[oldest]; + cache_entry.update(new_file_and_idx, pos, self.time_stamp); + + Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line.start)) + } + + pub fn span_data_to_lines_and_cols( + &mut self, + span_data: &SpanData, + ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)> { + self.time_stamp += 1; + + // Check if lo and hi are in the cached lines. + let lo_cache_idx = self.cache_entry_index(span_data.lo); + let hi_cache_idx = self.cache_entry_index(span_data.hi); + + if lo_cache_idx != -1 && hi_cache_idx != -1 { + // Cache hit for span lo and hi. Check if they belong to the same file. + let result = { + let lo = &self.line_cache[lo_cache_idx as usize]; + let hi = &self.line_cache[hi_cache_idx as usize]; + + if lo.file_index != hi.file_index { + return None; + } + + ( + lo.file.clone(), + lo.line_number, + span_data.lo - lo.line.start, + hi.line_number, + span_data.hi - hi.line.start, + ) + }; + + self.line_cache[lo_cache_idx as usize].touch(self.time_stamp); + self.line_cache[hi_cache_idx as usize].touch(self.time_stamp); + + return Some(result); + } + + // No cache hit or cache hit for only one of span lo and hi. + let oldest = if lo_cache_idx != -1 || hi_cache_idx != -1 { + let avoid_idx = if lo_cache_idx != -1 { lo_cache_idx } else { hi_cache_idx }; + self.oldest_cache_entry_index_avoid(avoid_idx as usize) + } else { + self.oldest_cache_entry_index() + }; + + // If the entry doesn't point to the correct file, get the new file and index. + // Return early if the file containing beginning of span doesn't contain end of span. + let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, span_data.lo) { + let new_file_and_idx = self.file_for_position(span_data.lo)?; + if !file_contains(&new_file_and_idx.0, span_data.hi) { + return None; + } + + Some(new_file_and_idx) + } else { + let file = &self.line_cache[oldest].file; + if !file_contains(&file, span_data.hi) { + return None; + } + + None + }; + + // Update the cache entries. + let (lo_idx, hi_idx) = match (lo_cache_idx, hi_cache_idx) { + // Oldest cache entry is for span_data.lo line. + (-1, -1) => { + let lo = &mut self.line_cache[oldest]; + lo.update(new_file_and_idx, span_data.lo, self.time_stamp); + + if !lo.line.contains(&span_data.hi) { + let new_file_and_idx = Some((lo.file.clone(), lo.file_index)); + let next_oldest = self.oldest_cache_entry_index_avoid(oldest); + let hi = &mut self.line_cache[next_oldest]; + hi.update(new_file_and_idx, span_data.hi, self.time_stamp); + (oldest, next_oldest) + } else { + (oldest, oldest) + } + } + // Oldest cache entry is for span_data.lo line. + (-1, _) => { + let lo = &mut self.line_cache[oldest]; + lo.update(new_file_and_idx, span_data.lo, self.time_stamp); + let hi = &mut self.line_cache[hi_cache_idx as usize]; + hi.touch(self.time_stamp); + (oldest, hi_cache_idx as usize) + } + // Oldest cache entry is for span_data.hi line. + (_, -1) => { + let hi = &mut self.line_cache[oldest]; + hi.update(new_file_and_idx, span_data.hi, self.time_stamp); + let lo = &mut self.line_cache[lo_cache_idx as usize]; + lo.touch(self.time_stamp); + (lo_cache_idx as usize, oldest) + } + _ => { + panic!(); + } + }; + + let lo = &self.line_cache[lo_idx]; + let hi = &self.line_cache[hi_idx]; + + // Span lo and hi may equal line end when last line doesn't + // end in newline, hence the inclusive upper bounds below. + assert!(span_data.lo >= lo.line.start); + assert!(span_data.lo <= lo.line.end); + assert!(span_data.hi >= hi.line.start); + assert!(span_data.hi <= hi.line.end); + assert!(lo.file.contains(span_data.lo)); + assert!(lo.file.contains(span_data.hi)); + assert_eq!(lo.file_index, hi.file_index); + + Some(( + lo.file.clone(), + lo.line_number, + span_data.lo - lo.line.start, + hi.line_number, + span_data.hi - hi.line.start, + )) + } + + fn cache_entry_index(&self, pos: BytePos) -> isize { + for (idx, cache_entry) in self.line_cache.iter().enumerate() { + if cache_entry.line.contains(&pos) { + return idx as isize; + } + } + + -1 + } + + fn oldest_cache_entry_index(&self) -> usize { + let mut oldest = 0; + + for idx in 1..self.line_cache.len() { + if self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp { + oldest = idx; + } + } + + oldest + } + + fn oldest_cache_entry_index_avoid(&self, avoid_idx: usize) -> usize { + let mut oldest = if avoid_idx != 0 { 0 } else { 1 }; + + for idx in 0..self.line_cache.len() { + if idx != avoid_idx + && self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp + { + oldest = idx; + } + } + + oldest + } + + fn file_for_position(&self, pos: BytePos) -> Option<(Lrc<SourceFile>, usize)> { + if !self.source_map.files().is_empty() { + let file_idx = self.source_map.lookup_source_file_idx(pos); + let file = &self.source_map.files()[file_idx]; + + if file_contains(file, pos) { + return Some((file.clone(), file_idx)); + } + } + + None + } +} + +#[inline] +fn file_contains(file: &SourceFile, pos: BytePos) -> bool { + // `SourceMap::lookup_source_file_idx` and `SourceFile::contains` both consider the position + // one past the end of a file to belong to it. Normally, that's what we want. But for the + // purposes of converting a byte position to a line and column number, we can't come up with a + // line and column number if the file is empty, because an empty file doesn't contain any + // lines. So for our purposes, we don't consider empty files to contain any byte position. + file.contains(pos) && !file.is_empty() +} diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs new file mode 100644 index 000000000..a1533fe46 --- /dev/null +++ b/compiler/rustc_span/src/def_id.rs @@ -0,0 +1,444 @@ +use crate::HashStableContext; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; +use rustc_data_structures::AtomicRef; +use rustc_index::vec::Idx; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::borrow::Borrow; +use std::fmt; +use std::hash::{Hash, Hasher}; + +rustc_index::newtype_index! { + pub struct CrateNum { + ENCODABLE = custom + DEBUG_FORMAT = "crate{}" + } +} + +/// Item definitions in the currently-compiled crate would have the `CrateNum` +/// `LOCAL_CRATE` in their `DefId`. +pub const LOCAL_CRATE: CrateNum = CrateNum::from_u32(0); + +impl CrateNum { + #[inline] + pub fn new(x: usize) -> CrateNum { + CrateNum::from_usize(x) + } + + #[inline] + pub fn as_def_id(self) -> DefId { + DefId { krate: self, index: CRATE_DEF_INDEX } + } +} + +impl fmt::Display for CrateNum { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.private, f) + } +} + +/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. +/// Therefore, make sure to include the context when encode a `CrateNum`. +impl<E: Encoder> Encodable<E> for CrateNum { + default fn encode(&self, s: &mut E) { + s.emit_u32(self.as_u32()); + } +} + +impl<D: Decoder> Decodable<D> for CrateNum { + default fn decode(d: &mut D) -> CrateNum { + CrateNum::from_u32(d.read_u32()) + } +} + +/// A `DefPathHash` is a fixed-size representation of a `DefPath` that is +/// stable across crate and compilation session boundaries. It consists of two +/// separate 64-bit hashes. The first uniquely identifies the crate this +/// `DefPathHash` originates from (see [StableCrateId]), and the second +/// uniquely identifies the corresponding `DefPath` within that crate. Together +/// they form a unique identifier within an entire crate graph. +/// +/// There is a very small chance of hash collisions, which would mean that two +/// different `DefPath`s map to the same `DefPathHash`. Proceeding compilation +/// with such a hash collision would very probably lead to an ICE, and in the +/// worst case lead to a silent mis-compilation. The compiler therefore actively +/// and exhaustively checks for such hash collisions and aborts compilation if +/// it finds one. +/// +/// `DefPathHash` uses 64-bit hashes for both the crate-id part and the +/// crate-internal part, even though it is likely that there are many more +/// `LocalDefId`s in a single crate than there are individual crates in a crate +/// graph. Since we use the same number of bits in both cases, the collision +/// probability for the crate-local part will be quite a bit higher (though +/// still very small). +/// +/// This imbalance is not by accident: A hash collision in the +/// crate-local part of a `DefPathHash` will be detected and reported while +/// compiling the crate in question. Such a collision does not depend on +/// outside factors and can be easily fixed by the crate maintainer (e.g. by +/// renaming the item in question or by bumping the crate version in a harmless +/// way). +/// +/// A collision between crate-id hashes on the other hand is harder to fix +/// because it depends on the set of crates in the entire crate graph of a +/// compilation session. Again, using the same crate with a different version +/// number would fix the issue with a high probability -- but that might be +/// easier said then done if the crates in questions are dependencies of +/// third-party crates. +/// +/// That being said, given a high quality hash function, the collision +/// probabilities in question are very small. For example, for a big crate like +/// `rustc_middle` (with ~50000 `LocalDefId`s as of the time of writing) there +/// is a probability of roughly 1 in 14,750,000,000 of a crate-internal +/// collision occurring. For a big crate graph with 1000 crates in it, there is +/// a probability of 1 in 36,890,000,000,000 of a `StableCrateId` collision. +#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(HashStable_Generic, Encodable, Decodable)] +pub struct DefPathHash(pub Fingerprint); + +impl DefPathHash { + /// Returns the [StableCrateId] identifying the crate this [DefPathHash] + /// originates from. + #[inline] + pub fn stable_crate_id(&self) -> StableCrateId { + StableCrateId(self.0.as_value().0) + } + + /// Returns the crate-local part of the [DefPathHash]. + /// + /// Used for tests. + #[inline] + pub fn local_hash(&self) -> u64 { + self.0.as_value().1 + } + + /// Builds a new [DefPathHash] with the given [StableCrateId] and + /// `local_hash`, where `local_hash` must be unique within its crate. + pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash { + DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash)) + } +} + +impl Borrow<Fingerprint> for DefPathHash { + #[inline] + fn borrow(&self) -> &Fingerprint { + &self.0 + } +} + +/// A [`StableCrateId`] is a 64-bit hash of a crate name, together with all +/// `-Cmetadata` arguments, and some other data. It is to [`CrateNum`] what [`DefPathHash`] is to +/// [`DefId`]. It is stable across compilation sessions. +/// +/// Since the ID is a hash value, there is a small chance that two crates +/// end up with the same [`StableCrateId`]. The compiler will check for such +/// collisions when loading crates and abort compilation in order to avoid +/// further trouble. +/// +/// For more information on the possibility of hash collisions in rustc, +/// see the discussion in [`DefId`]. +#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(HashStable_Generic, Encodable, Decodable)] +pub struct StableCrateId(pub(crate) u64); + +impl StableCrateId { + pub fn to_u64(self) -> u64 { + self.0 + } + + /// Computes the stable ID for a crate with the given name and + /// `-Cmetadata` arguments. + pub fn new(crate_name: &str, is_exe: bool, mut metadata: Vec<String>) -> StableCrateId { + let mut hasher = StableHasher::new(); + crate_name.hash(&mut hasher); + + // We don't want the stable crate ID to depend on the order of + // -C metadata arguments, so sort them: + metadata.sort(); + // Every distinct -C metadata value is only incorporated once: + metadata.dedup(); + + hasher.write(b"metadata"); + for s in &metadata { + // Also incorporate the length of a metadata string, so that we generate + // different values for `-Cmetadata=ab -Cmetadata=c` and + // `-Cmetadata=a -Cmetadata=bc` + hasher.write_usize(s.len()); + hasher.write(s.as_bytes()); + } + + // Also incorporate crate type, so that we don't get symbol conflicts when + // linking against a library of the same name, if this is an executable. + hasher.write(if is_exe { b"exe" } else { b"lib" }); + + // Also incorporate the rustc version. Otherwise, with -Zsymbol-mangling-version=v0 + // and no -Cmetadata, symbols from the same crate compiled with different versions of + // rustc are named the same. + // + // RUSTC_FORCE_RUSTC_VERSION is used to inject rustc version information + // during testing. + if let Some(val) = std::env::var_os("RUSTC_FORCE_RUSTC_VERSION") { + hasher.write(val.to_string_lossy().into_owned().as_bytes()) + } else { + hasher.write(option_env!("CFG_VERSION").unwrap_or("unknown version").as_bytes()); + } + + StableCrateId(hasher.finish()) + } +} + +rustc_index::newtype_index! { + /// A DefIndex is an index into the hir-map for a crate, identifying a + /// particular definition. It should really be considered an interned + /// shorthand for a particular DefPath. + pub struct DefIndex { + ENCODABLE = custom // (only encodable in metadata) + + DEBUG_FORMAT = "DefIndex({})", + /// The crate root is always assigned index 0 by the AST Map code, + /// thanks to `NodeCollector::new`. + const CRATE_DEF_INDEX = 0, + } +} + +impl<E: Encoder> Encodable<E> for DefIndex { + default fn encode(&self, _: &mut E) { + panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for DefIndex { + default fn decode(_: &mut D) -> DefIndex { + panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>()); + } +} + +/// A `DefId` identifies a particular *definition*, by combining a crate +/// index and a def index. +/// +/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy)] +// On below-64 bit systems we can simply use the derived `Hash` impl +#[cfg_attr(not(target_pointer_width = "64"), derive(Hash))] +#[repr(C)] +#[rustc_pass_by_value] +// We guarantee field order. Note that the order is essential here, see below why. +pub struct DefId { + // cfg-ing the order of fields so that the `DefIndex` which is high entropy always ends up in + // the lower bits no matter the endianness. This allows the compiler to turn that `Hash` impl + // into a direct call to 'u64::hash(_)`. + #[cfg(not(all(target_pointer_width = "64", target_endian = "big")))] + pub index: DefIndex, + pub krate: CrateNum, + #[cfg(all(target_pointer_width = "64", target_endian = "big"))] + pub index: DefIndex, +} + +// On 64-bit systems, we can hash the whole `DefId` as one `u64` instead of two `u32`s. This +// improves performance without impairing `FxHash` quality. So the below code gets compiled to a +// noop on little endian systems because the memory layout of `DefId` is as follows: +// +// ``` +// +-1--------------31-+-32-------------63-+ +// ! index ! krate ! +// +-------------------+-------------------+ +// ``` +// +// The order here has direct impact on `FxHash` quality because we have far more `DefIndex` per +// crate than we have `Crate`s within one compilation. Or in other words, this arrangement puts +// more entropy in the low bits than the high bits. The reason this matters is that `FxHash`, which +// is used throughout rustc, has problems distributing the entropy from the high bits, so reversing +// the order would lead to a large number of collisions and thus far worse performance. +// +// On 64-bit big-endian systems, this compiles to a 64-bit rotation by 32 bits, which is still +// faster than another `FxHash` round. +#[cfg(target_pointer_width = "64")] +impl Hash for DefId { + fn hash<H: Hasher>(&self, h: &mut H) { + (((self.krate.as_u32() as u64) << 32) | (self.index.as_u32() as u64)).hash(h) + } +} + +impl DefId { + /// Makes a local `DefId` from the given `DefIndex`. + #[inline] + pub fn local(index: DefIndex) -> DefId { + DefId { krate: LOCAL_CRATE, index } + } + + /// Returns whether the item is defined in the crate currently being compiled. + #[inline] + pub fn is_local(self) -> bool { + self.krate == LOCAL_CRATE + } + + #[inline] + pub fn as_local(self) -> Option<LocalDefId> { + if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None } + } + + #[inline] + #[track_caller] + pub fn expect_local(self) -> LocalDefId { + // NOTE: `match` below is required to apply `#[track_caller]`, + // i.e. don't use closures. + match self.as_local() { + Some(local_def_id) => local_def_id, + None => panic!("DefId::expect_local: `{:?}` isn't local", self), + } + } + + #[inline] + pub fn is_crate_root(self) -> bool { + self.index == CRATE_DEF_INDEX + } + + #[inline] + pub fn as_crate_root(self) -> Option<CrateNum> { + if self.is_crate_root() { Some(self.krate) } else { None } + } + + #[inline] + pub fn is_top_level_module(self) -> bool { + self.is_local() && self.is_crate_root() + } +} + +impl<E: Encoder> Encodable<E> for DefId { + default fn encode(&self, s: &mut E) { + self.krate.encode(s); + self.index.encode(s); + } +} + +impl<D: Decoder> Decodable<D> for DefId { + default fn decode(d: &mut D) -> DefId { + DefId { krate: Decodable::decode(d), index: Decodable::decode(d) } + } +} + +pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish() +} + +pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> = + AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _)); + +impl fmt::Debug for DefId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*DEF_ID_DEBUG)(*self, f) + } +} + +rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId); + +/// A `LocalDefId` is equivalent to a `DefId` with `krate == LOCAL_CRATE`. Since +/// we encode this information in the type, we can ensure at compile time that +/// no `DefId`s from upstream crates get thrown into the mix. There are quite a +/// few cases where we know that only `DefId`s from the local crate are expected; +/// a `DefId` from a different crate would signify a bug somewhere. This +/// is when `LocalDefId` comes in handy. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct LocalDefId { + pub local_def_index: DefIndex, +} + +// To ensure correctness of incremental compilation, +// `LocalDefId` must not implement `Ord` or `PartialOrd`. +// See https://github.com/rust-lang/rust/issues/90317. +impl !Ord for LocalDefId {} +impl !PartialOrd for LocalDefId {} + +pub const CRATE_DEF_ID: LocalDefId = LocalDefId { local_def_index: CRATE_DEF_INDEX }; + +impl Idx for LocalDefId { + #[inline] + fn new(idx: usize) -> Self { + LocalDefId { local_def_index: Idx::new(idx) } + } + #[inline] + fn index(self) -> usize { + self.local_def_index.index() + } +} + +impl LocalDefId { + #[inline] + pub fn to_def_id(self) -> DefId { + DefId { krate: LOCAL_CRATE, index: self.local_def_index } + } + + #[inline] + pub fn is_top_level_module(self) -> bool { + self == CRATE_DEF_ID + } +} + +impl fmt::Debug for LocalDefId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.to_def_id().fmt(f) + } +} + +impl<E: Encoder> Encodable<E> for LocalDefId { + fn encode(&self, s: &mut E) { + self.to_def_id().encode(s); + } +} + +impl<D: Decoder> Decodable<D> for LocalDefId { + fn decode(d: &mut D) -> LocalDefId { + DefId::decode(d).expect_local() + } +} + +rustc_data_structures::define_id_collections!(LocalDefIdMap, LocalDefIdSet, LocalDefId); + +impl<CTX: HashStableContext> HashStable<CTX> for DefId { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for LocalDefId { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for CrateNum { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + } +} + +impl<CTX: HashStableContext> ToStableHashKey<CTX> for DefId { + type KeyType = DefPathHash; + + #[inline] + fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash { + hcx.def_path_hash(*self) + } +} + +impl<CTX: HashStableContext> ToStableHashKey<CTX> for LocalDefId { + type KeyType = DefPathHash; + + #[inline] + fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash { + hcx.def_path_hash(self.to_def_id()) + } +} + +impl<CTX: HashStableContext> ToStableHashKey<CTX> for CrateNum { + type KeyType = DefPathHash; + + #[inline] + fn to_stable_hash_key(&self, hcx: &CTX) -> DefPathHash { + self.as_def_id().to_stable_hash_key(hcx) + } +} diff --git a/compiler/rustc_span/src/edition.rs b/compiler/rustc_span/src/edition.rs new file mode 100644 index 000000000..065d3660e --- /dev/null +++ b/compiler/rustc_span/src/edition.rs @@ -0,0 +1,110 @@ +use crate::symbol::{sym, Symbol}; +use std::fmt; +use std::str::FromStr; + +use rustc_macros::HashStable_Generic; + +/// The edition of the compiler. (See [RFC 2052](https://github.com/rust-lang/rfcs/blob/master/text/2052-epochs.md).) +#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)] +#[derive(HashStable_Generic)] +pub enum Edition { + // When adding new editions, be sure to do the following: + // + // - update the `ALL_EDITIONS` const + // - update the `EDITION_NAME_LIST` const + // - add a `rust_####()` function to the session + // - update the enum in Cargo's sources as well + // + // Editions *must* be kept in order, oldest to newest. + /// The 2015 edition + Edition2015, + /// The 2018 edition + Edition2018, + /// The 2021 edition + Edition2021, + /// The 2024 edition + Edition2024, +} + +// Must be in order from oldest to newest. +pub const ALL_EDITIONS: &[Edition] = + &[Edition::Edition2015, Edition::Edition2018, Edition::Edition2021, Edition::Edition2024]; + +pub const EDITION_NAME_LIST: &str = "2015|2018|2021|2024"; + +pub const DEFAULT_EDITION: Edition = Edition::Edition2015; + +pub const LATEST_STABLE_EDITION: Edition = Edition::Edition2021; + +impl fmt::Display for Edition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match *self { + Edition::Edition2015 => "2015", + Edition::Edition2018 => "2018", + Edition::Edition2021 => "2021", + Edition::Edition2024 => "2024", + }; + write!(f, "{}", s) + } +} + +impl Edition { + pub fn lint_name(&self) -> &'static str { + match *self { + Edition::Edition2015 => "rust_2015_compatibility", + Edition::Edition2018 => "rust_2018_compatibility", + Edition::Edition2021 => "rust_2021_compatibility", + Edition::Edition2024 => "rust_2024_compatibility", + } + } + + pub fn feature_name(&self) -> Symbol { + match *self { + Edition::Edition2015 => sym::rust_2015_preview, + Edition::Edition2018 => sym::rust_2018_preview, + Edition::Edition2021 => sym::rust_2021_preview, + Edition::Edition2024 => sym::rust_2024_preview, + } + } + + pub fn is_stable(&self) -> bool { + match *self { + Edition::Edition2015 => true, + Edition::Edition2018 => true, + Edition::Edition2021 => true, + Edition::Edition2024 => false, + } + } + + pub fn rust_2015(&self) -> bool { + *self == Edition::Edition2015 + } + + /// Are we allowed to use features from the Rust 2018 edition? + pub fn rust_2018(&self) -> bool { + *self >= Edition::Edition2018 + } + + /// Are we allowed to use features from the Rust 2021 edition? + pub fn rust_2021(&self) -> bool { + *self >= Edition::Edition2021 + } + + /// Are we allowed to use features from the Rust 2024 edition? + pub fn rust_2024(&self) -> bool { + *self >= Edition::Edition2024 + } +} + +impl FromStr for Edition { + type Err = (); + fn from_str(s: &str) -> Result<Self, ()> { + match s { + "2015" => Ok(Edition::Edition2015), + "2018" => Ok(Edition::Edition2018), + "2021" => Ok(Edition::Edition2021), + "2024" => Ok(Edition::Edition2024), + _ => Err(()), + } + } +} diff --git a/compiler/rustc_span/src/fatal_error.rs b/compiler/rustc_span/src/fatal_error.rs new file mode 100644 index 000000000..fa84c486d --- /dev/null +++ b/compiler/rustc_span/src/fatal_error.rs @@ -0,0 +1,26 @@ +/// Used as a return value to signify a fatal error occurred. (It is also +/// used as the argument to panic at the moment, but that will eventually +/// not be true.) +#[derive(Copy, Clone, Debug)] +#[must_use] +pub struct FatalError; + +pub struct FatalErrorMarker; + +// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError). +// We don't want to invoke the panic handler and print a backtrace for fatal errors. +impl !Send for FatalError {} + +impl FatalError { + pub fn raise(self) -> ! { + std::panic::resume_unwind(Box::new(FatalErrorMarker)) + } +} + +impl std::fmt::Display for FatalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "fatal error") + } +} + +impl std::error::Error for FatalError {} diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs new file mode 100644 index 000000000..e169d3c7c --- /dev/null +++ b/compiler/rustc_span/src/hygiene.rs @@ -0,0 +1,1528 @@ +//! Machinery for hygienic macros. +//! +//! Inspired by Matthew Flatt et al., “Macros That Work Together: Compile-Time Bindings, Partial +//! Expansion, and Definition Contexts,” *Journal of Functional Programming* 22, no. 2 +//! (March 1, 2012): 181–216, <https://doi.org/10.1017/S0956796812000093>. + +// Hygiene data is stored in a global variable and accessed via TLS, which +// means that accesses are somewhat expensive. (`HygieneData::with` +// encapsulates a single access.) Therefore, on hot code paths it is worth +// ensuring that multiple HygieneData accesses are combined into a single +// `HygieneData::with`. +// +// This explains why `HygieneData`, `SyntaxContext` and `ExpnId` have interfaces +// with a certain amount of redundancy in them. For example, +// `SyntaxContext::outer_expn_data` combines `SyntaxContext::outer` and +// `ExpnId::expn_data` so that two `HygieneData` accesses can be performed within +// a single `HygieneData::with` call. +// +// It also explains why many functions appear in `HygieneData` and again in +// `SyntaxContext` or `ExpnId`. For example, `HygieneData::outer` and +// `SyntaxContext::outer` do the same thing, but the former is for use within a +// `HygieneData::with` call while the latter is for use outside such a call. +// When modifying this file it is important to understand this distinction, +// because getting it wrong can lead to nested `HygieneData::with` calls that +// trigger runtime aborts. (Fortunately these are obvious and easy to fix.) + +use crate::edition::Edition; +use crate::symbol::{kw, sym, Symbol}; +use crate::with_session_globals; +use crate::{HashStableContext, Span, DUMMY_SP}; + +use crate::def_id::{CrateNum, DefId, StableCrateId, CRATE_DEF_ID, LOCAL_CRATE}; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::stable_hasher::HashingControls; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_data_structures::unhash::UnhashMap; +use rustc_index::vec::IndexVec; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::fmt; +use std::hash::Hash; +use tracing::*; + +/// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks". +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SyntaxContext(u32); + +#[derive(Debug, Encodable, Decodable, Clone)] +pub struct SyntaxContextData { + outer_expn: ExpnId, + outer_transparency: Transparency, + parent: SyntaxContext, + /// This context, but with all transparent and semi-transparent expansions filtered away. + opaque: SyntaxContext, + /// This context, but with all transparent expansions filtered away. + opaque_and_semitransparent: SyntaxContext, + /// Name of the crate to which `$crate` with this context would resolve. + dollar_crate_name: Symbol, +} + +rustc_index::newtype_index! { + /// A unique ID associated with a macro invocation and expansion. + pub struct ExpnIndex { + ENCODABLE = custom + } +} + +/// A unique ID associated with a macro invocation and expansion. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct ExpnId { + pub krate: CrateNum, + pub local_id: ExpnIndex, +} + +impl fmt::Debug for ExpnId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Generate crate_::{{expn_}}. + write!(f, "{:?}::{{{{expn{}}}}}", self.krate, self.local_id.private) + } +} + +rustc_index::newtype_index! { + /// A unique ID associated with a macro invocation and expansion. + pub struct LocalExpnId { + ENCODABLE = custom + ORD_IMPL = custom + DEBUG_FORMAT = "expn{}" + } +} + +// To ensure correctness of incremental compilation, +// `LocalExpnId` must not implement `Ord` or `PartialOrd`. +// See https://github.com/rust-lang/rust/issues/90317. +impl !Ord for LocalExpnId {} +impl !PartialOrd for LocalExpnId {} + +/// Assert that the provided `HashStableContext` is configured with the 'default' +/// `HashingControls`. We should always have bailed out before getting to here +/// with a non-default mode. With this check in place, we can avoid the need +/// to maintain separate versions of `ExpnData` hashes for each permutation +/// of `HashingControls` settings. +fn assert_default_hashing_controls<CTX: HashStableContext>(ctx: &CTX, msg: &str) { + match ctx.hashing_controls() { + // Note that we require that `hash_spans` be set according to the global + // `-Z incremental-ignore-spans` option. Normally, this option is disabled, + // which will cause us to require that this method always be called with `Span` hashing + // enabled. + HashingControls { hash_spans } + if hash_spans == !ctx.unstable_opts_incremental_ignore_spans() => {} + other => panic!("Attempted hashing of {msg} with non-default HashingControls: {:?}", other), + } +} + +/// A unique hash value associated to an expansion. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)] +pub struct ExpnHash(Fingerprint); + +impl ExpnHash { + /// Returns the [StableCrateId] identifying the crate this [ExpnHash] + /// originates from. + #[inline] + pub fn stable_crate_id(self) -> StableCrateId { + StableCrateId(self.0.as_value().0) + } + + /// Returns the crate-local part of the [ExpnHash]. + /// + /// Used for tests. + #[inline] + pub fn local_hash(self) -> u64 { + self.0.as_value().1 + } + + #[inline] + pub fn is_root(self) -> bool { + self.0 == Fingerprint::ZERO + } + + /// Builds a new [ExpnHash] with the given [StableCrateId] and + /// `local_hash`, where `local_hash` must be unique within its crate. + fn new(stable_crate_id: StableCrateId, local_hash: u64) -> ExpnHash { + ExpnHash(Fingerprint::new(stable_crate_id.0, local_hash)) + } +} + +/// A property of a macro expansion that determines how identifiers +/// produced by that expansion are resolved. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Encodable, Decodable)] +#[derive(HashStable_Generic)] +pub enum Transparency { + /// Identifier produced by a transparent expansion is always resolved at call-site. + /// Call-site spans in procedural macros, hygiene opt-out in `macro` should use this. + Transparent, + /// Identifier produced by a semi-transparent expansion may be resolved + /// either at call-site or at definition-site. + /// If it's a local variable, label or `$crate` then it's resolved at def-site. + /// Otherwise it's resolved at call-site. + /// `macro_rules` macros behave like this, built-in macros currently behave like this too, + /// but that's an implementation detail. + SemiTransparent, + /// Identifier produced by an opaque expansion is always resolved at definition-site. + /// Def-site spans in procedural macros, identifiers from `macro` by default use this. + Opaque, +} + +impl LocalExpnId { + /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST. + pub const ROOT: LocalExpnId = LocalExpnId::from_u32(0); + + #[inline] + pub fn from_raw(idx: ExpnIndex) -> LocalExpnId { + LocalExpnId::from_u32(idx.as_u32()) + } + + #[inline] + pub fn as_raw(self) -> ExpnIndex { + ExpnIndex::from_u32(self.as_u32()) + } + + pub fn fresh_empty() -> LocalExpnId { + HygieneData::with(|data| { + let expn_id = data.local_expn_data.push(None); + let _eid = data.local_expn_hashes.push(ExpnHash(Fingerprint::ZERO)); + debug_assert_eq!(expn_id, _eid); + expn_id + }) + } + + pub fn fresh(mut expn_data: ExpnData, ctx: impl HashStableContext) -> LocalExpnId { + debug_assert_eq!(expn_data.parent.krate, LOCAL_CRATE); + let expn_hash = update_disambiguator(&mut expn_data, ctx); + HygieneData::with(|data| { + let expn_id = data.local_expn_data.push(Some(expn_data)); + let _eid = data.local_expn_hashes.push(expn_hash); + debug_assert_eq!(expn_id, _eid); + let _old_id = data.expn_hash_to_expn_id.insert(expn_hash, expn_id.to_expn_id()); + debug_assert!(_old_id.is_none()); + expn_id + }) + } + + #[inline] + pub fn expn_hash(self) -> ExpnHash { + HygieneData::with(|data| data.local_expn_hash(self)) + } + + #[inline] + pub fn expn_data(self) -> ExpnData { + HygieneData::with(|data| data.local_expn_data(self).clone()) + } + + #[inline] + pub fn to_expn_id(self) -> ExpnId { + ExpnId { krate: LOCAL_CRATE, local_id: self.as_raw() } + } + + #[inline] + pub fn set_expn_data(self, mut expn_data: ExpnData, ctx: impl HashStableContext) { + debug_assert_eq!(expn_data.parent.krate, LOCAL_CRATE); + let expn_hash = update_disambiguator(&mut expn_data, ctx); + HygieneData::with(|data| { + let old_expn_data = &mut data.local_expn_data[self]; + assert!(old_expn_data.is_none(), "expansion data is reset for an expansion ID"); + *old_expn_data = Some(expn_data); + debug_assert_eq!(data.local_expn_hashes[self].0, Fingerprint::ZERO); + data.local_expn_hashes[self] = expn_hash; + let _old_id = data.expn_hash_to_expn_id.insert(expn_hash, self.to_expn_id()); + debug_assert!(_old_id.is_none()); + }); + } + + #[inline] + pub fn is_descendant_of(self, ancestor: LocalExpnId) -> bool { + self.to_expn_id().is_descendant_of(ancestor.to_expn_id()) + } + + /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than + /// `expn_id.is_descendant_of(ctxt.outer_expn())`. + #[inline] + pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool { + self.to_expn_id().outer_expn_is_descendant_of(ctxt) + } + + /// Returns span for the macro which originally caused this expansion to happen. + /// + /// Stops backtracing at include! boundary. + #[inline] + pub fn expansion_cause(self) -> Option<Span> { + self.to_expn_id().expansion_cause() + } + + #[inline] + #[track_caller] + pub fn parent(self) -> LocalExpnId { + self.expn_data().parent.as_local().unwrap() + } +} + +impl ExpnId { + /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST. + /// Invariant: we do not create any ExpnId with local_id == 0 and krate != 0. + pub const fn root() -> ExpnId { + ExpnId { krate: LOCAL_CRATE, local_id: ExpnIndex::from_u32(0) } + } + + #[inline] + pub fn expn_hash(self) -> ExpnHash { + HygieneData::with(|data| data.expn_hash(self)) + } + + #[inline] + pub fn from_hash(hash: ExpnHash) -> Option<ExpnId> { + HygieneData::with(|data| data.expn_hash_to_expn_id.get(&hash).copied()) + } + + #[inline] + pub fn as_local(self) -> Option<LocalExpnId> { + if self.krate == LOCAL_CRATE { Some(LocalExpnId::from_raw(self.local_id)) } else { None } + } + + #[inline] + #[track_caller] + pub fn expect_local(self) -> LocalExpnId { + self.as_local().unwrap() + } + + #[inline] + pub fn expn_data(self) -> ExpnData { + HygieneData::with(|data| data.expn_data(self).clone()) + } + + #[inline] + pub fn is_descendant_of(self, ancestor: ExpnId) -> bool { + // a few "fast path" cases to avoid locking HygieneData + if ancestor == ExpnId::root() || ancestor == self { + return true; + } + if ancestor.krate != self.krate { + return false; + } + HygieneData::with(|data| data.is_descendant_of(self, ancestor)) + } + + /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than + /// `expn_id.is_descendant_of(ctxt.outer_expn())`. + pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool { + HygieneData::with(|data| data.is_descendant_of(self, data.outer_expn(ctxt))) + } + + /// Returns span for the macro which originally caused this expansion to happen. + /// + /// Stops backtracing at include! boundary. + pub fn expansion_cause(mut self) -> Option<Span> { + let mut last_macro = None; + loop { + let expn_data = self.expn_data(); + // Stop going up the backtrace once include! is encountered + if expn_data.is_root() + || expn_data.kind == ExpnKind::Macro(MacroKind::Bang, sym::include) + { + break; + } + self = expn_data.call_site.ctxt().outer_expn(); + last_macro = Some(expn_data.call_site); + } + last_macro + } +} + +#[derive(Debug)] +pub struct HygieneData { + /// Each expansion should have an associated expansion data, but sometimes there's a delay + /// between creation of an expansion ID and obtaining its data (e.g. macros are collected + /// first and then resolved later), so we use an `Option` here. + local_expn_data: IndexVec<LocalExpnId, Option<ExpnData>>, + local_expn_hashes: IndexVec<LocalExpnId, ExpnHash>, + /// Data and hash information from external crates. We may eventually want to remove these + /// maps, and fetch the information directly from the other crate's metadata like DefIds do. + foreign_expn_data: FxHashMap<ExpnId, ExpnData>, + foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>, + expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>, + syntax_context_data: Vec<SyntaxContextData>, + syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>, + /// Maps the `local_hash` of an `ExpnData` to the next disambiguator value. + /// This is used by `update_disambiguator` to keep track of which `ExpnData`s + /// would have collisions without a disambiguator. + /// The keys of this map are always computed with `ExpnData.disambiguator` + /// set to 0. + expn_data_disambiguators: FxHashMap<u64, u32>, +} + +impl HygieneData { + pub(crate) fn new(edition: Edition) -> Self { + let root_data = ExpnData::default( + ExpnKind::Root, + DUMMY_SP, + edition, + Some(CRATE_DEF_ID.to_def_id()), + None, + ); + + HygieneData { + local_expn_data: IndexVec::from_elem_n(Some(root_data), 1), + local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1), + foreign_expn_data: FxHashMap::default(), + foreign_expn_hashes: FxHashMap::default(), + expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root())) + .collect(), + syntax_context_data: vec![SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Opaque, + parent: SyntaxContext(0), + opaque: SyntaxContext(0), + opaque_and_semitransparent: SyntaxContext(0), + dollar_crate_name: kw::DollarCrate, + }], + syntax_context_map: FxHashMap::default(), + expn_data_disambiguators: FxHashMap::default(), + } + } + + pub fn with<T, F: FnOnce(&mut HygieneData) -> T>(f: F) -> T { + with_session_globals(|session_globals| f(&mut *session_globals.hygiene_data.borrow_mut())) + } + + #[inline] + fn local_expn_hash(&self, expn_id: LocalExpnId) -> ExpnHash { + self.local_expn_hashes[expn_id] + } + + #[inline] + fn expn_hash(&self, expn_id: ExpnId) -> ExpnHash { + match expn_id.as_local() { + Some(expn_id) => self.local_expn_hashes[expn_id], + None => self.foreign_expn_hashes[&expn_id], + } + } + + fn local_expn_data(&self, expn_id: LocalExpnId) -> &ExpnData { + self.local_expn_data[expn_id].as_ref().expect("no expansion data for an expansion ID") + } + + fn expn_data(&self, expn_id: ExpnId) -> &ExpnData { + if let Some(expn_id) = expn_id.as_local() { + self.local_expn_data[expn_id].as_ref().expect("no expansion data for an expansion ID") + } else { + &self.foreign_expn_data[&expn_id] + } + } + + fn is_descendant_of(&self, mut expn_id: ExpnId, ancestor: ExpnId) -> bool { + // a couple "fast path" cases to avoid traversing parents in the loop below + if ancestor == ExpnId::root() { + return true; + } + if expn_id.krate != ancestor.krate { + return false; + } + loop { + if expn_id == ancestor { + return true; + } + if expn_id == ExpnId::root() { + return false; + } + expn_id = self.expn_data(expn_id).parent; + } + } + + fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].opaque + } + + fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent + } + + fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId { + self.syntax_context_data[ctxt.0 as usize].outer_expn + } + + fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) { + let data = &self.syntax_context_data[ctxt.0 as usize]; + (data.outer_expn, data.outer_transparency) + } + + fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].parent + } + + fn remove_mark(&self, ctxt: &mut SyntaxContext) -> (ExpnId, Transparency) { + let outer_mark = self.outer_mark(*ctxt); + *ctxt = self.parent_ctxt(*ctxt); + outer_mark + } + + fn marks(&self, mut ctxt: SyntaxContext) -> Vec<(ExpnId, Transparency)> { + let mut marks = Vec::new(); + while ctxt != SyntaxContext::root() { + debug!("marks: getting parent of {:?}", ctxt); + marks.push(self.outer_mark(ctxt)); + ctxt = self.parent_ctxt(ctxt); + } + marks.reverse(); + marks + } + + fn walk_chain(&self, mut span: Span, to: SyntaxContext) -> Span { + debug!("walk_chain({:?}, {:?})", span, to); + debug!("walk_chain: span ctxt = {:?}", span.ctxt()); + while span.from_expansion() && span.ctxt() != to { + let outer_expn = self.outer_expn(span.ctxt()); + debug!("walk_chain({:?}): outer_expn={:?}", span, outer_expn); + let expn_data = self.expn_data(outer_expn); + debug!("walk_chain({:?}): expn_data={:?}", span, expn_data); + span = expn_data.call_site; + } + span + } + + fn adjust(&self, ctxt: &mut SyntaxContext, expn_id: ExpnId) -> Option<ExpnId> { + let mut scope = None; + while !self.is_descendant_of(expn_id, self.outer_expn(*ctxt)) { + scope = Some(self.remove_mark(ctxt).0); + } + scope + } + + fn apply_mark( + &mut self, + ctxt: SyntaxContext, + expn_id: ExpnId, + transparency: Transparency, + ) -> SyntaxContext { + assert_ne!(expn_id, ExpnId::root()); + if transparency == Transparency::Opaque { + return self.apply_mark_internal(ctxt, expn_id, transparency); + } + + let call_site_ctxt = self.expn_data(expn_id).call_site.ctxt(); + let mut call_site_ctxt = if transparency == Transparency::SemiTransparent { + self.normalize_to_macros_2_0(call_site_ctxt) + } else { + self.normalize_to_macro_rules(call_site_ctxt) + }; + + if call_site_ctxt == SyntaxContext::root() { + return self.apply_mark_internal(ctxt, expn_id, transparency); + } + + // Otherwise, `expn_id` is a macros 1.0 definition and the call site is in a + // macros 2.0 expansion, i.e., a macros 1.0 invocation is in a macros 2.0 definition. + // + // In this case, the tokens from the macros 1.0 definition inherit the hygiene + // at their invocation. That is, we pretend that the macros 1.0 definition + // was defined at its invocation (i.e., inside the macros 2.0 definition) + // so that the macros 2.0 definition remains hygienic. + // + // See the example at `test/ui/hygiene/legacy_interaction.rs`. + for (expn_id, transparency) in self.marks(ctxt) { + call_site_ctxt = self.apply_mark_internal(call_site_ctxt, expn_id, transparency); + } + self.apply_mark_internal(call_site_ctxt, expn_id, transparency) + } + + fn apply_mark_internal( + &mut self, + ctxt: SyntaxContext, + expn_id: ExpnId, + transparency: Transparency, + ) -> SyntaxContext { + let syntax_context_data = &mut self.syntax_context_data; + let mut opaque = syntax_context_data[ctxt.0 as usize].opaque; + let mut opaque_and_semitransparent = + syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent; + + if transparency >= Transparency::Opaque { + let parent = opaque; + opaque = *self + .syntax_context_map + .entry((parent, expn_id, transparency)) + .or_insert_with(|| { + let new_opaque = SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque: new_opaque, + opaque_and_semitransparent: new_opaque, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque + }); + } + + if transparency >= Transparency::SemiTransparent { + let parent = opaque_and_semitransparent; + opaque_and_semitransparent = *self + .syntax_context_map + .entry((parent, expn_id, transparency)) + .or_insert_with(|| { + let new_opaque_and_semitransparent = + SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque, + opaque_and_semitransparent: new_opaque_and_semitransparent, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque_and_semitransparent + }); + } + + let parent = ctxt; + *self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| { + let new_opaque_and_semitransparent_and_transparent = + SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque, + opaque_and_semitransparent, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque_and_semitransparent_and_transparent + }) + } +} + +pub fn clear_syntax_context_map() { + HygieneData::with(|data| data.syntax_context_map = FxHashMap::default()); +} + +pub fn walk_chain(span: Span, to: SyntaxContext) -> Span { + HygieneData::with(|data| data.walk_chain(span, to)) +} + +pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) { + // The new contexts that need updating are at the end of the list and have `$crate` as a name. + let (len, to_update) = HygieneData::with(|data| { + ( + data.syntax_context_data.len(), + data.syntax_context_data + .iter() + .rev() + .take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate) + .count(), + ) + }); + // The callback must be called from outside of the `HygieneData` lock, + // since it will try to acquire it too. + let range_to_update = len - to_update..len; + let names: Vec<_> = + range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx as u32))).collect(); + HygieneData::with(|data| { + range_to_update.zip(names).for_each(|(idx, name)| { + data.syntax_context_data[idx].dollar_crate_name = name; + }) + }) +} + +pub fn debug_hygiene_data(verbose: bool) -> String { + HygieneData::with(|data| { + if verbose { + format!("{:#?}", data) + } else { + let mut s = String::from("Expansions:"); + let mut debug_expn_data = |(id, expn_data): (&ExpnId, &ExpnData)| { + s.push_str(&format!( + "\n{:?}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}", + id, + expn_data.parent, + expn_data.call_site.ctxt(), + expn_data.def_site.ctxt(), + expn_data.kind, + )) + }; + data.local_expn_data.iter_enumerated().for_each(|(id, expn_data)| { + let expn_data = expn_data.as_ref().expect("no expansion data for an expansion ID"); + debug_expn_data((&id.to_expn_id(), expn_data)) + }); + + // Sort the hash map for more reproducible output. + // Because of this, it is fine to rely on the unstable iteration order of the map. + #[allow(rustc::potential_query_instability)] + let mut foreign_expn_data: Vec<_> = data.foreign_expn_data.iter().collect(); + foreign_expn_data.sort_by_key(|(id, _)| (id.krate, id.local_id)); + foreign_expn_data.into_iter().for_each(debug_expn_data); + s.push_str("\n\nSyntaxContexts:"); + data.syntax_context_data.iter().enumerate().for_each(|(id, ctxt)| { + s.push_str(&format!( + "\n#{}: parent: {:?}, outer_mark: ({:?}, {:?})", + id, ctxt.parent, ctxt.outer_expn, ctxt.outer_transparency, + )); + }); + s + } + }) +} + +impl SyntaxContext { + #[inline] + pub const fn root() -> Self { + SyntaxContext(0) + } + + #[inline] + pub(crate) fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + pub(crate) fn from_u32(raw: u32) -> SyntaxContext { + SyntaxContext(raw) + } + + /// Extend a syntax context with a given expansion and transparency. + pub(crate) fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext { + HygieneData::with(|data| data.apply_mark(self, expn_id, transparency)) + } + + /// Pulls a single mark off of the syntax context. This effectively moves the + /// context up one macro definition level. That is, if we have a nested macro + /// definition as follows: + /// + /// ```ignore (illustrative) + /// macro_rules! f { + /// macro_rules! g { + /// ... + /// } + /// } + /// ``` + /// + /// and we have a SyntaxContext that is referring to something declared by an invocation + /// of g (call it g1), calling remove_mark will result in the SyntaxContext for the + /// invocation of f that created g1. + /// Returns the mark that was removed. + pub fn remove_mark(&mut self) -> ExpnId { + HygieneData::with(|data| data.remove_mark(self).0) + } + + pub fn marks(self) -> Vec<(ExpnId, Transparency)> { + HygieneData::with(|data| data.marks(self)) + } + + /// Adjust this context for resolution in a scope created by the given expansion. + /// For example, consider the following three resolutions of `f`: + /// + /// ```rust + /// #![feature(decl_macro)] + /// mod foo { pub fn f() {} } // `f`'s `SyntaxContext` is empty. + /// m!(f); + /// macro m($f:ident) { + /// mod bar { + /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`. + /// pub fn $f() {} // `$f`'s `SyntaxContext` is empty. + /// } + /// foo::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m` + /// //^ Since `mod foo` is outside this expansion, `adjust` removes the mark from `f`, + /// //| and it resolves to `::foo::f`. + /// bar::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m` + /// //^ Since `mod bar` not outside this expansion, `adjust` does not change `f`, + /// //| and it resolves to `::bar::f`. + /// bar::$f(); // `f`'s `SyntaxContext` is empty. + /// //^ Since `mod bar` is not outside this expansion, `adjust` does not change `$f`, + /// //| and it resolves to `::bar::$f`. + /// } + /// ``` + /// This returns the expansion whose definition scope we use to privacy check the resolution, + /// or `None` if we privacy check as usual (i.e., not w.r.t. a macro definition scope). + pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + HygieneData::with(|data| data.adjust(self, expn_id)) + } + + /// Like `SyntaxContext::adjust`, but also normalizes `self` to macros 2.0. + pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + HygieneData::with(|data| { + *self = data.normalize_to_macros_2_0(*self); + data.adjust(self, expn_id) + }) + } + + /// Adjust this context for resolution in a scope created by the given expansion + /// via a glob import with the given `SyntaxContext`. + /// For example: + /// + /// ```compile_fail,E0425 + /// #![feature(decl_macro)] + /// m!(f); + /// macro m($i:ident) { + /// mod foo { + /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`. + /// pub fn $i() {} // `$i`'s `SyntaxContext` is empty. + /// } + /// n!(f); + /// macro n($j:ident) { + /// use foo::*; + /// f(); // `f`'s `SyntaxContext` has a mark from `m` and a mark from `n` + /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::f`. + /// $i(); // `$i`'s `SyntaxContext` has a mark from `n` + /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::$i`. + /// $j(); // `$j`'s `SyntaxContext` has a mark from `m` + /// //^ This cannot be glob-adjusted, so this is a resolution error. + /// } + /// } + /// ``` + /// This returns `None` if the context cannot be glob-adjusted. + /// Otherwise, it returns the scope to use when privacy checking (see `adjust` for details). + pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> { + HygieneData::with(|data| { + let mut scope = None; + let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt()); + while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) { + scope = Some(data.remove_mark(&mut glob_ctxt).0); + if data.remove_mark(self).0 != scope.unwrap() { + return None; + } + } + if data.adjust(self, expn_id).is_some() { + return None; + } + Some(scope) + }) + } + + /// Undo `glob_adjust` if possible: + /// + /// ```ignore (illustrative) + /// if let Some(privacy_checking_scope) = self.reverse_glob_adjust(expansion, glob_ctxt) { + /// assert!(self.glob_adjust(expansion, glob_ctxt) == Some(privacy_checking_scope)); + /// } + /// ``` + pub fn reverse_glob_adjust( + &mut self, + expn_id: ExpnId, + glob_span: Span, + ) -> Option<Option<ExpnId>> { + HygieneData::with(|data| { + if data.adjust(self, expn_id).is_some() { + return None; + } + + let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt()); + let mut marks = Vec::new(); + while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) { + marks.push(data.remove_mark(&mut glob_ctxt)); + } + + let scope = marks.last().map(|mark| mark.0); + while let Some((expn_id, transparency)) = marks.pop() { + *self = data.apply_mark(*self, expn_id, transparency); + } + Some(scope) + }) + } + + pub fn hygienic_eq(self, other: SyntaxContext, expn_id: ExpnId) -> bool { + HygieneData::with(|data| { + let mut self_normalized = data.normalize_to_macros_2_0(self); + data.adjust(&mut self_normalized, expn_id); + self_normalized == data.normalize_to_macros_2_0(other) + }) + } + + #[inline] + pub fn normalize_to_macros_2_0(self) -> SyntaxContext { + HygieneData::with(|data| data.normalize_to_macros_2_0(self)) + } + + #[inline] + pub fn normalize_to_macro_rules(self) -> SyntaxContext { + HygieneData::with(|data| data.normalize_to_macro_rules(self)) + } + + #[inline] + pub fn outer_expn(self) -> ExpnId { + HygieneData::with(|data| data.outer_expn(self)) + } + + /// `ctxt.outer_expn_data()` is equivalent to but faster than + /// `ctxt.outer_expn().expn_data()`. + #[inline] + pub fn outer_expn_data(self) -> ExpnData { + HygieneData::with(|data| data.expn_data(data.outer_expn(self)).clone()) + } + + #[inline] + pub fn outer_mark(self) -> (ExpnId, Transparency) { + HygieneData::with(|data| data.outer_mark(self)) + } + + pub fn dollar_crate_name(self) -> Symbol { + HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name) + } + + pub fn edition(self) -> Edition { + HygieneData::with(|data| data.expn_data(data.outer_expn(self)).edition) + } +} + +impl fmt::Debug for SyntaxContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "#{}", self.0) + } +} + +impl Span { + /// Creates a fresh expansion with given properties. + /// Expansions are normally created by macros, but in some cases expansions are created for + /// other compiler-generated code to set per-span properties like allowed unstable features. + /// The returned span belongs to the created expansion and has the new properties, + /// but its location is inherited from the current span. + pub fn fresh_expansion(self, expn_id: LocalExpnId) -> Span { + HygieneData::with(|data| { + self.with_ctxt(data.apply_mark( + SyntaxContext::root(), + expn_id.to_expn_id(), + Transparency::Transparent, + )) + }) + } + + /// Reuses the span but adds information like the kind of the desugaring and features that are + /// allowed inside this span. + pub fn mark_with_reason( + self, + allow_internal_unstable: Option<Lrc<[Symbol]>>, + reason: DesugaringKind, + edition: Edition, + ctx: impl HashStableContext, + ) -> Span { + let expn_data = ExpnData { + allow_internal_unstable, + ..ExpnData::default(ExpnKind::Desugaring(reason), self, edition, None, None) + }; + let expn_id = LocalExpnId::fresh(expn_data, ctx); + self.fresh_expansion(expn_id) + } +} + +/// A subset of properties from both macro definition and macro call available through global data. +/// Avoid using this if you have access to the original definition or call structures. +#[derive(Clone, Debug, Encodable, Decodable, HashStable_Generic)] +pub struct ExpnData { + // --- The part unique to each expansion. + /// The kind of this expansion - macro or compiler desugaring. + pub kind: ExpnKind, + /// The expansion that produced this expansion. + pub parent: ExpnId, + /// The location of the actual macro invocation or syntax sugar , e.g. + /// `let x = foo!();` or `if let Some(y) = x {}` + /// + /// This may recursively refer to other macro invocations, e.g., if + /// `foo!()` invoked `bar!()` internally, and there was an + /// expression inside `bar!`; the call_site of the expression in + /// the expansion would point to the `bar!` invocation; that + /// call_site span would have its own ExpnData, with the call_site + /// pointing to the `foo!` invocation. + pub call_site: Span, + /// Used to force two `ExpnData`s to have different `Fingerprint`s. + /// Due to macro expansion, it's possible to end up with two `ExpnId`s + /// that have identical `ExpnData`s. This violates the contract of `HashStable` + /// - the two `ExpnId`s are not equal, but their `Fingerprint`s are equal + /// (since the numerical `ExpnId` value is not considered by the `HashStable` + /// implementation). + /// + /// The `disambiguator` field is set by `update_disambiguator` when two distinct + /// `ExpnId`s would end up with the same `Fingerprint`. Since `ExpnData` includes + /// a `krate` field, this value only needs to be unique within a single crate. + disambiguator: u32, + + // --- The part specific to the macro/desugaring definition. + // --- It may be reasonable to share this part between expansions with the same definition, + // --- but such sharing is known to bring some minor inconveniences without also bringing + // --- noticeable perf improvements (PR #62898). + /// The span of the macro definition (possibly dummy). + /// This span serves only informational purpose and is not used for resolution. + pub def_site: Span, + /// List of `#[unstable]`/feature-gated features that the macro is allowed to use + /// internally without forcing the whole crate to opt-in + /// to them. + pub allow_internal_unstable: Option<Lrc<[Symbol]>>, + /// Whether the macro is allowed to use `unsafe` internally + /// even if the user crate has `#![forbid(unsafe_code)]`. + pub allow_internal_unsafe: bool, + /// Enables the macro helper hack (`ident!(...)` -> `$crate::ident!(...)`) + /// for a given macro. + pub local_inner_macros: bool, + /// Edition of the crate in which the macro is defined. + pub edition: Edition, + /// The `DefId` of the macro being invoked, + /// if this `ExpnData` corresponds to a macro invocation + pub macro_def_id: Option<DefId>, + /// The normal module (`mod`) in which the expanded macro was defined. + pub parent_module: Option<DefId>, +} + +impl !PartialEq for ExpnData {} +impl !Hash for ExpnData {} + +impl ExpnData { + pub fn new( + kind: ExpnKind, + parent: ExpnId, + call_site: Span, + def_site: Span, + allow_internal_unstable: Option<Lrc<[Symbol]>>, + allow_internal_unsafe: bool, + local_inner_macros: bool, + edition: Edition, + macro_def_id: Option<DefId>, + parent_module: Option<DefId>, + ) -> ExpnData { + ExpnData { + kind, + parent, + call_site, + def_site, + allow_internal_unstable, + allow_internal_unsafe, + local_inner_macros, + edition, + macro_def_id, + parent_module, + disambiguator: 0, + } + } + + /// Constructs expansion data with default properties. + pub fn default( + kind: ExpnKind, + call_site: Span, + edition: Edition, + macro_def_id: Option<DefId>, + parent_module: Option<DefId>, + ) -> ExpnData { + ExpnData { + kind, + parent: ExpnId::root(), + call_site, + def_site: DUMMY_SP, + allow_internal_unstable: None, + allow_internal_unsafe: false, + local_inner_macros: false, + edition, + macro_def_id, + parent_module, + disambiguator: 0, + } + } + + pub fn allow_unstable( + kind: ExpnKind, + call_site: Span, + edition: Edition, + allow_internal_unstable: Lrc<[Symbol]>, + macro_def_id: Option<DefId>, + parent_module: Option<DefId>, + ) -> ExpnData { + ExpnData { + allow_internal_unstable: Some(allow_internal_unstable), + ..ExpnData::default(kind, call_site, edition, macro_def_id, parent_module) + } + } + + #[inline] + pub fn is_root(&self) -> bool { + matches!(self.kind, ExpnKind::Root) + } + + #[inline] + fn hash_expn(&self, ctx: &mut impl HashStableContext) -> u64 { + let mut hasher = StableHasher::new(); + self.hash_stable(ctx, &mut hasher); + hasher.finish() + } +} + +/// Expansion kind. +#[derive(Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] +pub enum ExpnKind { + /// No expansion, aka root expansion. Only `ExpnId::root()` has this kind. + Root, + /// Expansion produced by a macro. + Macro(MacroKind, Symbol), + /// Transform done by the compiler on the AST. + AstPass(AstPass), + /// Desugaring done by the compiler during HIR lowering. + Desugaring(DesugaringKind), + /// MIR inlining + Inlined, +} + +impl ExpnKind { + pub fn descr(&self) -> String { + match *self { + ExpnKind::Root => kw::PathRoot.to_string(), + ExpnKind::Macro(macro_kind, name) => match macro_kind { + MacroKind::Bang => format!("{}!", name), + MacroKind::Attr => format!("#[{}]", name), + MacroKind::Derive => format!("#[derive({})]", name), + }, + ExpnKind::AstPass(kind) => kind.descr().to_string(), + ExpnKind::Desugaring(kind) => format!("desugaring of {}", kind.descr()), + ExpnKind::Inlined => "inlined source".to_string(), + } + } +} + +/// The kind of macro invocation or definition. +#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Hash, Debug)] +#[derive(HashStable_Generic)] +pub enum MacroKind { + /// A bang macro `foo!()`. + Bang, + /// An attribute macro `#[foo]`. + Attr, + /// A derive macro `#[derive(Foo)]` + Derive, +} + +impl MacroKind { + pub fn descr(self) -> &'static str { + match self { + MacroKind::Bang => "macro", + MacroKind::Attr => "attribute macro", + MacroKind::Derive => "derive macro", + } + } + + pub fn descr_expected(self) -> &'static str { + match self { + MacroKind::Attr => "attribute", + _ => self.descr(), + } + } + + pub fn article(self) -> &'static str { + match self { + MacroKind::Attr => "an", + _ => "a", + } + } +} + +/// The kind of AST transform. +#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] +pub enum AstPass { + StdImports, + TestHarness, + ProcMacroHarness, +} + +impl AstPass { + pub fn descr(self) -> &'static str { + match self { + AstPass::StdImports => "standard library imports", + AstPass::TestHarness => "test harness", + AstPass::ProcMacroHarness => "proc macro harness", + } + } +} + +/// The kind of compiler desugaring. +#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)] +pub enum DesugaringKind { + /// We desugar `if c { i } else { e }` to `match $ExprKind::Use(c) { true => i, _ => e }`. + /// However, we do not want to blame `c` for unreachability but rather say that `i` + /// is unreachable. This desugaring kind allows us to avoid blaming `c`. + /// This also applies to `while` loops. + CondTemporary, + QuestionMark, + TryBlock, + YeetExpr, + /// Desugaring of an `impl Trait` in return type position + /// to an `type Foo = impl Trait;` and replacing the + /// `impl Trait` with `Foo`. + OpaqueTy, + Async, + Await, + ForLoop, + WhileLoop, +} + +impl DesugaringKind { + /// The description wording should combine well with "desugaring of {}". + pub fn descr(self) -> &'static str { + match self { + DesugaringKind::CondTemporary => "`if` or `while` condition", + DesugaringKind::Async => "`async` block or function", + DesugaringKind::Await => "`await` expression", + DesugaringKind::QuestionMark => "operator `?`", + DesugaringKind::TryBlock => "`try` block", + DesugaringKind::YeetExpr => "`do yeet` expression", + DesugaringKind::OpaqueTy => "`impl Trait`", + DesugaringKind::ForLoop => "`for` loop", + DesugaringKind::WhileLoop => "`while` loop", + } + } +} + +#[derive(Default)] +pub struct HygieneEncodeContext { + /// All `SyntaxContexts` for which we have written `SyntaxContextData` into crate metadata. + /// This is `None` after we finish encoding `SyntaxContexts`, to ensure + /// that we don't accidentally try to encode any more `SyntaxContexts` + serialized_ctxts: Lock<FxHashSet<SyntaxContext>>, + /// The `SyntaxContexts` that we have serialized (e.g. as a result of encoding `Spans`) + /// in the most recent 'round' of serializing. Serializing `SyntaxContextData` + /// may cause us to serialize more `SyntaxContext`s, so serialize in a loop + /// until we reach a fixed point. + latest_ctxts: Lock<FxHashSet<SyntaxContext>>, + + serialized_expns: Lock<FxHashSet<ExpnId>>, + + latest_expns: Lock<FxHashSet<ExpnId>>, +} + +impl HygieneEncodeContext { + /// Record the fact that we need to serialize the corresponding `ExpnData`. + pub fn schedule_expn_data_for_encoding(&self, expn: ExpnId) { + if !self.serialized_expns.lock().contains(&expn) { + self.latest_expns.lock().insert(expn); + } + } + + pub fn encode<T>( + &self, + encoder: &mut T, + mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData), + mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash), + ) { + // When we serialize a `SyntaxContextData`, we may end up serializing + // a `SyntaxContext` that we haven't seen before + while !self.latest_ctxts.lock().is_empty() || !self.latest_expns.lock().is_empty() { + debug!( + "encode_hygiene: Serializing a round of {:?} SyntaxContextDatas: {:?}", + self.latest_ctxts.lock().len(), + self.latest_ctxts + ); + + // Consume the current round of SyntaxContexts. + // Drop the lock() temporary early + let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) }; + + // It's fine to iterate over a HashMap, because the serialization + // of the table that we insert data into doesn't depend on insertion + // order + #[allow(rustc::potential_query_instability)] + for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, data| { + if self.serialized_ctxts.lock().insert(ctxt) { + encode_ctxt(encoder, index, data); + } + }); + + let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) }; + + // Same as above, this is fine as we are inserting into a order-independent hashset + #[allow(rustc::potential_query_instability)] + for_all_expns_in(latest_expns.into_iter(), |expn, data, hash| { + if self.serialized_expns.lock().insert(expn) { + encode_expn(encoder, expn, data, hash); + } + }); + } + debug!("encode_hygiene: Done serializing SyntaxContextData"); + } +} + +#[derive(Default)] +/// Additional information used to assist in decoding hygiene data +pub struct HygieneDecodeContext { + // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current + // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create + // a new id in the global `HygieneData`. This map tracks the ID we end up picking, + // so that multiple occurrences of the same serialized id are decoded to the same + // `SyntaxContext` + remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>, +} + +/// Register an expansion which has been decoded from the on-disk-cache for the local crate. +pub fn register_local_expn_id(data: ExpnData, hash: ExpnHash) -> ExpnId { + HygieneData::with(|hygiene_data| { + let expn_id = hygiene_data.local_expn_data.next_index(); + hygiene_data.local_expn_data.push(Some(data)); + let _eid = hygiene_data.local_expn_hashes.push(hash); + debug_assert_eq!(expn_id, _eid); + + let expn_id = expn_id.to_expn_id(); + + let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id); + debug_assert!(_old_id.is_none()); + expn_id + }) +} + +/// Register an expansion which has been decoded from the metadata of a foreign crate. +pub fn register_expn_id( + krate: CrateNum, + local_id: ExpnIndex, + data: ExpnData, + hash: ExpnHash, +) -> ExpnId { + debug_assert!(data.parent == ExpnId::root() || krate == data.parent.krate); + let expn_id = ExpnId { krate, local_id }; + HygieneData::with(|hygiene_data| { + let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data); + debug_assert!(_old_data.is_none()); + let _old_hash = hygiene_data.foreign_expn_hashes.insert(expn_id, hash); + debug_assert!(_old_hash.is_none()); + let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id); + debug_assert!(_old_id.is_none()); + }); + expn_id +} + +/// Decode an expansion from the metadata of a foreign crate. +pub fn decode_expn_id( + krate: CrateNum, + index: u32, + decode_data: impl FnOnce(ExpnId) -> (ExpnData, ExpnHash), +) -> ExpnId { + if index == 0 { + debug!("decode_expn_id: deserialized root"); + return ExpnId::root(); + } + + let index = ExpnIndex::from_u32(index); + + // This function is used to decode metadata, so it cannot decode information about LOCAL_CRATE. + debug_assert_ne!(krate, LOCAL_CRATE); + let expn_id = ExpnId { krate, local_id: index }; + + // Fast path if the expansion has already been decoded. + if HygieneData::with(|hygiene_data| hygiene_data.foreign_expn_data.contains_key(&expn_id)) { + return expn_id; + } + + // Don't decode the data inside `HygieneData::with`, since we need to recursively decode + // other ExpnIds + let (expn_data, hash) = decode_data(expn_id); + + register_expn_id(krate, index, expn_data, hash) +} + +// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext` +// to track which `SyntaxContext`s we have already decoded. +// The provided closure will be invoked to deserialize a `SyntaxContextData` +// if we haven't already seen the id of the `SyntaxContext` we are deserializing. +pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>( + d: &mut D, + context: &HygieneDecodeContext, + decode_data: F, +) -> SyntaxContext { + let raw_id: u32 = Decodable::decode(d); + if raw_id == 0 { + debug!("decode_syntax_context: deserialized root"); + // The root is special + return SyntaxContext::root(); + } + + let outer_ctxts = &context.remapped_ctxts; + + // Ensure that the lock() temporary is dropped early + { + if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() { + return ctxt; + } + } + + // Allocate and store SyntaxContext id *before* calling the decoder function, + // as the SyntaxContextData may reference itself. + let new_ctxt = HygieneData::with(|hygiene_data| { + let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); + // Push a dummy SyntaxContextData to ensure that nobody else can get the + // same ID as us. This will be overwritten after call `decode_Data` + hygiene_data.syntax_context_data.push(SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Transparent, + parent: SyntaxContext::root(), + opaque: SyntaxContext::root(), + opaque_and_semitransparent: SyntaxContext::root(), + dollar_crate_name: kw::Empty, + }); + let mut ctxts = outer_ctxts.lock(); + let new_len = raw_id as usize + 1; + if ctxts.len() < new_len { + ctxts.resize(new_len, None); + } + ctxts[raw_id as usize] = Some(new_ctxt); + drop(ctxts); + new_ctxt + }); + + // Don't try to decode data while holding the lock, since we need to + // be able to recursively decode a SyntaxContext + let mut ctxt_data = decode_data(d, raw_id); + // Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names` + // We don't care what the encoding crate set this to - we want to resolve it + // from the perspective of the current compilation session + ctxt_data.dollar_crate_name = kw::DollarCrate; + + // Overwrite the dummy data with our decoded SyntaxContextData + HygieneData::with(|hygiene_data| { + let dummy = std::mem::replace( + &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize], + ctxt_data, + ); + // Make sure nothing weird happening while `decode_data` was running + assert_eq!(dummy.dollar_crate_name, kw::Empty); + }); + + new_ctxt +} + +fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>( + ctxts: impl Iterator<Item = SyntaxContext>, + mut f: F, +) { + let all_data: Vec<_> = HygieneData::with(|data| { + ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect() + }); + for (ctxt, data) in all_data.into_iter() { + f(ctxt.0, ctxt, &data); + } +} + +fn for_all_expns_in( + expns: impl Iterator<Item = ExpnId>, + mut f: impl FnMut(ExpnId, &ExpnData, ExpnHash), +) { + let all_data: Vec<_> = HygieneData::with(|data| { + expns.map(|expn| (expn, data.expn_data(expn).clone(), data.expn_hash(expn))).collect() + }); + for (expn, data, hash) in all_data.into_iter() { + f(expn, &data, hash); + } +} + +impl<E: Encoder> Encodable<E> for LocalExpnId { + fn encode(&self, e: &mut E) { + self.to_expn_id().encode(e); + } +} + +impl<E: Encoder> Encodable<E> for ExpnId { + default fn encode(&self, _: &mut E) { + panic!("cannot encode `ExpnId` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for LocalExpnId { + fn decode(d: &mut D) -> Self { + ExpnId::expect_local(ExpnId::decode(d)) + } +} + +impl<D: Decoder> Decodable<D> for ExpnId { + default fn decode(_: &mut D) -> Self { + panic!("cannot decode `ExpnId` with `{}`", std::any::type_name::<D>()); + } +} + +pub fn raw_encode_syntax_context<E: Encoder>( + ctxt: SyntaxContext, + context: &HygieneEncodeContext, + e: &mut E, +) { + if !context.serialized_ctxts.lock().contains(&ctxt) { + context.latest_ctxts.lock().insert(ctxt); + } + ctxt.0.encode(e); +} + +impl<E: Encoder> Encodable<E> for SyntaxContext { + default fn encode(&self, _: &mut E) { + panic!("cannot encode `SyntaxContext` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for SyntaxContext { + default fn decode(_: &mut D) -> Self { + panic!("cannot decode `SyntaxContext` with `{}`", std::any::type_name::<D>()); + } +} + +/// Updates the `disambiguator` field of the corresponding `ExpnData` +/// such that the `Fingerprint` of the `ExpnData` does not collide with +/// any other `ExpnIds`. +/// +/// This method is called only when an `ExpnData` is first associated +/// with an `ExpnId` (when the `ExpnId` is initially constructed, or via +/// `set_expn_data`). It is *not* called for foreign `ExpnId`s deserialized +/// from another crate's metadata - since `ExpnHash` includes the stable crate id, +/// collisions are only possible between `ExpnId`s within the same crate. +fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContext) -> ExpnHash { + // This disambiguator should not have been set yet. + assert_eq!( + expn_data.disambiguator, 0, + "Already set disambiguator for ExpnData: {:?}", + expn_data + ); + assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)"); + let mut expn_hash = expn_data.hash_expn(&mut ctx); + + let disambiguator = HygieneData::with(|data| { + // If this is the first ExpnData with a given hash, then keep our + // disambiguator at 0 (the default u32 value) + let disambig = data.expn_data_disambiguators.entry(expn_hash).or_default(); + let disambiguator = *disambig; + *disambig += 1; + disambiguator + }); + + if disambiguator != 0 { + debug!("Set disambiguator for expn_data={:?} expn_hash={:?}", expn_data, expn_hash); + + expn_data.disambiguator = disambiguator; + expn_hash = expn_data.hash_expn(&mut ctx); + + // Verify that the new disambiguator makes the hash unique + #[cfg(debug_assertions)] + HygieneData::with(|data| { + assert_eq!( + data.expn_data_disambiguators.get(&expn_hash), + None, + "Hash collision after disambiguator update!", + ); + }); + } + + ExpnHash::new(ctx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(), expn_hash) +} + +impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + const TAG_EXPANSION: u8 = 0; + const TAG_NO_EXPANSION: u8 = 1; + + if *self == SyntaxContext::root() { + TAG_NO_EXPANSION.hash_stable(ctx, hasher); + } else { + TAG_EXPANSION.hash_stable(ctx, hasher); + let (expn_id, transparency) = self.outer_mark(); + expn_id.hash_stable(ctx, hasher); + transparency.hash_stable(ctx, hasher); + } + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for ExpnId { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + assert_default_hashing_controls(ctx, "ExpnId"); + let hash = if *self == ExpnId::root() { + // Avoid fetching TLS storage for a trivial often-used value. + Fingerprint::ZERO + } else { + self.expn_hash().0 + }; + + hash.hash_stable(ctx, hasher); + } +} diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs new file mode 100644 index 000000000..61e4b98a8 --- /dev/null +++ b/compiler/rustc_span/src/lev_distance.rs @@ -0,0 +1,177 @@ +//! Levenshtein distances. +//! +//! The [Levenshtein distance] is a metric for measuring the difference between two strings. +//! +//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance + +use crate::symbol::Symbol; +use std::cmp; + +#[cfg(test)] +mod tests; + +/// Finds the Levenshtein distance between two strings. +/// +/// Returns None if the distance exceeds the limit. +pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> { + let n = a.chars().count(); + let m = b.chars().count(); + let min_dist = if n < m { m - n } else { n - m }; + + if min_dist > limit { + return None; + } + if n == 0 || m == 0 { + return (min_dist <= limit).then_some(min_dist); + } + + let mut dcol: Vec<_> = (0..=m).collect(); + + for (i, sc) in a.chars().enumerate() { + let mut current = i; + dcol[0] = current + 1; + + for (j, tc) in b.chars().enumerate() { + let next = dcol[j + 1]; + if sc == tc { + dcol[j + 1] = current; + } else { + dcol[j + 1] = cmp::min(current, next); + dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; + } + current = next; + } + } + + (dcol[m] <= limit).then_some(dcol[m]) +} + +/// Provides a word similarity score between two words that accounts for substrings being more +/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match. +/// 0 is an identical match. +/// +/// Uses the Levenshtein distance between the two strings and removes the cost of the length +/// difference. If this is 0 then it is either a substring match or a full word match, in the +/// substring match case we detect this and return `1`. To prevent finding meaningless substrings, +/// eg. "in" in "shrink", we only perform this subtraction of length difference if one of the words +/// is not greater than twice the length of the other. For cases where the words are close in size +/// but not an exact substring then the cost of the length difference is discounted by half. +/// +/// Returns `None` if the distance exceeds the limit. +pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> { + let n = a.chars().count(); + let m = b.chars().count(); + + // Check one isn't less than half the length of the other. If this is true then there is a + // big difference in length. + let big_len_diff = (n * 2) < m || (m * 2) < n; + let len_diff = if n < m { m - n } else { n - m }; + let lev = lev_distance(a, b, limit + len_diff)?; + + // This is the crux, subtracting length difference means exact substring matches will now be 0 + let score = lev - len_diff; + + // If the score is 0 but the words have different lengths then it's a substring match not a full + // word match + let score = if score == 0 && len_diff > 0 && !big_len_diff { + 1 // Exact substring match, but not a total word match so return non-zero + } else if !big_len_diff { + // Not a big difference in length, discount cost of length difference + score + (len_diff + 1) / 2 + } else { + // A big difference in length, add back the difference in length to the score + score + len_diff + }; + + (score <= limit).then_some(score) +} + +/// Finds the best match for given word in the given iterator where substrings are meaningful. +/// +/// A version of [`find_best_match_for_name`] that uses [`lev_distance_with_substrings`] as the score +/// for word similarity. This takes an optional distance limit which defaults to one-third of the +/// given word. +/// +/// Besides the modified Levenshtein, we use case insensitive comparison to improve accuracy +/// on an edge case with a lower(upper)case letters mismatch. +pub fn find_best_match_for_name_with_substrings( + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + find_best_match_for_name_impl(true, candidates, lookup, dist) +} + +/// Finds the best match for a given word in the given iterator. +/// +/// As a loose rule to avoid the obviously incorrect suggestions, it takes +/// an optional limit for the maximum allowable edit distance, which defaults +/// to one-third of the given word. +/// +/// Besides Levenshtein, we use case insensitive comparison to improve accuracy +/// on an edge case with a lower(upper)case letters mismatch. +pub fn find_best_match_for_name( + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + find_best_match_for_name_impl(false, candidates, lookup, dist) +} + +#[cold] +fn find_best_match_for_name_impl( + use_substring_score: bool, + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + let lookup = lookup.as_str(); + let lookup_uppercase = lookup.to_uppercase(); + + // Priority of matches: + // 1. Exact case insensitive match + // 2. Levenshtein distance match + // 3. Sorted word match + if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) { + return Some(*c); + } + + let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); + let mut best = None; + for c in candidates { + match if use_substring_score { + lev_distance_with_substrings(lookup, c.as_str(), dist) + } else { + lev_distance(lookup, c.as_str(), dist) + } { + Some(0) => return Some(*c), + Some(d) => { + dist = d - 1; + best = Some(*c); + } + None => {} + } + } + if best.is_some() { + return best; + } + + find_match_by_sorted_words(candidates, lookup) +} + +fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> { + iter_names.iter().fold(None, |result, candidate| { + if sort_by_words(candidate.as_str()) == sort_by_words(lookup) { + Some(*candidate) + } else { + result + } + }) +} + +fn sort_by_words(name: &str) -> String { + let mut split_words: Vec<&str> = name.split('_').collect(); + // We are sorting primitive &strs and can use unstable sort here. + split_words.sort_unstable(); + split_words.join("_") +} diff --git a/compiler/rustc_span/src/lev_distance/tests.rs b/compiler/rustc_span/src/lev_distance/tests.rs new file mode 100644 index 000000000..b17d6588c --- /dev/null +++ b/compiler/rustc_span/src/lev_distance/tests.rs @@ -0,0 +1,71 @@ +use super::*; + +#[test] +fn test_lev_distance() { + use std::char::{from_u32, MAX}; + // Test bytelength agnosticity + for c in (0..MAX as u32).filter_map(from_u32).map(|i| i.to_string()) { + assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0)); + } + + let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; + let c = "Mary häd ä little lämb\n\nLittle lämb\n"; + assert_eq!(lev_distance(a, b, usize::MAX), Some(1)); + assert_eq!(lev_distance(b, a, usize::MAX), Some(1)); + assert_eq!(lev_distance(a, c, usize::MAX), Some(2)); + assert_eq!(lev_distance(c, a, usize::MAX), Some(2)); + assert_eq!(lev_distance(b, c, usize::MAX), Some(1)); + assert_eq!(lev_distance(c, b, usize::MAX), Some(1)); +} + +#[test] +fn test_lev_distance_limit() { + assert_eq!(lev_distance("abc", "abcd", 1), Some(1)); + assert_eq!(lev_distance("abc", "abcd", 0), None); + assert_eq!(lev_distance("abc", "xyz", 3), Some(3)); + assert_eq!(lev_distance("abc", "xyz", 2), None); +} + +#[test] +fn test_method_name_similarity_score() { + assert_eq!(lev_distance_with_substrings("empty", "is_empty", 1), Some(1)); + assert_eq!(lev_distance_with_substrings("shrunk", "rchunks", 2), None); + assert_eq!(lev_distance_with_substrings("abc", "abcd", 1), Some(1)); + assert_eq!(lev_distance_with_substrings("a", "abcd", 1), None); + assert_eq!(lev_distance_with_substrings("edf", "eq", 1), None); + assert_eq!(lev_distance_with_substrings("abc", "xyz", 3), Some(3)); + assert_eq!(lev_distance_with_substrings("abcdef", "abcdef", 2), Some(0)); +} + +#[test] +fn test_find_best_match_for_name() { + use crate::create_default_session_globals_then; + create_default_session_globals_then(|| { + let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), None), + Some(Symbol::intern("aaab")) + ); + + assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None); + + let input = vec![Symbol::intern("AAAA")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), None), + Some(Symbol::intern("AAAA")) + ); + + let input = vec![Symbol::intern("AAAA")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)), + Some(Symbol::intern("AAAA")) + ); + + let input = vec![Symbol::intern("a_longer_variable_name")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None), + Some(Symbol::intern("a_longer_variable_name")) + ); + }) +} diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs new file mode 100644 index 000000000..cf3069281 --- /dev/null +++ b/compiler/rustc_span/src/lib.rs @@ -0,0 +1,2116 @@ +//! Source positions and related helper functions. +//! +//! Important concepts in this module include: +//! +//! - the *span*, represented by [`SpanData`] and related types; +//! - source code as represented by a [`SourceMap`]; and +//! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module. +//! +//! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata, +//! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`]. +//! +//! ## Note +//! +//! This API is completely unstable and subject to change. + +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(array_windows)] +#![feature(let_else)] +#![feature(if_let_guard)] +#![feature(negative_impls)] +#![feature(min_specialization)] +#![feature(rustc_attrs)] + +#[macro_use] +extern crate rustc_macros; + +#[macro_use] +extern crate tracing; + +use rustc_data_structures::AtomicRef; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +mod caching_source_map_view; +pub mod source_map; +pub use self::caching_source_map_view::CachingSourceMapView; +use source_map::SourceMap; + +pub mod edition; +use edition::Edition; +pub mod hygiene; +use hygiene::Transparency; +pub use hygiene::{DesugaringKind, ExpnKind, MacroKind}; +pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext}; +use rustc_data_structures::stable_hasher::HashingControls; +pub mod def_id; +use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE}; +pub mod lev_distance; +mod span_encoding; +pub use span_encoding::{Span, DUMMY_SP}; + +pub mod symbol; +pub use symbol::{sym, Symbol}; + +mod analyze_source_file; +pub mod fatal_error; + +pub mod profiling; + +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::{Lock, Lrc}; + +use std::borrow::Cow; +use std::cmp::{self, Ordering}; +use std::fmt; +use std::hash::Hash; +use std::ops::{Add, Range, Sub}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; + +use md5::Digest; +use md5::Md5; +use sha1::Sha1; +use sha2::Sha256; + +use tracing::debug; + +#[cfg(test)] +mod tests; + +// Per-session global variables: this struct is stored in thread-local storage +// in such a way that it is accessible without any kind of handle to all +// threads within the compilation session, but is not accessible outside the +// session. +pub struct SessionGlobals { + symbol_interner: symbol::Interner, + span_interner: Lock<span_encoding::SpanInterner>, + hygiene_data: Lock<hygiene::HygieneData>, + source_map: Lock<Option<Lrc<SourceMap>>>, +} + +impl SessionGlobals { + pub fn new(edition: Edition) -> SessionGlobals { + SessionGlobals { + symbol_interner: symbol::Interner::fresh(), + span_interner: Lock::new(span_encoding::SpanInterner::default()), + hygiene_data: Lock::new(hygiene::HygieneData::new(edition)), + source_map: Lock::new(None), + } + } +} + +#[inline] +pub fn create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R { + assert!( + !SESSION_GLOBALS.is_set(), + "SESSION_GLOBALS should never be overwritten! \ + Use another thread if you need another SessionGlobals" + ); + let session_globals = SessionGlobals::new(edition); + SESSION_GLOBALS.set(&session_globals, f) +} + +#[inline] +pub fn set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R { + assert!( + !SESSION_GLOBALS.is_set(), + "SESSION_GLOBALS should never be overwritten! \ + Use another thread if you need another SessionGlobals" + ); + SESSION_GLOBALS.set(session_globals, f) +} + +#[inline] +pub fn create_default_session_if_not_set_then<R, F>(f: F) -> R +where + F: FnOnce(&SessionGlobals) -> R, +{ + create_session_if_not_set_then(edition::DEFAULT_EDITION, f) +} + +#[inline] +pub fn create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R +where + F: FnOnce(&SessionGlobals) -> R, +{ + if !SESSION_GLOBALS.is_set() { + let session_globals = SessionGlobals::new(edition); + SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f)) + } else { + SESSION_GLOBALS.with(f) + } +} + +#[inline] +pub fn with_session_globals<R, F>(f: F) -> R +where + F: FnOnce(&SessionGlobals) -> R, +{ + SESSION_GLOBALS.with(f) +} + +#[inline] +pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R { + create_session_globals_then(edition::DEFAULT_EDITION, f) +} + +// If this ever becomes non thread-local, `decode_syntax_context` +// and `decode_expn_id` will need to be updated to handle concurrent +// deserialization. +scoped_tls::scoped_thread_local!(static SESSION_GLOBALS: SessionGlobals); + +// FIXME: We should use this enum or something like it to get rid of the +// use of magic `/rust/1.x/...` paths across the board. +#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)] +#[derive(Decodable)] +pub enum RealFileName { + LocalPath(PathBuf), + /// For remapped paths (namely paths into libstd that have been mapped + /// to the appropriate spot on the local host's file system, and local file + /// system paths that have been remapped with `FilePathMapping`), + Remapped { + /// `local_path` is the (host-dependent) local path to the file. This is + /// None if the file was imported from another crate + local_path: Option<PathBuf>, + /// `virtual_name` is the stable path rustc will store internally within + /// build artifacts. + virtual_name: PathBuf, + }, +} + +impl Hash for RealFileName { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + // To prevent #70924 from happening again we should only hash the + // remapped (virtualized) path if that exists. This is because + // virtualized paths to sysroot crates (/rust/$hash or /rust/$version) + // remain stable even if the corresponding local_path changes + self.remapped_path_if_available().hash(state) + } +} + +// This is functionally identical to #[derive(Encodable)], with the exception of +// an added assert statement +impl<S: Encoder> Encodable<S> for RealFileName { + fn encode(&self, encoder: &mut S) { + match *self { + RealFileName::LocalPath(ref local_path) => encoder.emit_enum_variant(0, |encoder| { + local_path.encode(encoder); + }), + + RealFileName::Remapped { ref local_path, ref virtual_name } => encoder + .emit_enum_variant(1, |encoder| { + // For privacy and build reproducibility, we must not embed host-dependant path in artifacts + // if they have been remapped by --remap-path-prefix + assert!(local_path.is_none()); + local_path.encode(encoder); + virtual_name.encode(encoder); + }), + } + } +} + +impl RealFileName { + /// Returns the path suitable for reading from the file system on the local host, + /// if this information exists. + /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that. + pub fn local_path(&self) -> Option<&Path> { + match self { + RealFileName::LocalPath(p) => Some(p), + RealFileName::Remapped { local_path: p, virtual_name: _ } => { + p.as_ref().map(PathBuf::as_path) + } + } + } + + /// Returns the path suitable for reading from the file system on the local host, + /// if this information exists. + /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that. + pub fn into_local_path(self) -> Option<PathBuf> { + match self { + RealFileName::LocalPath(p) => Some(p), + RealFileName::Remapped { local_path: p, virtual_name: _ } => p, + } + } + + /// Returns the path suitable for embedding into build artifacts. This would still + /// be a local path if it has not been remapped. A remapped path will not correspond + /// to a valid file system path: see `local_path_if_available()` for something that + /// is more likely to return paths into the local host file system. + pub fn remapped_path_if_available(&self) -> &Path { + match self { + RealFileName::LocalPath(p) + | RealFileName::Remapped { local_path: _, virtual_name: p } => &p, + } + } + + /// Returns the path suitable for reading from the file system on the local host, + /// if this information exists. Otherwise returns the remapped name. + /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that. + pub fn local_path_if_available(&self) -> &Path { + match self { + RealFileName::LocalPath(path) + | RealFileName::Remapped { local_path: None, virtual_name: path } + | RealFileName::Remapped { local_path: Some(path), virtual_name: _ } => path, + } + } + + pub fn to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str> { + match display_pref { + FileNameDisplayPreference::Local => self.local_path_if_available().to_string_lossy(), + FileNameDisplayPreference::Remapped => { + self.remapped_path_if_available().to_string_lossy() + } + } + } +} + +/// Differentiates between real files and common virtual files. +#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)] +#[derive(Decodable, Encodable)] +pub enum FileName { + Real(RealFileName), + /// Call to `quote!`. + QuoteExpansion(u64), + /// Command line. + Anon(u64), + /// Hack in `src/librustc_ast/parse.rs`. + // FIXME(jseyfried) + MacroExpansion(u64), + ProcMacroSourceCode(u64), + /// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`. + CfgSpec(u64), + /// Strings provided as crate attributes in the CLI. + CliCrateAttr(u64), + /// Custom sources for explicit parser calls from plugins and drivers. + Custom(String), + DocTest(PathBuf, isize), + /// Post-substitution inline assembly from LLVM. + InlineAsm(u64), +} + +impl From<PathBuf> for FileName { + fn from(p: PathBuf) -> Self { + assert!(!p.to_string_lossy().ends_with('>')); + FileName::Real(RealFileName::LocalPath(p)) + } +} + +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum FileNameDisplayPreference { + Remapped, + Local, +} + +pub struct FileNameDisplay<'a> { + inner: &'a FileName, + display_pref: FileNameDisplayPreference, +} + +impl fmt::Display for FileNameDisplay<'_> { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use FileName::*; + match *self.inner { + Real(ref name) => { + write!(fmt, "{}", name.to_string_lossy(self.display_pref)) + } + QuoteExpansion(_) => write!(fmt, "<quote expansion>"), + MacroExpansion(_) => write!(fmt, "<macro expansion>"), + Anon(_) => write!(fmt, "<anon>"), + ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"), + CfgSpec(_) => write!(fmt, "<cfgspec>"), + CliCrateAttr(_) => write!(fmt, "<crate attribute>"), + Custom(ref s) => write!(fmt, "<{}>", s), + DocTest(ref path, _) => write!(fmt, "{}", path.display()), + InlineAsm(_) => write!(fmt, "<inline asm>"), + } + } +} + +impl<'a> FileNameDisplay<'a> { + pub fn to_string_lossy(&self) -> Cow<'a, str> { + match self.inner { + FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref), + _ => Cow::from(self.to_string()), + } + } +} + +impl FileName { + pub fn is_real(&self) -> bool { + use FileName::*; + match *self { + Real(_) => true, + Anon(_) + | MacroExpansion(_) + | ProcMacroSourceCode(_) + | CfgSpec(_) + | CliCrateAttr(_) + | Custom(_) + | QuoteExpansion(_) + | DocTest(_, _) + | InlineAsm(_) => false, + } + } + + pub fn prefer_remapped(&self) -> FileNameDisplay<'_> { + FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped } + } + + // This may include transient local filesystem information. + // Must not be embedded in build outputs. + pub fn prefer_local(&self) -> FileNameDisplay<'_> { + FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Local } + } + + pub fn display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_> { + FileNameDisplay { inner: self, display_pref } + } + + pub fn macro_expansion_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::MacroExpansion(hasher.finish()) + } + + pub fn anon_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::Anon(hasher.finish()) + } + + pub fn proc_macro_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::ProcMacroSourceCode(hasher.finish()) + } + + pub fn cfg_spec_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::QuoteExpansion(hasher.finish()) + } + + pub fn cli_crate_attr_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::CliCrateAttr(hasher.finish()) + } + + pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName { + FileName::DocTest(path, line) + } + + pub fn inline_asm_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::InlineAsm(hasher.finish()) + } +} + +/// Represents a span. +/// +/// Spans represent a region of code, used for error reporting. Positions in spans +/// are *absolute* positions from the beginning of the [`SourceMap`], not positions +/// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back +/// to the original source. +/// +/// You must be careful if the span crosses more than one file, since you will not be +/// able to use many of the functions on spans in source_map and you cannot assume +/// that the length of the span is equal to `span.hi - span.lo`; there may be space in the +/// [`BytePos`] range between files. +/// +/// `SpanData` is public because `Span` uses a thread-local interner and can't be +/// sent to other threads, but some pieces of performance infra run in a separate thread. +/// Using `Span` is generally preferred. +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct SpanData { + pub lo: BytePos, + pub hi: BytePos, + /// Information about where the macro came from, if this piece of + /// code was created by a macro expansion. + pub ctxt: SyntaxContext, + pub parent: Option<LocalDefId>, +} + +// Order spans by position in the file. +impl Ord for SpanData { + fn cmp(&self, other: &Self) -> Ordering { + let SpanData { + lo: s_lo, + hi: s_hi, + ctxt: s_ctxt, + // `LocalDefId` does not implement `Ord`. + // The other fields are enough to determine in-file order. + parent: _, + } = self; + let SpanData { + lo: o_lo, + hi: o_hi, + ctxt: o_ctxt, + // `LocalDefId` does not implement `Ord`. + // The other fields are enough to determine in-file order. + parent: _, + } = other; + + (s_lo, s_hi, s_ctxt).cmp(&(o_lo, o_hi, o_ctxt)) + } +} + +impl PartialOrd for SpanData { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl SpanData { + #[inline] + pub fn span(&self) -> Span { + Span::new(self.lo, self.hi, self.ctxt, self.parent) + } + #[inline] + pub fn with_lo(&self, lo: BytePos) -> Span { + Span::new(lo, self.hi, self.ctxt, self.parent) + } + #[inline] + pub fn with_hi(&self, hi: BytePos) -> Span { + Span::new(self.lo, hi, self.ctxt, self.parent) + } + #[inline] + pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span { + Span::new(self.lo, self.hi, ctxt, self.parent) + } + #[inline] + pub fn with_parent(&self, parent: Option<LocalDefId>) -> Span { + Span::new(self.lo, self.hi, self.ctxt, parent) + } + /// Returns `true` if this is a dummy span with any hygienic context. + #[inline] + pub fn is_dummy(self) -> bool { + self.lo.0 == 0 && self.hi.0 == 0 + } + /// Returns `true` if `self` fully encloses `other`. + pub fn contains(self, other: Self) -> bool { + self.lo <= other.lo && other.hi <= self.hi + } +} + +// The interner is pointed to by a thread local value which is only set on the main thread +// with parallelization is disabled. So we don't allow `Span` to transfer between threads +// to avoid panics and other errors, even though it would be memory safe to do so. +#[cfg(not(parallel_compiler))] +impl !Send for Span {} +#[cfg(not(parallel_compiler))] +impl !Sync for Span {} + +impl PartialOrd for Span { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + PartialOrd::partial_cmp(&self.data(), &rhs.data()) + } +} +impl Ord for Span { + fn cmp(&self, rhs: &Self) -> Ordering { + Ord::cmp(&self.data(), &rhs.data()) + } +} + +impl Span { + #[inline] + pub fn lo(self) -> BytePos { + self.data().lo + } + #[inline] + pub fn with_lo(self, lo: BytePos) -> Span { + self.data().with_lo(lo) + } + #[inline] + pub fn hi(self) -> BytePos { + self.data().hi + } + #[inline] + pub fn with_hi(self, hi: BytePos) -> Span { + self.data().with_hi(hi) + } + #[inline] + pub fn ctxt(self) -> SyntaxContext { + self.data_untracked().ctxt + } + pub fn eq_ctxt(self, other: Span) -> bool { + self.data_untracked().ctxt == other.data_untracked().ctxt + } + #[inline] + pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span { + self.data_untracked().with_ctxt(ctxt) + } + #[inline] + pub fn parent(self) -> Option<LocalDefId> { + self.data().parent + } + #[inline] + pub fn with_parent(self, ctxt: Option<LocalDefId>) -> Span { + self.data().with_parent(ctxt) + } + + /// Returns `true` if this is a dummy span with any hygienic context. + #[inline] + pub fn is_dummy(self) -> bool { + self.data_untracked().is_dummy() + } + + /// Returns `true` if this span comes from a macro or desugaring. + #[inline] + pub fn from_expansion(self) -> bool { + self.ctxt() != SyntaxContext::root() + } + + /// Returns `true` if `span` originates in a derive-macro's expansion. + pub fn in_derive_expansion(self) -> bool { + matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _)) + } + + /// Gate suggestions that would not be appropriate in a context the user didn't write. + pub fn can_be_used_for_suggestions(self) -> bool { + !self.from_expansion() + // FIXME: If this span comes from a `derive` macro but it points at code the user wrote, + // the callsite span and the span will be pointing at different places. It also means that + // we can safely provide suggestions on this span. + || (matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _)) + && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi()))) + } + + #[inline] + pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span { + Span::new(lo, hi, SyntaxContext::root(), None) + } + + /// Returns a new span representing an empty span at the beginning of this span. + #[inline] + pub fn shrink_to_lo(self) -> Span { + let span = self.data_untracked(); + span.with_hi(span.lo) + } + /// Returns a new span representing an empty span at the end of this span. + #[inline] + pub fn shrink_to_hi(self) -> Span { + let span = self.data_untracked(); + span.with_lo(span.hi) + } + + #[inline] + /// Returns `true` if `hi == lo`. + pub fn is_empty(self) -> bool { + let span = self.data_untracked(); + span.hi == span.lo + } + + /// Returns `self` if `self` is not the dummy span, and `other` otherwise. + pub fn substitute_dummy(self, other: Span) -> Span { + if self.is_dummy() { other } else { self } + } + + /// Returns `true` if `self` fully encloses `other`. + pub fn contains(self, other: Span) -> bool { + let span = self.data(); + let other = other.data(); + span.contains(other) + } + + /// Returns `true` if `self` touches `other`. + pub fn overlaps(self, other: Span) -> bool { + let span = self.data(); + let other = other.data(); + span.lo < other.hi && other.lo < span.hi + } + + /// Returns `true` if the spans are equal with regards to the source text. + /// + /// Use this instead of `==` when either span could be generated code, + /// and you only care that they point to the same bytes of source text. + pub fn source_equal(self, other: Span) -> bool { + let span = self.data(); + let other = other.data(); + span.lo == other.lo && span.hi == other.hi + } + + /// Returns `Some(span)`, where the start is trimmed by the end of `other`. + pub fn trim_start(self, other: Span) -> Option<Span> { + let span = self.data(); + let other = other.data(); + if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None } + } + + /// Returns the source span -- this is either the supplied span, or the span for + /// the macro callsite that expanded to it. + pub fn source_callsite(self) -> Span { + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { expn_data.call_site.source_callsite() } else { self } + } + + /// The `Span` for the tokens in the previous macro expansion from which `self` was generated, + /// if any. + pub fn parent_callsite(self) -> Option<Span> { + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { Some(expn_data.call_site) } else { None } + } + + /// Walk down the expansion ancestors to find a span that's contained within `outer`. + pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> { + while !outer.contains(self) { + self = self.parent_callsite()?; + } + Some(self) + } + + /// Edition of the crate from which this span came. + pub fn edition(self) -> edition::Edition { + self.ctxt().edition() + } + + #[inline] + pub fn rust_2015(self) -> bool { + self.edition() == edition::Edition::Edition2015 + } + + #[inline] + pub fn rust_2018(self) -> bool { + self.edition() >= edition::Edition::Edition2018 + } + + #[inline] + pub fn rust_2021(self) -> bool { + self.edition() >= edition::Edition::Edition2021 + } + + #[inline] + pub fn rust_2024(self) -> bool { + self.edition() >= edition::Edition::Edition2024 + } + + /// Returns the source callee. + /// + /// Returns `None` if the supplied span has no expansion trace, + /// else returns the `ExpnData` for the macro definition + /// corresponding to the source callsite. + pub fn source_callee(self) -> Option<ExpnData> { + fn source_callee(expn_data: ExpnData) -> ExpnData { + let next_expn_data = expn_data.call_site.ctxt().outer_expn_data(); + if !next_expn_data.is_root() { source_callee(next_expn_data) } else { expn_data } + } + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { Some(source_callee(expn_data)) } else { None } + } + + /// Checks if a span is "internal" to a macro in which `#[unstable]` + /// items can be used (that is, a macro marked with + /// `#[allow_internal_unstable]`). + pub fn allows_unstable(self, feature: Symbol) -> bool { + self.ctxt() + .outer_expn_data() + .allow_internal_unstable + .map_or(false, |features| features.iter().any(|&f| f == feature)) + } + + /// Checks if this span arises from a compiler desugaring of kind `kind`. + pub fn is_desugaring(self, kind: DesugaringKind) -> bool { + match self.ctxt().outer_expn_data().kind { + ExpnKind::Desugaring(k) => k == kind, + _ => false, + } + } + + /// Returns the compiler desugaring that created this span, or `None` + /// if this span is not from a desugaring. + pub fn desugaring_kind(self) -> Option<DesugaringKind> { + match self.ctxt().outer_expn_data().kind { + ExpnKind::Desugaring(k) => Some(k), + _ => None, + } + } + + /// Checks if a span is "internal" to a macro in which `unsafe` + /// can be used without triggering the `unsafe_code` lint. + // (that is, a macro marked with `#[allow_internal_unsafe]`). + pub fn allows_unsafe(self) -> bool { + self.ctxt().outer_expn_data().allow_internal_unsafe + } + + pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> { + let mut prev_span = DUMMY_SP; + std::iter::from_fn(move || { + loop { + let expn_data = self.ctxt().outer_expn_data(); + if expn_data.is_root() { + return None; + } + + let is_recursive = expn_data.call_site.source_equal(prev_span); + + prev_span = self; + self = expn_data.call_site; + + // Don't print recursive invocations. + if !is_recursive { + return Some(expn_data); + } + } + }) + } + + /// Returns a `Span` that would enclose both `self` and `end`. + /// + /// ```text + /// ____ ___ + /// self lorem ipsum end + /// ^^^^^^^^^^^^^^^^^^^^ + /// ``` + pub fn to(self, end: Span) -> Span { + let span_data = self.data(); + let end_data = end.data(); + // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480). + // Return the macro span on its own to avoid weird diagnostic output. It is preferable to + // have an incomplete span than a completely nonsensical one. + if span_data.ctxt != end_data.ctxt { + if span_data.ctxt == SyntaxContext::root() { + return end; + } else if end_data.ctxt == SyntaxContext::root() { + return self; + } + // Both spans fall within a macro. + // FIXME(estebank): check if it is the *same* macro. + } + Span::new( + cmp::min(span_data.lo, end_data.lo), + cmp::max(span_data.hi, end_data.hi), + if span_data.ctxt == SyntaxContext::root() { end_data.ctxt } else { span_data.ctxt }, + if span_data.parent == end_data.parent { span_data.parent } else { None }, + ) + } + + /// Returns a `Span` between the end of `self` to the beginning of `end`. + /// + /// ```text + /// ____ ___ + /// self lorem ipsum end + /// ^^^^^^^^^^^^^ + /// ``` + pub fn between(self, end: Span) -> Span { + let span = self.data(); + let end = end.data(); + Span::new( + span.hi, + end.lo, + if end.ctxt == SyntaxContext::root() { end.ctxt } else { span.ctxt }, + if span.parent == end.parent { span.parent } else { None }, + ) + } + + /// Returns a `Span` from the beginning of `self` until the beginning of `end`. + /// + /// ```text + /// ____ ___ + /// self lorem ipsum end + /// ^^^^^^^^^^^^^^^^^ + /// ``` + pub fn until(self, end: Span) -> Span { + // Most of this function's body is copied from `to`. + // We can't just do `self.to(end.shrink_to_lo())`, + // because to also does some magic where it uses min/max so + // it can handle overlapping spans. Some advanced mis-use of + // `until` with different ctxts makes this visible. + let span_data = self.data(); + let end_data = end.data(); + // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480). + // Return the macro span on its own to avoid weird diagnostic output. It is preferable to + // have an incomplete span than a completely nonsensical one. + if span_data.ctxt != end_data.ctxt { + if span_data.ctxt == SyntaxContext::root() { + return end; + } else if end_data.ctxt == SyntaxContext::root() { + return self; + } + // Both spans fall within a macro. + // FIXME(estebank): check if it is the *same* macro. + } + Span::new( + span_data.lo, + end_data.lo, + if end_data.ctxt == SyntaxContext::root() { end_data.ctxt } else { span_data.ctxt }, + if span_data.parent == end_data.parent { span_data.parent } else { None }, + ) + } + + pub fn from_inner(self, inner: InnerSpan) -> Span { + let span = self.data(); + Span::new( + span.lo + BytePos::from_usize(inner.start), + span.lo + BytePos::from_usize(inner.end), + span.ctxt, + span.parent, + ) + } + + /// Equivalent of `Span::def_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::Opaque) + } + + /// Equivalent of `Span::call_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::Transparent) + } + + /// Equivalent of `Span::mixed_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent) + } + + /// Produces a span with the same location as `self` and context produced by a macro with the + /// given ID and transparency, assuming that macro was defined directly and not produced by + /// some other macro (which is the case for built-in and procedural macros). + pub fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span { + self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency)) + } + + #[inline] + pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency)) + } + + #[inline] + pub fn remove_mark(&mut self) -> ExpnId { + let mut span = self.data(); + let mark = span.ctxt.remove_mark(); + *self = Span::new(span.lo, span.hi, span.ctxt, span.parent); + mark + } + + #[inline] + pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + let mut span = self.data(); + let mark = span.ctxt.adjust(expn_id); + *self = Span::new(span.lo, span.hi, span.ctxt, span.parent); + mark + } + + #[inline] + pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + let mut span = self.data(); + let mark = span.ctxt.normalize_to_macros_2_0_and_adjust(expn_id); + *self = Span::new(span.lo, span.hi, span.ctxt, span.parent); + mark + } + + #[inline] + pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> { + let mut span = self.data(); + let mark = span.ctxt.glob_adjust(expn_id, glob_span); + *self = Span::new(span.lo, span.hi, span.ctxt, span.parent); + mark + } + + #[inline] + pub fn reverse_glob_adjust( + &mut self, + expn_id: ExpnId, + glob_span: Span, + ) -> Option<Option<ExpnId>> { + let mut span = self.data(); + let mark = span.ctxt.reverse_glob_adjust(expn_id, glob_span); + *self = Span::new(span.lo, span.hi, span.ctxt, span.parent); + mark + } + + #[inline] + pub fn normalize_to_macros_2_0(self) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.normalize_to_macros_2_0()) + } + + #[inline] + pub fn normalize_to_macro_rules(self) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.normalize_to_macro_rules()) + } +} + +impl Default for Span { + fn default() -> Self { + DUMMY_SP + } +} + +impl<E: Encoder> Encodable<E> for Span { + default fn encode(&self, s: &mut E) { + let span = self.data(); + span.lo.encode(s); + span.hi.encode(s); + } +} +impl<D: Decoder> Decodable<D> for Span { + default fn decode(s: &mut D) -> Span { + let lo = Decodable::decode(s); + let hi = Decodable::decode(s); + + Span::new(lo, hi, SyntaxContext::root(), None) + } +} + +/// Calls the provided closure, using the provided `SourceMap` to format +/// any spans that are debug-printed during the closure's execution. +/// +/// Normally, the global `TyCtxt` is used to retrieve the `SourceMap` +/// (see `rustc_interface::callbacks::span_debug1`). However, some parts +/// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before +/// a `TyCtxt` is available. In this case, we fall back to +/// the `SourceMap` provided to this function. If that is not available, +/// we fall back to printing the raw `Span` field values. +pub fn with_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T { + with_session_globals(|session_globals| { + *session_globals.source_map.borrow_mut() = Some(source_map); + }); + struct ClearSourceMap; + impl Drop for ClearSourceMap { + fn drop(&mut self) { + with_session_globals(|session_globals| { + session_globals.source_map.borrow_mut().take(); + }); + } + } + + let _guard = ClearSourceMap; + f() +} + +impl fmt::Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + with_session_globals(|session_globals| { + if let Some(source_map) = &*session_globals.source_map.borrow() { + write!(f, "{} ({:?})", source_map.span_to_diagnostic_string(*self), self.ctxt()) + } else { + f.debug_struct("Span") + .field("lo", &self.lo()) + .field("hi", &self.hi()) + .field("ctxt", &self.ctxt()) + .finish() + } + }) + } +} + +impl fmt::Debug for SpanData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&Span::new(self.lo, self.hi, self.ctxt, self.parent), f) + } +} + +/// Identifies an offset of a multi-byte character in a `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub struct MultiByteChar { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The number of bytes, `>= 2`. + pub bytes: u8, +} + +/// Identifies an offset of a non-narrow character in a `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub enum NonNarrowChar { + /// Represents a zero-width character. + ZeroWidth(BytePos), + /// Represents a wide (full-width) character. + Wide(BytePos), + /// Represents a tab character, represented visually with a width of 4 characters. + Tab(BytePos), +} + +impl NonNarrowChar { + fn new(pos: BytePos, width: usize) -> Self { + match width { + 0 => NonNarrowChar::ZeroWidth(pos), + 2 => NonNarrowChar::Wide(pos), + 4 => NonNarrowChar::Tab(pos), + _ => panic!("width {} given for non-narrow character", width), + } + } + + /// Returns the absolute offset of the character in the `SourceMap`. + pub fn pos(&self) -> BytePos { + match *self { + NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p, + } + } + + /// Returns the width of the character, 0 (zero-width) or 2 (wide). + pub fn width(&self) -> usize { + match *self { + NonNarrowChar::ZeroWidth(_) => 0, + NonNarrowChar::Wide(_) => 2, + NonNarrowChar::Tab(_) => 4, + } + } +} + +impl Add<BytePos> for NonNarrowChar { + type Output = Self; + + fn add(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs), + NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos + rhs), + } + } +} + +impl Sub<BytePos> for NonNarrowChar { + type Output = Self; + + fn sub(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs), + NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos - rhs), + } + } +} + +/// Identifies an offset of a character that was normalized away from `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub struct NormalizedPos { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The difference between original and normalized string at position. + pub diff: u32, +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum ExternalSource { + /// No external source has to be loaded, since the `SourceFile` represents a local crate. + Unneeded, + Foreign { + kind: ExternalSourceKind, + /// This SourceFile's byte-offset within the source_map of its original crate. + original_start_pos: BytePos, + /// The end of this SourceFile within the source_map of its original crate. + original_end_pos: BytePos, + }, +} + +/// The state of the lazy external source loading mechanism of a `SourceFile`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum ExternalSourceKind { + /// The external source has been loaded already. + Present(Lrc<String>), + /// No attempt has been made to load the external source. + AbsentOk, + /// A failed attempt has been made to load the external source. + AbsentErr, + Unneeded, +} + +impl ExternalSource { + pub fn get_source(&self) -> Option<&Lrc<String>> { + match self { + ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src), + _ => None, + } + } +} + +#[derive(Debug)] +pub struct OffsetOverflowError; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)] +#[derive(HashStable_Generic)] +pub enum SourceFileHashAlgorithm { + Md5, + Sha1, + Sha256, +} + +impl FromStr for SourceFileHashAlgorithm { + type Err = (); + + fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> { + match s { + "md5" => Ok(SourceFileHashAlgorithm::Md5), + "sha1" => Ok(SourceFileHashAlgorithm::Sha1), + "sha256" => Ok(SourceFileHashAlgorithm::Sha256), + _ => Err(()), + } + } +} + +/// The hash of the on-disk source file used for debug info. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[derive(HashStable_Generic, Encodable, Decodable)] +pub struct SourceFileHash { + pub kind: SourceFileHashAlgorithm, + value: [u8; 32], +} + +impl SourceFileHash { + pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash { + let mut hash = SourceFileHash { kind, value: Default::default() }; + let len = hash.hash_len(); + let value = &mut hash.value[..len]; + let data = src.as_bytes(); + match kind { + SourceFileHashAlgorithm::Md5 => { + value.copy_from_slice(&Md5::digest(data)); + } + SourceFileHashAlgorithm::Sha1 => { + value.copy_from_slice(&Sha1::digest(data)); + } + SourceFileHashAlgorithm::Sha256 => { + value.copy_from_slice(&Sha256::digest(data)); + } + } + hash + } + + /// Check if the stored hash matches the hash of the string. + pub fn matches(&self, src: &str) -> bool { + Self::new(self.kind, src) == *self + } + + /// The bytes of the hash. + pub fn hash_bytes(&self) -> &[u8] { + let len = self.hash_len(); + &self.value[..len] + } + + fn hash_len(&self) -> usize { + match self.kind { + SourceFileHashAlgorithm::Md5 => 16, + SourceFileHashAlgorithm::Sha1 => 20, + SourceFileHashAlgorithm::Sha256 => 32, + } + } +} + +#[derive(HashStable_Generic)] +#[derive(Copy, PartialEq, PartialOrd, Clone, Ord, Eq, Hash, Debug, Encodable, Decodable)] +pub enum DebuggerVisualizerType { + Natvis, + GdbPrettyPrinter, +} + +/// A single debugger visualizer file. +#[derive(HashStable_Generic)] +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Encodable, Decodable)] +pub struct DebuggerVisualizerFile { + /// The complete debugger visualizer source. + pub src: Arc<[u8]>, + /// Indicates which visualizer type this targets. + pub visualizer_type: DebuggerVisualizerType, +} + +impl DebuggerVisualizerFile { + pub fn new(src: Arc<[u8]>, visualizer_type: DebuggerVisualizerType) -> Self { + DebuggerVisualizerFile { src, visualizer_type } + } +} + +#[derive(Clone)] +pub enum SourceFileLines { + /// The source file lines, in decoded (random-access) form. + Lines(Vec<BytePos>), + + /// The source file lines, in undecoded difference list form. + Diffs(SourceFileDiffs), +} + +impl SourceFileLines { + pub fn is_lines(&self) -> bool { + matches!(self, SourceFileLines::Lines(_)) + } +} + +/// The source file lines in difference list form. This matches the form +/// used within metadata, which saves space by exploiting the fact that the +/// lines list is sorted and individual lines are usually not that long. +/// +/// We read it directly from metadata and only decode it into `Lines` form +/// when necessary. This is a significant performance win, especially for +/// small crates where very little of `std`'s metadata is used. +#[derive(Clone)] +pub struct SourceFileDiffs { + /// Position of the first line. Note that this is always encoded as a + /// `BytePos` because it is often much larger than any of the + /// differences. + line_start: BytePos, + + /// Always 1, 2, or 4. Always as small as possible, while being big + /// enough to hold the length of the longest line in the source file. + /// The 1 case is by far the most common. + bytes_per_diff: usize, + + /// The number of diffs encoded in `raw_diffs`. Always one less than + /// the number of lines in the source file. + num_diffs: usize, + + /// The diffs in "raw" form. Each segment of `bytes_per_diff` length + /// encodes one little-endian diff. Note that they aren't LEB128 + /// encoded. This makes for much faster decoding. Besides, the + /// bytes_per_diff==1 case is by far the most common, and LEB128 + /// encoding has no effect on that case. + raw_diffs: Vec<u8>, +} + +/// A single source in the [`SourceMap`]. +#[derive(Clone)] +pub struct SourceFile { + /// The name of the file that the source came from. Source that doesn't + /// originate from files has names between angle brackets by convention + /// (e.g., `<anon>`). + pub name: FileName, + /// The complete source code. + pub src: Option<Lrc<String>>, + /// The source code's hash. + pub src_hash: SourceFileHash, + /// The external source code (used for external crates, which will have a `None` + /// value as `self.src`. + pub external_src: Lock<ExternalSource>, + /// The start position of this source in the `SourceMap`. + pub start_pos: BytePos, + /// The end position of this source in the `SourceMap`. + pub end_pos: BytePos, + /// Locations of lines beginnings in the source code. + pub lines: Lock<SourceFileLines>, + /// Locations of multi-byte characters in the source code. + pub multibyte_chars: Vec<MultiByteChar>, + /// Width of characters that are not narrow in the source code. + pub non_narrow_chars: Vec<NonNarrowChar>, + /// Locations of characters removed during normalization. + pub normalized_pos: Vec<NormalizedPos>, + /// A hash of the filename, used for speeding up hashing in incremental compilation. + pub name_hash: u128, + /// Indicates which crate this `SourceFile` was imported from. + pub cnum: CrateNum, +} + +impl<S: Encoder> Encodable<S> for SourceFile { + fn encode(&self, s: &mut S) { + self.name.encode(s); + self.src_hash.encode(s); + self.start_pos.encode(s); + self.end_pos.encode(s); + + // We are always in `Lines` form by the time we reach here. + assert!(self.lines.borrow().is_lines()); + self.lines(|lines| { + // Store the length. + s.emit_u32(lines.len() as u32); + + // Compute and store the difference list. + if lines.len() != 0 { + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines + .array_windows() + .map(|&[fst, snd]| snd - fst) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: usize = match max_line_length { + 0..=0xFF => 1, + 0x100..=0xFFFF => 2, + _ => 4, + }; + + // Encode the number of bytes used per diff. + s.emit_u8(bytes_per_diff as u8); + + // Encode the first element. + lines[0].encode(s); + + // Encode the difference list. + let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); + let num_diffs = lines.len() - 1; + let mut raw_diffs; + match bytes_per_diff { + 1 => { + raw_diffs = Vec::with_capacity(num_diffs); + for diff in diff_iter { + raw_diffs.push(diff.0 as u8); + } + } + 2 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes()); + } + } + 4 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0 as u32).to_le_bytes()); + } + } + _ => unreachable!(), + } + s.emit_raw_bytes(&raw_diffs); + } + }); + + self.multibyte_chars.encode(s); + self.non_narrow_chars.encode(s); + self.name_hash.encode(s); + self.normalized_pos.encode(s); + self.cnum.encode(s); + } +} + +impl<D: Decoder> Decodable<D> for SourceFile { + fn decode(d: &mut D) -> SourceFile { + let name: FileName = Decodable::decode(d); + let src_hash: SourceFileHash = Decodable::decode(d); + let start_pos: BytePos = Decodable::decode(d); + let end_pos: BytePos = Decodable::decode(d); + let lines = { + let num_lines: u32 = Decodable::decode(d); + if num_lines > 0 { + // Read the number of bytes used per diff. + let bytes_per_diff = d.read_u8() as usize; + + // Read the first element. + let line_start: BytePos = Decodable::decode(d); + + // Read the difference list. + let num_diffs = num_lines as usize - 1; + let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec(); + SourceFileLines::Diffs(SourceFileDiffs { + line_start, + bytes_per_diff, + num_diffs, + raw_diffs, + }) + } else { + SourceFileLines::Lines(vec![]) + } + }; + let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d); + let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d); + let name_hash: u128 = Decodable::decode(d); + let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d); + let cnum: CrateNum = Decodable::decode(d); + SourceFile { + name, + start_pos, + end_pos, + src: None, + src_hash, + // Unused - the metadata decoder will construct + // a new SourceFile, filling in `external_src` properly + external_src: Lock::new(ExternalSource::Unneeded), + lines: Lock::new(lines), + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + cnum, + } + } +} + +impl fmt::Debug for SourceFile { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "SourceFile({:?})", self.name) + } +} + +impl SourceFile { + pub fn new( + name: FileName, + mut src: String, + start_pos: BytePos, + hash_kind: SourceFileHashAlgorithm, + ) -> Self { + // Compute the file hash before any normalization. + let src_hash = SourceFileHash::new(hash_kind, &src); + let normalized_pos = normalize_src(&mut src, start_pos); + + let name_hash = { + let mut hasher: StableHasher = StableHasher::new(); + name.hash(&mut hasher); + hasher.finish::<u128>() + }; + let end_pos = start_pos.to_usize() + src.len(); + assert!(end_pos <= u32::MAX as usize); + + let (lines, multibyte_chars, non_narrow_chars) = + analyze_source_file::analyze_source_file(&src, start_pos); + + SourceFile { + name, + src: Some(Lrc::new(src)), + src_hash, + external_src: Lock::new(ExternalSource::Unneeded), + start_pos, + end_pos: Pos::from_usize(end_pos), + lines: Lock::new(SourceFileLines::Lines(lines)), + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + cnum: LOCAL_CRATE, + } + } + + pub fn lines<F, R>(&self, f: F) -> R + where + F: FnOnce(&[BytePos]) -> R, + { + let mut guard = self.lines.borrow_mut(); + match &*guard { + SourceFileLines::Lines(lines) => f(lines), + SourceFileLines::Diffs(SourceFileDiffs { + mut line_start, + bytes_per_diff, + num_diffs, + raw_diffs, + }) => { + // Convert from "diffs" form to "lines" form. + let num_lines = num_diffs + 1; + let mut lines = Vec::with_capacity(num_lines); + lines.push(line_start); + + assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff); + match bytes_per_diff { + 1 => { + lines.extend(raw_diffs.into_iter().map(|&diff| { + line_start = line_start + BytePos(diff as u32); + line_start + })); + } + 2 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [raw_diffs[pos], raw_diffs[pos + 1]]; + let diff = u16::from_le_bytes(bytes); + line_start = line_start + BytePos(diff as u32); + line_start + })); + } + 4 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [ + raw_diffs[pos], + raw_diffs[pos + 1], + raw_diffs[pos + 2], + raw_diffs[pos + 3], + ]; + let diff = u32::from_le_bytes(bytes); + line_start = line_start + BytePos(diff); + line_start + })); + } + _ => unreachable!(), + } + let res = f(&lines); + *guard = SourceFileLines::Lines(lines); + res + } + } + } + + /// Returns the `BytePos` of the beginning of the current line. + pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { + let line_index = self.lookup_line(pos).unwrap(); + self.lines(|lines| lines[line_index]) + } + + /// Add externally loaded source. + /// If the hash of the input doesn't match or no input is supplied via None, + /// it is interpreted as an error and the corresponding enum variant is set. + /// The return value signifies whether some kind of source is present. + pub fn add_external_src<F>(&self, get_src: F) -> bool + where + F: FnOnce() -> Option<String>, + { + if matches!( + *self.external_src.borrow(), + ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. } + ) { + let src = get_src(); + let mut external_src = self.external_src.borrow_mut(); + // Check that no-one else have provided the source while we were getting it + if let ExternalSource::Foreign { + kind: src_kind @ ExternalSourceKind::AbsentOk, .. + } = &mut *external_src + { + if let Some(mut src) = src { + // The src_hash needs to be computed on the pre-normalized src. + if self.src_hash.matches(&src) { + normalize_src(&mut src, BytePos::from_usize(0)); + *src_kind = ExternalSourceKind::Present(Lrc::new(src)); + return true; + } + } else { + *src_kind = ExternalSourceKind::AbsentErr; + } + + false + } else { + self.src.is_some() || external_src.get_source().is_some() + } + } else { + self.src.is_some() || self.external_src.borrow().get_source().is_some() + } + } + + /// Gets a line from the list of pre-computed line-beginnings. + /// The line number here is 0-based. + pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> { + fn get_until_newline(src: &str, begin: usize) -> &str { + // We can't use `lines.get(line_number+1)` because we might + // be parsing when we call this function and thus the current + // line is the last one we have line info for. + let slice = &src[begin..]; + match slice.find('\n') { + Some(e) => &slice[..e], + None => slice, + } + } + + let begin = { + let line = self.lines(|lines| lines.get(line_number).copied())?; + let begin: BytePos = line - self.start_pos; + begin.to_usize() + }; + + if let Some(ref src) = self.src { + Some(Cow::from(get_until_newline(src, begin))) + } else if let Some(src) = self.external_src.borrow().get_source() { + Some(Cow::Owned(String::from(get_until_newline(src, begin)))) + } else { + None + } + } + + pub fn is_real_file(&self) -> bool { + self.name.is_real() + } + + #[inline] + pub fn is_imported(&self) -> bool { + self.src.is_none() + } + + pub fn count_lines(&self) -> usize { + self.lines(|lines| lines.len()) + } + + /// Finds the line containing the given position. The return value is the + /// index into the `lines` array of this `SourceFile`, not the 1-based line + /// number. If the source_file is empty or the position is located before the + /// first line, `None` is returned. + pub fn lookup_line(&self, pos: BytePos) -> Option<usize> { + self.lines(|lines| match lines.binary_search(&pos) { + Ok(idx) => Some(idx), + Err(0) => None, + Err(idx) => Some(idx - 1), + }) + } + + pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> { + if self.is_empty() { + return self.start_pos..self.end_pos; + } + + self.lines(|lines| { + assert!(line_index < lines.len()); + if line_index == (lines.len() - 1) { + lines[line_index]..self.end_pos + } else { + lines[line_index]..lines[line_index + 1] + } + }) + } + + /// Returns whether or not the file contains the given `SourceMap` byte + /// position. The position one past the end of the file is considered to be + /// contained by the file. This implies that files for which `is_empty` + /// returns true still contain one byte position according to this function. + #[inline] + pub fn contains(&self, byte_pos: BytePos) -> bool { + byte_pos >= self.start_pos && byte_pos <= self.end_pos + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.start_pos == self.end_pos + } + + /// Calculates the original byte position relative to the start of the file + /// based on the given byte position. + pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + // Diff before any records is 0. Otherwise use the previously recorded + // diff as that applies to the following characters until a new diff + // is recorded. + let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) { + Ok(i) => self.normalized_pos[i].diff, + Err(i) if i == 0 => 0, + Err(i) => self.normalized_pos[i - 1].diff, + }; + + BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + } + + /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`. + pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + // The number of extra bytes due to multibyte chars in the `SourceFile`. + let mut total_extra_bytes = 0; + + for mbc in self.multibyte_chars.iter() { + debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); + if mbc.pos < bpos { + // Every character is at least one byte, so we only + // count the actual extra bytes. + total_extra_bytes += mbc.bytes as u32 - 1; + // We should never see a byte position in the middle of a + // character. + assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32); + } else { + break; + } + } + + assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32()); + CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize) + } + + /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a + /// given `BytePos`. + pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) { + let chpos = self.bytepos_to_file_charpos(pos); + match self.lookup_line(pos) { + Some(a) => { + let line = a + 1; // Line numbers start at 1 + let linebpos = self.lines(|lines| lines[a]); + let linechpos = self.bytepos_to_file_charpos(linebpos); + let col = chpos - linechpos; + debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos); + debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos); + debug!("byte is on line: {}", line); + assert!(chpos >= linechpos); + (line, col) + } + None => (0, chpos), + } + } + + /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based) + /// column offset when displayed, for a given `BytePos`. + pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) { + let (line, col_or_chpos) = self.lookup_file_pos(pos); + if line > 0 { + let col = col_or_chpos; + let linebpos = self.lines(|lines| lines[line - 1]); + let col_display = { + let start_width_idx = self + .non_narrow_chars + .binary_search_by_key(&linebpos, |x| x.pos()) + .unwrap_or_else(|x| x); + let end_width_idx = self + .non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let special_chars = end_width_idx - start_width_idx; + let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx] + .iter() + .map(|x| x.width()) + .sum(); + col.0 - special_chars + non_narrow + }; + (line, col, col_display) + } else { + let chpos = col_or_chpos; + let col_display = { + let end_width_idx = self + .non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let non_narrow: usize = + self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum(); + chpos.0 - end_width_idx + non_narrow + }; + (0, chpos, col_display) + } + } +} + +/// Normalizes the source code and records the normalizations. +fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { + let mut normalized_pos = vec![]; + remove_bom(src, &mut normalized_pos); + normalize_newlines(src, &mut normalized_pos); + + // Offset all the positions by start_pos to match the final file positions. + for np in &mut normalized_pos { + np.pos.0 += start_pos.0; + } + + normalized_pos +} + +/// Removes UTF-8 BOM, if any. +fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { + if src.starts_with('\u{feff}') { + src.drain(..3); + normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); + } +} + +/// Replaces `\r\n` with `\n` in-place in `src`. +/// +/// Returns error if there's a lone `\r` in the string. +fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { + if !src.as_bytes().contains(&b'\r') { + return; + } + + // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding. + // While we *can* call `as_mut_vec` and do surgery on the live string + // directly, let's rather steal the contents of `src`. This makes the code + // safe even if a panic occurs. + + let mut buf = std::mem::replace(src, String::new()).into_bytes(); + let mut gap_len = 0; + let mut tail = buf.as_mut_slice(); + let mut cursor = 0; + let original_gap = normalized_pos.last().map_or(0, |l| l.diff); + loop { + let idx = match find_crlf(&tail[gap_len..]) { + None => tail.len(), + Some(idx) => idx + gap_len, + }; + tail.copy_within(gap_len..idx, 0); + tail = &mut tail[idx - gap_len..]; + if tail.len() == gap_len { + break; + } + cursor += idx - gap_len; + gap_len += 1; + normalized_pos.push(NormalizedPos { + pos: BytePos::from_usize(cursor + 1), + diff: original_gap + gap_len as u32, + }); + } + + // Account for removed `\r`. + // After `set_len`, `buf` is guaranteed to contain utf-8 again. + let new_len = buf.len() - gap_len; + unsafe { + buf.set_len(new_len); + *src = String::from_utf8_unchecked(buf); + } + + fn find_crlf(src: &[u8]) -> Option<usize> { + let mut search_idx = 0; + while let Some(idx) = find_cr(&src[search_idx..]) { + if src[search_idx..].get(idx + 1) != Some(&b'\n') { + search_idx += idx + 1; + continue; + } + return Some(search_idx + idx); + } + None + } + + fn find_cr(src: &[u8]) -> Option<usize> { + src.iter().position(|&b| b == b'\r') + } +} + +// _____________________________________________________________________________ +// Pos, BytePos, CharPos +// + +pub trait Pos { + fn from_usize(n: usize) -> Self; + fn to_usize(&self) -> usize; + fn from_u32(n: u32) -> Self; + fn to_u32(&self) -> u32; +} + +macro_rules! impl_pos { + ( + $( + $(#[$attr:meta])* + $vis:vis struct $ident:ident($inner_vis:vis $inner_ty:ty); + )* + ) => { + $( + $(#[$attr])* + $vis struct $ident($inner_vis $inner_ty); + + impl Pos for $ident { + #[inline(always)] + fn from_usize(n: usize) -> $ident { + $ident(n as $inner_ty) + } + + #[inline(always)] + fn to_usize(&self) -> usize { + self.0 as usize + } + + #[inline(always)] + fn from_u32(n: u32) -> $ident { + $ident(n as $inner_ty) + } + + #[inline(always)] + fn to_u32(&self) -> u32 { + self.0 as u32 + } + } + + impl Add for $ident { + type Output = $ident; + + #[inline(always)] + fn add(self, rhs: $ident) -> $ident { + $ident(self.0 + rhs.0) + } + } + + impl Sub for $ident { + type Output = $ident; + + #[inline(always)] + fn sub(self, rhs: $ident) -> $ident { + $ident(self.0 - rhs.0) + } + } + )* + }; +} + +impl_pos! { + /// A byte offset. + /// + /// Keep this small (currently 32-bits), as AST contains a lot of them. + #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] + pub struct BytePos(pub u32); + + /// A character offset. + /// + /// Because of multibyte UTF-8 characters, a byte offset + /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`] + /// values to `CharPos` values as necessary. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] + pub struct CharPos(pub usize); +} + +impl<S: Encoder> Encodable<S> for BytePos { + fn encode(&self, s: &mut S) { + s.emit_u32(self.0); + } +} + +impl<D: Decoder> Decodable<D> for BytePos { + fn decode(d: &mut D) -> BytePos { + BytePos(d.read_u32()) + } +} + +// _____________________________________________________________________________ +// Loc, SourceFileAndLine, SourceFileAndBytePos +// + +/// A source code location used for error reporting. +#[derive(Debug, Clone)] +pub struct Loc { + /// Information about the original source. + pub file: Lrc<SourceFile>, + /// The (1-based) line number. + pub line: usize, + /// The (0-based) column offset. + pub col: CharPos, + /// The (0-based) column offset when displayed. + pub col_display: usize, +} + +// Used to be structural records. +#[derive(Debug)] +pub struct SourceFileAndLine { + pub sf: Lrc<SourceFile>, + /// Index of line, starting from 0. + pub line: usize, +} +#[derive(Debug)] +pub struct SourceFileAndBytePos { + pub sf: Lrc<SourceFile>, + pub pos: BytePos, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct LineInfo { + /// Index of line, starting from 0. + pub line_index: usize, + + /// Column in line where span begins, starting from 0. + pub start_col: CharPos, + + /// Column in line where span ends, starting from 0, exclusive. + pub end_col: CharPos, +} + +pub struct FileLines { + pub file: Lrc<SourceFile>, + pub lines: Vec<LineInfo>, +} + +pub static SPAN_TRACK: AtomicRef<fn(LocalDefId)> = AtomicRef::new(&((|_| {}) as fn(_))); + +// _____________________________________________________________________________ +// SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions +// + +pub type FileLinesResult = Result<FileLines, SpanLinesError>; + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanLinesError { + DistinctSources(DistinctSources), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanSnippetError { + IllFormedSpan(Span), + DistinctSources(DistinctSources), + MalformedForSourcemap(MalformedSourceMapPositions), + SourceNotAvailable { filename: FileName }, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DistinctSources { + pub begin: (FileName, BytePos), + pub end: (FileName, BytePos), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct MalformedSourceMapPositions { + pub name: FileName, + pub source_len: usize, + pub begin_pos: BytePos, + pub end_pos: BytePos, +} + +/// Range inside of a `Span` used for diagnostics when we only have access to relative positions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct InnerSpan { + pub start: usize, + pub end: usize, +} + +impl InnerSpan { + pub fn new(start: usize, end: usize) -> InnerSpan { + InnerSpan { start, end } + } +} + +/// Requirements for a `StableHashingContext` to be used in this crate. +/// +/// This is a hack to allow using the [`HashStable_Generic`] derive macro +/// instead of implementing everything in rustc_middle. +pub trait HashStableContext { + fn def_path_hash(&self, def_id: DefId) -> DefPathHash; + fn hash_spans(&self) -> bool; + /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since + /// we don't have easy access to a `Session` + fn unstable_opts_incremental_ignore_spans(&self) -> bool; + fn def_span(&self, def_id: LocalDefId) -> Span; + fn span_data_to_lines_and_cols( + &mut self, + span: &SpanData, + ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>; + fn hashing_controls(&self) -> HashingControls; +} + +impl<CTX> HashStable<CTX> for Span +where + CTX: HashStableContext, +{ + /// Hashes a span in a stable way. We can't directly hash the span's `BytePos` + /// fields (that would be similar to hashing pointers, since those are just + /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column) + /// triple, which stays the same even if the containing `SourceFile` has moved + /// within the `SourceMap`. + /// + /// Also note that we are hashing byte offsets for the column, not unicode + /// codepoint offsets. For the purpose of the hash that's sufficient. + /// Also, hashing filenames is expensive so we avoid doing it twice when the + /// span starts and ends in the same file, which is almost always the case. + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + const TAG_VALID_SPAN: u8 = 0; + const TAG_INVALID_SPAN: u8 = 1; + const TAG_RELATIVE_SPAN: u8 = 2; + + if !ctx.hash_spans() { + return; + } + + let span = self.data_untracked(); + span.ctxt.hash_stable(ctx, hasher); + span.parent.hash_stable(ctx, hasher); + + if span.is_dummy() { + Hash::hash(&TAG_INVALID_SPAN, hasher); + return; + } + + if let Some(parent) = span.parent { + let def_span = ctx.def_span(parent).data_untracked(); + if def_span.contains(span) { + // This span is enclosed in a definition: only hash the relative position. + Hash::hash(&TAG_RELATIVE_SPAN, hasher); + (span.lo - def_span.lo).to_u32().hash_stable(ctx, hasher); + (span.hi - def_span.lo).to_u32().hash_stable(ctx, hasher); + return; + } + } + + // If this is not an empty or invalid span, we want to hash the last + // position that belongs to it, as opposed to hashing the first + // position past it. + let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span) else { + Hash::hash(&TAG_INVALID_SPAN, hasher); + return; + }; + + Hash::hash(&TAG_VALID_SPAN, hasher); + // We truncate the stable ID hash and line and column numbers. The chances + // of causing a collision this way should be minimal. + Hash::hash(&(file.name_hash as u64), hasher); + + // Hash both the length and the end location (line/column) of a span. If we + // hash only the length, for example, then two otherwise equal spans with + // different end locations will have the same hash. This can cause a problem + // during incremental compilation wherein a previous result for a query that + // depends on the end location of a span will be incorrectly reused when the + // end location of the span it depends on has changed (see issue #74890). A + // similar analysis applies if some query depends specifically on the length + // of the span, but we only hash the end location. So hash both. + + let col_lo_trunc = (col_lo.0 as u64) & 0xFF; + let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8; + let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32; + let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40; + let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc; + let len = (span.hi - span.lo).0; + Hash::hash(&col_line, hasher); + Hash::hash(&len, hasher); + } +} diff --git a/compiler/rustc_span/src/profiling.rs b/compiler/rustc_span/src/profiling.rs new file mode 100644 index 000000000..f169007fa --- /dev/null +++ b/compiler/rustc_span/src/profiling.rs @@ -0,0 +1,35 @@ +use std::borrow::Borrow; + +use rustc_data_structures::profiling::EventArgRecorder; + +/// Extension trait for self-profiling purposes: allows to record spans within a generic activity's +/// event arguments. +pub trait SpannedEventArgRecorder { + /// Records the following event arguments within the current generic activity being profiled: + /// - the provided `event_arg` + /// - a string representation of the provided `span` + /// + /// Note: when self-profiling with costly event arguments, at least one argument + /// needs to be recorded. A panic will be triggered if that doesn't happen. + fn record_arg_with_span<A>(&mut self, event_arg: A, span: crate::Span) + where + A: Borrow<str> + Into<String>; +} + +impl SpannedEventArgRecorder for EventArgRecorder<'_> { + fn record_arg_with_span<A>(&mut self, event_arg: A, span: crate::Span) + where + A: Borrow<str> + Into<String>, + { + self.record_arg(event_arg); + + let span_arg = crate::with_session_globals(|session_globals| { + if let Some(source_map) = &*session_globals.source_map.borrow() { + source_map.span_to_embeddable_string(span) + } else { + format!("{:?}", span) + } + }); + self.record_arg(span_arg); + } +} diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs new file mode 100644 index 000000000..28381157d --- /dev/null +++ b/compiler/rustc_span/src/source_map.rs @@ -0,0 +1,1281 @@ +//! Types for tracking pieces of source code within a crate. +//! +//! The [`SourceMap`] tracks all the source code used within a single crate, mapping +//! from integer byte positions to the original source code location. Each bit +//! of source parsed during crate parsing (typically files, in-memory strings, +//! or various bits of macro expansion) cover a continuous range of bytes in the +//! `SourceMap` and are represented by [`SourceFile`]s. Byte positions are stored in +//! [`Span`] and used pervasively in the compiler. They are absolute positions +//! within the `SourceMap`, which upon request can be converted to line and column +//! information, source code snippets, etc. + +pub use crate::hygiene::{ExpnData, ExpnKind}; +pub use crate::*; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::StableHasher; +use rustc_data_structures::sync::{AtomicU32, Lrc, MappedReadGuard, ReadGuard, RwLock}; +use std::hash::Hash; +use std::path::{Path, PathBuf}; +use std::sync::atomic::Ordering; +use std::{clone::Clone, cmp}; +use std::{convert::TryFrom, unreachable}; + +use std::fs; +use std::io; +use tracing::debug; + +#[cfg(test)] +mod tests; + +/// Returns the span itself if it doesn't come from a macro expansion, +/// otherwise return the call site span up to the `enclosing_sp` by +/// following the `expn_data` chain. +pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span { + let expn_data1 = sp.ctxt().outer_expn_data(); + let expn_data2 = enclosing_sp.ctxt().outer_expn_data(); + if expn_data1.is_root() || !expn_data2.is_root() && expn_data1.call_site == expn_data2.call_site + { + sp + } else { + original_sp(expn_data1.call_site, enclosing_sp) + } +} + +pub mod monotonic { + use std::ops::{Deref, DerefMut}; + + /// A `MonotonicVec` is a `Vec` which can only be grown. + /// Once inserted, an element can never be removed or swapped, + /// guaranteeing that any indices into a `MonotonicVec` are stable + // This is declared in its own module to ensure that the private + // field is inaccessible + pub struct MonotonicVec<T>(Vec<T>); + impl<T> MonotonicVec<T> { + pub fn new(val: Vec<T>) -> MonotonicVec<T> { + MonotonicVec(val) + } + + pub fn push(&mut self, val: T) { + self.0.push(val); + } + } + + impl<T> Default for MonotonicVec<T> { + fn default() -> Self { + MonotonicVec::new(vec![]) + } + } + + impl<T> Deref for MonotonicVec<T> { + type Target = Vec<T>; + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<T> !DerefMut for MonotonicVec<T> {} +} + +#[derive(Clone, Encodable, Decodable, Debug, Copy, HashStable_Generic)] +pub struct Spanned<T> { + pub node: T, + pub span: Span, +} + +pub fn respan<T>(sp: Span, t: T) -> Spanned<T> { + Spanned { node: t, span: sp } +} + +pub fn dummy_spanned<T>(t: T) -> Spanned<T> { + respan(DUMMY_SP, t) +} + +// _____________________________________________________________________________ +// SourceFile, MultiByteChar, FileName, FileLines +// + +/// An abstraction over the fs operations used by the Parser. +pub trait FileLoader { + /// Query the existence of a file. + fn file_exists(&self, path: &Path) -> bool; + + /// Read the contents of a UTF-8 file into memory. + fn read_file(&self, path: &Path) -> io::Result<String>; +} + +/// A FileLoader that uses std::fs to load real files. +pub struct RealFileLoader; + +impl FileLoader for RealFileLoader { + fn file_exists(&self, path: &Path) -> bool { + path.exists() + } + + fn read_file(&self, path: &Path) -> io::Result<String> { + fs::read_to_string(path) + } +} + +/// This is a [SourceFile] identifier that is used to correlate source files between +/// subsequent compilation sessions (which is something we need to do during +/// incremental compilation). +/// +/// The [StableSourceFileId] also contains the CrateNum of the crate the source +/// file was originally parsed for. This way we get two separate entries in +/// the [SourceMap] if the same file is part of both the local and an upstream +/// crate. Trying to only have one entry for both cases is problematic because +/// at the point where we discover that there's a local use of the file in +/// addition to the upstream one, we might already have made decisions based on +/// the assumption that it's an upstream file. Treating the two files as +/// different has no real downsides. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)] +pub struct StableSourceFileId { + // A hash of the source file's FileName. This is hash so that it's size + // is more predictable than if we included the actual FileName value. + pub file_name_hash: u64, + + // The CrateNum of the crate this source file was originally parsed for. + // We cannot include this information in the hash because at the time + // of hashing we don't have the context to map from the CrateNum's numeric + // value to a StableCrateId. + pub cnum: CrateNum, +} + +// FIXME: we need a more globally consistent approach to the problem solved by +// StableSourceFileId, perhaps built atop source_file.name_hash. +impl StableSourceFileId { + pub fn new(source_file: &SourceFile) -> StableSourceFileId { + StableSourceFileId::new_from_name(&source_file.name, source_file.cnum) + } + + fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId { + let mut hasher = StableHasher::new(); + name.hash(&mut hasher); + StableSourceFileId { file_name_hash: hasher.finish(), cnum } + } +} + +// _____________________________________________________________________________ +// SourceMap +// + +#[derive(Default)] +pub(super) struct SourceMapFiles { + source_files: monotonic::MonotonicVec<Lrc<SourceFile>>, + stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>, +} + +pub struct SourceMap { + /// The address space below this value is currently used by the files in the source map. + used_address_space: AtomicU32, + + files: RwLock<SourceMapFiles>, + file_loader: Box<dyn FileLoader + Sync + Send>, + // This is used to apply the file path remapping as specified via + // `--remap-path-prefix` to all `SourceFile`s allocated within this `SourceMap`. + path_mapping: FilePathMapping, + + /// The algorithm used for hashing the contents of each source file. + hash_kind: SourceFileHashAlgorithm, +} + +impl SourceMap { + pub fn new(path_mapping: FilePathMapping) -> SourceMap { + Self::with_file_loader_and_hash_kind( + Box::new(RealFileLoader), + path_mapping, + SourceFileHashAlgorithm::Md5, + ) + } + + pub fn with_file_loader_and_hash_kind( + file_loader: Box<dyn FileLoader + Sync + Send>, + path_mapping: FilePathMapping, + hash_kind: SourceFileHashAlgorithm, + ) -> SourceMap { + SourceMap { + used_address_space: AtomicU32::new(0), + files: Default::default(), + file_loader, + path_mapping, + hash_kind, + } + } + + pub fn path_mapping(&self) -> &FilePathMapping { + &self.path_mapping + } + + pub fn file_exists(&self, path: &Path) -> bool { + self.file_loader.file_exists(path) + } + + pub fn load_file(&self, path: &Path) -> io::Result<Lrc<SourceFile>> { + let src = self.file_loader.read_file(path)?; + let filename = path.to_owned().into(); + Ok(self.new_source_file(filename, src)) + } + + /// Loads source file as a binary blob. + /// + /// Unlike `load_file`, guarantees that no normalization like BOM-removal + /// takes place. + pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> { + // Ideally, this should use `self.file_loader`, but it can't + // deal with binary files yet. + let bytes = fs::read(path)?; + + // We need to add file to the `SourceMap`, so that it is present + // in dep-info. There's also an edge case that file might be both + // loaded as a binary via `include_bytes!` and as proper `SourceFile` + // via `mod`, so we try to use real file contents and not just an + // empty string. + let text = std::str::from_utf8(&bytes).unwrap_or("").to_string(); + self.new_source_file(path.to_owned().into(), text); + Ok(bytes) + } + + // By returning a `MonotonicVec`, we ensure that consumers cannot invalidate + // any existing indices pointing into `files`. + pub fn files(&self) -> MappedReadGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> { + ReadGuard::map(self.files.borrow(), |files| &files.source_files) + } + + pub fn source_file_by_stable_id( + &self, + stable_id: StableSourceFileId, + ) -> Option<Lrc<SourceFile>> { + self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned() + } + + fn allocate_address_space(&self, size: usize) -> Result<usize, OffsetOverflowError> { + let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?; + + loop { + let current = self.used_address_space.load(Ordering::Relaxed); + let next = current + .checked_add(size) + // Add one so there is some space between files. This lets us distinguish + // positions in the `SourceMap`, even in the presence of zero-length files. + .and_then(|next| next.checked_add(1)) + .ok_or(OffsetOverflowError)?; + + if self + .used_address_space + .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + return Ok(usize::try_from(current).unwrap()); + } + } + } + + /// Creates a new `SourceFile`. + /// If a file already exists in the `SourceMap` with the same ID, that file is returned + /// unmodified. + pub fn new_source_file(&self, filename: FileName, src: String) -> Lrc<SourceFile> { + self.try_new_source_file(filename, src).unwrap_or_else(|OffsetOverflowError| { + eprintln!("fatal error: rustc does not support files larger than 4GB"); + crate::fatal_error::FatalError.raise() + }) + } + + fn try_new_source_file( + &self, + filename: FileName, + src: String, + ) -> Result<Lrc<SourceFile>, OffsetOverflowError> { + // Note that filename may not be a valid path, eg it may be `<anon>` etc, + // but this is okay because the directory determined by `path.pop()` will + // be empty, so the working directory will be used. + let (filename, _) = self.path_mapping.map_filename_prefix(&filename); + + let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE); + + let lrc_sf = match self.source_file_by_stable_id(file_id) { + Some(lrc_sf) => lrc_sf, + None => { + let start_pos = self.allocate_address_space(src.len())?; + + let source_file = Lrc::new(SourceFile::new( + filename, + src, + Pos::from_usize(start_pos), + self.hash_kind, + )); + + // Let's make sure the file_id we generated above actually matches + // the ID we generate for the SourceFile we just created. + debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); + + let mut files = self.files.borrow_mut(); + + files.source_files.push(source_file.clone()); + files.stable_id_to_source_file.insert(file_id, source_file.clone()); + + source_file + } + }; + Ok(lrc_sf) + } + + /// Allocates a new `SourceFile` representing a source file from an external + /// crate. The source code of such an "imported `SourceFile`" is not available, + /// but we still know enough to generate accurate debuginfo location + /// information for things inlined from other crates. + pub fn new_imported_source_file( + &self, + filename: FileName, + src_hash: SourceFileHash, + name_hash: u128, + source_len: usize, + cnum: CrateNum, + file_local_lines: Lock<SourceFileLines>, + mut file_local_multibyte_chars: Vec<MultiByteChar>, + mut file_local_non_narrow_chars: Vec<NonNarrowChar>, + mut file_local_normalized_pos: Vec<NormalizedPos>, + original_start_pos: BytePos, + original_end_pos: BytePos, + ) -> Lrc<SourceFile> { + let start_pos = self + .allocate_address_space(source_len) + .expect("not enough address space for imported source file"); + + let end_pos = Pos::from_usize(start_pos + source_len); + let start_pos = Pos::from_usize(start_pos); + + // Translate these positions into the new global frame of reference, + // now that the offset of the SourceFile is known. + // + // These are all unsigned values. `original_start_pos` may be larger or + // smaller than `start_pos`, but `pos` is always larger than both. + // Therefore, `(pos - original_start_pos) + start_pos` won't overflow + // but `start_pos - original_start_pos` might. So we use the former + // form rather than pre-computing the offset into a local variable. The + // compiler backend can optimize away the repeated computations in a + // way that won't trigger overflow checks. + match &mut *file_local_lines.borrow_mut() { + SourceFileLines::Lines(lines) => { + for pos in lines { + *pos = (*pos - original_start_pos) + start_pos; + } + } + SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => { + *line_start = (*line_start - original_start_pos) + start_pos; + } + } + for mbc in &mut file_local_multibyte_chars { + mbc.pos = (mbc.pos - original_start_pos) + start_pos; + } + for swc in &mut file_local_non_narrow_chars { + *swc = (*swc - original_start_pos) + start_pos; + } + for nc in &mut file_local_normalized_pos { + nc.pos = (nc.pos - original_start_pos) + start_pos; + } + + let source_file = Lrc::new(SourceFile { + name: filename, + src: None, + src_hash, + external_src: Lock::new(ExternalSource::Foreign { + kind: ExternalSourceKind::AbsentOk, + original_start_pos, + original_end_pos, + }), + start_pos, + end_pos, + lines: file_local_lines, + multibyte_chars: file_local_multibyte_chars, + non_narrow_chars: file_local_non_narrow_chars, + normalized_pos: file_local_normalized_pos, + name_hash, + cnum, + }); + + let mut files = self.files.borrow_mut(); + + files.source_files.push(source_file.clone()); + files + .stable_id_to_source_file + .insert(StableSourceFileId::new(&source_file), source_file.clone()); + + source_file + } + + // If there is a doctest offset, applies it to the line. + pub fn doctest_offset_line(&self, file: &FileName, orig: usize) -> usize { + match file { + FileName::DocTest(_, offset) => { + if *offset < 0 { + orig - (-(*offset)) as usize + } else { + orig + *offset as usize + } + } + _ => orig, + } + } + + /// Return the SourceFile that contains the given `BytePos` + pub fn lookup_source_file(&self, pos: BytePos) -> Lrc<SourceFile> { + let idx = self.lookup_source_file_idx(pos); + (*self.files.borrow().source_files)[idx].clone() + } + + /// Looks up source information about a `BytePos`. + pub fn lookup_char_pos(&self, pos: BytePos) -> Loc { + let sf = self.lookup_source_file(pos); + let (line, col, col_display) = sf.lookup_file_pos_with_col_display(pos); + Loc { file: sf, line, col, col_display } + } + + // If the corresponding `SourceFile` is empty, does not return a line number. + pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> { + let f = self.lookup_source_file(pos); + + match f.lookup_line(pos) { + Some(line) => Ok(SourceFileAndLine { sf: f, line }), + None => Err(f), + } + } + + fn span_to_string(&self, sp: Span, filename_display_pref: FileNameDisplayPreference) -> String { + if self.files.borrow().source_files.is_empty() || sp.is_dummy() { + return "no-location".to_string(); + } + + let lo = self.lookup_char_pos(sp.lo()); + let hi = self.lookup_char_pos(sp.hi()); + format!( + "{}:{}:{}: {}:{}", + lo.file.name.display(filename_display_pref), + lo.line, + lo.col.to_usize() + 1, + hi.line, + hi.col.to_usize() + 1, + ) + } + + /// Format the span location suitable for embedding in build artifacts + pub fn span_to_embeddable_string(&self, sp: Span) -> String { + self.span_to_string(sp, FileNameDisplayPreference::Remapped) + } + + /// Format the span location suitable for pretty printing anotations with relative line numbers + pub fn span_to_relative_line_string(&self, sp: Span, relative_to: Span) -> String { + if self.files.borrow().source_files.is_empty() || sp.is_dummy() || relative_to.is_dummy() { + return "no-location".to_string(); + } + + let lo = self.lookup_char_pos(sp.lo()); + let hi = self.lookup_char_pos(sp.hi()); + let offset = self.lookup_char_pos(relative_to.lo()); + + if lo.file.name != offset.file.name { + return self.span_to_embeddable_string(sp); + } + + let lo_line = lo.line.saturating_sub(offset.line); + let hi_line = hi.line.saturating_sub(offset.line); + + format!( + "{}:+{}:{}: +{}:{}", + lo.file.name.display(FileNameDisplayPreference::Remapped), + lo_line, + lo.col.to_usize() + 1, + hi_line, + hi.col.to_usize() + 1, + ) + } + + /// Format the span location to be printed in diagnostics. Must not be emitted + /// to build artifacts as this may leak local file paths. Use span_to_embeddable_string + /// for string suitable for embedding. + pub fn span_to_diagnostic_string(&self, sp: Span) -> String { + self.span_to_string(sp, self.path_mapping.filename_display_for_diagnostics) + } + + pub fn span_to_filename(&self, sp: Span) -> FileName { + self.lookup_char_pos(sp.lo()).file.name.clone() + } + + pub fn filename_for_diagnostics<'a>(&self, filename: &'a FileName) -> FileNameDisplay<'a> { + filename.display(self.path_mapping.filename_display_for_diagnostics) + } + + pub fn is_multiline(&self, sp: Span) -> bool { + let lo = self.lookup_source_file_idx(sp.lo()); + let hi = self.lookup_source_file_idx(sp.hi()); + if lo != hi { + return true; + } + let f = (*self.files.borrow().source_files)[lo].clone(); + f.lookup_line(sp.lo()) != f.lookup_line(sp.hi()) + } + + #[instrument(skip(self), level = "trace")] + pub fn is_valid_span(&self, sp: Span) -> Result<(Loc, Loc), SpanLinesError> { + let lo = self.lookup_char_pos(sp.lo()); + trace!(?lo); + let hi = self.lookup_char_pos(sp.hi()); + trace!(?hi); + if lo.file.start_pos != hi.file.start_pos { + return Err(SpanLinesError::DistinctSources(DistinctSources { + begin: (lo.file.name.clone(), lo.file.start_pos), + end: (hi.file.name.clone(), hi.file.start_pos), + })); + } + Ok((lo, hi)) + } + + pub fn is_line_before_span_empty(&self, sp: Span) -> bool { + match self.span_to_prev_source(sp) { + Ok(s) => s.rsplit_once('\n').unwrap_or(("", &s)).1.trim_start().is_empty(), + Err(_) => false, + } + } + + pub fn span_to_lines(&self, sp: Span) -> FileLinesResult { + debug!("span_to_lines(sp={:?})", sp); + let (lo, hi) = self.is_valid_span(sp)?; + assert!(hi.line >= lo.line); + + if sp.is_dummy() { + return Ok(FileLines { file: lo.file, lines: Vec::new() }); + } + + let mut lines = Vec::with_capacity(hi.line - lo.line + 1); + + // The span starts partway through the first line, + // but after that it starts from offset 0. + let mut start_col = lo.col; + + // For every line but the last, it extends from `start_col` + // and to the end of the line. Be careful because the line + // numbers in Loc are 1-based, so we subtract 1 to get 0-based + // lines. + // + // FIXME: now that we handle DUMMY_SP up above, we should consider + // asserting that the line numbers here are all indeed 1-based. + let hi_line = hi.line.saturating_sub(1); + for line_index in lo.line.saturating_sub(1)..hi_line { + let line_len = lo.file.get_line(line_index).map_or(0, |s| s.chars().count()); + lines.push(LineInfo { line_index, start_col, end_col: CharPos::from_usize(line_len) }); + start_col = CharPos::from_usize(0); + } + + // For the last line, it extends from `start_col` to `hi.col`: + lines.push(LineInfo { line_index: hi_line, start_col, end_col: hi.col }); + + Ok(FileLines { file: lo.file, lines }) + } + + /// Extracts the source surrounding the given `Span` using the `extract_source` function. The + /// extract function takes three arguments: a string slice containing the source, an index in + /// the slice for the beginning of the span and an index in the slice for the end of the span. + fn span_to_source<F, T>(&self, sp: Span, extract_source: F) -> Result<T, SpanSnippetError> + where + F: Fn(&str, usize, usize) -> Result<T, SpanSnippetError>, + { + let local_begin = self.lookup_byte_offset(sp.lo()); + let local_end = self.lookup_byte_offset(sp.hi()); + + if local_begin.sf.start_pos != local_end.sf.start_pos { + Err(SpanSnippetError::DistinctSources(DistinctSources { + begin: (local_begin.sf.name.clone(), local_begin.sf.start_pos), + end: (local_end.sf.name.clone(), local_end.sf.start_pos), + })) + } else { + self.ensure_source_file_source_present(local_begin.sf.clone()); + + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + + if start_index > end_index || end_index > source_len { + return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { + name: local_begin.sf.name.clone(), + source_len, + begin_pos: local_begin.pos, + end_pos: local_end.pos, + })); + } + + if let Some(ref src) = local_begin.sf.src { + extract_source(src, start_index, end_index) + } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() { + extract_source(src, start_index, end_index) + } else { + Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() }) + } + } + } + + pub fn is_span_accessible(&self, sp: Span) -> bool { + self.span_to_source(sp, |src, start_index, end_index| { + Ok(src.get(start_index..end_index).is_some()) + }) + .map_or(false, |is_accessible| is_accessible) + } + + /// Returns the source snippet as `String` corresponding to the given `Span`. + pub fn span_to_snippet(&self, sp: Span) -> Result<String, SpanSnippetError> { + self.span_to_source(sp, |src, start_index, end_index| { + src.get(start_index..end_index) + .map(|s| s.to_string()) + .ok_or(SpanSnippetError::IllFormedSpan(sp)) + }) + } + + pub fn span_to_margin(&self, sp: Span) -> Option<usize> { + Some(self.indentation_before(sp)?.len()) + } + + pub fn indentation_before(&self, sp: Span) -> Option<String> { + self.span_to_source(sp, |src, start_index, _| { + let before = &src[..start_index]; + let last_line = before.rsplit_once('\n').map_or(before, |(_, last)| last); + Ok(last_line + .split_once(|c: char| !c.is_whitespace()) + .map_or(last_line, |(indent, _)| indent) + .to_string()) + }) + .ok() + } + + /// Returns the source snippet as `String` before the given `Span`. + pub fn span_to_prev_source(&self, sp: Span) -> Result<String, SpanSnippetError> { + self.span_to_source(sp, |src, start_index, _| { + src.get(..start_index).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp)) + }) + } + + /// Extends the given `Span` to just after the previous occurrence of `c`. Return the same span + /// if no character could be found or if an error occurred while retrieving the code snippet. + pub fn span_extend_to_prev_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span { + if let Ok(prev_source) = self.span_to_prev_source(sp) { + let prev_source = prev_source.rsplit(c).next().unwrap_or(""); + if !prev_source.is_empty() && (accept_newlines || !prev_source.contains('\n')) { + return sp.with_lo(BytePos(sp.lo().0 - prev_source.len() as u32)); + } + } + + sp + } + + /// Extends the given `Span` to just after the previous occurrence of `pat` when surrounded by + /// whitespace. Returns None if the pattern could not be found or if an error occurred while + /// retrieving the code snippet. + pub fn span_extend_to_prev_str( + &self, + sp: Span, + pat: &str, + accept_newlines: bool, + include_whitespace: bool, + ) -> Option<Span> { + // assure that the pattern is delimited, to avoid the following + // fn my_fn() + // ^^^^ returned span without the check + // ---------- correct span + let prev_source = self.span_to_prev_source(sp).ok()?; + for ws in &[" ", "\t", "\n"] { + let pat = pat.to_owned() + ws; + if let Some(pat_pos) = prev_source.rfind(&pat) { + let just_after_pat_pos = pat_pos + pat.len() - 1; + let just_after_pat_plus_ws = if include_whitespace { + just_after_pat_pos + + prev_source[just_after_pat_pos..] + .find(|c: char| !c.is_whitespace()) + .unwrap_or(0) + } else { + just_after_pat_pos + }; + let len = prev_source.len() - just_after_pat_plus_ws; + let prev_source = &prev_source[just_after_pat_plus_ws..]; + if accept_newlines || !prev_source.trim_start().contains('\n') { + return Some(sp.with_lo(BytePos(sp.lo().0 - len as u32))); + } + } + } + + None + } + + /// Returns the source snippet as `String` after the given `Span`. + pub fn span_to_next_source(&self, sp: Span) -> Result<String, SpanSnippetError> { + self.span_to_source(sp, |src, _, end_index| { + src.get(end_index..).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp)) + }) + } + + /// Extends the given `Span` while the next character matches the predicate + pub fn span_extend_while( + &self, + span: Span, + f: impl Fn(char) -> bool, + ) -> Result<Span, SpanSnippetError> { + self.span_to_source(span, |s, _start, end| { + let n = s[end..].char_indices().find(|&(_, c)| !f(c)).map_or(s.len() - end, |(i, _)| i); + Ok(span.with_hi(span.hi() + BytePos(n as u32))) + }) + } + + /// Extends the given `Span` to just after the next occurrence of `c`. + pub fn span_extend_to_next_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span { + if let Ok(next_source) = self.span_to_next_source(sp) { + let next_source = next_source.split(c).next().unwrap_or(""); + if !next_source.is_empty() && (accept_newlines || !next_source.contains('\n')) { + return sp.with_hi(BytePos(sp.hi().0 + next_source.len() as u32)); + } + } + + sp + } + + /// Extends the given `Span` to contain the entire line it is on. + pub fn span_extend_to_line(&self, sp: Span) -> Span { + self.span_extend_to_prev_char(self.span_extend_to_next_char(sp, '\n', true), '\n', true) + } + + /// Given a `Span`, tries to get a shorter span ending before the first occurrence of `char` + /// `c`. + pub fn span_until_char(&self, sp: Span, c: char) -> Span { + match self.span_to_snippet(sp) { + Ok(snippet) => { + let snippet = snippet.split(c).next().unwrap_or("").trim_end(); + if !snippet.is_empty() && !snippet.contains('\n') { + sp.with_hi(BytePos(sp.lo().0 + snippet.len() as u32)) + } else { + sp + } + } + _ => sp, + } + } + + /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char` + /// `c`. + pub fn span_through_char(&self, sp: Span, c: char) -> Span { + if let Ok(snippet) = self.span_to_snippet(sp) { + if let Some(offset) = snippet.find(c) { + return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32)); + } + } + sp + } + + /// Given a `Span`, gets a new `Span` covering the first token and all its trailing whitespace + /// or the original `Span`. + /// + /// If `sp` points to `"let mut x"`, then a span pointing at `"let "` will be returned. + pub fn span_until_non_whitespace(&self, sp: Span) -> Span { + let mut whitespace_found = false; + + self.span_take_while(sp, |c| { + if !whitespace_found && c.is_whitespace() { + whitespace_found = true; + } + + !whitespace_found || c.is_whitespace() + }) + } + + /// Given a `Span`, gets a new `Span` covering the first token without its trailing whitespace + /// or the original `Span` in case of error. + /// + /// If `sp` points to `"let mut x"`, then a span pointing at `"let"` will be returned. + pub fn span_until_whitespace(&self, sp: Span) -> Span { + self.span_take_while(sp, |c| !c.is_whitespace()) + } + + /// Given a `Span`, gets a shorter one until `predicate` yields `false`. + pub fn span_take_while<P>(&self, sp: Span, predicate: P) -> Span + where + P: for<'r> FnMut(&'r char) -> bool, + { + if let Ok(snippet) = self.span_to_snippet(sp) { + let offset = snippet.chars().take_while(predicate).map(|c| c.len_utf8()).sum::<usize>(); + + sp.with_hi(BytePos(sp.lo().0 + (offset as u32))) + } else { + sp + } + } + + /// Given a `Span`, return a span ending in the closest `{`. This is useful when you have a + /// `Span` enclosing a whole item but we need to point at only the head (usually the first + /// line) of that item. + /// + /// *Only suitable for diagnostics.* + pub fn guess_head_span(&self, sp: Span) -> Span { + // FIXME: extend the AST items to have a head span, or replace callers with pointing at + // the item's ident when appropriate. + self.span_until_char(sp, '{') + } + + /// Returns a new span representing just the first character of the given span. + pub fn start_point(&self, sp: Span) -> Span { + let width = { + let sp = sp.data(); + let local_begin = self.lookup_byte_offset(sp.lo); + let start_index = local_begin.pos.to_usize(); + let src = local_begin.sf.external_src.borrow(); + + let snippet = if let Some(ref src) = local_begin.sf.src { + Some(&src[start_index..]) + } else if let Some(src) = src.get_source() { + Some(&src[start_index..]) + } else { + None + }; + + match snippet { + None => 1, + Some(snippet) => match snippet.chars().next() { + None => 1, + Some(c) => c.len_utf8(), + }, + } + }; + + sp.with_hi(BytePos(sp.lo().0 + width as u32)) + } + + /// Returns a new span representing just the last character of this span. + pub fn end_point(&self, sp: Span) -> Span { + let pos = sp.hi().0; + + let width = self.find_width_of_character_at_span(sp, false); + let corrected_end_position = pos.checked_sub(width).unwrap_or(pos); + + let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0)); + sp.with_lo(end_point) + } + + /// Returns a new span representing the next character after the end-point of this span. + pub fn next_point(&self, sp: Span) -> Span { + if sp.is_dummy() { + return sp; + } + let start_of_next_point = sp.hi().0; + + let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true); + // If the width is 1, then the next span should point to the same `lo` and `hi`. However, + // in the case of a multibyte character, where the width != 1, the next span should + // span multiple bytes to include the whole character. + let end_of_next_point = + start_of_next_point.checked_add(width - 1).unwrap_or(start_of_next_point); + + let end_of_next_point = BytePos(cmp::max(sp.lo().0 + 1, end_of_next_point)); + Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None) + } + + /// Finds the width of the character, either before or after the end of provided span, + /// depending on the `forwards` parameter. + fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 { + let sp = sp.data(); + if sp.lo == sp.hi { + debug!("find_width_of_character_at_span: early return empty span"); + return 1; + } + + let local_begin = self.lookup_byte_offset(sp.lo); + let local_end = self.lookup_byte_offset(sp.hi); + debug!( + "find_width_of_character_at_span: local_begin=`{:?}`, local_end=`{:?}`", + local_begin, local_end + ); + + if local_begin.sf.start_pos != local_end.sf.start_pos { + debug!("find_width_of_character_at_span: begin and end are in different files"); + return 1; + } + + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + debug!( + "find_width_of_character_at_span: start_index=`{:?}`, end_index=`{:?}`", + start_index, end_index + ); + + // Disregard indexes that are at the start or end of their spans, they can't fit bigger + // characters. + if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) { + debug!("find_width_of_character_at_span: start or end of span, cannot be multibyte"); + return 1; + } + + let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len); + // Ensure indexes are also not malformed. + if start_index > end_index || end_index > source_len { + debug!("find_width_of_character_at_span: source indexes are malformed"); + return 1; + } + + let src = local_begin.sf.external_src.borrow(); + + // We need to extend the snippet to the end of the src rather than to end_index so when + // searching forwards for boundaries we've got somewhere to search. + let snippet = if let Some(ref src) = local_begin.sf.src { + &src[start_index..] + } else if let Some(src) = src.get_source() { + &src[start_index..] + } else { + return 1; + }; + debug!("find_width_of_character_at_span: snippet=`{:?}`", snippet); + + let mut target = if forwards { end_index + 1 } else { end_index - 1 }; + debug!("find_width_of_character_at_span: initial target=`{:?}`", target); + + while !snippet.is_char_boundary(target - start_index) && target < source_len { + target = if forwards { + target + 1 + } else { + match target.checked_sub(1) { + Some(target) => target, + None => { + break; + } + } + }; + debug!("find_width_of_character_at_span: target=`{:?}`", target); + } + debug!("find_width_of_character_at_span: final target=`{:?}`", target); + + if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 } + } + + pub fn get_source_file(&self, filename: &FileName) -> Option<Lrc<SourceFile>> { + // Remap filename before lookup + let filename = self.path_mapping().map_filename_prefix(filename).0; + for sf in self.files.borrow().source_files.iter() { + if filename == sf.name { + return Some(sf.clone()); + } + } + None + } + + /// For a global `BytePos`, computes the local offset within the containing `SourceFile`. + pub fn lookup_byte_offset(&self, bpos: BytePos) -> SourceFileAndBytePos { + let idx = self.lookup_source_file_idx(bpos); + let sf = (*self.files.borrow().source_files)[idx].clone(); + let offset = bpos - sf.start_pos; + SourceFileAndBytePos { sf, pos: offset } + } + + // Returns the index of the `SourceFile` (in `self.files`) that contains `pos`. + // This index is guaranteed to be valid for the lifetime of this `SourceMap`, + // since `source_files` is a `MonotonicVec` + pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize { + self.files + .borrow() + .source_files + .binary_search_by_key(&pos, |key| key.start_pos) + .unwrap_or_else(|p| p - 1) + } + + pub fn count_lines(&self) -> usize { + self.files().iter().fold(0, |a, f| a + f.count_lines()) + } + + pub fn generate_fn_name_span(&self, span: Span) -> Option<Span> { + let prev_span = self.span_extend_to_prev_str(span, "fn", true, true)?; + if let Ok(snippet) = self.span_to_snippet(prev_span) { + debug!( + "generate_fn_name_span: span={:?}, prev_span={:?}, snippet={:?}", + span, prev_span, snippet + ); + + if snippet.is_empty() { + return None; + }; + + let len = snippet + .find(|c: char| !c.is_alphanumeric() && c != '_') + .expect("no label after fn"); + Some(prev_span.with_hi(BytePos(prev_span.lo().0 + len as u32))) + } else { + None + } + } + + /// Takes the span of a type parameter in a function signature and try to generate a span for + /// the function name (with generics) and a new snippet for this span with the pointed type + /// parameter as a new local type parameter. + /// + /// For instance: + /// ```rust,ignore (pseudo-Rust) + /// // Given span + /// fn my_function(param: T) + /// // ^ Original span + /// + /// // Result + /// fn my_function(param: T) + /// // ^^^^^^^^^^^ Generated span with snippet `my_function<T>` + /// ``` + /// + /// Attention: The method used is very fragile since it essentially duplicates the work of the + /// parser. If you need to use this function or something similar, please consider updating the + /// `SourceMap` functions and this function to something more robust. + pub fn generate_local_type_param_snippet(&self, span: Span) -> Option<(Span, String)> { + // Try to extend the span to the previous "fn" keyword to retrieve the function + // signature. + if let Some(sugg_span) = self.span_extend_to_prev_str(span, "fn", false, true) { + if let Ok(snippet) = self.span_to_snippet(sugg_span) { + // Consume the function name. + let mut offset = snippet + .find(|c: char| !c.is_alphanumeric() && c != '_') + .expect("no label after fn"); + + // Consume the generics part of the function signature. + let mut bracket_counter = 0; + let mut last_char = None; + for c in snippet[offset..].chars() { + match c { + '<' => bracket_counter += 1, + '>' => bracket_counter -= 1, + '(' => { + if bracket_counter == 0 { + break; + } + } + _ => {} + } + offset += c.len_utf8(); + last_char = Some(c); + } + + // Adjust the suggestion span to encompass the function name with its generics. + let sugg_span = sugg_span.with_hi(BytePos(sugg_span.lo().0 + offset as u32)); + + // Prepare the new suggested snippet to append the type parameter that triggered + // the error in the generics of the function signature. + let mut new_snippet = if last_char == Some('>') { + format!("{}, ", &snippet[..(offset - '>'.len_utf8())]) + } else { + format!("{}<", &snippet[..offset]) + }; + new_snippet + .push_str(&self.span_to_snippet(span).unwrap_or_else(|_| "T".to_string())); + new_snippet.push('>'); + + return Some((sugg_span, new_snippet)); + } + } + + None + } + pub fn ensure_source_file_source_present(&self, source_file: Lrc<SourceFile>) -> bool { + source_file.add_external_src(|| { + match source_file.name { + FileName::Real(ref name) if let Some(local_path) = name.local_path() => { + self.file_loader.read_file(local_path).ok() + } + _ => None, + } + }) + } + + pub fn is_imported(&self, sp: Span) -> bool { + let source_file_index = self.lookup_source_file_idx(sp.lo()); + let source_file = &self.files()[source_file_index]; + source_file.is_imported() + } + + /// Gets the span of a statement. If the statement is a macro expansion, the + /// span in the context of the block span is found. The trailing semicolon is included + /// on a best-effort basis. + pub fn stmt_span(&self, stmt_span: Span, block_span: Span) -> Span { + if !stmt_span.from_expansion() { + return stmt_span; + } + let mac_call = original_sp(stmt_span, block_span); + self.mac_call_stmt_semi_span(mac_call).map_or(mac_call, |s| mac_call.with_hi(s.hi())) + } + + /// Tries to find the span of the semicolon of a macro call statement. + /// The input must be the *call site* span of a statement from macro expansion. + /// ```ignore (illustrative) + /// // v output + /// mac!(); + /// // ^^^^^^ input + /// ``` + pub fn mac_call_stmt_semi_span(&self, mac_call: Span) -> Option<Span> { + let span = self.span_extend_while(mac_call, char::is_whitespace).ok()?; + let span = span.shrink_to_hi().with_hi(BytePos(span.hi().0.checked_add(1)?)); + if self.span_to_snippet(span).as_deref() != Ok(";") { + return None; + } + Some(span) + } +} + +#[derive(Clone)] +pub struct FilePathMapping { + mapping: Vec<(PathBuf, PathBuf)>, + filename_display_for_diagnostics: FileNameDisplayPreference, +} + +impl FilePathMapping { + pub fn empty() -> FilePathMapping { + FilePathMapping::new(Vec::new()) + } + + pub fn new(mapping: Vec<(PathBuf, PathBuf)>) -> FilePathMapping { + let filename_display_for_diagnostics = if mapping.is_empty() { + FileNameDisplayPreference::Local + } else { + FileNameDisplayPreference::Remapped + }; + + FilePathMapping { mapping, filename_display_for_diagnostics } + } + + /// Applies any path prefix substitution as defined by the mapping. + /// The return value is the remapped path and a boolean indicating whether + /// the path was affected by the mapping. + pub fn map_prefix(&self, path: PathBuf) -> (PathBuf, bool) { + if path.as_os_str().is_empty() { + // Exit early if the path is empty and therefore there's nothing to remap. + // This is mostly to reduce spam for `RUSTC_LOG=[remap_path_prefix]`. + return (path, false); + } + + return remap_path_prefix(&self.mapping, path); + + #[instrument(level = "debug", skip(mapping))] + fn remap_path_prefix(mapping: &[(PathBuf, PathBuf)], path: PathBuf) -> (PathBuf, bool) { + // NOTE: We are iterating over the mapping entries from last to first + // because entries specified later on the command line should + // take precedence. + for &(ref from, ref to) in mapping.iter().rev() { + debug!("Trying to apply {:?} => {:?}", from, to); + + if let Ok(rest) = path.strip_prefix(from) { + let remapped = if rest.as_os_str().is_empty() { + // This is subtle, joining an empty path onto e.g. `foo/bar` will + // result in `foo/bar/`, that is, there'll be an additional directory + // separator at the end. This can lead to duplicated directory separators + // in remapped paths down the line. + // So, if we have an exact match, we just return that without a call + // to `Path::join()`. + to.clone() + } else { + to.join(rest) + }; + debug!("Match - remapped {:?} => {:?}", path, remapped); + + return (remapped, true); + } else { + debug!("No match - prefix {:?} does not match {:?}", from, path); + } + } + + debug!("Path {:?} was not remapped", path); + (path, false) + } + } + + fn map_filename_prefix(&self, file: &FileName) -> (FileName, bool) { + match file { + FileName::Real(realfile) if let RealFileName::LocalPath(local_path) = realfile => { + let (mapped_path, mapped) = self.map_prefix(local_path.to_path_buf()); + let realfile = if mapped { + RealFileName::Remapped { + local_path: Some(local_path.clone()), + virtual_name: mapped_path, + } + } else { + realfile.clone() + }; + (FileName::Real(realfile), mapped) + } + FileName::Real(_) => unreachable!("attempted to remap an already remapped filename"), + other => (other.clone(), false), + } + } + + /// Expand a relative path to an absolute path with remapping taken into account. + /// Use this when absolute paths are required (e.g. debuginfo or crate metadata). + /// + /// The resulting `RealFileName` will have its `local_path` portion erased if + /// possible (i.e. if there's also a remapped path). + pub fn to_embeddable_absolute_path( + &self, + file_path: RealFileName, + working_directory: &RealFileName, + ) -> RealFileName { + match file_path { + // Anything that's already remapped we don't modify, except for erasing + // the `local_path` portion. + RealFileName::Remapped { local_path: _, virtual_name } => { + RealFileName::Remapped { + // We do not want any local path to be exported into metadata + local_path: None, + // We use the remapped name verbatim, even if it looks like a relative + // path. The assumption is that the user doesn't want us to further + // process paths that have gone through remapping. + virtual_name, + } + } + + RealFileName::LocalPath(unmapped_file_path) => { + // If no remapping has been applied yet, try to do so + let (new_path, was_remapped) = self.map_prefix(unmapped_file_path); + if was_remapped { + // It was remapped, so don't modify further + return RealFileName::Remapped { local_path: None, virtual_name: new_path }; + } + + if new_path.is_absolute() { + // No remapping has applied to this path and it is absolute, + // so the working directory cannot influence it either, so + // we are done. + return RealFileName::LocalPath(new_path); + } + + debug_assert!(new_path.is_relative()); + let unmapped_file_path_rel = new_path; + + match working_directory { + RealFileName::LocalPath(unmapped_working_dir_abs) => { + let file_path_abs = unmapped_working_dir_abs.join(unmapped_file_path_rel); + + // Although neither `working_directory` nor the file name were subject + // to path remapping, the concatenation between the two may be. Hence + // we need to do a remapping here. + let (file_path_abs, was_remapped) = self.map_prefix(file_path_abs); + if was_remapped { + RealFileName::Remapped { + // Erase the actual path + local_path: None, + virtual_name: file_path_abs, + } + } else { + // No kind of remapping applied to this path, so + // we leave it as it is. + RealFileName::LocalPath(file_path_abs) + } + } + RealFileName::Remapped { + local_path: _, + virtual_name: remapped_working_dir_abs, + } => { + // If working_directory has been remapped, then we emit + // Remapped variant as the expanded path won't be valid + RealFileName::Remapped { + local_path: None, + virtual_name: Path::new(remapped_working_dir_abs) + .join(unmapped_file_path_rel), + } + } + } + } + } + } +} diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs new file mode 100644 index 000000000..be827cea8 --- /dev/null +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -0,0 +1,481 @@ +use super::*; + +use rustc_data_structures::sync::Lrc; + +fn init_source_map() -> SourceMap { + let sm = SourceMap::new(FilePathMapping::empty()); + sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string()); + sm.new_source_file(PathBuf::from("empty.rs").into(), String::new()); + sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string()); + sm +} + +impl SourceMap { + /// Returns `Some(span)`, a union of the LHS and RHS span. The LHS must precede the RHS. If + /// there are gaps between LHS and RHS, the resulting union will cross these gaps. + /// For this to work, + /// + /// * the syntax contexts of both spans much match, + /// * the LHS span needs to end on the same line the RHS span begins, + /// * the LHS span must start at or before the RHS span. + fn merge_spans(&self, sp_lhs: Span, sp_rhs: Span) -> Option<Span> { + // Ensure we're at the same expansion ID. + if sp_lhs.ctxt() != sp_rhs.ctxt() { + return None; + } + + let lhs_end = match self.lookup_line(sp_lhs.hi()) { + Ok(x) => x, + Err(_) => return None, + }; + let rhs_begin = match self.lookup_line(sp_rhs.lo()) { + Ok(x) => x, + Err(_) => return None, + }; + + // If we must cross lines to merge, don't merge. + if lhs_end.line != rhs_begin.line { + return None; + } + + // Ensure these follow the expected order and that we don't overlap. + if (sp_lhs.lo() <= sp_rhs.lo()) && (sp_lhs.hi() <= sp_rhs.lo()) { + Some(sp_lhs.to(sp_rhs)) + } else { + None + } + } + + /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`. + fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + let idx = self.lookup_source_file_idx(bpos); + let sf = &(*self.files.borrow().source_files)[idx]; + sf.bytepos_to_file_charpos(bpos) + } +} + +/// Tests `lookup_byte_offset`. +#[test] +fn t3() { + let sm = init_source_map(); + + let srcfbp1 = sm.lookup_byte_offset(BytePos(23)); + assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into()); + assert_eq!(srcfbp1.pos, BytePos(23)); + + let srcfbp1 = sm.lookup_byte_offset(BytePos(24)); + assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into()); + assert_eq!(srcfbp1.pos, BytePos(0)); + + let srcfbp2 = sm.lookup_byte_offset(BytePos(25)); + assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into()); + assert_eq!(srcfbp2.pos, BytePos(0)); +} + +/// Tests `bytepos_to_file_charpos`. +#[test] +fn t4() { + let sm = init_source_map(); + + let cp1 = sm.bytepos_to_file_charpos(BytePos(22)); + assert_eq!(cp1, CharPos(22)); + + let cp2 = sm.bytepos_to_file_charpos(BytePos(25)); + assert_eq!(cp2, CharPos(0)); +} + +/// Tests zero-length `SourceFile`s. +#[test] +fn t5() { + let sm = init_source_map(); + + let loc1 = sm.lookup_char_pos(BytePos(22)); + assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into()); + assert_eq!(loc1.line, 2); + assert_eq!(loc1.col, CharPos(10)); + + let loc2 = sm.lookup_char_pos(BytePos(25)); + assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into()); + assert_eq!(loc2.line, 1); + assert_eq!(loc2.col, CharPos(0)); +} + +fn init_source_map_mbc() -> SourceMap { + let sm = SourceMap::new(FilePathMapping::empty()); + // "€" is a three-byte UTF8 char. + sm.new_source_file( + PathBuf::from("blork.rs").into(), + "fir€st €€€€ line.\nsecond line".to_string(), + ); + sm.new_source_file( + PathBuf::from("blork2.rs").into(), + "first line€€.\n€ second line".to_string(), + ); + sm +} + +/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars. +#[test] +fn t6() { + let sm = init_source_map_mbc(); + + let cp1 = sm.bytepos_to_file_charpos(BytePos(3)); + assert_eq!(cp1, CharPos(3)); + + let cp2 = sm.bytepos_to_file_charpos(BytePos(6)); + assert_eq!(cp2, CharPos(4)); + + let cp3 = sm.bytepos_to_file_charpos(BytePos(56)); + assert_eq!(cp3, CharPos(12)); + + let cp4 = sm.bytepos_to_file_charpos(BytePos(61)); + assert_eq!(cp4, CharPos(15)); +} + +/// Test `span_to_lines` for a span ending at the end of a `SourceFile`. +#[test] +fn t7() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let file_lines = sm.span_to_lines(span).unwrap(); + + assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into()); + assert_eq!(file_lines.lines.len(), 1); + assert_eq!(file_lines.lines[0].line_index, 1); +} + +/// Given a string like " ~~~~~~~~~~~~ ", produces a span +/// converting that range. The idea is that the string has the same +/// length as the input, and we uncover the byte positions. Note +/// that this can span lines and so on. +fn span_from_selection(input: &str, selection: &str) -> Span { + assert_eq!(input.len(), selection.len()); + let left_index = selection.find('~').unwrap() as u32; + let right_index = selection.rfind('~').map_or(left_index, |x| x as u32); + Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1)) +} + +/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3 +/// lines in the middle of a file. +#[test] +fn span_to_snippet_and_lines_spanning_multiple_lines() { + let sm = SourceMap::new(FilePathMapping::empty()); + let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n"; + let selection = " \n ~~\n~~~\n~~~~~ \n \n"; + sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string()); + let span = span_from_selection(inputtext, selection); + + // Check that we are extracting the text we thought we were extracting. + assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD"); + + // Check that span_to_lines gives us the complete result with the lines/cols we expected. + let lines = sm.span_to_lines(span).unwrap(); + let expected = vec![ + LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) }, + LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) }, + LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) }, + ]; + assert_eq!(lines.lines, expected); +} + +/// Test span_to_snippet for a span ending at the end of a `SourceFile`. +#[test] +fn t8() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let snippet = sm.span_to_snippet(span); + + assert_eq!(snippet, Ok("second line".to_string())); +} + +/// Test `span_to_str` for a span ending at the end of a `SourceFile`. +#[test] +fn t9() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let sstr = sm.span_to_diagnostic_string(span); + + assert_eq!(sstr, "blork.rs:2:1: 2:12"); +} + +/// Tests failing to merge two spans on different lines. +#[test] +fn span_merging_fail() { + let sm = SourceMap::new(FilePathMapping::empty()); + let inputtext = "bbbb BB\ncc CCC\n"; + let selection1 = " ~~\n \n"; + let selection2 = " \n ~~~\n"; + sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned()); + let span1 = span_from_selection(inputtext, selection1); + let span2 = span_from_selection(inputtext, selection2); + + assert!(sm.merge_spans(span1, span2).is_none()); +} + +/// Tests loading an external source file that requires normalization. +#[test] +fn t10() { + let sm = SourceMap::new(FilePathMapping::empty()); + let unnormalized = "first line.\r\nsecond line"; + let normalized = "first line.\nsecond line"; + + let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string()); + + assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized); + assert!( + src_file.src_hash.matches(unnormalized), + "src_hash should use the source before normalization" + ); + + let SourceFile { + name, + src_hash, + start_pos, + end_pos, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + .. + } = (*src_file).clone(); + + let imported_src_file = sm.new_imported_source_file( + name, + src_hash, + name_hash, + (end_pos - start_pos).to_usize(), + CrateNum::new(0), + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + start_pos, + end_pos, + ); + + assert!( + imported_src_file.external_src.borrow().get_source().is_none(), + "imported source file should not have source yet" + ); + imported_src_file.add_external_src(|| Some(unnormalized.to_string())); + assert_eq!( + imported_src_file.external_src.borrow().get_source().unwrap().as_ref(), + normalized, + "imported source file should be normalized" + ); +} + +/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`. +trait SourceMapExtension { + fn span_substr( + &self, + file: &Lrc<SourceFile>, + source_text: &str, + substring: &str, + n: usize, + ) -> Span; +} + +impl SourceMapExtension for SourceMap { + fn span_substr( + &self, + file: &Lrc<SourceFile>, + source_text: &str, + substring: &str, + n: usize, + ) -> Span { + eprintln!( + "span_substr(file={:?}/{:?}, substring={:?}, n={})", + file.name, file.start_pos, substring, n + ); + let mut i = 0; + let mut hi = 0; + loop { + let offset = source_text[hi..].find(substring).unwrap_or_else(|| { + panic!( + "source_text `{}` does not have {} occurrences of `{}`, only {}", + source_text, n, substring, i + ); + }); + let lo = hi + offset; + hi = lo + substring.len(); + if i == n { + let span = Span::with_root_ctxt( + BytePos(lo as u32 + file.start_pos.0), + BytePos(hi as u32 + file.start_pos.0), + ); + assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring); + return span; + } + i += 1; + } + } +} + +// Takes a unix-style path and returns a platform specific path. +fn path(p: &str) -> PathBuf { + path_str(p).into() +} + +// Takes a unix-style path and returns a platform specific path. +fn path_str(p: &str) -> String { + #[cfg(not(windows))] + { + return p.into(); + } + + #[cfg(windows)] + { + let mut path = p.replace('/', "\\"); + if let Some(rest) = path.strip_prefix('\\') { + path = ["X:\\", rest].concat(); + } + + path + } +} + +fn map_path_prefix(mapping: &FilePathMapping, p: &str) -> String { + // It's important that we convert to a string here because that's what + // later stages do too (e.g. in the backend), and comparing `Path` values + // won't catch some differences at the string level, e.g. "abc" and "abc/" + // compare as equal. + mapping.map_prefix(path(p)).0.to_string_lossy().to_string() +} + +#[test] +fn path_prefix_remapping() { + // Relative to relative + { + let mapping = &FilePathMapping::new(vec![(path("abc/def"), path("foo"))]); + + assert_eq!(map_path_prefix(mapping, "abc/def/src/main.rs"), path_str("foo/src/main.rs")); + assert_eq!(map_path_prefix(mapping, "abc/def"), path_str("foo")); + } + + // Relative to absolute + { + let mapping = &FilePathMapping::new(vec![(path("abc/def"), path("/foo"))]); + + assert_eq!(map_path_prefix(mapping, "abc/def/src/main.rs"), path_str("/foo/src/main.rs")); + assert_eq!(map_path_prefix(mapping, "abc/def"), path_str("/foo")); + } + + // Absolute to relative + { + let mapping = &FilePathMapping::new(vec![(path("/abc/def"), path("foo"))]); + + assert_eq!(map_path_prefix(mapping, "/abc/def/src/main.rs"), path_str("foo/src/main.rs")); + assert_eq!(map_path_prefix(mapping, "/abc/def"), path_str("foo")); + } + + // Absolute to absolute + { + let mapping = &FilePathMapping::new(vec![(path("/abc/def"), path("/foo"))]); + + assert_eq!(map_path_prefix(mapping, "/abc/def/src/main.rs"), path_str("/foo/src/main.rs")); + assert_eq!(map_path_prefix(mapping, "/abc/def"), path_str("/foo")); + } +} + +#[test] +fn path_prefix_remapping_expand_to_absolute() { + // "virtual" working directory is relative path + let mapping = + &FilePathMapping::new(vec![(path("/foo"), path("FOO")), (path("/bar"), path("BAR"))]); + let working_directory = path("/foo"); + let working_directory = RealFileName::Remapped { + local_path: Some(working_directory.clone()), + virtual_name: mapping.map_prefix(working_directory).0, + }; + + assert_eq!(working_directory.remapped_path_if_available(), path("FOO")); + + // Unmapped absolute path + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("/foo/src/main.rs")), + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") } + ); + + // Unmapped absolute path with unrelated working directory + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("/bar/src/main.rs")), + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("BAR/src/main.rs") } + ); + + // Unmapped absolute path that does not match any prefix + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("/quux/src/main.rs")), + &working_directory + ), + RealFileName::LocalPath(path("/quux/src/main.rs")), + ); + + // Unmapped relative path + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("src/main.rs")), + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") } + ); + + // Unmapped relative path with `./` + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("./src/main.rs")), + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") } + ); + + // Unmapped relative path that does not match any prefix + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::LocalPath(path("quux/src/main.rs")), + &RealFileName::LocalPath(path("/abc")), + ), + RealFileName::LocalPath(path("/abc/quux/src/main.rs")), + ); + + // Already remapped absolute path + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::Remapped { + local_path: Some(path("/foo/src/main.rs")), + virtual_name: path("FOO/src/main.rs"), + }, + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("FOO/src/main.rs") } + ); + + // Already remapped absolute path, with unrelated working directory + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::Remapped { + local_path: Some(path("/bar/src/main.rs")), + virtual_name: path("BAR/src/main.rs"), + }, + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("BAR/src/main.rs") } + ); + + // Already remapped relative path + assert_eq!( + mapping.to_embeddable_absolute_path( + RealFileName::Remapped { local_path: None, virtual_name: path("XYZ/src/main.rs") }, + &working_directory + ), + RealFileName::Remapped { local_path: None, virtual_name: path("XYZ/src/main.rs") } + ); +} diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs new file mode 100644 index 000000000..3ee329e97 --- /dev/null +++ b/compiler/rustc_span/src/span_encoding.rs @@ -0,0 +1,150 @@ +// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value). +// One format is used for keeping span data inline, +// another contains index into an out-of-line span interner. +// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd. +// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28 + +use crate::def_id::LocalDefId; +use crate::hygiene::SyntaxContext; +use crate::SPAN_TRACK; +use crate::{BytePos, SpanData}; + +use rustc_data_structures::fx::FxIndexSet; + +/// A compressed span. +/// +/// Whereas [`SpanData`] is 12 bytes, which is a bit too big to stick everywhere, `Span` +/// is a form that only takes up 8 bytes, with less space for the length and +/// context. The vast majority (99.9%+) of `SpanData` instances will fit within +/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are +/// stored in a separate interner table, and the `Span` will index into that +/// table. Interning is rare enough that the cost is low, but common enough +/// that the code is exercised regularly. +/// +/// An earlier version of this code used only 4 bytes for `Span`, but that was +/// slower because only 80--90% of spans could be stored inline (even less in +/// very large crates) and so the interner was used a lot more. +/// +/// Inline (compressed) format: +/// - `span.base_or_index == span_data.lo` +/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`) +/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`) +/// +/// Interned format: +/// - `span.base_or_index == index` (indexes into the interner table) +/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero) +/// - `span.ctxt == 0` +/// +/// The inline form uses 0 for the tag value (rather than 1) so that we don't +/// need to mask out the tag bit when getting the length, and so that the +/// dummy span can be all zeroes. +/// +/// Notes about the choice of field sizes: +/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base` +/// values never cause interning. The number of bits needed for `base` +/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate. +/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits +/// in `SpanData`, which means that large `len` values will cause interning. +/// The number of bits needed for `len` does not depend on the crate size. +/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually +/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough +/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur +/// dozens of times in a typical crate. +/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that +/// large `ctxt` values will cause interning. The number of bits needed for +/// `ctxt` values depend partly on the crate size and partly on the form of +/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`, +/// but larger crates might need more than 16 bits. +/// +/// In order to reliably use parented spans in incremental compilation, +/// the dependency to the parent definition's span. This is performed +/// using the callback `SPAN_TRACK` to access the query engine. +/// +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +#[rustc_pass_by_value] +pub struct Span { + base_or_index: u32, + len_or_tag: u16, + ctxt_or_zero: u16, +} + +const LEN_TAG: u16 = 0b1000_0000_0000_0000; +const MAX_LEN: u32 = 0b0111_1111_1111_1111; +const MAX_CTXT: u32 = 0b1111_1111_1111_1111; + +/// Dummy span, both position and length are zero, syntax context is zero as well. +pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 }; + +impl Span { + #[inline] + pub fn new( + mut lo: BytePos, + mut hi: BytePos, + ctxt: SyntaxContext, + parent: Option<LocalDefId>, + ) -> Self { + if lo > hi { + std::mem::swap(&mut lo, &mut hi); + } + + let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32()); + + if len <= MAX_LEN && ctxt2 <= MAX_CTXT && parent.is_none() { + // Inline format. + Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 } + } else { + // Interned format. + let index = + with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent })); + Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 } + } + } + + #[inline] + pub fn data(self) -> SpanData { + let data = self.data_untracked(); + if let Some(parent) = data.parent { + (*SPAN_TRACK)(parent); + } + data + } + + /// Internal function to translate between an encoded span and the expanded representation. + /// This function must not be used outside the incremental engine. + #[inline] + pub fn data_untracked(self) -> SpanData { + if self.len_or_tag != LEN_TAG { + // Inline format. + debug_assert!(self.len_or_tag as u32 <= MAX_LEN); + SpanData { + lo: BytePos(self.base_or_index), + hi: BytePos(self.base_or_index + self.len_or_tag as u32), + ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32), + parent: None, + } + } else { + // Interned format. + debug_assert!(self.ctxt_or_zero == 0); + let index = self.base_or_index; + with_span_interner(|interner| interner.spans[index as usize]) + } + } +} + +#[derive(Default)] +pub struct SpanInterner { + spans: FxIndexSet<SpanData>, +} + +impl SpanInterner { + fn intern(&mut self, span_data: &SpanData) -> u32 { + let (index, _) = self.spans.insert_full(*span_data); + index as u32 + } +} + +// If an interner exists, return it. Otherwise, prepare a fresh one. +#[inline] +fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T { + crate::with_session_globals(|session_globals| f(&mut *session_globals.span_interner.lock())) +} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs new file mode 100644 index 000000000..791160ff6 --- /dev/null +++ b/compiler/rustc_span/src/symbol.rs @@ -0,0 +1,2067 @@ +//! An "interner" is a data structure that associates values with usize tags and +//! allows bidirectional lookup; i.e., given a value, one can easily find the +//! type, and vice versa. + +use rustc_arena::DroplessArena; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; +use rustc_data_structures::sync::Lock; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +use std::cmp::{Ord, PartialEq, PartialOrd}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str; + +use crate::{with_session_globals, Edition, Span, DUMMY_SP}; + +#[cfg(test)] +mod tests; + +// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. +symbols! { + // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`, + // this should be rarely necessary though if the keywords are kept in alphabetic order. + Keywords { + // Special reserved identifiers used internally for elided lifetimes, + // unnamed method parameters, crate root module, error recovery etc. + Empty: "", + PathRoot: "{{root}}", + DollarCrate: "$crate", + Underscore: "_", + + // Keywords that are used in stable Rust. + As: "as", + Break: "break", + Const: "const", + Continue: "continue", + Crate: "crate", + Else: "else", + Enum: "enum", + Extern: "extern", + False: "false", + Fn: "fn", + For: "for", + If: "if", + Impl: "impl", + In: "in", + Let: "let", + Loop: "loop", + Match: "match", + Mod: "mod", + Move: "move", + Mut: "mut", + Pub: "pub", + Ref: "ref", + Return: "return", + SelfLower: "self", + SelfUpper: "Self", + Static: "static", + Struct: "struct", + Super: "super", + Trait: "trait", + True: "true", + Type: "type", + Unsafe: "unsafe", + Use: "use", + Where: "where", + While: "while", + + // Keywords that are used in unstable Rust or reserved for future use. + Abstract: "abstract", + Become: "become", + Box: "box", + Do: "do", + Final: "final", + Macro: "macro", + Override: "override", + Priv: "priv", + Typeof: "typeof", + Unsized: "unsized", + Virtual: "virtual", + Yield: "yield", + + // Edition-specific keywords that are used in stable Rust. + Async: "async", // >= 2018 Edition only + Await: "await", // >= 2018 Edition only + Dyn: "dyn", // >= 2018 Edition only + + // Edition-specific keywords that are used in unstable Rust or reserved for future use. + Try: "try", // >= 2018 Edition only + + // Special lifetime names + UnderscoreLifetime: "'_", + StaticLifetime: "'static", + + // Weak keywords, have special meaning only in specific contexts. + Auto: "auto", + Catch: "catch", + Default: "default", + MacroRules: "macro_rules", + Raw: "raw", + Union: "union", + Yeet: "yeet", + } + + // Pre-interned symbols that can be referred to with `rustc_span::sym::*`. + // + // The symbol is the stringified identifier unless otherwise specified, in + // which case the name should mention the non-identifier punctuation. + // E.g. `sym::proc_dash_macro` represents "proc-macro", and it shouldn't be + // called `sym::proc_macro` because then it's easy to mistakenly think it + // represents "proc_macro". + // + // As well as the symbols listed, there are symbols for the strings + // "0", "1", ..., "9", which are accessible via `sym::integer`. + // + // The proc macro will abort if symbols are not in alphabetical order (as + // defined by `impl Ord for str`) or if any symbols are duplicated. Vim + // users can sort the list by selecting it and executing the command + // `:'<,'>!LC_ALL=C sort`. + // + // There is currently no checking that all symbols are used; that would be + // nice to have. + Symbols { + AcqRel, + Acquire, + AddSubdiagnostic, + Alignment, + Any, + Arc, + Argument, + ArgumentV1, + ArgumentV1Methods, + Arguments, + AsMut, + AsRef, + AssertParamIsClone, + AssertParamIsCopy, + AssertParamIsEq, + AtomicBool, + AtomicI128, + AtomicI16, + AtomicI32, + AtomicI64, + AtomicI8, + AtomicIsize, + AtomicPtr, + AtomicU128, + AtomicU16, + AtomicU32, + AtomicU64, + AtomicU8, + AtomicUsize, + BTreeEntry, + BTreeMap, + BTreeSet, + BinaryHeap, + Borrow, + Break, + C, + CStr, + CString, + Capture, + Center, + Clone, + Continue, + Copy, + Count, + Cow, + Debug, + DebugStruct, + DebugTuple, + Decodable, + Decoder, + DecorateLint, + Default, + Deref, + DiagnosticMessage, + DirBuilder, + Display, + DoubleEndedIterator, + Duration, + Encodable, + Encoder, + Eq, + Equal, + Err, + Error, + File, + FileType, + Fn, + FnMut, + FnOnce, + FormatSpec, + Formatter, + From, + FromIterator, + FromResidual, + Future, + FxHashMap, + FxHashSet, + GlobalAlloc, + Hash, + HashMap, + HashMapEntry, + HashSet, + Hasher, + Implied, + Input, + Into, + IntoFuture, + IntoIterator, + IoRead, + IoWrite, + IrTyKind, + Is, + ItemContext, + Iterator, + Layout, + Left, + LinkedList, + LintPass, + Mutex, + N, + NonZeroI128, + NonZeroI16, + NonZeroI32, + NonZeroI64, + NonZeroI8, + NonZeroU128, + NonZeroU16, + NonZeroU32, + NonZeroU64, + NonZeroU8, + None, + Ok, + Option, + Ord, + Ordering, + OsStr, + OsString, + Output, + Param, + PartialEq, + PartialOrd, + Path, + PathBuf, + Pending, + Pin, + Pointer, + Poll, + ProcMacro, + ProcMacroHack, + ProceduralMasqueradeDummyType, + Range, + RangeFrom, + RangeFull, + RangeInclusive, + RangeTo, + RangeToInclusive, + Rc, + Ready, + Receiver, + Relaxed, + Release, + Result, + Return, + Right, + Rust, + RustcDecodable, + RustcEncodable, + Send, + SeqCst, + SessionDiagnostic, + SliceIndex, + Some, + String, + StructuralEq, + StructuralPartialEq, + SubdiagnosticMessage, + Sync, + Target, + ToOwned, + ToString, + Try, + TryCaptureGeneric, + TryCapturePrintable, + TryFrom, + TryInto, + Ty, + TyCtxt, + TyKind, + Unknown, + UnsafeArg, + Vec, + VecDeque, + Wrapper, + Yield, + _DECLS, + _Self, + __D, + __H, + __S, + __awaitee, + __try_var, + _d, + _e, + _task_context, + a32, + aarch64_target_feature, + aarch64_ver_target_feature, + abi, + abi_amdgpu_kernel, + abi_avr_interrupt, + abi_c_cmse_nonsecure_call, + abi_efiapi, + abi_msp430_interrupt, + abi_ptx, + abi_sysv64, + abi_thiscall, + abi_unadjusted, + abi_vectorcall, + abi_x86_interrupt, + abort, + aborts, + add, + add_assign, + add_with_overflow, + address, + adt_const_params, + advanced_slice_patterns, + adx_target_feature, + alias, + align, + align_offset, + alignstack, + all, + alloc, + alloc_error_handler, + alloc_layout, + alloc_zeroed, + allocator, + allocator_api, + allocator_internals, + allow, + allow_fail, + allow_internal_unsafe, + allow_internal_unstable, + allowed, + alu32, + always, + and, + and_then, + anonymous_lifetime_in_impl_trait, + any, + append_const_msg, + arbitrary_enum_discriminant, + arbitrary_self_types, + args, + arith_offset, + arm, + arm_target_feature, + array, + arrays, + as_ptr, + as_ref, + as_str, + asm, + asm_const, + asm_experimental_arch, + asm_sym, + asm_unwind, + assert, + assert_eq_macro, + assert_inhabited, + assert_macro, + assert_ne_macro, + assert_receiver_is_total_eq, + assert_uninit_valid, + assert_zero_valid, + asserting, + associated_const_equality, + associated_consts, + associated_type_bounds, + associated_type_defaults, + associated_types, + assume, + assume_init, + async_await, + async_closure, + atomic, + atomic_mod, + atomics, + att_syntax, + attr, + attr_literals, + attributes, + augmented_assignments, + auto_traits, + automatically_derived, + avx, + avx512_target_feature, + avx512bw, + avx512f, + await_macro, + bang, + begin_panic, + bench, + bin, + bind_by_move_pattern_guards, + bindings_after_at, + bitand, + bitand_assign, + bitor, + bitor_assign, + bitreverse, + bitxor, + bitxor_assign, + black_box, + block, + bool, + borrowck_graphviz_format, + borrowck_graphviz_postflow, + borrowck_graphviz_preflow, + box_free, + box_patterns, + box_syntax, + bpf_target_feature, + braced_empty_structs, + branch, + breakpoint, + bridge, + bswap, + c_str, + c_unwind, + c_variadic, + call, + call_mut, + call_once, + caller_location, + capture_disjoint_fields, + cdylib, + ceilf32, + ceilf64, + cfg, + cfg_accessible, + cfg_attr, + cfg_attr_multi, + cfg_doctest, + cfg_eval, + cfg_hide, + cfg_macro, + cfg_panic, + cfg_sanitize, + cfg_target_abi, + cfg_target_compact, + cfg_target_feature, + cfg_target_has_atomic, + cfg_target_has_atomic_equal_alignment, + cfg_target_has_atomic_load_store, + cfg_target_thread_local, + cfg_target_vendor, + cfg_version, + cfi, + char, + client, + clippy, + clobber_abi, + clone, + clone_closures, + clone_from, + closure, + closure_lifetime_binder, + closure_to_fn_coercion, + closure_track_caller, + cmp, + cmp_max, + cmp_min, + cmpxchg16b_target_feature, + cmse_nonsecure_entry, + coerce_unsized, + cold, + column, + column_macro, + compare_and_swap, + compare_exchange, + compare_exchange_weak, + compile_error, + compile_error_macro, + compiler, + compiler_builtins, + compiler_fence, + concat, + concat_bytes, + concat_idents, + concat_macro, + conservative_impl_trait, + console, + const_allocate, + const_async_blocks, + const_compare_raw_pointers, + const_constructor, + const_deallocate, + const_eval_limit, + const_eval_select, + const_eval_select_ct, + const_evaluatable_checked, + const_extern_fn, + const_fn, + const_fn_floating_point_arithmetic, + const_fn_fn_ptr_basics, + const_fn_trait_bound, + const_fn_transmute, + const_fn_union, + const_fn_unsize, + const_for, + const_format_args, + const_generic_defaults, + const_generics, + const_generics_defaults, + const_if_match, + const_impl_trait, + const_in_array_repeat_expressions, + const_indexing, + const_let, + const_loop, + const_mut_refs, + const_panic, + const_panic_fmt, + const_precise_live_drops, + const_raw_ptr_deref, + const_raw_ptr_to_usize_cast, + const_refs_to_cell, + const_trait, + const_trait_bound_opt_out, + const_trait_impl, + const_transmute, + const_try, + constant, + constructor, + contents, + context, + convert, + copy, + copy_closures, + copy_nonoverlapping, + copysignf32, + copysignf64, + core, + core_intrinsics, + core_panic, + core_panic_2015_macro, + core_panic_macro, + cosf32, + cosf64, + count, + cr, + crate_id, + crate_in_paths, + crate_local, + crate_name, + crate_type, + crate_visibility_modifier, + crt_dash_static: "crt-static", + cstring_type, + ctlz, + ctlz_nonzero, + ctpop, + cttz, + cttz_nonzero, + custom_attribute, + custom_derive, + custom_inner_attributes, + custom_test_frameworks, + d, + d32, + dbg_macro, + dead_code, + dealloc, + debug, + debug_assert_eq_macro, + debug_assert_macro, + debug_assert_ne_macro, + debug_assertions, + debug_struct, + debug_struct_fields_finish, + debug_trait_builder, + debug_tuple, + debug_tuple_fields_finish, + debugger_visualizer, + decl_macro, + declare_lint_pass, + decode, + default_alloc_error_handler, + default_lib_allocator, + default_method_body_is_const, + default_type_parameter_fallback, + default_type_params, + delay_span_bug_from_inside_query, + deny, + deprecated, + deprecated_safe, + deprecated_suggestion, + deref, + deref_method, + deref_mut, + deref_target, + derive, + derive_default_enum, + destruct, + destructuring_assignment, + diagnostic, + direct, + discriminant_kind, + discriminant_type, + discriminant_value, + dispatch_from_dyn, + display_trait, + div, + div_assign, + doc, + doc_alias, + doc_auto_cfg, + doc_cfg, + doc_cfg_hide, + doc_keyword, + doc_masked, + doc_notable_trait, + doc_primitive, + doc_spotlight, + doctest, + document_private_items, + dotdot: "..", + dotdot_in_tuple_patterns, + dotdoteq_in_patterns, + dreg, + dreg_low16, + dreg_low8, + drop, + drop_in_place, + drop_types_in_const, + dropck_eyepatch, + dropck_parametricity, + dylib, + dyn_metadata, + dyn_trait, + e, + edition_macro_pats, + edition_panic, + eh_catch_typeinfo, + eh_personality, + emit_enum, + emit_enum_variant, + emit_enum_variant_arg, + emit_struct, + emit_struct_field, + enable, + enclosing_scope, + encode, + end, + env, + env_macro, + eprint_macro, + eprintln_macro, + eq, + ermsb_target_feature, + exact_div, + except, + exchange_malloc, + exclusive_range_pattern, + exhaustive_integer_patterns, + exhaustive_patterns, + existential_type, + exp2f32, + exp2f64, + expect, + expected, + expf32, + expf64, + explicit_generic_args_with_impl_trait, + export_name, + expr, + extended_key_value_attributes, + extern_absolute_paths, + extern_crate_item_prelude, + extern_crate_self, + extern_in_paths, + extern_prelude, + extern_types, + external_doc, + f, + f16c_target_feature, + f32, + f64, + fabsf32, + fabsf64, + fadd_fast, + fake_variadic, + fdiv_fast, + feature, + fence, + ferris: "🦀", + fetch_update, + ffi, + ffi_const, + ffi_pure, + ffi_returns_twice, + field, + field_init_shorthand, + file, + file_macro, + fill, + finish, + flags, + float, + float_to_int_unchecked, + floorf32, + floorf64, + fmaf32, + fmaf64, + fmt, + fmt_as_str, + fmt_internals, + fmul_fast, + fn_align, + fn_must_use, + fn_mut, + fn_once, + fn_once_output, + forbid, + forget, + format, + format_args, + format_args_capture, + format_args_macro, + format_args_nl, + format_macro, + fp, + freeze, + freg, + frem_fast, + from, + from_desugaring, + from_generator, + from_iter, + from_method, + from_output, + from_residual, + from_size_align_unchecked, + from_usize, + from_yeet, + fsub_fast, + fundamental, + future, + future_trait, + gdb_script_file, + ge, + gen_future, + gen_kill, + generator, + generator_return, + generator_state, + generators, + generic_arg_infer, + generic_assert, + generic_associated_types, + generic_associated_types_extended, + generic_const_exprs, + generic_param_attrs, + get_context, + global_allocator, + global_asm, + globs, + gt, + half_open_range_patterns, + hash, + hexagon_target_feature, + hidden, + homogeneous_aggregate, + html_favicon_url, + html_logo_url, + html_no_source, + html_playground_url, + html_root_url, + hwaddress, + i, + i128, + i128_type, + i16, + i32, + i64, + i8, + ident, + if_let, + if_let_guard, + if_while_or_patterns, + ignore, + impl_header_lifetime_elision, + impl_lint_pass, + impl_macros, + impl_trait_in_bindings, + implied_by, + import, + import_shadowing, + imported_main, + in_band_lifetimes, + include, + include_bytes, + include_bytes_macro, + include_macro, + include_str, + include_str_macro, + inclusive_range_syntax, + index, + index_mut, + infer_outlives_requirements, + infer_static_outlives_requirements, + inherent_associated_types, + inlateout, + inline, + inline_const, + inline_const_pat, + inout, + instruction_set, + integer_: "integer", + integral, + intel, + into_future, + into_iter, + intra_doc_pointers, + intrinsics, + irrefutable_let_patterns, + isa_attribute, + isize, + issue, + issue_5723_bootstrap, + issue_tracker_base_url, + item, + item_like_imports, + iter, + iter_repeat, + keyword, + kind, + kreg, + kreg0, + label, + label_break_value, + lang, + lang_items, + large_assignments, + lateout, + lazy_normalization_consts, + le, + len, + let_chains, + let_else, + lhs, + lib, + libc, + lifetime, + likely, + line, + line_macro, + link, + link_args, + link_cfg, + link_llvm_intrinsics, + link_name, + link_ordinal, + link_section, + linkage, + linker, + lint_reasons, + literal, + load, + loaded_from_disk, + local, + local_inner_macros, + log10f32, + log10f64, + log2f32, + log2f64, + log_syntax, + logf32, + logf64, + loop_break_value, + lt, + macro_at_most_once_rep, + macro_attributes_in_derive_output, + macro_escape, + macro_export, + macro_lifetime_matcher, + macro_literal_matcher, + macro_metavar_expr, + macro_reexport, + macro_use, + macro_vis_matcher, + macros_in_extern, + main, + managed_boxes, + manually_drop, + map, + marker, + marker_trait_attr, + masked, + match_beginning_vert, + match_default_bindings, + matches_macro, + maxnumf32, + maxnumf64, + may_dangle, + may_unwind, + maybe_uninit, + maybe_uninit_uninit, + maybe_uninit_zeroed, + mem_discriminant, + mem_drop, + mem_forget, + mem_replace, + mem_size_of, + mem_size_of_val, + mem_uninitialized, + mem_variant_count, + mem_zeroed, + member_constraints, + memory, + memtag, + message, + meta, + metadata_type, + min_align_of, + min_align_of_val, + min_const_fn, + min_const_generics, + min_const_unsafe_fn, + min_specialization, + min_type_alias_impl_trait, + minnumf32, + minnumf64, + mips_target_feature, + miri, + misc, + mmx_reg, + modifiers, + module, + module_path, + module_path_macro, + more_qualified_paths, + more_struct_aliases, + movbe_target_feature, + move_ref_pattern, + move_size_limit, + mul, + mul_assign, + mul_with_overflow, + must_not_suspend, + must_use, + naked, + naked_functions, + name, + names, + native_link_modifiers, + native_link_modifiers_as_needed, + native_link_modifiers_bundle, + native_link_modifiers_verbatim, + native_link_modifiers_whole_archive, + natvis_file, + ne, + nearbyintf32, + nearbyintf64, + needs_allocator, + needs_drop, + needs_panic_runtime, + neg, + negate_unsigned, + negative_impls, + neon, + never, + never_type, + never_type_fallback, + new, + new_unchecked, + next, + nll, + no, + no_builtins, + no_core, + no_coverage, + no_crate_inject, + no_debug, + no_default_passes, + no_implicit_prelude, + no_inline, + no_link, + no_main, + no_mangle, + no_sanitize, + no_stack_check, + no_start, + no_std, + nomem, + non_ascii_idents, + non_exhaustive, + non_exhaustive_omitted_patterns_lint, + non_modrs_mods, + none_error, + nontemporal_store, + noop_method_borrow, + noop_method_clone, + noop_method_deref, + noreturn, + nostack, + not, + notable_trait, + note, + object_safe_for_dispatch, + of, + offset, + omit_gdb_pretty_printer_section, + on, + on_unimplemented, + oom, + opaque, + ops, + opt_out_copy, + optimize, + optimize_attribute, + optin_builtin_traits, + option, + option_env, + option_env_macro, + options, + or, + or_patterns, + other, + out, + overlapping_marker_traits, + owned_box, + packed, + panic, + panic_2015, + panic_2021, + panic_abort, + panic_bounds_check, + panic_display, + panic_fmt, + panic_handler, + panic_impl, + panic_implementation, + panic_info, + panic_location, + panic_no_unwind, + panic_runtime, + panic_str, + panic_unwind, + panicking, + param_attrs, + partial_cmp, + partial_ord, + passes, + pat, + pat_param, + path, + pattern_parentheses, + phantom_data, + pin, + platform_intrinsics, + plugin, + plugin_registrar, + plugins, + pointee_trait, + pointer, + pointer_trait_fmt, + poll, + position, + post_dash_lto: "post-lto", + powerpc_target_feature, + powf32, + powf64, + powif32, + powif64, + pre_dash_lto: "pre-lto", + precise_pointer_size_matching, + precision, + pref_align_of, + prefetch_read_data, + prefetch_read_instruction, + prefetch_write_data, + prefetch_write_instruction, + preg, + prelude, + prelude_import, + preserves_flags, + primitive, + print_macro, + println_macro, + proc_dash_macro: "proc-macro", + proc_macro, + proc_macro_attribute, + proc_macro_def_site, + proc_macro_derive, + proc_macro_expr, + proc_macro_gen, + proc_macro_hygiene, + proc_macro_internals, + proc_macro_mod, + proc_macro_non_items, + proc_macro_path_invoc, + profiler_builtins, + profiler_runtime, + ptr, + ptr_guaranteed_eq, + ptr_guaranteed_ne, + ptr_null, + ptr_null_mut, + ptr_offset_from, + ptr_offset_from_unsigned, + pub_macro_rules, + pub_restricted, + pure, + pushpop_unsafe, + qreg, + qreg_low4, + qreg_low8, + quad_precision_float, + question_mark, + quote, + range_inclusive_new, + raw_dylib, + raw_eq, + raw_identifiers, + raw_ref_op, + re_rebalance_coherence, + read_enum, + read_enum_variant, + read_enum_variant_arg, + read_struct, + read_struct_field, + readonly, + realloc, + reason, + receiver, + recursion_limit, + reexport_test_harness_main, + ref_unwind_safe_trait, + reference, + reflect, + reg, + reg16, + reg32, + reg64, + reg_abcd, + reg_byte, + reg_iw, + reg_nonzero, + reg_pair, + reg_ptr, + reg_upper, + register_attr, + register_tool, + relaxed_adts, + relaxed_struct_unsize, + rem, + rem_assign, + repr, + repr128, + repr_align, + repr_align_enum, + repr_packed, + repr_simd, + repr_transparent, + residual, + result, + rhs, + rintf32, + rintf64, + riscv_target_feature, + rlib, + rotate_left, + rotate_right, + roundf32, + roundf64, + rt, + rtm_target_feature, + rust, + rust_2015, + rust_2015_preview, + rust_2018, + rust_2018_preview, + rust_2021, + rust_2021_preview, + rust_2024, + rust_2024_preview, + rust_begin_unwind, + rust_cold_cc, + rust_eh_catch_typeinfo, + rust_eh_personality, + rust_eh_register_frames, + rust_eh_unregister_frames, + rust_oom, + rustc, + rustc_allocator, + rustc_allocator_nounwind, + rustc_allocator_zeroed, + rustc_allow_const_fn_unstable, + rustc_allow_incoherent_impl, + rustc_allowed_through_unstable_modules, + rustc_attrs, + rustc_box, + rustc_builtin_macro, + rustc_capture_analysis, + rustc_clean, + rustc_coherence_is_core, + rustc_const_stable, + rustc_const_unstable, + rustc_conversion_suggestion, + rustc_deallocator, + rustc_def_path, + rustc_diagnostic_item, + rustc_diagnostic_macros, + rustc_dirty, + rustc_do_not_const_check, + rustc_dummy, + rustc_dump_env_program_clauses, + rustc_dump_program_clauses, + rustc_dump_user_substs, + rustc_dump_vtable, + rustc_error, + rustc_evaluate_where_clauses, + rustc_expected_cgu_reuse, + rustc_has_incoherent_inherent_impls, + rustc_if_this_changed, + rustc_inherit_overflow_checks, + rustc_insignificant_dtor, + rustc_layout, + rustc_layout_scalar_valid_range_end, + rustc_layout_scalar_valid_range_start, + rustc_legacy_const_generics, + rustc_lint_diagnostics, + rustc_lint_opt_deny_field_access, + rustc_lint_opt_ty, + rustc_lint_query_instability, + rustc_macro_transparency, + rustc_main, + rustc_mir, + rustc_must_implement_one_of, + rustc_nonnull_optimization_guaranteed, + rustc_object_lifetime_default, + rustc_on_unimplemented, + rustc_outlives, + rustc_paren_sugar, + rustc_partition_codegened, + rustc_partition_reused, + rustc_pass_by_value, + rustc_peek, + rustc_peek_definite_init, + rustc_peek_liveness, + rustc_peek_maybe_init, + rustc_peek_maybe_uninit, + rustc_polymorphize_error, + rustc_private, + rustc_proc_macro_decls, + rustc_promotable, + rustc_reallocator, + rustc_regions, + rustc_reservation_impl, + rustc_serialize, + rustc_skip_array_during_method_dispatch, + rustc_specialization_trait, + rustc_stable, + rustc_std_internal_symbol, + rustc_strict_coherence, + rustc_symbol_name, + rustc_test_marker, + rustc_then_this_would_need, + rustc_trivial_field_reads, + rustc_unsafe_specialization_marker, + rustc_variance, + rustdoc, + rustdoc_internals, + rustfmt, + rvalue_static_promotion, + s, + sanitize, + sanitizer_runtime, + saturating_add, + saturating_sub, + self_in_typedefs, + self_struct_ctor, + semitransparent, + shadow_call_stack, + shl, + shl_assign, + should_panic, + shr, + shr_assign, + simd, + simd_add, + simd_and, + simd_arith_offset, + simd_as, + simd_bitmask, + simd_cast, + simd_ceil, + simd_div, + simd_eq, + simd_extract, + simd_fabs, + simd_fcos, + simd_fexp, + simd_fexp2, + simd_ffi, + simd_flog, + simd_flog10, + simd_flog2, + simd_floor, + simd_fma, + simd_fmax, + simd_fmin, + simd_fpow, + simd_fpowi, + simd_fsin, + simd_fsqrt, + simd_gather, + simd_ge, + simd_gt, + simd_insert, + simd_le, + simd_lt, + simd_mul, + simd_ne, + simd_neg, + simd_or, + simd_reduce_add_ordered, + simd_reduce_add_unordered, + simd_reduce_all, + simd_reduce_and, + simd_reduce_any, + simd_reduce_max, + simd_reduce_max_nanless, + simd_reduce_min, + simd_reduce_min_nanless, + simd_reduce_mul_ordered, + simd_reduce_mul_unordered, + simd_reduce_or, + simd_reduce_xor, + simd_rem, + simd_round, + simd_saturating_add, + simd_saturating_sub, + simd_scatter, + simd_select, + simd_select_bitmask, + simd_shl, + simd_shr, + simd_shuffle, + simd_sub, + simd_trunc, + simd_xor, + since, + sinf32, + sinf64, + size, + size_of, + size_of_val, + sized, + skip, + slice, + slice_len_fn, + slice_patterns, + slicing_syntax, + soft, + specialization, + speed, + spotlight, + sqrtf32, + sqrtf64, + sreg, + sreg_low16, + sse, + sse4a_target_feature, + stable, + staged_api, + start, + state, + static_in_const, + static_nobundle, + static_recursion, + staticlib, + std, + std_inject, + std_panic, + std_panic_2015_macro, + std_panic_macro, + stmt, + stmt_expr_attributes, + stop_after_dataflow, + store, + str, + str_split_whitespace, + str_trim, + str_trim_end, + str_trim_start, + strict_provenance, + stringify, + stringify_macro, + struct_field_attributes, + struct_inherit, + struct_variant, + structural_match, + structural_peq, + structural_teq, + sty, + sub, + sub_assign, + sub_with_overflow, + suggestion, + sym, + sync, + t32, + target, + target_abi, + target_arch, + target_endian, + target_env, + target_family, + target_feature, + target_feature_11, + target_has_atomic, + target_has_atomic_equal_alignment, + target_has_atomic_load_store, + target_os, + target_pointer_width, + target_target_vendor, + target_thread_local, + target_vendor, + task, + tbm_target_feature, + termination, + termination_trait, + termination_trait_test, + test, + test_2018_feature, + test_accepted_feature, + test_case, + test_removed_feature, + test_runner, + test_unstable_lint, + then_with, + thread, + thread_local, + thread_local_macro, + thumb2, + thumb_mode: "thumb-mode", + tmm_reg, + to_string, + to_vec, + todo_macro, + tool_attributes, + tool_lints, + trace_macros, + track_caller, + trait_alias, + trait_upcasting, + transmute, + transmute_trait, + transparent, + transparent_enums, + transparent_unions, + trivial_bounds, + truncf32, + truncf64, + try_blocks, + try_capture, + try_from, + try_into, + try_trait_v2, + tt, + tuple, + tuple_from_req, + tuple_indexing, + two_phase, + ty, + type_alias_enum_variants, + type_alias_impl_trait, + type_ascription, + type_changing_struct_update, + type_id, + type_length_limit, + type_macros, + type_name, + u128, + u16, + u32, + u64, + u8, + unaligned_volatile_load, + unaligned_volatile_store, + unboxed_closures, + unchecked_add, + unchecked_div, + unchecked_mul, + unchecked_rem, + unchecked_shl, + unchecked_shr, + unchecked_sub, + underscore_const_names, + underscore_imports, + underscore_lifetimes, + uniform_paths, + unimplemented_macro, + unit, + universal_impl_trait, + unix, + unlikely, + unmarked_api, + unpin, + unreachable, + unreachable_2015, + unreachable_2015_macro, + unreachable_2021, + unreachable_2021_macro, + unreachable_code, + unreachable_display, + unreachable_macro, + unrestricted_attribute_tokens, + unsafe_block_in_unsafe_fn, + unsafe_cell, + unsafe_no_drop_flag, + unsafe_pin_internals, + unsize, + unsized_fn_params, + unsized_locals, + unsized_tuple_coercion, + unstable, + unstable_location_reason_default: "this crate is being loaded from the sysroot, an \ + unstable location; did you mean to load this crate \ + from crates.io via `Cargo.toml` instead?", + untagged_unions, + unused_imports, + unused_qualifications, + unwind, + unwind_attributes, + unwind_safe_trait, + unwrap, + unwrap_or, + use_extern_macros, + use_nested_groups, + used, + used_with_arg, + using, + usize, + v1, + va_arg, + va_copy, + va_end, + va_list, + va_start, + val, + values, + var, + variant_count, + vec, + vec_macro, + version, + vfp2, + vis, + visible_private_types, + volatile, + volatile_copy_memory, + volatile_copy_nonoverlapping_memory, + volatile_load, + volatile_set_memory, + volatile_store, + vreg, + vreg_low16, + vtable_align, + vtable_size, + warn, + wasm_abi, + wasm_import_module, + wasm_target_feature, + while_let, + width, + windows, + windows_subsystem, + with_negative_coherence, + wrapping_add, + wrapping_mul, + wrapping_sub, + wreg, + write_bytes, + write_macro, + write_str, + writeln_macro, + x87_reg, + xer, + xmm_reg, + yeet_desugar_details, + yeet_expr, + ymm_reg, + zmm_reg, + } +} + +#[derive(Copy, Clone, Eq, HashStable_Generic, Encodable, Decodable)] +pub struct Ident { + pub name: Symbol, + pub span: Span, +} + +impl Ident { + #[inline] + /// Constructs a new identifier from a symbol and a span. + pub const fn new(name: Symbol, span: Span) -> Ident { + Ident { name, span } + } + + /// Constructs a new identifier with a dummy span. + #[inline] + pub const fn with_dummy_span(name: Symbol) -> Ident { + Ident::new(name, DUMMY_SP) + } + + #[inline] + pub fn empty() -> Ident { + Ident::with_dummy_span(kw::Empty) + } + + /// Maps a string to an identifier with a dummy span. + pub fn from_str(string: &str) -> Ident { + Ident::with_dummy_span(Symbol::intern(string)) + } + + /// Maps a string and a span to an identifier. + pub fn from_str_and_span(string: &str, span: Span) -> Ident { + Ident::new(Symbol::intern(string), span) + } + + /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context. + pub fn with_span_pos(self, span: Span) -> Ident { + Ident::new(self.name, span.with_ctxt(self.span.ctxt())) + } + + pub fn without_first_quote(self) -> Ident { + Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span) + } + + /// "Normalize" ident for use in comparisons using "item hygiene". + /// Identifiers with same string value become same if they came from the same macro 2.0 macro + /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from + /// different macro 2.0 macros. + /// Technically, this operation strips all non-opaque marks from ident's syntactic context. + pub fn normalize_to_macros_2_0(self) -> Ident { + Ident::new(self.name, self.span.normalize_to_macros_2_0()) + } + + /// "Normalize" ident for use in comparisons using "local variable hygiene". + /// Identifiers with same string value become same if they came from the same non-transparent + /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different + /// non-transparent macros. + /// Technically, this operation strips all transparent marks from ident's syntactic context. + pub fn normalize_to_macro_rules(self) -> Ident { + Ident::new(self.name, self.span.normalize_to_macro_rules()) + } + + /// Access the underlying string. This is a slowish operation because it + /// requires locking the symbol interner. + /// + /// Note that the lifetime of the return value is a lie. See + /// `Symbol::as_str()` for details. + pub fn as_str(&self) -> &str { + self.name.as_str() + } +} + +impl PartialEq for Ident { + fn eq(&self, rhs: &Self) -> bool { + self.name == rhs.name && self.span.eq_ctxt(rhs.span) + } +} + +impl Hash for Ident { + fn hash<H: Hasher>(&self, state: &mut H) { + self.name.hash(state); + self.span.ctxt().hash(state); + } +} + +impl fmt::Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f)?; + fmt::Debug::fmt(&self.span.ctxt(), f) + } +} + +/// This implementation is supposed to be used in error messages, so it's expected to be identical +/// to printing the original identifier token written in source code (`token_to_string`), +/// except that AST identifiers don't keep the rawness flag, so we have to guess it. +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&IdentPrinter::new(self.name, self.is_raw_guess(), None), f) + } +} + +/// This is the most general way to print identifiers. +/// AST pretty-printer is used as a fallback for turning AST structures into token streams for +/// proc macros. Additionally, proc macros may stringify their input and expect it survive the +/// stringification (especially true for proc macro derives written between Rust 1.15 and 1.30). +/// So we need to somehow pretty-print `$crate` in a way preserving at least some of its +/// hygiene data, most importantly name of the crate it refers to. +/// As a result we print `$crate` as `crate` if it refers to the local crate +/// and as `::other_crate_name` if it refers to some other crate. +/// Note, that this is only done if the ident token is printed from inside of AST pretty-printing, +/// but not otherwise. Pretty-printing is the only way for proc macros to discover token contents, +/// so we should not perform this lossy conversion if the top level call to the pretty-printer was +/// done for a token stream or a single token. +pub struct IdentPrinter { + symbol: Symbol, + is_raw: bool, + /// Span used for retrieving the crate name to which `$crate` refers to, + /// if this field is `None` then the `$crate` conversion doesn't happen. + convert_dollar_crate: Option<Span>, +} + +impl IdentPrinter { + /// The most general `IdentPrinter` constructor. Do not use this. + pub fn new(symbol: Symbol, is_raw: bool, convert_dollar_crate: Option<Span>) -> IdentPrinter { + IdentPrinter { symbol, is_raw, convert_dollar_crate } + } + + /// This implementation is supposed to be used when printing identifiers + /// as a part of pretty-printing for larger AST pieces. + /// Do not use this either. + pub fn for_ast_ident(ident: Ident, is_raw: bool) -> IdentPrinter { + IdentPrinter::new(ident.name, is_raw, Some(ident.span)) + } +} + +impl fmt::Display for IdentPrinter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_raw { + f.write_str("r#")?; + } else if self.symbol == kw::DollarCrate { + if let Some(span) = self.convert_dollar_crate { + let converted = span.ctxt().dollar_crate_name(); + if !converted.is_path_segment_keyword() { + f.write_str("::")?; + } + return fmt::Display::fmt(&converted, f); + } + } + fmt::Display::fmt(&self.symbol, f) + } +} + +/// An newtype around `Ident` that calls [Ident::normalize_to_macro_rules] on +/// construction. +// FIXME(matthewj, petrochenkov) Use this more often, add a similar +// `ModernIdent` struct and use that as well. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct MacroRulesNormalizedIdent(Ident); + +impl MacroRulesNormalizedIdent { + pub fn new(ident: Ident) -> Self { + Self(ident.normalize_to_macro_rules()) + } +} + +impl fmt::Debug for MacroRulesNormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl fmt::Display for MacroRulesNormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +/// An interned string. +/// +/// Internally, a `Symbol` is implemented as an index, and all operations +/// (including hashing, equality, and ordering) operate on that index. The use +/// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes, +/// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes. +/// +/// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it +/// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Symbol(SymbolIndex); + +rustc_index::newtype_index! { + struct SymbolIndex { .. } +} + +impl Symbol { + const fn new(n: u32) -> Self { + Symbol(SymbolIndex::from_u32(n)) + } + + /// Maps a string to its interned representation. + pub fn intern(string: &str) -> Self { + with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) + } + + /// Access the underlying string. This is a slowish operation because it + /// requires locking the symbol interner. + /// + /// Note that the lifetime of the return value is a lie. It's not the same + /// as `&self`, but actually tied to the lifetime of the underlying + /// interner. Interners are long-lived, and there are very few of them, and + /// this function is typically used for short-lived things, so in practice + /// it works out ok. + pub fn as_str(&self) -> &str { + with_session_globals(|session_globals| unsafe { + std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) + }) + } + + pub fn as_u32(self) -> u32 { + self.0.as_u32() + } + + pub fn is_empty(self) -> bool { + self == kw::Empty + } + + /// This method is supposed to be used in error messages, so it's expected to be + /// identical to printing the original identifier token written in source code + /// (`token_to_string`, `Ident::to_string`), except that symbols don't keep the rawness flag + /// or edition, so we have to guess the rawness using the global edition. + pub fn to_ident_string(self) -> String { + Ident::with_dummy_span(self).to_string() + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl<S: Encoder> Encodable<S> for Symbol { + fn encode(&self, s: &mut S) { + s.emit_str(self.as_str()); + } +} + +impl<D: Decoder> Decodable<D> for Symbol { + #[inline] + fn decode(d: &mut D) -> Symbol { + Symbol::intern(&d.read_str()) + } +} + +impl<CTX> HashStable<CTX> for Symbol { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.as_str().hash_stable(hcx, hasher); + } +} + +impl<CTX> ToStableHashKey<CTX> for Symbol { + type KeyType = String; + #[inline] + fn to_stable_hash_key(&self, _: &CTX) -> String { + self.as_str().to_string() + } +} + +#[derive(Default)] +pub(crate) struct Interner(Lock<InternerInner>); + +// The `&'static str`s in this type actually point into the arena. +// +// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 +// found that to regress performance up to 2% in some cases. This might be +// revisited after further improvements to `indexmap`. +// +// This type is private to prevent accidentally constructing more than one +// `Interner` on the same thread, which makes it easy to mix up `Symbol`s +// between `Interner`s. +#[derive(Default)] +struct InternerInner { + arena: DroplessArena, + names: FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, +} + +impl Interner { + fn prefill(init: &[&'static str]) -> Self { + Interner(Lock::new(InternerInner { + strings: init.into(), + names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), + ..Default::default() + })) + } + + #[inline] + fn intern(&self, string: &str) -> Symbol { + let mut inner = self.0.lock(); + if let Some(&name) = inner.names.get(string) { + return name; + } + + let name = Symbol::new(inner.strings.len() as u32); + + // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, + // and immediately convert the clone back to `&[u8], all because there + // is no `inner.arena.alloc_str()` method. This is clearly safe. + let string: &str = + unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) }; + + // SAFETY: we can extend the arena allocation to `'static` because we + // only access these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + inner.strings.push(string); + + // This second hash table lookup can be avoided by using `RawEntryMut`, + // but this code path isn't hot enough for it to be worth it. See + // #91445 for details. + inner.names.insert(string, name); + name + } + + // Get the symbol as a string. `Symbol::as_str()` should be used in + // preference to this function. + fn get(&self, symbol: Symbol) -> &str { + self.0.lock().strings[symbol.0.as_usize()] + } +} + +// This module has a very short name because it's used a lot. +/// This module contains all the defined keyword `Symbol`s. +/// +/// Given that `kw` is imported, use them like `kw::keyword_name`. +/// For example `kw::Loop` or `kw::Break`. +pub mod kw { + pub use super::kw_generated::*; +} + +// This module has a very short name because it's used a lot. +/// This module contains all the defined non-keyword `Symbol`s. +/// +/// Given that `sym` is imported, use them like `sym::symbol_name`. +/// For example `sym::rustfmt` or `sym::u8`. +pub mod sym { + use super::Symbol; + use std::convert::TryInto; + + #[doc(inline)] + pub use super::sym_generated::*; + + // Used from a macro in `librustc_feature/accepted.rs` + pub use super::kw::MacroRules as macro_rules; + + /// Get the symbol for an integer. + /// + /// The first few non-negative integers each have a static symbol and therefore + /// are fast. + pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol { + if let Result::Ok(idx) = n.try_into() { + if idx < 10 { + return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32); + } + } + Symbol::intern(&n.to_string()) + } +} + +impl Symbol { + fn is_special(self) -> bool { + self <= kw::Underscore + } + + fn is_used_keyword_always(self) -> bool { + self >= kw::As && self <= kw::While + } + + fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool { + (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018 + } + + fn is_unused_keyword_always(self) -> bool { + self >= kw::Abstract && self <= kw::Yield + } + + fn is_unused_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool { + self == kw::Try && edition() >= Edition::Edition2018 + } + + pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool { + self.is_special() + || self.is_used_keyword_always() + || self.is_unused_keyword_always() + || self.is_used_keyword_conditional(edition) + || self.is_unused_keyword_conditional(edition) + } + + /// A keyword or reserved identifier that can be used as a path segment. + pub fn is_path_segment_keyword(self) -> bool { + self == kw::Super + || self == kw::SelfLower + || self == kw::SelfUpper + || self == kw::Crate + || self == kw::PathRoot + || self == kw::DollarCrate + } + + /// Returns `true` if the symbol is `true` or `false`. + pub fn is_bool_lit(self) -> bool { + self == kw::True || self == kw::False + } + + /// Returns `true` if this symbol can be a raw identifier. + pub fn can_be_raw(self) -> bool { + self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword() + } +} + +impl Ident { + // Returns `true` for reserved identifiers used internally for elided lifetimes, + // unnamed method parameters, crate root module, error recovery etc. + pub fn is_special(self) -> bool { + self.name.is_special() + } + + /// Returns `true` if the token is a keyword used in the language. + pub fn is_used_keyword(self) -> bool { + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name.is_used_keyword_always() + || self.name.is_used_keyword_conditional(|| self.span.edition()) + } + + /// Returns `true` if the token is a keyword reserved for possible future use. + pub fn is_unused_keyword(self) -> bool { + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name.is_unused_keyword_always() + || self.name.is_unused_keyword_conditional(|| self.span.edition()) + } + + /// Returns `true` if the token is either a special identifier or a keyword. + pub fn is_reserved(self) -> bool { + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name.is_reserved(|| self.span.edition()) + } + + /// A keyword or reserved identifier that can be used as a path segment. + pub fn is_path_segment_keyword(self) -> bool { + self.name.is_path_segment_keyword() + } + + /// We see this identifier in a normal identifier position, like variable name or a type. + /// How was it written originally? Did it use the raw form? Let's try to guess. + pub fn is_raw_guess(self) -> bool { + self.name.can_be_raw() && self.is_reserved() + } +} diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs new file mode 100644 index 000000000..0958fce5f --- /dev/null +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -0,0 +1,25 @@ +use super::*; + +use crate::create_default_session_globals_then; + +#[test] +fn interner_tests() { + let i = Interner::default(); + // first one is zero: + assert_eq!(i.intern("dog"), Symbol::new(0)); + // re-use gets the same entry: + assert_eq!(i.intern("dog"), Symbol::new(0)); + // different string gets a different #: + assert_eq!(i.intern("cat"), Symbol::new(1)); + assert_eq!(i.intern("cat"), Symbol::new(1)); + // dog is still at zero + assert_eq!(i.intern("dog"), Symbol::new(0)); +} + +#[test] +fn without_first_quote_test() { + create_default_session_globals_then(|| { + let i = Ident::from_str("'break"); + assert_eq!(i.without_first_quote().name, kw::Break); + }); +} diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs new file mode 100644 index 000000000..5b3915c33 --- /dev/null +++ b/compiler/rustc_span/src/tests.rs @@ -0,0 +1,43 @@ +use super::*; + +#[test] +fn test_lookup_line() { + let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); + let sf = + SourceFile::new(FileName::Anon(0), source, BytePos(3), SourceFileHashAlgorithm::Sha256); + sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)])); + + assert_eq!(sf.lookup_line(BytePos(0)), None); + assert_eq!(sf.lookup_line(BytePos(3)), Some(0)); + assert_eq!(sf.lookup_line(BytePos(4)), Some(0)); + + assert_eq!(sf.lookup_line(BytePos(16)), Some(0)); + assert_eq!(sf.lookup_line(BytePos(17)), Some(1)); + assert_eq!(sf.lookup_line(BytePos(18)), Some(1)); + + assert_eq!(sf.lookup_line(BytePos(28)), Some(2)); + assert_eq!(sf.lookup_line(BytePos(29)), Some(2)); +} + +#[test] +fn test_normalize_newlines() { + fn check(before: &str, after: &str, expected_positions: &[u32]) { + let mut actual = before.to_string(); + let mut actual_positions = vec![]; + normalize_newlines(&mut actual, &mut actual_positions); + let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect(); + assert_eq!(actual.as_str(), after); + assert_eq!(actual_positions, expected_positions); + } + check("", "", &[]); + check("\n", "\n", &[]); + check("\r", "\r", &[]); + check("\r\r", "\r\r", &[]); + check("\r\n", "\n", &[1]); + check("hello world", "hello world", &[]); + check("hello\nworld", "hello\nworld", &[]); + check("hello\r\nworld", "hello\nworld", &[6]); + check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]); + check("\r\r\n", "\r\n", &[2]); + check("hello\rworld", "hello\rworld", &[]); +} |