diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:41 +0000 |
commit | 4f9fe856a25ab29345b90e7725509e9ee38a37be (patch) | |
tree | e4ffd8a9374cae7b21f7cbfb352927e0e074aff6 /compiler/rustc_span | |
parent | Adding upstream version 1.68.2+dfsg1. (diff) | |
download | rustc-4f9fe856a25ab29345b90e7725509e9ee38a37be.tar.xz rustc-4f9fe856a25ab29345b90e7725509e9ee38a37be.zip |
Adding upstream version 1.69.0+dfsg1.upstream/1.69.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_span')
-rw-r--r-- | compiler/rustc_span/src/def_id.rs | 12 | ||||
-rw-r--r-- | compiler/rustc_span/src/edit_distance.rs | 229 | ||||
-rw-r--r-- | compiler/rustc_span/src/edit_distance/tests.rs | 80 | ||||
-rw-r--r-- | compiler/rustc_span/src/edition.rs | 29 | ||||
-rw-r--r-- | compiler/rustc_span/src/lev_distance.rs | 177 | ||||
-rw-r--r-- | compiler/rustc_span/src/lev_distance/tests.rs | 71 | ||||
-rw-r--r-- | compiler/rustc_span/src/lib.rs | 27 | ||||
-rw-r--r-- | compiler/rustc_span/src/span_encoding.rs | 7 | ||||
-rw-r--r-- | compiler/rustc_span/src/symbol.rs | 16 |
9 files changed, 374 insertions, 274 deletions
diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index 7c5e1427d..162c15574 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -119,6 +119,12 @@ impl DefPathHash { } } +impl Default for DefPathHash { + fn default() -> Self { + DefPathHash(Fingerprint::ZERO) + } +} + impl Borrow<Fingerprint> for DefPathHash { #[inline] fn borrow(&self) -> &Fingerprint { @@ -229,7 +235,7 @@ impl<D: Decoder> Decodable<D> for DefIndex { pub struct DefId { // cfg-ing the order of fields so that the `DefIndex` which is high entropy always ends up in // the lower bits no matter the endianness. This allows the compiler to turn that `Hash` impl - // into a direct call to 'u64::hash(_)`. + // into a direct call to `u64::hash(_)`. #[cfg(not(all(target_pointer_width = "64", target_endian = "big")))] pub index: DefIndex, pub krate: CrateNum, @@ -293,7 +299,7 @@ impl DefId { #[inline] pub fn as_local(self) -> Option<LocalDefId> { - if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None } + self.is_local().then(|| LocalDefId { local_def_index: self.index }) } #[inline] @@ -314,7 +320,7 @@ impl DefId { #[inline] pub fn as_crate_root(self) -> Option<CrateNum> { - if self.is_crate_root() { Some(self.krate) } else { None } + self.is_crate_root().then_some(self.krate) } #[inline] diff --git a/compiler/rustc_span/src/edit_distance.rs b/compiler/rustc_span/src/edit_distance.rs new file mode 100644 index 000000000..89f0386e3 --- /dev/null +++ b/compiler/rustc_span/src/edit_distance.rs @@ -0,0 +1,229 @@ +//! Edit distances. +//! +//! The [edit distance] is a metric for measuring the difference between two strings. +//! +//! [edit distance]: https://en.wikipedia.org/wiki/Edit_distance + +// The current implementation is the restricted Damerau-Levenshtein algorithm. It is restricted +// because it does not permit modifying characters that have already been transposed. The specific +// algorithm should not matter to the caller of the methods, which is why it is not noted in the +// documentation. + +use crate::symbol::Symbol; +use std::{cmp, mem}; + +#[cfg(test)] +mod tests; + +/// Finds the [edit distance] between two strings. +/// +/// Returns `None` if the distance exceeds the limit. +/// +/// [edit distance]: https://en.wikipedia.org/wiki/Edit_distance +pub fn edit_distance(a: &str, b: &str, limit: usize) -> Option<usize> { + let mut a = &a.chars().collect::<Vec<_>>()[..]; + let mut b = &b.chars().collect::<Vec<_>>()[..]; + + // Ensure that `b` is the shorter string, minimizing memory use. + if a.len() < b.len() { + mem::swap(&mut a, &mut b); + } + + let min_dist = a.len() - b.len(); + // If we know the limit will be exceeded, we can return early. + if min_dist > limit { + return None; + } + + // Strip common prefix. + while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_first().zip(a.split_first()) + && a_char == b_char + { + a = a_rest; + b = b_rest; + } + // Strip common suffix. + while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_last().zip(a.split_last()) + && a_char == b_char + { + a = a_rest; + b = b_rest; + } + + // If either string is empty, the distance is the length of the other. + // We know that `b` is the shorter string, so we don't need to check `a`. + if b.len() == 0 { + return Some(min_dist); + } + + let mut prev_prev = vec![usize::MAX; b.len() + 1]; + let mut prev = (0..=b.len()).collect::<Vec<_>>(); + let mut current = vec![0; b.len() + 1]; + + // row by row + for i in 1..=a.len() { + current[0] = i; + let a_idx = i - 1; + + // column by column + for j in 1..=b.len() { + let b_idx = j - 1; + + // There is no cost to substitute a character with itself. + let substitution_cost = if a[a_idx] == b[b_idx] { 0 } else { 1 }; + + current[j] = cmp::min( + // deletion + prev[j] + 1, + cmp::min( + // insertion + current[j - 1] + 1, + // substitution + prev[j - 1] + substitution_cost, + ), + ); + + if (i > 1) && (j > 1) && (a[a_idx] == b[b_idx - 1]) && (a[a_idx - 1] == b[b_idx]) { + // transposition + current[j] = cmp::min(current[j], prev_prev[j - 2] + 1); + } + } + + // Rotate the buffers, reusing the memory. + [prev_prev, prev, current] = [prev, current, prev_prev]; + } + + // `prev` because we already rotated the buffers. + let distance = prev[b.len()]; + (distance <= limit).then_some(distance) +} + +/// Provides a word similarity score between two words that accounts for substrings being more +/// meaningful than a typical edit distance. The lower the score, the closer the match. 0 is an +/// identical match. +/// +/// Uses the edit distance between the two strings and removes the cost of the length difference. +/// If this is 0 then it is either a substring match or a full word match, in the substring match +/// case we detect this and return `1`. To prevent finding meaningless substrings, eg. "in" in +/// "shrink", we only perform this subtraction of length difference if one of the words is not +/// greater than twice the length of the other. For cases where the words are close in size but not +/// an exact substring then the cost of the length difference is discounted by half. +/// +/// Returns `None` if the distance exceeds the limit. +pub fn edit_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> { + let n = a.chars().count(); + let m = b.chars().count(); + + // Check one isn't less than half the length of the other. If this is true then there is a + // big difference in length. + let big_len_diff = (n * 2) < m || (m * 2) < n; + let len_diff = if n < m { m - n } else { n - m }; + let distance = edit_distance(a, b, limit + len_diff)?; + + // This is the crux, subtracting length difference means exact substring matches will now be 0 + let score = distance - len_diff; + + // If the score is 0 but the words have different lengths then it's a substring match not a full + // word match + let score = if score == 0 && len_diff > 0 && !big_len_diff { + 1 // Exact substring match, but not a total word match so return non-zero + } else if !big_len_diff { + // Not a big difference in length, discount cost of length difference + score + (len_diff + 1) / 2 + } else { + // A big difference in length, add back the difference in length to the score + score + len_diff + }; + + (score <= limit).then_some(score) +} + +/// Finds the best match for given word in the given iterator where substrings are meaningful. +/// +/// A version of [`find_best_match_for_name`] that uses [`edit_distance_with_substrings`] as the +/// score for word similarity. This takes an optional distance limit which defaults to one-third of +/// the given word. +/// +/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case +/// letters mismatch. +pub fn find_best_match_for_name_with_substrings( + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + find_best_match_for_name_impl(true, candidates, lookup, dist) +} + +/// Finds the best match for a given word in the given iterator. +/// +/// As a loose rule to avoid the obviously incorrect suggestions, it takes +/// an optional limit for the maximum allowable edit distance, which defaults +/// to one-third of the given word. +/// +/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case +/// letters mismatch. +pub fn find_best_match_for_name( + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + find_best_match_for_name_impl(false, candidates, lookup, dist) +} + +#[cold] +fn find_best_match_for_name_impl( + use_substring_score: bool, + candidates: &[Symbol], + lookup: Symbol, + dist: Option<usize>, +) -> Option<Symbol> { + let lookup = lookup.as_str(); + let lookup_uppercase = lookup.to_uppercase(); + + // Priority of matches: + // 1. Exact case insensitive match + // 2. Edit distance match + // 3. Sorted word match + if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) { + return Some(*c); + } + + let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); + let mut best = None; + for c in candidates { + match if use_substring_score { + edit_distance_with_substrings(lookup, c.as_str(), dist) + } else { + edit_distance(lookup, c.as_str(), dist) + } { + Some(0) => return Some(*c), + Some(d) => { + dist = d - 1; + best = Some(*c); + } + None => {} + } + } + if best.is_some() { + return best; + } + + find_match_by_sorted_words(candidates, lookup) +} + +fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> { + iter_names.iter().fold(None, |result, candidate| { + if sort_by_words(candidate.as_str()) == sort_by_words(lookup) { + Some(*candidate) + } else { + result + } + }) +} + +fn sort_by_words(name: &str) -> String { + let mut split_words: Vec<&str> = name.split('_').collect(); + // We are sorting primitive &strs and can use unstable sort here. + split_words.sort_unstable(); + split_words.join("_") +} diff --git a/compiler/rustc_span/src/edit_distance/tests.rs b/compiler/rustc_span/src/edit_distance/tests.rs new file mode 100644 index 000000000..c9c7a1f1b --- /dev/null +++ b/compiler/rustc_span/src/edit_distance/tests.rs @@ -0,0 +1,80 @@ +use super::*; + +#[test] +fn test_edit_distance() { + // Test bytelength agnosticity + for c in (0..char::MAX as u32).filter_map(char::from_u32).map(|i| i.to_string()) { + assert_eq!(edit_distance(&c[..], &c[..], usize::MAX), Some(0)); + } + + let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; + let c = "Mary häd ä little lämb\n\nLittle lämb\n"; + assert_eq!(edit_distance(a, b, usize::MAX), Some(1)); + assert_eq!(edit_distance(b, a, usize::MAX), Some(1)); + assert_eq!(edit_distance(a, c, usize::MAX), Some(2)); + assert_eq!(edit_distance(c, a, usize::MAX), Some(2)); + assert_eq!(edit_distance(b, c, usize::MAX), Some(1)); + assert_eq!(edit_distance(c, b, usize::MAX), Some(1)); +} + +#[test] +fn test_edit_distance_limit() { + assert_eq!(edit_distance("abc", "abcd", 1), Some(1)); + assert_eq!(edit_distance("abc", "abcd", 0), None); + assert_eq!(edit_distance("abc", "xyz", 3), Some(3)); + assert_eq!(edit_distance("abc", "xyz", 2), None); +} + +#[test] +fn test_method_name_similarity_score() { + assert_eq!(edit_distance_with_substrings("empty", "is_empty", 1), Some(1)); + assert_eq!(edit_distance_with_substrings("shrunk", "rchunks", 2), None); + assert_eq!(edit_distance_with_substrings("abc", "abcd", 1), Some(1)); + assert_eq!(edit_distance_with_substrings("a", "abcd", 1), None); + assert_eq!(edit_distance_with_substrings("edf", "eq", 1), None); + assert_eq!(edit_distance_with_substrings("abc", "xyz", 3), Some(3)); + assert_eq!(edit_distance_with_substrings("abcdef", "abcdef", 2), Some(0)); +} + +#[test] +fn test_find_best_match_for_name() { + use crate::create_default_session_globals_then; + create_default_session_globals_then(|| { + let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), None), + Some(Symbol::intern("aaab")) + ); + + assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None); + + let input = vec![Symbol::intern("AAAA")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), None), + Some(Symbol::intern("AAAA")) + ); + + let input = vec![Symbol::intern("AAAA")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)), + Some(Symbol::intern("AAAA")) + ); + + let input = vec![Symbol::intern("a_longer_variable_name")]; + assert_eq!( + find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None), + Some(Symbol::intern("a_longer_variable_name")) + ); + }) +} + +#[test] +fn test_precise_algorithm() { + // Not Levenshtein distance. + assert_ne!(edit_distance("ab", "ba", usize::MAX), Some(2)); + // Not unrestricted Damerau-Levenshtein distance. + assert_ne!(edit_distance("abde", "bcaed", usize::MAX), Some(3)); + // The current implementation is a restricted Damerau-Levenshtein distance. + assert_eq!(edit_distance("abde", "bcaed", usize::MAX), Some(4)); +} diff --git a/compiler/rustc_span/src/edition.rs b/compiler/rustc_span/src/edition.rs index b43183916..f16db69aa 100644 --- a/compiler/rustc_span/src/edition.rs +++ b/compiler/rustc_span/src/edition.rs @@ -49,8 +49,8 @@ impl fmt::Display for Edition { } impl Edition { - pub fn lint_name(&self) -> &'static str { - match *self { + pub fn lint_name(self) -> &'static str { + match self { Edition::Edition2015 => "rust_2015_compatibility", Edition::Edition2018 => "rust_2018_compatibility", Edition::Edition2021 => "rust_2021_compatibility", @@ -58,8 +58,8 @@ impl Edition { } } - pub fn feature_name(&self) -> Symbol { - match *self { + pub fn feature_name(self) -> Symbol { + match self { Edition::Edition2015 => sym::rust_2015_preview, Edition::Edition2018 => sym::rust_2018_preview, Edition::Edition2021 => sym::rust_2021_preview, @@ -67,8 +67,8 @@ impl Edition { } } - pub fn is_stable(&self) -> bool { - match *self { + pub fn is_stable(self) -> bool { + match self { Edition::Edition2015 => true, Edition::Edition2018 => true, Edition::Edition2021 => true, @@ -76,23 +76,24 @@ impl Edition { } } - pub fn rust_2015(&self) -> bool { - *self == Edition::Edition2015 + /// Is this edition 2015? + pub fn is_rust_2015(self) -> bool { + self == Edition::Edition2015 } /// Are we allowed to use features from the Rust 2018 edition? - pub fn rust_2018(&self) -> bool { - *self >= Edition::Edition2018 + pub fn rust_2018(self) -> bool { + self >= Edition::Edition2018 } /// Are we allowed to use features from the Rust 2021 edition? - pub fn rust_2021(&self) -> bool { - *self >= Edition::Edition2021 + pub fn rust_2021(self) -> bool { + self >= Edition::Edition2021 } /// Are we allowed to use features from the Rust 2024 edition? - pub fn rust_2024(&self) -> bool { - *self >= Edition::Edition2024 + pub fn rust_2024(self) -> bool { + self >= Edition::Edition2024 } } diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs deleted file mode 100644 index 61e4b98a8..000000000 --- a/compiler/rustc_span/src/lev_distance.rs +++ /dev/null @@ -1,177 +0,0 @@ -//! Levenshtein distances. -//! -//! The [Levenshtein distance] is a metric for measuring the difference between two strings. -//! -//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance - -use crate::symbol::Symbol; -use std::cmp; - -#[cfg(test)] -mod tests; - -/// Finds the Levenshtein distance between two strings. -/// -/// Returns None if the distance exceeds the limit. -pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> { - let n = a.chars().count(); - let m = b.chars().count(); - let min_dist = if n < m { m - n } else { n - m }; - - if min_dist > limit { - return None; - } - if n == 0 || m == 0 { - return (min_dist <= limit).then_some(min_dist); - } - - let mut dcol: Vec<_> = (0..=m).collect(); - - for (i, sc) in a.chars().enumerate() { - let mut current = i; - dcol[0] = current + 1; - - for (j, tc) in b.chars().enumerate() { - let next = dcol[j + 1]; - if sc == tc { - dcol[j + 1] = current; - } else { - dcol[j + 1] = cmp::min(current, next); - dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; - } - current = next; - } - } - - (dcol[m] <= limit).then_some(dcol[m]) -} - -/// Provides a word similarity score between two words that accounts for substrings being more -/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match. -/// 0 is an identical match. -/// -/// Uses the Levenshtein distance between the two strings and removes the cost of the length -/// difference. If this is 0 then it is either a substring match or a full word match, in the -/// substring match case we detect this and return `1`. To prevent finding meaningless substrings, -/// eg. "in" in "shrink", we only perform this subtraction of length difference if one of the words -/// is not greater than twice the length of the other. For cases where the words are close in size -/// but not an exact substring then the cost of the length difference is discounted by half. -/// -/// Returns `None` if the distance exceeds the limit. -pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> { - let n = a.chars().count(); - let m = b.chars().count(); - - // Check one isn't less than half the length of the other. If this is true then there is a - // big difference in length. - let big_len_diff = (n * 2) < m || (m * 2) < n; - let len_diff = if n < m { m - n } else { n - m }; - let lev = lev_distance(a, b, limit + len_diff)?; - - // This is the crux, subtracting length difference means exact substring matches will now be 0 - let score = lev - len_diff; - - // If the score is 0 but the words have different lengths then it's a substring match not a full - // word match - let score = if score == 0 && len_diff > 0 && !big_len_diff { - 1 // Exact substring match, but not a total word match so return non-zero - } else if !big_len_diff { - // Not a big difference in length, discount cost of length difference - score + (len_diff + 1) / 2 - } else { - // A big difference in length, add back the difference in length to the score - score + len_diff - }; - - (score <= limit).then_some(score) -} - -/// Finds the best match for given word in the given iterator where substrings are meaningful. -/// -/// A version of [`find_best_match_for_name`] that uses [`lev_distance_with_substrings`] as the score -/// for word similarity. This takes an optional distance limit which defaults to one-third of the -/// given word. -/// -/// Besides the modified Levenshtein, we use case insensitive comparison to improve accuracy -/// on an edge case with a lower(upper)case letters mismatch. -pub fn find_best_match_for_name_with_substrings( - candidates: &[Symbol], - lookup: Symbol, - dist: Option<usize>, -) -> Option<Symbol> { - find_best_match_for_name_impl(true, candidates, lookup, dist) -} - -/// Finds the best match for a given word in the given iterator. -/// -/// As a loose rule to avoid the obviously incorrect suggestions, it takes -/// an optional limit for the maximum allowable edit distance, which defaults -/// to one-third of the given word. -/// -/// Besides Levenshtein, we use case insensitive comparison to improve accuracy -/// on an edge case with a lower(upper)case letters mismatch. -pub fn find_best_match_for_name( - candidates: &[Symbol], - lookup: Symbol, - dist: Option<usize>, -) -> Option<Symbol> { - find_best_match_for_name_impl(false, candidates, lookup, dist) -} - -#[cold] -fn find_best_match_for_name_impl( - use_substring_score: bool, - candidates: &[Symbol], - lookup: Symbol, - dist: Option<usize>, -) -> Option<Symbol> { - let lookup = lookup.as_str(); - let lookup_uppercase = lookup.to_uppercase(); - - // Priority of matches: - // 1. Exact case insensitive match - // 2. Levenshtein distance match - // 3. Sorted word match - if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) { - return Some(*c); - } - - let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); - let mut best = None; - for c in candidates { - match if use_substring_score { - lev_distance_with_substrings(lookup, c.as_str(), dist) - } else { - lev_distance(lookup, c.as_str(), dist) - } { - Some(0) => return Some(*c), - Some(d) => { - dist = d - 1; - best = Some(*c); - } - None => {} - } - } - if best.is_some() { - return best; - } - - find_match_by_sorted_words(candidates, lookup) -} - -fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> { - iter_names.iter().fold(None, |result, candidate| { - if sort_by_words(candidate.as_str()) == sort_by_words(lookup) { - Some(*candidate) - } else { - result - } - }) -} - -fn sort_by_words(name: &str) -> String { - let mut split_words: Vec<&str> = name.split('_').collect(); - // We are sorting primitive &strs and can use unstable sort here. - split_words.sort_unstable(); - split_words.join("_") -} diff --git a/compiler/rustc_span/src/lev_distance/tests.rs b/compiler/rustc_span/src/lev_distance/tests.rs deleted file mode 100644 index b17d6588c..000000000 --- a/compiler/rustc_span/src/lev_distance/tests.rs +++ /dev/null @@ -1,71 +0,0 @@ -use super::*; - -#[test] -fn test_lev_distance() { - use std::char::{from_u32, MAX}; - // Test bytelength agnosticity - for c in (0..MAX as u32).filter_map(from_u32).map(|i| i.to_string()) { - assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0)); - } - - let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; - let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; - let c = "Mary häd ä little lämb\n\nLittle lämb\n"; - assert_eq!(lev_distance(a, b, usize::MAX), Some(1)); - assert_eq!(lev_distance(b, a, usize::MAX), Some(1)); - assert_eq!(lev_distance(a, c, usize::MAX), Some(2)); - assert_eq!(lev_distance(c, a, usize::MAX), Some(2)); - assert_eq!(lev_distance(b, c, usize::MAX), Some(1)); - assert_eq!(lev_distance(c, b, usize::MAX), Some(1)); -} - -#[test] -fn test_lev_distance_limit() { - assert_eq!(lev_distance("abc", "abcd", 1), Some(1)); - assert_eq!(lev_distance("abc", "abcd", 0), None); - assert_eq!(lev_distance("abc", "xyz", 3), Some(3)); - assert_eq!(lev_distance("abc", "xyz", 2), None); -} - -#[test] -fn test_method_name_similarity_score() { - assert_eq!(lev_distance_with_substrings("empty", "is_empty", 1), Some(1)); - assert_eq!(lev_distance_with_substrings("shrunk", "rchunks", 2), None); - assert_eq!(lev_distance_with_substrings("abc", "abcd", 1), Some(1)); - assert_eq!(lev_distance_with_substrings("a", "abcd", 1), None); - assert_eq!(lev_distance_with_substrings("edf", "eq", 1), None); - assert_eq!(lev_distance_with_substrings("abc", "xyz", 3), Some(3)); - assert_eq!(lev_distance_with_substrings("abcdef", "abcdef", 2), Some(0)); -} - -#[test] -fn test_find_best_match_for_name() { - use crate::create_default_session_globals_then; - create_default_session_globals_then(|| { - let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")]; - assert_eq!( - find_best_match_for_name(&input, Symbol::intern("aaaa"), None), - Some(Symbol::intern("aaab")) - ); - - assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None); - - let input = vec![Symbol::intern("AAAA")]; - assert_eq!( - find_best_match_for_name(&input, Symbol::intern("aaaa"), None), - Some(Symbol::intern("AAAA")) - ); - - let input = vec![Symbol::intern("AAAA")]; - assert_eq!( - find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)), - Some(Symbol::intern("AAAA")) - ); - - let input = vec![Symbol::intern("a_longer_variable_name")]; - assert_eq!( - find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None), - Some(Symbol::intern("a_longer_variable_name")) - ); - }) -} diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 7e61f2f9f..873cd33f6 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -19,6 +19,7 @@ #![feature(negative_impls)] #![feature(min_specialization)] #![feature(rustc_attrs)] +#![feature(let_chains)] #![deny(rustc::untranslatable_diagnostic)] #![deny(rustc::diagnostic_outside_of_impl)] @@ -46,7 +47,7 @@ pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext}; use rustc_data_structures::stable_hasher::HashingControls; pub mod def_id; use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE}; -pub mod lev_distance; +pub mod edit_distance; mod span_encoding; pub use span_encoding::{Span, DUMMY_SP}; @@ -705,23 +706,23 @@ impl Span { } #[inline] - pub fn rust_2015(self) -> bool { - self.edition() == edition::Edition::Edition2015 + pub fn is_rust_2015(self) -> bool { + self.edition().is_rust_2015() } #[inline] pub fn rust_2018(self) -> bool { - self.edition() >= edition::Edition::Edition2018 + self.edition().rust_2018() } #[inline] pub fn rust_2021(self) -> bool { - self.edition() >= edition::Edition::Edition2021 + self.edition().rust_2021() } #[inline] pub fn rust_2024(self) -> bool { - self.edition() >= edition::Edition::Edition2024 + self.edition().rust_2024() } /// Returns the source callee. @@ -2148,3 +2149,17 @@ where Hash::hash(&len, hasher); } } + +/// Useful type to use with `Result<>` indicate that an error has already +/// been reported to the user, so no need to continue checking. +#[derive(Clone, Copy, Debug, Encodable, Decodable, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(HashStable_Generic)] +pub struct ErrorGuaranteed(()); + +impl ErrorGuaranteed { + /// To be used only if you really know what you are doing... ideally, we would find a way to + /// eliminate all calls to this method. + pub fn unchecked_claim_error_was_emitted() -> Self { + ErrorGuaranteed(()) + } +} diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index d48c4f7e5..c600298c5 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -110,11 +110,16 @@ impl Span { // Inline format with parent. let len_or_tag = len_or_tag | PARENT_MASK; let parent2 = parent.local_def_index.as_u32(); - if ctxt2 == SyntaxContext::root().as_u32() && parent2 <= MAX_CTXT { + if ctxt2 == SyntaxContext::root().as_u32() + && parent2 <= MAX_CTXT + && len_or_tag < LEN_TAG + { + debug_assert_ne!(len_or_tag, LEN_TAG); return Span { base_or_index: base, len_or_tag, ctxt_or_tag: parent2 as u16 }; } } else { // Inline format with ctxt. + debug_assert_ne!(len_or_tag, LEN_TAG); return Span { base_or_index: base, len_or_tag: len as u16, diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 7597b8d12..6272bf7f2 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -288,6 +288,7 @@ symbols! { Target, ToOwned, ToString, + TokenStream, Try, TryCaptureGeneric, TryCapturePrintable, @@ -723,16 +724,23 @@ symbols! { forbid, forget, format, + format_alignment, format_args, format_args_capture, format_args_macro, format_args_nl, + format_argument, + format_arguments, + format_count, format_macro, + format_placeholder, + format_unsafe_arg, freeze, freg, frem_fast, from, from_desugaring, + from_fn, from_iter, from_method, from_output, @@ -948,6 +956,7 @@ symbols! { mul, mul_assign, mul_with_overflow, + multiple_supertrait_upcastable, must_not_suspend, must_use, naked, @@ -1008,6 +1017,7 @@ symbols! { non_ascii_idents, non_exhaustive, non_exhaustive_omitted_patterns_lint, + non_lifetime_binders, non_modrs_mods, nontemporal_store, noop_method_borrow, @@ -1041,6 +1051,7 @@ symbols! { overlapping_marker_traits, owned_box, packed, + packed_bundled_libs, panic, panic_2015, panic_2021, @@ -1076,7 +1087,7 @@ symbols! { plugins, pointee_trait, pointer, - pointer_sized, + pointer_like, poll, position, post_dash_lto: "post-lto", @@ -1215,6 +1226,7 @@ symbols! { rustc_capture_analysis, rustc_clean, rustc_coherence_is_core, + rustc_coinductive, rustc_const_stable, rustc_const_unstable, rustc_conversion_suggestion, @@ -1942,7 +1954,7 @@ impl Interner { let name = Symbol::new(inner.strings.len() as u32); // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, - // and immediately convert the clone back to `&[u8], all because there + // and immediately convert the clone back to `&[u8]`, all because there // is no `inner.arena.alloc_str()` method. This is clearly safe. let string: &str = unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) }; |