diff options
Diffstat (limited to 'vendor/imara-diff/src/myers/preprocess.rs')
-rw-r--r-- | vendor/imara-diff/src/myers/preprocess.rs | 173 |
1 files changed, 0 insertions, 173 deletions
diff --git a/vendor/imara-diff/src/myers/preprocess.rs b/vendor/imara-diff/src/myers/preprocess.rs deleted file mode 100644 index 905d331b4..000000000 --- a/vendor/imara-diff/src/myers/preprocess.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::intern::Token; -use crate::myers::sqrt; -use crate::util::{strip_common_postfix, strip_common_prefix}; - -pub fn preprocess( - mut file1: &[Token], - mut file2: &[Token], -) -> (PreprocessedFile, PreprocessedFile) { - let common_prefix = strip_common_prefix(&mut file1, &mut file2); - strip_common_postfix(&mut file1, &mut file2); - let (hdiff1, hdiff2) = token_occurrences(file1, file2); - let file1 = PreprocessedFile::new(common_prefix, &hdiff1, file1); - let file2 = PreprocessedFile::new(common_prefix, &hdiff2, file2); - (file1, file2) -} - -/// computes how -fn token_occurrences(file1: &[Token], file2: &[Token]) -> (Vec<Occurances>, Vec<Occurances>) { - const MAX_EQLIMIT: u32 = 1024; - - // compute the limit after which tokens are treated as `Occurances::COMMON` - let eqlimit1 = sqrt(file1.len()).min(MAX_EQLIMIT); - let eqlimit2 = sqrt(file2.len()).min(MAX_EQLIMIT); - - // first collect how often each token occurs in a file - let mut occurances1 = Vec::new(); - for token in file1 { - let bucket = token.0 as usize; - if bucket >= occurances1.len() { - occurances1.resize(bucket + 1, 0u32); - } - occurances1[bucket] += 1; - } - - // do the same thing for - let mut occurances2 = Vec::new(); - let token_occurances2: Vec<_> = file2 - .iter() - .map(|token| { - let bucket = token.0 as usize; - if bucket >= occurances2.len() { - occurances2.resize(bucket + 1, 0); - } - occurances2[bucket] += 1; - let occurances1 = *occurances1.get(bucket).unwrap_or(&0); - Occurances::from_occurances(occurances1, eqlimit2) - }) - .collect(); - - let token_occurances1: Vec<_> = file1 - .iter() - .map(|token| { - let bucket = token.0 as usize; - let occurances2 = *occurances2.get(bucket).unwrap_or(&0); - Occurances::from_occurances(occurances2, eqlimit1) - }) - .collect(); - - (token_occurances1, token_occurances2) -} - -#[derive(Clone, Copy, Debug)] -enum Occurances { - /// Token does not occur in this file - None, - /// Token occurs at least once - Some, - /// Token occurs very frequently (exact number depends on file size). - /// Such a tokens are usually empty lines or braces and are often not meaningful to a diff - Common, -} - -impl Occurances { - pub fn from_occurances(occurances: u32, eqlimit: u32) -> Occurances { - if occurances == 0 { - Occurances::None - } else if occurances >= eqlimit { - Occurances::Common - } else { - Occurances::Some - } - } -} - -#[derive(Debug)] -pub struct PreprocessedFile { - pub offset: u32, - pub is_changed: Vec<bool>, - pub indices: Vec<u32>, - pub tokens: Vec<Token>, -} - -impl PreprocessedFile { - fn new(offset: u32, token_diff: &[Occurances], tokens: &[Token]) -> PreprocessedFile { - let mut changed = vec![false; tokens.len()]; - let (tokens, indices) = prune_unmatched_tokens(tokens, token_diff, &mut changed); - PreprocessedFile { offset, is_changed: changed, indices, tokens } - } -} - -fn prune_unmatched_tokens( - file: &[Token], - token_status: &[Occurances], - changed: &mut [bool], -) -> (Vec<Token>, Vec<u32>) { - assert_eq!(token_status.len(), file.len()); - file.iter() - .zip(token_status) - .enumerate() - .filter_map(|(i, (&token, &status))| { - let prune = match status { - Occurances::None => true, - Occurances::Some => false, - Occurances::Common => should_prune_common_line(token_status, i), - }; - if prune { - changed[i] = true; - None - } else { - Some((token, i as u32)) - } - }) - .unzip() -} - -// TODO do not unnecessarily rescan lines -fn should_prune_common_line(token_status: &[Occurances], pos: usize) -> bool { - const WINDOW_SIZE: usize = 100; - - let mut unmatched_before = 0; - let mut common_before = 0; - - let start = if pos > WINDOW_SIZE { WINDOW_SIZE } else { 0 }; - for status in token_status[start..pos].iter().rev() { - match status { - Occurances::None => { - unmatched_before += 1; - } - Occurances::Common => { - common_before += 1; - } - Occurances::Some => break, - } - } - - if unmatched_before == 0 { - return false; - } - - let end = token_status.len().min(pos + WINDOW_SIZE); - let mut unmatched_after = 0; - let mut common_after = 0; - for status in token_status[pos..end].iter() { - match status { - Occurances::None => { - unmatched_after += 1; - } - Occurances::Common => { - common_after += 1; - } - Occurances::Some => break, - } - } - - if unmatched_after == 0 { - return false; - } - - let common = common_before + common_after; - let unmatched = unmatched_before + unmatched_after; - - unmatched > 3 * common -} |