diff options
Diffstat (limited to 'vendor/imara-diff/src')
-rw-r--r-- | vendor/imara-diff/src/histogram.rs | 133 | ||||
-rw-r--r-- | vendor/imara-diff/src/histogram/lcs.rs | 127 | ||||
-rw-r--r-- | vendor/imara-diff/src/histogram/list_pool.rs | 258 | ||||
-rw-r--r-- | vendor/imara-diff/src/intern.rs | 172 | ||||
-rw-r--r-- | vendor/imara-diff/src/lib.rs | 267 | ||||
-rw-r--r-- | vendor/imara-diff/src/myers.rs | 271 | ||||
-rw-r--r-- | vendor/imara-diff/src/myers/middle_snake.rs | 246 | ||||
-rw-r--r-- | vendor/imara-diff/src/myers/preprocess.rs | 173 | ||||
-rw-r--r-- | vendor/imara-diff/src/myers/slice.rs | 65 | ||||
-rw-r--r-- | vendor/imara-diff/src/sink.rs | 110 | ||||
-rw-r--r-- | vendor/imara-diff/src/sources.rs | 151 | ||||
-rw-r--r-- | vendor/imara-diff/src/tests.rs | 152 | ||||
-rw-r--r-- | vendor/imara-diff/src/unified_diff.rs | 136 | ||||
-rw-r--r-- | vendor/imara-diff/src/util.rs | 48 |
14 files changed, 0 insertions, 2309 deletions
diff --git a/vendor/imara-diff/src/histogram.rs b/vendor/imara-diff/src/histogram.rs deleted file mode 100644 index f6c7e4f16..000000000 --- a/vendor/imara-diff/src/histogram.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::ops::Range; - -use crate::histogram::lcs::find_lcs; -use crate::histogram::list_pool::{ListHandle, ListPool}; -use crate::intern::Token; -use crate::util::{strip_common_postfix, strip_common_prefix}; -use crate::{myers, Sink}; - -mod lcs; -mod list_pool; - -const MAX_CHAIN_LEN: u32 = 63; - -struct Histogram { - token_occurances: Vec<ListHandle>, - pool: ListPool, -} - -pub fn diff<S: Sink>( - mut before: &[Token], - mut after: &[Token], - num_tokens: u32, - mut sink: S, -) -> S::Out { - let mut histogram = Histogram::new(num_tokens); - let prefix = strip_common_prefix(&mut before, &mut after); - strip_common_postfix(&mut before, &mut after); - histogram.run(before, prefix, after, prefix, &mut sink); - sink.finish() -} - -impl Histogram { - fn new(num_buckets: u32) -> Histogram { - Histogram { - token_occurances: vec![ListHandle::default(); num_buckets as usize], - pool: ListPool::new(2 * num_buckets), - } - } - - fn clear(&mut self) { - self.pool.clear(); - } - - fn token_occurances(&self, token: Token) -> &[u32] { - self.token_occurances[token.0 as usize].as_slice(&self.pool) - } - - fn num_token_occurances(&self, token: Token) -> u32 { - self.token_occurances[token.0 as usize].len(&self.pool) as u32 - } - - fn populate(&mut self, file: &[Token]) { - for (i, &token) in file.iter().enumerate() { - self.token_occurances[token.0 as usize].push(i as u32, &mut self.pool); - } - } - - fn run( - &mut self, - mut before: &[Token], - mut before_off: u32, - mut after: &[Token], - mut after_off: u32, - sink: &mut impl Sink, - ) { - loop { - if before.is_empty() { - if !after.is_empty() { - sink.process_change( - before_off..before_off, - after_off..after_off + after.len() as u32, - ); - } - return; - } else if after.is_empty() { - sink.process_change( - before_off..before_off + before.len() as u32, - after_off..after_off, - ); - return; - } - - self.populate(before); - match find_lcs(before, after, self) { - // no lcs was found, that means that file1 and file2 two have nothing in common - Some(lcs) if lcs.len == 0 => { - sink.process_change( - before_off..before_off + before.len() as u32, - after_off..after_off + after.len() as u32, - ); - return; - } - Some(lcs) => { - self.run( - &before[..lcs.before_start as usize], - before_off, - &after[..lcs.after_start as usize], - after_off, - sink, - ); - - // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons - let before_end = lcs.before_start + lcs.len; - before = &before[before_end as usize..]; - before_off += before_end; - - let after_end = lcs.after_start + lcs.len; - after = &after[after_end as usize..]; - after_off += after_end; - } - None => { - // we are diffing two extremly large repetitive file - // this is a worst case for histogram diff with O(N^2) performance - // fallback to myers to maintain linear time complxity - myers::diff( - before, - after, - 0, // not used by myers - |mut before: Range<u32>, mut after: Range<u32>| { - before.start += before_off; - before.end += before_off; - after.start += after_off; - after.end += after_off; - sink.process_change(before, after) - }, - false, - ); - return; - } - } - } - } -} diff --git a/vendor/imara-diff/src/histogram/lcs.rs b/vendor/imara-diff/src/histogram/lcs.rs deleted file mode 100644 index f8d8c2930..000000000 --- a/vendor/imara-diff/src/histogram/lcs.rs +++ /dev/null @@ -1,127 +0,0 @@ -use crate::histogram::{Histogram, MAX_CHAIN_LEN}; -use crate::intern::Token; - -pub(super) fn find_lcs( - before: &[Token], - after: &[Token], - histogram: &mut Histogram, -) -> Option<Lcs> { - let mut search = - LcsSearch { lcs: Lcs::default(), min_occurances: MAX_CHAIN_LEN + 1, found_cs: false }; - search.run(before, after, histogram); - if search.success() { - Some(search.lcs) - } else { - None - } -} - -#[derive(Default, Debug)] -pub struct Lcs { - pub before_start: u32, - pub after_start: u32, - pub len: u32, -} - -pub struct LcsSearch { - lcs: Lcs, - min_occurances: u32, - found_cs: bool, -} - -impl LcsSearch { - fn run(&mut self, before: &[Token], after: &[Token], histogram: &mut Histogram) { - let mut pos = 0; - while let Some(&token) = after.get(pos as usize) { - if histogram.num_token_occurances(token) != 0 { - self.found_cs = true; - if histogram.num_token_occurances(token) <= self.min_occurances { - pos = self.update_lcs(pos, token, histogram, before, after); - continue; - } - } - - pos += 1; - } - - histogram.clear(); - } - - fn success(&mut self) -> bool { - !self.found_cs || self.min_occurances <= MAX_CHAIN_LEN - } - - fn update_lcs( - &mut self, - after_pos: u32, - token: Token, - histogram: &Histogram, - before: &[Token], - after: &[Token], - ) -> u32 { - let mut next_token_idx2 = after_pos + 1; - let mut occurances_iter = histogram.token_occurances(token).iter().copied(); - let mut token_idx1 = occurances_iter.next().unwrap(); - - 'occurances_iter: loop { - let mut occurances = histogram.num_token_occurances(token); - let mut start1 = token_idx1; - let mut start2 = after_pos; - loop { - if start1 == 0 || start2 == 0 { - break; - } - let token1 = before.get(start1 as usize - 1); - let token2 = after.get(start2 as usize - 1); - if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) { - start1 -= 1; - start2 -= 1; - let new_occurances = histogram.num_token_occurances(before[start1 as usize]); - occurances = occurances.min(new_occurances); - } else { - break; - } - } - - let mut end1 = token_idx1 + 1; - let mut end2 = after_pos + 1; - - loop { - let token1 = before.get(end1 as usize); - let token2 = after.get(end2 as usize); - if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) { - let new_occurances = histogram.num_token_occurances(before[end1 as usize]); - occurances = occurances.min(new_occurances); - end1 += 1; - end2 += 1; - } else { - break; - } - } - - if next_token_idx2 < end2 { - next_token_idx2 = end2; - } - - let len = end2 - start2; - debug_assert_eq!(len, end1 - start1); - if self.lcs.len < len || self.min_occurances > occurances { - self.min_occurances = occurances; - self.lcs = Lcs { before_start: start1, after_start: start2, len }; - } - - loop { - if let Some(next_token_idx) = occurances_iter.next() { - if next_token_idx > end2 { - token_idx1 = next_token_idx; - break; - } - } else { - break 'occurances_iter; - } - } - } - - next_token_idx2 - } -} diff --git a/vendor/imara-diff/src/histogram/list_pool.rs b/vendor/imara-diff/src/histogram/list_pool.rs deleted file mode 100644 index af257aa5b..000000000 --- a/vendor/imara-diff/src/histogram/list_pool.rs +++ /dev/null @@ -1,258 +0,0 @@ -use crate::histogram::MAX_CHAIN_LEN; - -/// A small list of entity references allocated from a pool. -/// -/// An `ListHandle` type provides similar functionality to `Vec`, but with some important -/// differences in the implementation: -/// -/// 1. Memory is allocated from a `ListPool` instead of the global heap. -/// 2. The footprint of an entity list is 4 bytes, compared with the 24 bytes for `Vec`. -/// 3. An entity list doesn't implement `Drop`, leaving it to the pool to manage memory. -/// -/// The list pool is intended to be used as a LIFO allocator. After building up a larger data -/// structure with many list references, the whole thing can be discarded quickly by clearing the -/// pool. -/// -/// # Safety -/// -/// Entity lists are not as safe to use as `Vec`, but they never jeopardize Rust's memory safety -/// guarantees. These are the problems to be aware of: -/// -/// - If you lose track of an entity list, its memory won't be recycled until the pool is cleared. -/// This can cause the pool to grow very large with leaked lists. -/// - If entity lists are used after their pool is cleared, they may contain garbage data, and -/// modifying them may corrupt other lists in the pool. -/// - If an entity list is used with two different pool instances, both pools are likely to become -/// corrupted. -/// -/// Entity lists can be cloned, but that operation should only be used as part of cloning the whole -/// function they belong to. *Cloning an entity list does not allocate new memory for the clone*. -/// It creates an alias of the same memory. -/// -/// Entity lists cannot be hashed and compared for equality because it's not possible to compare the -/// contents of the list without the pool reference. -/// -/// # Implementation -/// -/// The `ListHandle` itself is designed to have the smallest possible footprint. This is important -/// because it is used inside very compact data structures like `InstructionData`. The list -/// contains only a 32-bit index into the pool's memory vector, pointing to the first element of -/// the list. -/// -/// The pool is just a single `Vec` containing all of the allocated lists. Each list is -/// represented as three contiguous parts: -/// -/// 1. The number of elements in the list. -/// 2. The list elements. -/// 3. Excess capacity elements. -/// -/// The total size of the three parts is always a power of two, and the excess capacity is always -/// as small as possible. This means that shrinking a list may cause the excess capacity to shrink -/// if a smaller power-of-two size becomes available. -/// -/// Both growing and shrinking a list may cause it to be reallocated in the pool vector. -/// -/// The index stored in an `ListHandle` points to part 2, the list elements. The value 0 is -/// reserved for the empty list which isn't allocated in the vector. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ListHandle { - index: u32, - generation: u32, - len: u32, -} - -/// Create an empty list. -impl Default for ListHandle { - fn default() -> Self { - Self { index: 0, generation: 0, len: 0 } - } -} - -const MAX_SIZE_CLAS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1); -const NUM_SIZE_CLASS: usize = MAX_SIZE_CLAS as usize + 1; - -/// A memory pool for storing lists of `T`. -#[derive(Clone, Debug)] -pub struct ListPool { - // The main array containing the lists. - data: Vec<u32>, - - // Heads of the free lists, one for each size class. - free: [u32; NUM_SIZE_CLASS], - - generation: u32, -} - -/// Lists are allocated in sizes that are powers of two, starting from 4. -/// Each power of two is assigned a size class number, so the size is `4 << SizeClass`. -type SizeClass = u8; - -/// Get the size of a given size class. The size includes the length field, so the maximum list -/// length is one less than the class size. -#[inline] -const fn sclass_size(sclass: SizeClass) -> usize { - 4 << sclass -} - -/// Get the size class to use for a given list length. -/// This always leaves room for the length element in addition to the list elements. -#[inline] -const fn sclass_for_length(len: u32) -> SizeClass { - 30 - (len | 3).leading_zeros() as SizeClass -} - -/// Is `len` the minimum length in its size class? -#[inline] -fn is_sclass_max_length(len: u32) -> bool { - len > 3 && len.is_power_of_two() -} - -impl ListPool { - /// Create a new list pool. - pub fn new(capacity: u32) -> Self { - Self { - data: Vec::with_capacity(capacity as usize), - free: [u32::MAX; NUM_SIZE_CLASS], - generation: 1, - } - } - - /// Clear the pool, forgetting about all lists that use it. - /// - /// This invalidates any existing entity lists that used this pool to allocate memory. - /// - /// The pool's memory is not released to the operating system, but kept around for faster - /// allocation in the future. - pub fn clear(&mut self) { - self.data.clear(); - self.free.fill(u32::MAX); - self.generation += 1; - } - - /// Allocate a storage block with a size given by `sclass`. - /// - /// Returns the first index of an available segment of `self.data` containing - /// `sclass_size(sclass)` elements. The allocated memory is filled with reserved - /// values. - fn alloc(&mut self, sclass: SizeClass) -> usize { - let freelist_head = self.free[sclass as usize]; - // First try the free list for this size class. - if freelist_head == u32::MAX { - // Nothing on the free list. Allocate more memory. - let offset = self.data.len(); - self.data.resize(offset + sclass_size(sclass), u32::MAX); - offset - } else { - // take allocation of the free list (linked list) - self.free[sclass as usize] = self.data[freelist_head as usize]; - freelist_head as usize - } - } - - /// Free a storage block with a size given by `sclass`. - /// - /// This must be a block that was previously allocated by `alloc()` with the same size class. - fn free(&mut self, block: usize, sclass: SizeClass) { - let sclass = sclass as usize; - // Insert the block on the free list which is a single linked list. - self.data[block] = self.free[sclass] as u32; - self.free[sclass] = block as u32 - } - - /// Returns two mutable slices representing the two requested blocks. - /// - /// The two returned slices can be longer than the blocks. Each block is located at the front - /// of the respective slice. - fn mut_slices(&mut self, block0: usize, block1: usize) -> (&mut [u32], &mut [u32]) { - if block0 < block1 { - let (s0, s1) = self.data.split_at_mut(block1); - (&mut s0[block0..], s1) - } else { - let (s1, s0) = self.data.split_at_mut(block0); - (s0, &mut s1[block1..]) - } - } - - /// Reallocate a block to a different size class. - /// - /// Copy `elems_to_copy` elements from the old to the new block. - fn realloc( - &mut self, - block: usize, - from_sclass: SizeClass, - to_sclass: SizeClass, - elems_to_copy: usize, - ) -> usize { - debug_assert!(elems_to_copy <= sclass_size(from_sclass)); - debug_assert!(elems_to_copy <= sclass_size(to_sclass)); - let new_block = self.alloc(to_sclass); - - let (old, new) = self.mut_slices(block, new_block); - new[0..elems_to_copy].copy_from_slice(&old[0..elems_to_copy]); - - self.free(block, from_sclass); - new_block - } -} - -impl ListHandle { - /// Get the number of elements in the list. - #[allow(clippy::len_without_is_empty)] - pub fn len(&self, pool: &ListPool) -> u32 { - if self.generation == pool.generation { - self.len - } else { - 0 - } - } - - /// Get the list as a slice. - pub fn as_slice<'a>(&'a self, pool: &'a ListPool) -> &'a [u32] { - let idx = self.index as usize; - match self.len(pool) { - 0 => &[], - 1 => std::slice::from_ref(&self.index), - len => &pool.data[idx..idx + len as usize], - } - } - - /// Appends an element to the back of the list. - /// Returns the index where the element was inserted. - pub fn push(&mut self, element: u32, pool: &mut ListPool) { - let len = self.len(pool); - match len { - 0 => { - self.generation = pool.generation; - self.index = element; - self.len = 1; - } - 1 => { - // This is an empty list. Allocate a block and set length=1. - let block = pool.alloc(0); - pool.data[block] = self.index; - pool.data[block + 1] = element; - self.index = block as u32; - self.len = 2; - } - 2..=MAX_CHAIN_LEN => { - // Do we need to reallocate? - let block; - let idx = self.index as usize; - if is_sclass_max_length(len) { - // Reallocate, preserving length + all old elements. - let sclass = sclass_for_length(len); - block = pool.realloc(idx, sclass - 1, sclass, len as usize); - self.index = block as u32; - } else { - block = idx; - } - pool.data[block + len as usize] = element; - self.len += 1; - } - - // ignore elements longer then MAX_CHAIN_LEN - // these are rarely relevant and if they are we fall back to myers - _ => (), - } - } -} diff --git a/vendor/imara-diff/src/intern.rs b/vendor/imara-diff/src/intern.rs deleted file mode 100644 index 7d06522c8..000000000 --- a/vendor/imara-diff/src/intern.rs +++ /dev/null @@ -1,172 +0,0 @@ -use std::hash::Hash; -use std::ops::Index; - -use ahash::RandomState; -use hashbrown::raw::RawTable; - -/// A token represented as an interned integer. -/// -/// A token represents the smallest possible unit of change during a diff. -/// For text this is usually a line, a word or a single character. -/// All [algorithms](crate::Algorithm) operate on interned tokens instead -/// of using the token data directly. -/// This allows for much better performance by amortizing the cost hashing/equality. -/// -/// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`](crate::intern::InternedInput) module. -#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)] -#[repr(transparent)] -pub struct Token(pub u32); - -impl From<u32> for Token { - fn from(token: u32) -> Self { - Token(token) - } -} - -impl From<Token> for u32 { - fn from(token: Token) -> Self { - token.0 - } -} - -pub trait TokenSource { - type Token: Hash + Eq; - type Tokenizer: Iterator<Item = Self::Token>; - fn tokenize(&self) -> Self::Tokenizer; - fn estimate_tokens(&self) -> u32; -} - -/// Two lists of interned [tokens](crate::intern::Token) that can be compared with the [`diff`](crate::diff) function. -/// -/// A token represents the smallest possible unit of change during a diff. -/// For text this is usually a line, a word or a single character. -/// All [algorithms](crate::Algorithm) operate on interned tokens instead -/// of using the token data directly. -/// This allows for much better performance by amortizing the cost hashing/equality. -/// -/// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`](crate::intern::InternedInput) module. -#[derive(Default)] -pub struct InternedInput<T: Eq + Hash> { - pub before: Vec<Token>, - pub after: Vec<Token>, - pub interner: Interner<T>, -} - -impl<T: Eq + Hash> InternedInput<T> { - pub fn new<I: TokenSource<Token = T>>(before: I, after: I) -> Self { - let token_estimate_before = before.estimate_tokens() as usize; - let token_estimate_after = after.estimate_tokens() as usize; - let mut res = Self { - before: Vec::with_capacity(token_estimate_before), - after: Vec::with_capacity(token_estimate_after), - interner: Interner::new(token_estimate_before + token_estimate_after), - }; - res.update_before(before.tokenize()); - res.update_after(after.tokenize()); - res - } - - /// replaces `self.before` wtih the iterned Tokens yielded by `input` - /// Note that this does not erase any tokens from the interner and might therefore be considered - /// a memory leak. If this function is called often over a long_running process - /// consider clearing the interner with [`clear`](crate::intern::Interner::clear). - pub fn update_before(&mut self, input: impl Iterator<Item = T>) { - self.before.clear(); - self.before.extend(input.map(|token| self.interner.intern(token))); - } - - /// replaces `self.before` wtih the iterned Tokens yielded by `input` - /// Note that this does not erase any tokens from the interner and might therefore be considered - /// a memory leak. If this function is called often over a long_running process - /// consider clearing the interner with [`clear`](crate::intern::Interner::clear) or - /// [`erase_tokens_after`](crate::intern::Interner::erase_tokens_after). - pub fn update_after(&mut self, input: impl Iterator<Item = T>) { - self.after.clear(); - self.after.extend(input.map(|token| self.interner.intern(token))); - } - - pub fn clear(&mut self) { - self.before.clear(); - self.after.clear(); - self.interner.clear(); - } -} - -/// A hastable based interner that allows -#[derive(Default)] -pub struct Interner<T: Hash + Eq> { - tokens: Vec<T>, - table: RawTable<Token>, - hasher: RandomState, -} - -impl<T: Hash + Eq> Interner<T> { - /// Create an Interner with an intial capacity calculated by calling - /// [`estimate_tokens`](crate::intern::TokenSource::estimate_tokens) methods of `before` and `after` - pub fn new_for_token_source<S: TokenSource<Token = T>>(before: &S, after: &S) -> Self { - Self::new(before.estimate_tokens() as usize + after.estimate_tokens() as usize) - } - - /// Create an Interner with inital capacity `capacity`. - pub fn new(capacity: usize) -> Interner<T> { - Interner { - tokens: Vec::with_capacity(capacity), - table: RawTable::with_capacity(capacity), - hasher: RandomState::new(), - } - } - - /// Remove all interned tokens - pub fn clear(&mut self) { - self.table.clear_no_drop(); - self.tokens.clear(); - } - - /// Intern `token` and return a the interned integer - pub fn intern(&mut self, token: T) -> Token { - let hash = self.hasher.hash_one(&token); - if let Some(&token) = self.table.get(hash, |&it| self.tokens[it.0 as usize] == token) { - token - } else { - let interned = Token(self.tokens.len() as u32); - self.table.insert(hash, interned, |&token| { - self.hasher.hash_one(&self.tokens[token.0 as usize]) - }); - self.tokens.push(token); - interned - } - } - - /// Returns to total number of **distinct** tokens currently interned. - pub fn num_tokens(&self) -> u32 { - self.tokens.len() as u32 - } - - /// Erases `first_erased_token` and any tokens interned afterwards from the interner. - pub fn erase_tokens_after(&mut self, first_erased_token: Token) { - assert!(first_erased_token.0 <= self.tokens.len() as u32); - let retained = first_erased_token.0 as usize; - let erased = self.tokens.len() - retained; - if retained <= erased { - self.table.clear_no_drop(); - // safety, we assert that retained is smaller then the table size so the table will never have to grow - unsafe { - for (i, token) in self.tokens[0..retained].iter().enumerate() { - self.table.insert_no_grow(self.hasher.hash_one(token), Token(i as u32)); - } - } - } else { - for (i, token) in self.tokens[retained..].iter().enumerate() { - self.table.erase_entry(self.hasher.hash_one(token), |token| token.0 == (retained + i) as u32); - } - } - self.tokens.truncate(first_erased_token.0 as usize); - } -} - -impl<T: Hash + Eq> Index<Token> for Interner<T> { - type Output = T; - fn index(&self, index: Token) -> &Self::Output { - &self.tokens[index.0 as usize] - } -} diff --git a/vendor/imara-diff/src/lib.rs b/vendor/imara-diff/src/lib.rs deleted file mode 100644 index c20b750ca..000000000 --- a/vendor/imara-diff/src/lib.rs +++ /dev/null @@ -1,267 +0,0 @@ -//! Imara-diff is a solid (imara in swahili) diff library for rust. -//! Solid refers to the fact that imara-diff provides very good runtime performance even -//! in pathologic cases so that your application never appears to freeze while waiting on a diff. -//! The performance improvements are achieved using battle tested heuristics used in gnu-diff and git -//! that are known to yield fast runtime and performance. -//! -//! Imara-diff is also designed to be flexible so that it can be used with arbitrary collections and -//! not just lists and strings and even allows reusing large parts of the computation when -//! comparing the same file to multiple different files. -//! -//! Imara-diff provides two diff algorithms: -//! -//! * The linear-space variant of the well known [**myer** algorithm](http://www.xmailserver.org/diff2.pdf) -//! * The **Histogram** algorithm which variant of the patience diff algorithm. -//! -//! Myers algorithm has been enhanced with preprocessing and multiple heuristics to ensure fast runtime in pathological -//! cases to avoid quadratic time complexity and closely matches the behaviour of gnu-diff and git. -//! The Histogram algorithm was originally ported from git but has been heavily optimized. -//! The **Histogram algorithm outperforms Myers diff** by 10% - 100% across a **wide variety of workloads**. -//! -//! Imara-diffs algorithms have been benchmarked over a wide variety of real-world code. -//! For example while comparing multiple different linux kernel it performs up to 30 times better than the `similar` crate: -#![cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_comparison.svg.base64"), "\"></img>"))] -//! -//! # Api Overview -//! -//! Imara-diff provides the [`UnifiedDiffBuilder`](crate::UnifiedDiffBuilder) for building -//! a human-redable diff similar to the output of `git diff` or `diff -u`. -//! This makes building a tool similar to gnu diff easy: -//! -//! ``` -//! use imara_diff::intern::InternedInput; -//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder}; -//! -//! let before = r#"fn foo() -> Bar { -//! let mut foo = 2; -//! foo *= 50; -//! println!("hello world") -//! }"#; -//! -//! let after = r#"// lorem ipsum -//! fn foo() -> Bar { -//! let mut foo = 2; -//! foo *= 50; -//! println!("hello world"); -//! println!("{foo}"); -//! } -//! // foo -//! "#; -//! -//! let input = InternedInput::new(before, after); -//! let diff = diff(Algorithm::Histogram, &input, UnifiedDiffBuilder::new(&input)); -//! assert_eq!( -//! diff, -//! r#"@@ -1,5 +1,8 @@ -//! +// lorem ipsum -//! fn foo() -> Bar { -//! let mut foo = 2; -//! foo *= 50; -//! - println!("hello world") -//! + println!("hello world"); -//! + println!("{foo}"); -//! } -//! +// foo -//! "# -//! ); -//! ``` -//! -//! If you want to process the diff in some way you can provide your own implementation of [`Sink`](crate::sink::Sink). -//! For closures [`Sink`](crate::sink::Sink) is already implemented, so simple [`Sink`]s can be easily added: -//! -//! ``` -//! use std::ops::Range; -//! -//! use imara_diff::intern::InternedInput; -//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder}; -//! -//! let before = r#"fn foo() -> Bar { -//! let mut foo = 2; -//! foo *= 50; -//! println!("hello world") -//! }"#; -//! -//! let after = r#"// lorem ipsum -//! fn foo() -> Bar { -//! let mut foo = 2; -//! foo *= 50; -//! println!("hello world"); -//! println!("{foo}"); -//! } -//! // foo -//! "#; -//! -//! let mut insertions = Vec::new(); -//! let mut removals = Vec::new(); -//! let mut replacements = Vec::new(); -//! -//! let input = InternedInput::new(before, after); -//! let sink = |before: Range<u32>, after: Range<u32>| { -//! let hunk_before: Vec<_> = input.before[before.start as usize..before.end as usize] -//! .iter() -//! .map(|&line| input.interner[line]) -//! .collect(); -//! let hunk_after: Vec<_> = input.after[after.start as usize..after.end as usize] -//! .iter() -//! .map(|&line| input.interner[line]) -//! .collect(); -//! if hunk_after.is_empty() { -//! removals.push(hunk_before) -//! } else if hunk_before.is_empty() { -//! insertions.push(hunk_after) -//! } else { -//! replacements.push((hunk_before, hunk_after)) -//! } -//! }; -//! let diff = diff(Algorithm::Histogram, &input, sink); -//! assert_eq!(&insertions, &[vec!["// lorem ipsum"], vec!["// foo"]]); -//! assert!(removals.is_empty()); -//! assert_eq!( -//! &replacements, -//! &[( -//! vec![" println!(\"hello world\")"], -//! vec![" println!(\"hello world\");", " println!(\"{foo}\");"] -//! )] -//! ); -//! ``` -//! -//! For `&str` and `&[u8]` imara-diff will compute a line diff by default. -//! To perform diffs of different tokenizations and collections you can implement the [`TokenSource`](crate::intern::TokenSource) trait. -//! For example the imara-diff provides an alternative tokenziser for line-diffs that includes the line terminator in the line: -//! -//! ``` -//! use imara_diff::intern::InternedInput; -//! use imara_diff::sink::Counter; -//! use imara_diff::sources::lines_with_terminator; -//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder}; -//! -//! let before = "foo"; -//! let after = "foo\n"; -//! -//! let input = InternedInput::new(before, after); -//! let changes = diff(Algorithm::Histogram, &input, Counter::default()); -//! assert_eq!(changes.insertions, 0); -//! assert_eq!(changes.removals, 0); -//! -//! let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); -//! let changes = diff(Algorithm::Histogram, &input, Counter::default()); -//! assert_eq!(changes.insertions, 1); -//! assert_eq!(changes.removals, 1); -//! ``` - -use std::hash::Hash; - -#[cfg(feature = "unified_diff")] -pub use unified_diff::UnifiedDiffBuilder; - -use crate::intern::{InternedInput, Token, TokenSource}; -pub use crate::sink::Sink; -mod histogram; -pub mod intern; -mod myers; -pub mod sink; -pub mod sources; -#[cfg(feature = "unified_diff")] -mod unified_diff; -mod util; - -#[cfg(test)] -mod tests; - -/// `imara-diff` supports multiple different algorithms -/// for computing an edit sequence. -/// These algorithms have different performance and all produce different output. -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum Algorithm { - /// A variation of the [`patience` diff algorithm described by Bram Cohen's blog post](https://bramcohen.livejournal.com/73318.html) - /// that uses a histogram to find the least common LCS. - /// Just like the `patience` diff algorithm, this algorithm usually produces - /// more human readable output then myers algorithm. - /// However compared to the `patience` diff algorithm (which is slower then myers algorithm), - /// the Histogram algorithm performs much better. - /// - /// The implementation here was originally ported from `git` but has been significantly - /// modified to improve performance. - /// As a result it consistently **performs better then myers algorithm** (5%-100%) over - /// a wide variety of test data. For example a benchmark of diffing linux kernel commits is shown below: - #[cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_speedup.svg.base64"), "\"></img>"))] - /// - /// For pathological subsequences that only contain highly repeating tokens (64+ occurrences) - /// the algorithm falls back on Myers algorithm (with heuristics) to avoid quadratic behavior. - /// - /// Compared to Myers algorithm, the Histogram diff algorithm is more focused on providing - /// human readable diffs instead of minimal diffs. In practice this means that the edit-sequences - /// produced by the histogram diff are often longer then those produced by Myers algorithm. - /// - /// The heuristic used by the histogram diff does not work well for inputs with small (often repeated) - /// tokens. For example **character diffs do not work well** as most (english) text is madeup of - /// a fairly small set of characters. The `Histogram` algorithm will automatically these cases and - /// fallback to Myers algorithm. However this detection has a nontrivial overhead, so - /// if its known upfront that the sort of tokens is very small `Myers` algorithm should - /// be used instead. - Histogram, - /// An implementation of the linear space variant of - /// [Myers `O((N+M)D)` algorithm](http://www.xmailserver.org/diff2.pdf). - /// The algorithm is enhanced with preprocessing that removes - /// tokens that don't occur in the other file at all. - /// Furthermore two heuristics to the middle snake search are implemented - /// that ensure reasonable runtime (mostly linear time complexity) even for large files. - /// - /// Due to the divide and conquer nature of the algorithm - /// the edit sequenced produced are still fairly small even when the middle snake - /// search is aborted by a heuristic. - /// However, the produced edit sequences are not guaranteed to be fully minimal. - /// If that property is vital to you, use the `MyersMinimal` algorithm instead. - /// - /// The implementation (including the preprocessing) are mostly - /// ported from `git` and `gnu-diff` where Myers algorithm is used - /// as the default diff algorithm. - /// Therefore the used heuristics have been heavily battle tested and - /// are known to behave well over a large variety of inputs - Myers, - /// Same as `Myers` but the early abort heuristics are disabled to guarantee - /// a minimal edit sequence. - /// This can mean significant slowdown in pathological cases. - MyersMinimal, -} - -impl Algorithm { - #[cfg(test)] - const ALL: [Self; 2] = [Algorithm::Histogram, Algorithm::Myers]; -} - -impl Default for Algorithm { - fn default() -> Self { - Algorithm::Histogram - } -} - -/// Computes an edit-script that transforms `input.before` into `input.after` using -/// the specified `algorithm` -/// The edit-script is passed to `sink.process_change` while it is produced. -pub fn diff<S: Sink, T: Eq + Hash>( - algorithm: Algorithm, - input: &InternedInput<T>, - sink: S, -) -> S::Out { - diff_with_tokens(algorithm, &input.before, &input.after, input.interner.num_tokens(), sink) -} - -/// Computes an edit-script that transforms `before` into `after` using -/// the specified `algorithm` -/// The edit-script is passed to `sink.process_change` while it is produced. -pub fn diff_with_tokens<S: Sink>( - algorithm: Algorithm, - before: &[Token], - after: &[Token], - num_tokens: u32, - sink: S, -) -> S::Out { - assert!(before.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX); - assert!(after.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX); - match algorithm { - Algorithm::Histogram => histogram::diff(before, after, num_tokens, sink), - Algorithm::Myers => myers::diff(before, after, num_tokens, sink, false), - Algorithm::MyersMinimal => myers::diff(before, after, num_tokens, sink, true), - } -} diff --git a/vendor/imara-diff/src/myers.rs b/vendor/imara-diff/src/myers.rs deleted file mode 100644 index efebec031..000000000 --- a/vendor/imara-diff/src/myers.rs +++ /dev/null @@ -1,271 +0,0 @@ -use std::ptr::NonNull; - -use crate::intern::Token; -use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult}; -use crate::myers::preprocess::PreprocessedFile; -use crate::myers::slice::FileSlice; -use crate::util::sqrt; -use crate::Sink; - -mod middle_snake; -mod preprocess; -mod slice; - -pub struct Myers { - kvec: NonNull<[i32]>, - kforward: NonNull<i32>, - kbackward: NonNull<i32>, - max_cost: u32, -} - -pub fn diff<S: Sink>( - before: &[Token], - after: &[Token], - _num_tokens: u32, - mut sink: S, - minimal: bool, -) -> S::Out { - // preprocess the files by removing parts of the file that are not contained in the other file at all - // this process remaps the token indices and therefore requires us to track changed files in a char array - // PERF use a bitset? - let (mut before, mut after) = preprocess::preprocess(before, after); - - // Perform the actual diff - Myers::new(before.tokens.len(), after.tokens.len()).run( - FileSlice::new(&mut before), - FileSlice::new(&mut after), - minimal, - ); - - process_changes_with_sink(&before, &after, &mut sink); - sink.finish() -} - -const HEUR_MIN_COST: u32 = 256; -const MAX_COST_MIN: u32 = 256; - -impl Drop for Myers { - fn drop(&mut self) { - unsafe { drop(Box::from_raw(self.kvec.as_ptr())) } - } -} - -impl Myers { - fn new(len1: usize, len2: usize) -> Self { - let ndiags = len1 + len2 as usize + 3; - let kvec = Box::leak(vec![0; 2 * ndiags + 2].into_boxed_slice()); - let kforward = NonNull::from(&mut kvec[len2 + 1]); - let kbackward = NonNull::from(&mut kvec[ndiags + len2 + 1]); - Self { kvec: kvec.into(), kforward, kbackward, max_cost: sqrt(ndiags).max(MAX_COST_MIN) } - } - - fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) { - loop { - file1.strip_common(&mut file2); - - if file1.is_empty() { - file2.mark_changed(); - return; - } else if file2.is_empty() { - file1.mark_changed(); - return; - } - - let split = self.split(&file1, &file2, need_min); - self.run( - file1.borrow().slice(..split.token_idx1 as u32), - file2.borrow().slice(..split.token_idx2 as u32), - split.minimized_lo, - ); - - file1 = file1.slice(split.token_idx1 as u32..); - file2 = file2.slice(split.token_idx2 as u32..); - need_min = split.minimized_hi - } - } - - /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. - /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both - /// the forward diagonal starting from (off1, off2) and the backward diagonal - /// starting from (lim1, lim2). If the K values on the same diagonal crosses - /// returns the furthest point of reach. We might encounter expensive edge cases - /// using this algorithm, so a little bit of heuristic is needed to cut the - /// search and to return a suboptimal point. - fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split { - let mut forward_search = - unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) }; - let mut backwards_search = - unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) }; - let is_odd = (file2.len() - file2.len()) & 1 != 0; - - let mut ec = 0; - - while ec <= self.max_cost { - let mut found_snake = false; - forward_search.next_d(); - if is_odd { - if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| { - backwards_search.contains(k) - && backwards_search.x_pos_at_diagonal(k) <= token_idx1 - }) { - match res { - SearchResult::Snake => found_snake = true, - SearchResult::Found { token_idx1, token_idx2 } => { - return Split { - token_idx1, - token_idx2, - minimized_lo: true, - minimized_hi: true, - }; - } - } - } - } else { - found_snake |= forward_search.run(file1, file2, |_, _| false).is_some() - }; - - backwards_search.next_d(); - if !is_odd { - if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| { - forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k) - }) { - match res { - SearchResult::Snake => found_snake = true, - SearchResult::Found { token_idx1, token_idx2 } => { - return Split { - token_idx1, - token_idx2, - minimized_lo: true, - minimized_hi: true, - }; - } - } - } - } else { - found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some() - }; - - if need_min { - continue; - } - - // If the edit cost is above the heuristic trigger and if - // we got a good snake, we sample current diagonals to see - // if some of them have reached an "interesting" path. Our - // measure is a function of the distance from the diagonal - // corner (i1 + i2) penalized with the distance from the - // mid diagonal itself. If this value is above the current - // edit cost times a magic factor (XDL_K_HEUR) we consider - // it interesting. - if found_snake && ec > HEUR_MIN_COST { - if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2) - { - return Split { - token_idx1, - token_idx2, - minimized_lo: true, - minimized_hi: false, - }; - } - - if let Some((token_idx1, token_idx2)) = - backwards_search.found_snake(ec, file1, file2) - { - return Split { - token_idx1, - token_idx2, - minimized_lo: false, - minimized_hi: true, - }; - } - } - - ec += 1; - } - - let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2); - let (distance_backwards, token_idx1_backwards) = - backwards_search.best_position(file1, file2); - if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards { - Split { - token_idx1: token_idx1_forward, - token_idx2: (distance_forward - token_idx1_forward as isize) as i32, - minimized_lo: true, - minimized_hi: false, - } - } else { - Split { - token_idx1: token_idx1_backwards, - token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32, - minimized_lo: false, - minimized_hi: true, - } - } - } -} - -#[derive(Debug)] -struct Split { - token_idx1: i32, - token_idx2: i32, - minimized_lo: bool, - minimized_hi: bool, -} - -/// the mapping performed during preprocessing makes it impossible to directly call -/// the `sink` during the diff itself. Instead `file.changed` is set to true for all -/// tokens that are changed -/// below these arrays are used to call the sink function -fn process_changes_with_sink( - before: &PreprocessedFile, - after: &PreprocessedFile, - sink: &mut impl Sink, -) { - let before_end = before.is_changed.len() as u32 + before.offset; - let after_end = after.is_changed.len() as u32 + after.offset; - - let mut before = before - .is_changed - .iter() - .enumerate() - .map(|(i, removed)| (i as u32 + before.offset, *removed)); - - let mut after = after - .is_changed - .iter() - .enumerate() - .map(|(i, inserted)| (i as u32 + after.offset, *inserted)); - - let mut next1 = before.next(); - let mut next2 = after.next(); - - while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) { - if !(removed | inserted) { - next1 = before.next(); - next2 = after.next(); - continue; - } - - let mut hunk_before = before_pos..before_pos; - let mut hunk_after = after_pos..after_pos; - if removed { - let end = before.find(|(_, changed)| !changed); - next1 = end.map(|(end, _)| (end, false)); - hunk_before.end = end.map_or(before_end, |(end, _)| end); - }; - - if inserted { - let end = after.find(|(_, changed)| !changed); - next2 = end.map(|(end, _)| (end, false)); - hunk_after.end = end.map_or(after_end, |(end, _)| end); - } - - sink.process_change(hunk_before, hunk_after); - } - - if let Some((before_pos, _)) = next1 { - sink.process_change(before_pos..before_end, after_end..after_end); - } else if let Some((after_pos, _)) = next2 { - sink.process_change(before_end..before_end, after_pos..after_end); - } -} diff --git a/vendor/imara-diff/src/myers/middle_snake.rs b/vendor/imara-diff/src/myers/middle_snake.rs deleted file mode 100644 index 7be333761..000000000 --- a/vendor/imara-diff/src/myers/middle_snake.rs +++ /dev/null @@ -1,246 +0,0 @@ -use std::ptr::NonNull; - -use crate::myers::slice::FileSlice; -use crate::util::{common_postfix, common_prefix}; - -const SNAKE_CNT: u32 = 20; -const K_HEUR: u32 = 4; - -pub struct MiddleSnakeSearch<const BACK: bool> { - kvec: NonNull<i32>, - kmin: i32, - kmax: i32, - dmin: i32, - dmax: i32, -} - -impl<const BACK: bool> MiddleSnakeSearch<BACK> { - /// # Safety - /// `data` must be valid for reads between `-file1.len()` and `file2.len()` - pub unsafe fn new(data: NonNull<i32>, file1: &FileSlice, file2: &FileSlice) -> Self { - let dmin = -(file2.len() as i32); - let dmax = file1.len() as i32; - let kmid = if BACK { dmin + dmax } else { 0 }; - let mut res = Self { kvec: data, kmin: kmid, kmax: kmid, dmin, dmax }; - let init = if BACK { file1.len() as i32 } else { 0 }; - res.write_xpos_at_diagonal(kmid, init); - res - } - - pub fn contains(&self, k: i32) -> bool { - (self.kmin..=self.kmax).contains(&k) - } - - pub fn bounds_check(&self, k: i32) { - debug_assert!((self.dmin - 1..=self.dmax + 1).contains(&k)); - } - - fn write_xpos_at_diagonal(&mut self, k: i32, token_idx1: i32) { - self.bounds_check(k); - unsafe { self.kvec.as_ptr().offset(k as isize).write(token_idx1) } - } - - pub fn x_pos_at_diagonal(&self, diagonal: i32) -> i32 { - self.bounds_check(diagonal); - unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() } - } - - pub fn pos_at_diagonal(&self, diagonal: i32) -> (i32, i32) { - self.bounds_check(diagonal); - let token_idx1 = unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() }; - let token_idx2 = (token_idx1 as i32 - diagonal) as i32; - (token_idx1, token_idx2) - } - - /// We need to extend the diagonal "domain" by one. If the next - /// values exits the box boundaries we need to change it in the - /// opposite direction because (max - min) must be a power of - /// two. - /// - /// Also we initialize the external K value to -1 so that we can - /// avoid extra conditions in the check inside the core loop. - pub fn next_d(&mut self) { - let init_val = if BACK { - // value should always be larger then bounds - i32::MAX - } else { - // value should always be smaller then bounds - i32::MIN - }; - - if self.kmin > self.dmin { - self.kmin -= 1; - self.write_xpos_at_diagonal(self.kmin - 1, init_val); - } else { - self.kmin += 1; - } - - if self.kmax < self.dmax { - self.kmax += 1; - self.write_xpos_at_diagonal(self.kmax + 1, init_val); - } else { - self.kmax -= 1; - } - } - - pub fn run( - &mut self, - file1: &FileSlice, - file2: &FileSlice, - mut f: impl FnMut(i32, i32) -> bool, - ) -> Option<SearchResult> { - let mut res = None; - let mut k = self.kmax; - while k >= self.kmin { - let mut token_idx1 = if BACK { - if self.x_pos_at_diagonal(k - 1) < self.x_pos_at_diagonal(k + 1) { - self.x_pos_at_diagonal(k - 1) - } else { - self.x_pos_at_diagonal(k + 1) - 1 - } - } else if self.x_pos_at_diagonal(k - 1) >= self.x_pos_at_diagonal(k + 1) { - self.x_pos_at_diagonal(k - 1) + 1 - } else { - self.x_pos_at_diagonal(k + 1) - }; - - let mut token_idx2 = (token_idx1 as i32 - k) as i32; - let off = if BACK { - if token_idx1 > 0 && token_idx2 > 0 { - let tokens1 = &file1.tokens[..token_idx1 as usize]; - let tokens2 = &file2.tokens[..token_idx2 as usize]; - common_postfix(tokens1, tokens2) - } else { - 0 - } - } else if token_idx1 < file1.len() as i32 && token_idx2 < file2.len() as i32 { - let tokens1 = &file1.tokens[token_idx1 as usize..]; - let tokens2 = &file2.tokens[token_idx2 as usize..]; - common_prefix(tokens1, tokens2) - } else { - 0 - }; - - if off > SNAKE_CNT { - res = Some(SearchResult::Snake) - } - - if BACK { - token_idx1 -= off as i32; - token_idx2 -= off as i32; - } else { - token_idx1 += off as i32; - token_idx2 += off as i32; - } - self.write_xpos_at_diagonal(k, token_idx1); - - if f(k, token_idx1) { - return Some(SearchResult::Found { token_idx1, token_idx2 }); - } - - k -= 2; - } - - res - } - - pub fn best_position(&self, file1: &FileSlice, file2: &FileSlice) -> (isize, i32) { - let mut best_distance: isize = if BACK { isize::MAX } else { -1 }; - let mut best_token_idx1 = if BACK { i32::MAX } else { -1 }; - let mut k = self.kmax; - while k >= self.kmin { - let mut token_idx1 = self.x_pos_at_diagonal(k); - if BACK { - token_idx1 = token_idx1.max(0); - } else { - token_idx1 = token_idx1.min(file1.len() as i32); - } - let mut token_idx2 = token_idx1 - k; - if BACK { - if token_idx2 < 0 { - token_idx1 = k; - token_idx2 = 0; - } - } else if token_idx2 > file2.len() as i32 { - token_idx1 = file2.len() as i32 + k; - token_idx2 = file2.len() as i32; - } - - let distance = token_idx1 as isize + token_idx2 as isize; - if BACK && distance < best_distance || !BACK && distance > best_distance { - best_distance = distance; - best_token_idx1 = token_idx1; - } - - k -= 2; - } - (best_distance, best_token_idx1) - } - - pub fn found_snake(&self, ec: u32, file1: &FileSlice, file2: &FileSlice) -> Option<(i32, i32)> { - let mut best_score = 0; - let mut best_token_idx1 = 0; - let mut best_token_idx2 = 0; - let mut k = self.kmax; - while k >= self.kmin { - let (token_idx1, token_idx2) = self.pos_at_diagonal(k); - if BACK { - if !(0..file1.len() as i32 - SNAKE_CNT as i32).contains(&token_idx1) { - k -= 2; - continue; - } - if !(0..file2.len() as i32 - SNAKE_CNT as i32).contains(&token_idx2) { - k -= 2; - continue; - } - } else { - if !(SNAKE_CNT as i32..file1.len() as i32).contains(&token_idx1) { - k -= 2; - continue; - } - if !(SNAKE_CNT as i32..file2.len() as i32).contains(&token_idx2) { - k -= 2; - continue; - } - } - - let main_diagonal_distance = k.unsigned_abs() as usize; - let distance = if BACK { - (file1.len() - token_idx1 as u32) + (file2.len() - token_idx2 as u32) - } else { - token_idx1 as u32 + token_idx2 as u32 - }; - let score = distance as usize + main_diagonal_distance; - if score > (K_HEUR * ec) as usize && score > best_score { - let is_snake = if BACK { - file1.tokens[token_idx1 as usize..] - .iter() - .zip(&file2.tokens[token_idx2 as usize..]) - .take(SNAKE_CNT as usize) - .all(|(token1, token2)| token1 == token2) - } else { - file1.tokens[..token_idx1 as usize] - .iter() - .zip(&file2.tokens[..token_idx2 as usize]) - .rev() - .take(SNAKE_CNT as usize) - .all(|(token1, token2)| token1 == token2) - }; - if is_snake { - best_token_idx1 = token_idx1; - best_token_idx2 = token_idx2; - best_score = score - } - } - - k -= 2; - } - - (best_score > 0).then(|| (best_token_idx1, best_token_idx2)) - } -} - -pub enum SearchResult { - Snake, - Found { token_idx1: i32, token_idx2: i32 }, -} diff --git a/vendor/imara-diff/src/myers/preprocess.rs b/vendor/imara-diff/src/myers/preprocess.rs deleted file mode 100644 index 905d331b4..000000000 --- a/vendor/imara-diff/src/myers/preprocess.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::intern::Token; -use crate::myers::sqrt; -use crate::util::{strip_common_postfix, strip_common_prefix}; - -pub fn preprocess( - mut file1: &[Token], - mut file2: &[Token], -) -> (PreprocessedFile, PreprocessedFile) { - let common_prefix = strip_common_prefix(&mut file1, &mut file2); - strip_common_postfix(&mut file1, &mut file2); - let (hdiff1, hdiff2) = token_occurrences(file1, file2); - let file1 = PreprocessedFile::new(common_prefix, &hdiff1, file1); - let file2 = PreprocessedFile::new(common_prefix, &hdiff2, file2); - (file1, file2) -} - -/// computes how -fn token_occurrences(file1: &[Token], file2: &[Token]) -> (Vec<Occurances>, Vec<Occurances>) { - const MAX_EQLIMIT: u32 = 1024; - - // compute the limit after which tokens are treated as `Occurances::COMMON` - let eqlimit1 = sqrt(file1.len()).min(MAX_EQLIMIT); - let eqlimit2 = sqrt(file2.len()).min(MAX_EQLIMIT); - - // first collect how often each token occurs in a file - let mut occurances1 = Vec::new(); - for token in file1 { - let bucket = token.0 as usize; - if bucket >= occurances1.len() { - occurances1.resize(bucket + 1, 0u32); - } - occurances1[bucket] += 1; - } - - // do the same thing for - let mut occurances2 = Vec::new(); - let token_occurances2: Vec<_> = file2 - .iter() - .map(|token| { - let bucket = token.0 as usize; - if bucket >= occurances2.len() { - occurances2.resize(bucket + 1, 0); - } - occurances2[bucket] += 1; - let occurances1 = *occurances1.get(bucket).unwrap_or(&0); - Occurances::from_occurances(occurances1, eqlimit2) - }) - .collect(); - - let token_occurances1: Vec<_> = file1 - .iter() - .map(|token| { - let bucket = token.0 as usize; - let occurances2 = *occurances2.get(bucket).unwrap_or(&0); - Occurances::from_occurances(occurances2, eqlimit1) - }) - .collect(); - - (token_occurances1, token_occurances2) -} - -#[derive(Clone, Copy, Debug)] -enum Occurances { - /// Token does not occur in this file - None, - /// Token occurs at least once - Some, - /// Token occurs very frequently (exact number depends on file size). - /// Such a tokens are usually empty lines or braces and are often not meaningful to a diff - Common, -} - -impl Occurances { - pub fn from_occurances(occurances: u32, eqlimit: u32) -> Occurances { - if occurances == 0 { - Occurances::None - } else if occurances >= eqlimit { - Occurances::Common - } else { - Occurances::Some - } - } -} - -#[derive(Debug)] -pub struct PreprocessedFile { - pub offset: u32, - pub is_changed: Vec<bool>, - pub indices: Vec<u32>, - pub tokens: Vec<Token>, -} - -impl PreprocessedFile { - fn new(offset: u32, token_diff: &[Occurances], tokens: &[Token]) -> PreprocessedFile { - let mut changed = vec![false; tokens.len()]; - let (tokens, indices) = prune_unmatched_tokens(tokens, token_diff, &mut changed); - PreprocessedFile { offset, is_changed: changed, indices, tokens } - } -} - -fn prune_unmatched_tokens( - file: &[Token], - token_status: &[Occurances], - changed: &mut [bool], -) -> (Vec<Token>, Vec<u32>) { - assert_eq!(token_status.len(), file.len()); - file.iter() - .zip(token_status) - .enumerate() - .filter_map(|(i, (&token, &status))| { - let prune = match status { - Occurances::None => true, - Occurances::Some => false, - Occurances::Common => should_prune_common_line(token_status, i), - }; - if prune { - changed[i] = true; - None - } else { - Some((token, i as u32)) - } - }) - .unzip() -} - -// TODO do not unnecessarily rescan lines -fn should_prune_common_line(token_status: &[Occurances], pos: usize) -> bool { - const WINDOW_SIZE: usize = 100; - - let mut unmatched_before = 0; - let mut common_before = 0; - - let start = if pos > WINDOW_SIZE { WINDOW_SIZE } else { 0 }; - for status in token_status[start..pos].iter().rev() { - match status { - Occurances::None => { - unmatched_before += 1; - } - Occurances::Common => { - common_before += 1; - } - Occurances::Some => break, - } - } - - if unmatched_before == 0 { - return false; - } - - let end = token_status.len().min(pos + WINDOW_SIZE); - let mut unmatched_after = 0; - let mut common_after = 0; - for status in token_status[pos..end].iter() { - match status { - Occurances::None => { - unmatched_after += 1; - } - Occurances::Common => { - common_after += 1; - } - Occurances::Some => break, - } - } - - if unmatched_after == 0 { - return false; - } - - let common = common_before + common_after; - let unmatched = unmatched_before + unmatched_after; - - unmatched > 3 * common -} diff --git a/vendor/imara-diff/src/myers/slice.rs b/vendor/imara-diff/src/myers/slice.rs deleted file mode 100644 index 0f99121b4..000000000 --- a/vendor/imara-diff/src/myers/slice.rs +++ /dev/null @@ -1,65 +0,0 @@ -use std::mem::take; -use std::ops::RangeBounds; - -use crate::intern::Token; -use crate::myers::preprocess::PreprocessedFile; -use crate::util::common_edges; - -#[derive(Default)] -pub struct FileSlice<'a> { - pub tokens: &'a [Token], - indices: &'a [u32], - changed: &'a mut [bool], -} - -impl<'a> FileSlice<'a> { - pub fn new(file: &'a mut PreprocessedFile) -> Self { - Self { tokens: &file.tokens, indices: &file.indices, changed: &mut file.is_changed } - } - - pub fn mark_changed(&mut self) { - for &i in self.indices { - self.changed[i as usize] = true; - } - } - - pub fn borrow(&mut self) -> FileSlice { - FileSlice { tokens: self.tokens, changed: self.changed, indices: self.indices } - } - - pub fn slice<R: RangeBounds<u32>>(self, range: R) -> Self { - let start = match range.start_bound() { - std::ops::Bound::Included(&start) => start, - std::ops::Bound::Excluded(&start) => start + 1, - std::ops::Bound::Unbounded => 0, - }; - - let end = match range.end_bound() { - std::ops::Bound::Included(&end) => end + 1, - std::ops::Bound::Excluded(&end) => end, - std::ops::Bound::Unbounded => self.len(), - }; - - Self { - tokens: &self.tokens[start as usize..end as usize], - changed: self.changed, - indices: &self.indices[start as usize..end as usize], - } - } - - pub fn strip_common(&mut self, other: &mut Self) { - let (start, common_postfix) = common_edges(self.tokens, other.tokens); - let end = self.len() - common_postfix; - *self = take(self).slice(start..end); - let end = other.len() - common_postfix; - *other = take(other).slice(start..end) - } - - pub fn len(&self) -> u32 { - self.tokens.len() as u32 - } - - pub fn is_empty(&self) -> bool { - self.tokens.is_empty() - } -} diff --git a/vendor/imara-diff/src/sink.rs b/vendor/imara-diff/src/sink.rs deleted file mode 100644 index 6bcf20f8a..000000000 --- a/vendor/imara-diff/src/sink.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::ops::Range; - -/// Trait for processing the edit-scripts computed with [`diff`](crate::diff) -pub trait Sink: Sized { - type Out; - - /// This method is called whenever a diff [`algorithm`](crate::Algorithm) - /// finds a change between the two processed input file. - /// A change is a continous subsequence of [tokens](crate::intern::Token) `before` that needs - /// to be replaced by a different contious subsequence of tokens `after` to construct the seconds file from the first. - /// - /// These token subsequences are passed to this function in in ** strictly montonically increasing order**. - /// That means that for two subsequenct calls `process_change(before1, after1)` and `process_change(before2, after2)` - /// the following always holds: - /// - /// ``` no_compile - /// assert!(before1.end < before2.start); - /// assert!(after1.end < after2.start); - /// ``` - /// - /// # Paramters - /// - **`before`** - the **position** of the removed token subsequence in the orignal file. - /// - **`after`** - the **position** of the inserted token subsequence in the destination file. - /// - /// # Notes - //// - /// A `Sink` has no function to indicate that a section of a file remains unchanged. - /// However due to the montonically increasing calls, implementations can easily determine - /// which subsequences remain unchanged by saving `before.end`/`after.end`. - /// The range between `before.start`/`after.end` and the previous `before.end`/`after.end` - /// is always unchanged. - fn process_change(&mut self, before: Range<u32>, after: Range<u32>); - - /// This function is called after all calls to `process_change` are complete - /// to obtain the final diff result - fn finish(self) -> Self::Out; - - /// Utility method that constructs a [`Counter`](crate::sink::Counter) that tracks the total number - /// of inserted and removed tokens in the changes passed to [`process_change`](crate::Sink::process_change). - fn with_counter(self) -> Counter<Self> { - Counter::new(self) - } -} - -impl<T: FnMut(Range<u32>, Range<u32>)> Sink for T { - type Out = (); - - fn process_change(&mut self, before: Range<u32>, after: Range<u32>) { - self(before, after) - } - - fn finish(self) -> Self::Out {} -} - -impl Sink for () { - type Out = (); - fn process_change(&mut self, _before: Range<u32>, _after: Range<u32>) {} - fn finish(self) -> Self::Out {} -} - -/// A [`Sink`](crate::Sink) which wraps a different sink -/// and counts the number of `removed` and `inserted` [tokens](crate::intern::Token). -pub struct Counter<T> { - /// Total number of recorded inserted [`tokens`](crate::intern::Token). - /// Computed by summing the lengths of the `after` subsequences pass to [`process_change`](crate::Sink::process_change). - pub removals: u32, - /// Total number of recorded inserted [`tokens`](crate::intern::Token). - /// Computed by summing the lengths of the `after` subsequences pass to [`process_change`](crate::Sink::process_change). - pub insertions: u32, - /// The [`Sink`](crate::Sink) for which the counter records [`tokens`](crate::intern::Token). - /// All calls to [`process_change`](crate::Sink::process_change) are forwarded to the `sink` by the counter. - /// After [`finish`](crate::Sink::finish) is called, this field contains the output returned by the [`finish`](crate::Sink::finish) - /// method of the wrapped [`Sink`](crate::Sink) - pub wrapped: T, -} - -impl<S: Sink> Counter<S> { - pub fn new(sink: S) -> Self { - Self { insertions: 0, removals: 0, wrapped: sink } - } -} - -impl<S: Sink> Sink for Counter<S> { - type Out = Counter<S::Out>; - fn process_change(&mut self, before: Range<u32>, after: Range<u32>) { - self.removals += before.end - before.start; - self.insertions += after.end - after.start; - self.wrapped.process_change(before, after) - } - - fn finish(self) -> Self::Out { - Counter { - removals: self.removals, - insertions: self.insertions, - wrapped: self.wrapped.finish(), - } - } -} - -impl<T> Counter<T> { - pub fn total(&self) -> usize { - self.insertions as usize + self.removals as usize - } -} - -impl Default for Counter<()> { - fn default() -> Self { - Counter::new(()) - } -} diff --git a/vendor/imara-diff/src/sources.rs b/vendor/imara-diff/src/sources.rs deleted file mode 100644 index 1c75bd026..000000000 --- a/vendor/imara-diff/src/sources.rs +++ /dev/null @@ -1,151 +0,0 @@ -use std::mem::take; -use std::str::from_utf8_unchecked; - -use crate::TokenSource; - -/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses -/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is -/// not included in the emitted tokens. -/// This means that changing the newline seperator from `\r\n` to `\n` -/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff). -pub fn lines(data: &str) -> Lines<'_, false> { - Lines(ByteLines(data.as_bytes())) -} - -/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses -/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is -/// included in the emitted tokens. -/// This means that changing the newline seperator from `\r\n` to `\n` -/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff). -pub fn lines_with_terminator(data: &str) -> Lines<'_, true> { - Lines(ByteLines(data.as_bytes())) -} - -/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses -/// the lines in `data` as Tokens. A lines is a continous subslice of -/// `data` which does not contain `\n` (or `\r\n`). -/// The newline seperator (`\r\n` or `\n`) is not included in the emitted tokens. -/// This means that changing the newline seperator from `\r\n` to `\n` -/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff). -pub fn byte_lines_with_terminator(data: &[u8]) -> ByteLines<'_, true> { - ByteLines(data) -} - -/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses -/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is -/// included in the emitted tokens. -/// This means that changing the newline seperator from `\r\n` to `\n` -/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff). -pub fn byte_lines(data: &[u8]) -> ByteLines<'_, false> { - ByteLines(data) -} - -/// By default a line diff is produced for a string -impl<'a> TokenSource for &'a str { - type Token = &'a str; - - type Tokenizer = Lines<'a, false>; - - fn tokenize(&self) -> Self::Tokenizer { - lines(self) - } - - fn estimate_tokens(&self) -> u32 { - lines_with_terminator(self).estimate_tokens() - } -} - -/// By default a line diff is produced for a bytes -impl<'a> TokenSource for &'a [u8] { - type Token = Self; - type Tokenizer = ByteLines<'a, false>; - - fn tokenize(&self) -> Self::Tokenizer { - byte_lines(self) - } - - fn estimate_tokens(&self) -> u32 { - byte_lines(self).estimate_tokens() - } -} - -/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a `str` as tokens. -/// See [`lines`](crate::sources::lines) and [`lines_with_terminator`](crate::sources::lines_with_terminator) for details -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct Lines<'a, const INCLUDE_LINE_TERMINATOR: bool>(ByteLines<'a, INCLUDE_LINE_TERMINATOR>); - -impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for Lines<'a, INCLUDE_LINE_TERMINATOR> { - type Item = &'a str; - - fn next(&mut self) -> Option<Self::Item> { - // safety invariant: this struct may only contain valid utf8 - // dividing valid utf8 bytes by ascii characters always produces valid utf-8 - self.0.next().map(|it| unsafe { from_utf8_unchecked(it) }) - } -} - -/// By default a line diff is produced for a string -impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource for Lines<'a, INCLUDE_LINE_TERMINATOR> { - type Token = &'a str; - - type Tokenizer = Self; - - fn tokenize(&self) -> Self::Tokenizer { - *self - } - - fn estimate_tokens(&self) -> u32 { - self.0.estimate_tokens() - } -} - -/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a byte slice as tokens. -/// See [`byte_lines`](crate::sources::lines) and [`byte_lines_with_terminator`](crate::sources::byte_lines_with_terminator) for details -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct ByteLines<'a, const INCLUDE_LINE_TERMINATOR: bool>(&'a [u8]); - -impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for ByteLines<'a, INCLUDE_LINE_TERMINATOR> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option<Self::Item> { - let mut saw_carriage_return = false; - let mut iter = self.0.iter().enumerate(); - let line_len = loop { - match iter.next() { - Some((i, b'\n')) => break i + 1, - None => { - return (!self.0.is_empty()).then(|| take(&mut self.0)); - } - Some((_, &it)) => saw_carriage_return = it == b'\r', - } - }; - let (mut line, rem) = self.0.split_at(line_len); - self.0 = rem; - if !INCLUDE_LINE_TERMINATOR { - line = &line[..line_len - 1 - saw_carriage_return as usize]; - } - Some(line) - } -} - -/// By default a line diff is produced for a string -impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource - for ByteLines<'a, INCLUDE_LINE_TERMINATOR> -{ - type Token = &'a [u8]; - - type Tokenizer = Self; - - fn tokenize(&self) -> Self::Tokenizer { - *self - } - - fn estimate_tokens(&self) -> u32 { - let len: usize = self.take(20).map(|line| line.len()).sum(); - if len == 0 { - 100 - } else { - (self.0.len() * 20 / len) as u32 - } - } -} diff --git a/vendor/imara-diff/src/tests.rs b/vendor/imara-diff/src/tests.rs deleted file mode 100644 index b0df76820..000000000 --- a/vendor/imara-diff/src/tests.rs +++ /dev/null @@ -1,152 +0,0 @@ -use std::fs::read_to_string; -use std::mem::swap; -use std::path::PathBuf; - -use expect_test::{expect, expect_file}; - -use crate::intern::InternedInput; -use crate::sink::Counter; -use crate::{diff, Algorithm, UnifiedDiffBuilder}; - -#[test] -fn replace() { - let before = r#"fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - println!("hello world") -}"#; - - let after = r#"const TEST: i32 = 0; -fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - println!("hello world"); - println!("hello foo {TEST}"); -} - -"#; - let input = InternedInput::new(before, after); - for algorithm in Algorithm::ALL { - println!("{algorithm:?}"); - let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input)); - expect![[r#" - @@ -1,5 +1,8 @@ - +const TEST: i32 = 0; - fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - - println!("hello world") - + println!("hello world"); - + println!("hello foo {TEST}"); - } - + - "#]] - .assert_eq(&diff); - } -} - -#[test] -fn identical_files() { - let file = r#"fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; -}"#; - - for algorithm in Algorithm::ALL { - println!("{algorithm:?}"); - let input = InternedInput::new(file, file); - let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input)); - assert_eq!(diff, ""); - } -} - -#[test] -fn simple_insert() { - let before = r#"fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; -}"#; - - let after = r#"fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - println("hello world") -}"#; - - let mut input = InternedInput::new(before, after); - for algorithm in Algorithm::ALL { - println!("{algorithm:?}"); - let res = diff(algorithm, &input, UnifiedDiffBuilder::new(&input)); - expect![[r#" - @@ -1,4 +1,5 @@ - fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - + println("hello world") - } - "#]] - .assert_eq(&res); - - swap(&mut input.before, &mut input.after); - - let res = diff(algorithm, &input, UnifiedDiffBuilder::new(&input)); - expect![[r#" - @@ -1,5 +1,4 @@ - fn foo() -> Bar{ - let mut foo = 2.0; - foo *= 100 / 2; - - println("hello world") - } - "#]] - .assert_eq(&res); - - swap(&mut input.before, &mut input.after); - } -} - -pub fn project_root() -> PathBuf { - let dir = env!("CARGO_MANIFEST_DIR"); - let mut res = PathBuf::from(dir); - while !res.join("README.md").exists() { - res = res.parent().expect("reached fs root without finding project root").to_owned() - } - res -} - -#[test] -fn hand_checked_udiffs() { - for algorithm in Algorithm::ALL { - println!("{algorithm:?}"); - let test_dir = project_root().join("tests"); - for file in ["helix_syntax.rs", "package.txt"] { - let path_before = test_dir.join(format!("{file}.before")); - let path_after = test_dir.join(format!("{file}.after")); - let path_diff = test_dir.join(format!("{file}.{algorithm:?}.diff")); - let before = read_to_string(path_before).unwrap(); - let after = read_to_string(path_after).unwrap(); - let input = InternedInput::new(&*before, &*after); - let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input)); - expect_file![path_diff].assert_eq(&diff); - } - } -} - -#[test] -fn complex_diffs() { - for algorithm in Algorithm::ALL { - println!("{algorithm:?}"); - let test_dir = project_root().join("tests"); - for (file1, file2) in [ - ("test1.json", "test2.json"), - ("helix_syntax.rs.Histogram.diff", "helix_syntax.rs.after"), - ] { - let path_before = test_dir.join(file1); - let path_diff = test_dir.join(file2); - let before = read_to_string(path_before).unwrap(); - let after = read_to_string(path_diff).unwrap(); - let input = InternedInput::new(&*before, &*after); - let res = diff(algorithm, &input, Counter::default()); - println!("{}", res.total()) - } - } -} diff --git a/vendor/imara-diff/src/unified_diff.rs b/vendor/imara-diff/src/unified_diff.rs deleted file mode 100644 index 9942d9510..000000000 --- a/vendor/imara-diff/src/unified_diff.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::fmt::{Display, Write}; -use std::hash::Hash; -use std::ops::Range; - -use crate::intern::{InternedInput, Interner, Token}; -use crate::Sink; - -/// A [`Sink`](crate::sink::Sink) that creates a textual diff -/// in the format typically output by git or gnu-diff if the `-u` option is used -pub struct UnifiedDiffBuilder<'a, W, T> -where - W: Write, - T: Hash + Eq + Display, -{ - before: &'a [Token], - after: &'a [Token], - interner: &'a Interner<T>, - - pos: u32, - before_hunk_start: u32, - after_hunk_start: u32, - before_hunk_len: u32, - after_hunk_len: u32, - - buffer: String, - dst: W, -} - -impl<'a, T> UnifiedDiffBuilder<'a, String, T> -where - T: Hash + Eq + Display, -{ - /// Create a new `UnifiedDiffBuilder` for the given `input`, - /// that will return a [`String`](std::string::String). - pub fn new(input: &'a InternedInput<T>) -> Self { - Self { - before_hunk_start: 0, - after_hunk_start: 0, - before_hunk_len: 0, - after_hunk_len: 0, - buffer: String::with_capacity(8), - dst: String::new(), - interner: &input.interner, - before: &input.before, - after: &input.after, - pos: 0, - } - } -} - -impl<'a, W, T> UnifiedDiffBuilder<'a, W, T> -where - W: Write, - T: Hash + Eq + Display, -{ - /// Create a new `UnifiedDiffBuilder` for the given `input`, - /// that will writes it output to the provided implementation of [`Write`](std::fmt::Write). - pub fn with_writer(input: &'a InternedInput<T>, writer: W) -> Self { - Self { - before_hunk_start: 0, - after_hunk_start: 0, - before_hunk_len: 0, - after_hunk_len: 0, - buffer: String::with_capacity(8), - dst: writer, - interner: &input.interner, - before: &input.before, - after: &input.after, - pos: 0, - } - } - - fn print_tokens(&mut self, tokens: &[Token], prefix: char) { - for &token in tokens { - writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap(); - } - } - - fn flush(&mut self) { - if self.before_hunk_len == 0 && self.after_hunk_len == 0 { - return; - } - - let end = (self.pos + 3).min(self.before.len() as u32); - self.update_pos(end, end); - - writeln!( - &mut self.dst, - "@@ -{},{} +{},{} @@", - self.before_hunk_start + 1, - self.before_hunk_len, - self.after_hunk_start + 1, - self.after_hunk_len, - ) - .unwrap(); - write!(&mut self.dst, "{}", &self.buffer).unwrap(); - self.buffer.clear(); - self.before_hunk_len = 0; - self.after_hunk_len = 0 - } - - fn update_pos(&mut self, print_to: u32, move_to: u32) { - self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' '); - let len = print_to - self.pos; - self.pos = move_to; - self.before_hunk_len += len; - self.after_hunk_len += len; - } -} - -impl<W, T> Sink for UnifiedDiffBuilder<'_, W, T> -where - W: Write, - T: Hash + Eq + Display, -{ - type Out = W; - - fn process_change(&mut self, before: Range<u32>, after: Range<u32>) { - if before.start - self.pos > 6 { - self.flush(); - self.pos = before.start - 3; - self.before_hunk_start = self.pos; - self.after_hunk_start = after.start - 3; - } - self.update_pos(before.start, before.end); - self.before_hunk_len += before.end - before.start; - self.after_hunk_len += after.end - after.start; - self.print_tokens(&self.before[before.start as usize..before.end as usize], '-'); - self.print_tokens(&self.after[after.start as usize..after.end as usize], '+'); - } - - fn finish(mut self) -> Self::Out { - self.flush(); - self.dst - } -} diff --git a/vendor/imara-diff/src/util.rs b/vendor/imara-diff/src/util.rs deleted file mode 100644 index 503078d8c..000000000 --- a/vendor/imara-diff/src/util.rs +++ /dev/null @@ -1,48 +0,0 @@ -use crate::intern::Token; - -pub fn common_prefix(file1: &[Token], file2: &[Token]) -> u32 { - let mut off = 0; - for (token1, token2) in file1.iter().zip(file2) { - if token1 != token2 { - break; - } - off += 1; - } - off -} - -pub fn common_postfix(file1: &[Token], file2: &[Token]) -> u32 { - let mut off = 0; - for (token1, token2) in file1.iter().rev().zip(file2.iter().rev()) { - if token1 != token2 { - break; - } - off += 1; - } - off -} - -pub fn common_edges(file1: &[Token], file2: &[Token]) -> (u32, u32) { - let prefix = common_prefix(file1, file2); - let postfix = common_postfix(&file1[prefix as usize..], &file2[prefix as usize..]); - (prefix, postfix) -} - -pub fn strip_common_prefix(file1: &mut &[Token], file2: &mut &[Token]) -> u32 { - let off = common_prefix(file1, file2); - *file1 = &file1[off as usize..]; - *file2 = &file2[off as usize..]; - off -} - -pub fn strip_common_postfix(file1: &mut &[Token], file2: &mut &[Token]) -> u32 { - let off = common_postfix(file1, file2); - *file1 = &file1[..file1.len() - off as usize]; - *file2 = &file2[..file2.len() - off as usize]; - off -} - -pub fn sqrt(val: usize) -> u32 { - let nbits = (usize::BITS as u32 - val.leading_zeros()) / 2; - 1 << nbits -} |