summaryrefslogtreecommitdiffstats
path: root/vendor/imara-diff/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/imara-diff/src')
-rw-r--r--vendor/imara-diff/src/histogram.rs133
-rw-r--r--vendor/imara-diff/src/histogram/lcs.rs127
-rw-r--r--vendor/imara-diff/src/histogram/list_pool.rs258
-rw-r--r--vendor/imara-diff/src/intern.rs172
-rw-r--r--vendor/imara-diff/src/lib.rs267
-rw-r--r--vendor/imara-diff/src/myers.rs271
-rw-r--r--vendor/imara-diff/src/myers/middle_snake.rs246
-rw-r--r--vendor/imara-diff/src/myers/preprocess.rs173
-rw-r--r--vendor/imara-diff/src/myers/slice.rs65
-rw-r--r--vendor/imara-diff/src/sink.rs110
-rw-r--r--vendor/imara-diff/src/sources.rs151
-rw-r--r--vendor/imara-diff/src/tests.rs152
-rw-r--r--vendor/imara-diff/src/unified_diff.rs136
-rw-r--r--vendor/imara-diff/src/util.rs48
14 files changed, 0 insertions, 2309 deletions
diff --git a/vendor/imara-diff/src/histogram.rs b/vendor/imara-diff/src/histogram.rs
deleted file mode 100644
index f6c7e4f16..000000000
--- a/vendor/imara-diff/src/histogram.rs
+++ /dev/null
@@ -1,133 +0,0 @@
-use std::ops::Range;
-
-use crate::histogram::lcs::find_lcs;
-use crate::histogram::list_pool::{ListHandle, ListPool};
-use crate::intern::Token;
-use crate::util::{strip_common_postfix, strip_common_prefix};
-use crate::{myers, Sink};
-
-mod lcs;
-mod list_pool;
-
-const MAX_CHAIN_LEN: u32 = 63;
-
-struct Histogram {
- token_occurances: Vec<ListHandle>,
- pool: ListPool,
-}
-
-pub fn diff<S: Sink>(
- mut before: &[Token],
- mut after: &[Token],
- num_tokens: u32,
- mut sink: S,
-) -> S::Out {
- let mut histogram = Histogram::new(num_tokens);
- let prefix = strip_common_prefix(&mut before, &mut after);
- strip_common_postfix(&mut before, &mut after);
- histogram.run(before, prefix, after, prefix, &mut sink);
- sink.finish()
-}
-
-impl Histogram {
- fn new(num_buckets: u32) -> Histogram {
- Histogram {
- token_occurances: vec![ListHandle::default(); num_buckets as usize],
- pool: ListPool::new(2 * num_buckets),
- }
- }
-
- fn clear(&mut self) {
- self.pool.clear();
- }
-
- fn token_occurances(&self, token: Token) -> &[u32] {
- self.token_occurances[token.0 as usize].as_slice(&self.pool)
- }
-
- fn num_token_occurances(&self, token: Token) -> u32 {
- self.token_occurances[token.0 as usize].len(&self.pool) as u32
- }
-
- fn populate(&mut self, file: &[Token]) {
- for (i, &token) in file.iter().enumerate() {
- self.token_occurances[token.0 as usize].push(i as u32, &mut self.pool);
- }
- }
-
- fn run(
- &mut self,
- mut before: &[Token],
- mut before_off: u32,
- mut after: &[Token],
- mut after_off: u32,
- sink: &mut impl Sink,
- ) {
- loop {
- if before.is_empty() {
- if !after.is_empty() {
- sink.process_change(
- before_off..before_off,
- after_off..after_off + after.len() as u32,
- );
- }
- return;
- } else if after.is_empty() {
- sink.process_change(
- before_off..before_off + before.len() as u32,
- after_off..after_off,
- );
- return;
- }
-
- self.populate(before);
- match find_lcs(before, after, self) {
- // no lcs was found, that means that file1 and file2 two have nothing in common
- Some(lcs) if lcs.len == 0 => {
- sink.process_change(
- before_off..before_off + before.len() as u32,
- after_off..after_off + after.len() as u32,
- );
- return;
- }
- Some(lcs) => {
- self.run(
- &before[..lcs.before_start as usize],
- before_off,
- &after[..lcs.after_start as usize],
- after_off,
- sink,
- );
-
- // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons
- let before_end = lcs.before_start + lcs.len;
- before = &before[before_end as usize..];
- before_off += before_end;
-
- let after_end = lcs.after_start + lcs.len;
- after = &after[after_end as usize..];
- after_off += after_end;
- }
- None => {
- // we are diffing two extremly large repetitive file
- // this is a worst case for histogram diff with O(N^2) performance
- // fallback to myers to maintain linear time complxity
- myers::diff(
- before,
- after,
- 0, // not used by myers
- |mut before: Range<u32>, mut after: Range<u32>| {
- before.start += before_off;
- before.end += before_off;
- after.start += after_off;
- after.end += after_off;
- sink.process_change(before, after)
- },
- false,
- );
- return;
- }
- }
- }
- }
-}
diff --git a/vendor/imara-diff/src/histogram/lcs.rs b/vendor/imara-diff/src/histogram/lcs.rs
deleted file mode 100644
index f8d8c2930..000000000
--- a/vendor/imara-diff/src/histogram/lcs.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-use crate::histogram::{Histogram, MAX_CHAIN_LEN};
-use crate::intern::Token;
-
-pub(super) fn find_lcs(
- before: &[Token],
- after: &[Token],
- histogram: &mut Histogram,
-) -> Option<Lcs> {
- let mut search =
- LcsSearch { lcs: Lcs::default(), min_occurances: MAX_CHAIN_LEN + 1, found_cs: false };
- search.run(before, after, histogram);
- if search.success() {
- Some(search.lcs)
- } else {
- None
- }
-}
-
-#[derive(Default, Debug)]
-pub struct Lcs {
- pub before_start: u32,
- pub after_start: u32,
- pub len: u32,
-}
-
-pub struct LcsSearch {
- lcs: Lcs,
- min_occurances: u32,
- found_cs: bool,
-}
-
-impl LcsSearch {
- fn run(&mut self, before: &[Token], after: &[Token], histogram: &mut Histogram) {
- let mut pos = 0;
- while let Some(&token) = after.get(pos as usize) {
- if histogram.num_token_occurances(token) != 0 {
- self.found_cs = true;
- if histogram.num_token_occurances(token) <= self.min_occurances {
- pos = self.update_lcs(pos, token, histogram, before, after);
- continue;
- }
- }
-
- pos += 1;
- }
-
- histogram.clear();
- }
-
- fn success(&mut self) -> bool {
- !self.found_cs || self.min_occurances <= MAX_CHAIN_LEN
- }
-
- fn update_lcs(
- &mut self,
- after_pos: u32,
- token: Token,
- histogram: &Histogram,
- before: &[Token],
- after: &[Token],
- ) -> u32 {
- let mut next_token_idx2 = after_pos + 1;
- let mut occurances_iter = histogram.token_occurances(token).iter().copied();
- let mut token_idx1 = occurances_iter.next().unwrap();
-
- 'occurances_iter: loop {
- let mut occurances = histogram.num_token_occurances(token);
- let mut start1 = token_idx1;
- let mut start2 = after_pos;
- loop {
- if start1 == 0 || start2 == 0 {
- break;
- }
- let token1 = before.get(start1 as usize - 1);
- let token2 = after.get(start2 as usize - 1);
- if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
- start1 -= 1;
- start2 -= 1;
- let new_occurances = histogram.num_token_occurances(before[start1 as usize]);
- occurances = occurances.min(new_occurances);
- } else {
- break;
- }
- }
-
- let mut end1 = token_idx1 + 1;
- let mut end2 = after_pos + 1;
-
- loop {
- let token1 = before.get(end1 as usize);
- let token2 = after.get(end2 as usize);
- if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
- let new_occurances = histogram.num_token_occurances(before[end1 as usize]);
- occurances = occurances.min(new_occurances);
- end1 += 1;
- end2 += 1;
- } else {
- break;
- }
- }
-
- if next_token_idx2 < end2 {
- next_token_idx2 = end2;
- }
-
- let len = end2 - start2;
- debug_assert_eq!(len, end1 - start1);
- if self.lcs.len < len || self.min_occurances > occurances {
- self.min_occurances = occurances;
- self.lcs = Lcs { before_start: start1, after_start: start2, len };
- }
-
- loop {
- if let Some(next_token_idx) = occurances_iter.next() {
- if next_token_idx > end2 {
- token_idx1 = next_token_idx;
- break;
- }
- } else {
- break 'occurances_iter;
- }
- }
- }
-
- next_token_idx2
- }
-}
diff --git a/vendor/imara-diff/src/histogram/list_pool.rs b/vendor/imara-diff/src/histogram/list_pool.rs
deleted file mode 100644
index af257aa5b..000000000
--- a/vendor/imara-diff/src/histogram/list_pool.rs
+++ /dev/null
@@ -1,258 +0,0 @@
-use crate::histogram::MAX_CHAIN_LEN;
-
-/// A small list of entity references allocated from a pool.
-///
-/// An `ListHandle` type provides similar functionality to `Vec`, but with some important
-/// differences in the implementation:
-///
-/// 1. Memory is allocated from a `ListPool` instead of the global heap.
-/// 2. The footprint of an entity list is 4 bytes, compared with the 24 bytes for `Vec`.
-/// 3. An entity list doesn't implement `Drop`, leaving it to the pool to manage memory.
-///
-/// The list pool is intended to be used as a LIFO allocator. After building up a larger data
-/// structure with many list references, the whole thing can be discarded quickly by clearing the
-/// pool.
-///
-/// # Safety
-///
-/// Entity lists are not as safe to use as `Vec`, but they never jeopardize Rust's memory safety
-/// guarantees. These are the problems to be aware of:
-///
-/// - If you lose track of an entity list, its memory won't be recycled until the pool is cleared.
-/// This can cause the pool to grow very large with leaked lists.
-/// - If entity lists are used after their pool is cleared, they may contain garbage data, and
-/// modifying them may corrupt other lists in the pool.
-/// - If an entity list is used with two different pool instances, both pools are likely to become
-/// corrupted.
-///
-/// Entity lists can be cloned, but that operation should only be used as part of cloning the whole
-/// function they belong to. *Cloning an entity list does not allocate new memory for the clone*.
-/// It creates an alias of the same memory.
-///
-/// Entity lists cannot be hashed and compared for equality because it's not possible to compare the
-/// contents of the list without the pool reference.
-///
-/// # Implementation
-///
-/// The `ListHandle` itself is designed to have the smallest possible footprint. This is important
-/// because it is used inside very compact data structures like `InstructionData`. The list
-/// contains only a 32-bit index into the pool's memory vector, pointing to the first element of
-/// the list.
-///
-/// The pool is just a single `Vec` containing all of the allocated lists. Each list is
-/// represented as three contiguous parts:
-///
-/// 1. The number of elements in the list.
-/// 2. The list elements.
-/// 3. Excess capacity elements.
-///
-/// The total size of the three parts is always a power of two, and the excess capacity is always
-/// as small as possible. This means that shrinking a list may cause the excess capacity to shrink
-/// if a smaller power-of-two size becomes available.
-///
-/// Both growing and shrinking a list may cause it to be reallocated in the pool vector.
-///
-/// The index stored in an `ListHandle` points to part 2, the list elements. The value 0 is
-/// reserved for the empty list which isn't allocated in the vector.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ListHandle {
- index: u32,
- generation: u32,
- len: u32,
-}
-
-/// Create an empty list.
-impl Default for ListHandle {
- fn default() -> Self {
- Self { index: 0, generation: 0, len: 0 }
- }
-}
-
-const MAX_SIZE_CLAS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1);
-const NUM_SIZE_CLASS: usize = MAX_SIZE_CLAS as usize + 1;
-
-/// A memory pool for storing lists of `T`.
-#[derive(Clone, Debug)]
-pub struct ListPool {
- // The main array containing the lists.
- data: Vec<u32>,
-
- // Heads of the free lists, one for each size class.
- free: [u32; NUM_SIZE_CLASS],
-
- generation: u32,
-}
-
-/// Lists are allocated in sizes that are powers of two, starting from 4.
-/// Each power of two is assigned a size class number, so the size is `4 << SizeClass`.
-type SizeClass = u8;
-
-/// Get the size of a given size class. The size includes the length field, so the maximum list
-/// length is one less than the class size.
-#[inline]
-const fn sclass_size(sclass: SizeClass) -> usize {
- 4 << sclass
-}
-
-/// Get the size class to use for a given list length.
-/// This always leaves room for the length element in addition to the list elements.
-#[inline]
-const fn sclass_for_length(len: u32) -> SizeClass {
- 30 - (len | 3).leading_zeros() as SizeClass
-}
-
-/// Is `len` the minimum length in its size class?
-#[inline]
-fn is_sclass_max_length(len: u32) -> bool {
- len > 3 && len.is_power_of_two()
-}
-
-impl ListPool {
- /// Create a new list pool.
- pub fn new(capacity: u32) -> Self {
- Self {
- data: Vec::with_capacity(capacity as usize),
- free: [u32::MAX; NUM_SIZE_CLASS],
- generation: 1,
- }
- }
-
- /// Clear the pool, forgetting about all lists that use it.
- ///
- /// This invalidates any existing entity lists that used this pool to allocate memory.
- ///
- /// The pool's memory is not released to the operating system, but kept around for faster
- /// allocation in the future.
- pub fn clear(&mut self) {
- self.data.clear();
- self.free.fill(u32::MAX);
- self.generation += 1;
- }
-
- /// Allocate a storage block with a size given by `sclass`.
- ///
- /// Returns the first index of an available segment of `self.data` containing
- /// `sclass_size(sclass)` elements. The allocated memory is filled with reserved
- /// values.
- fn alloc(&mut self, sclass: SizeClass) -> usize {
- let freelist_head = self.free[sclass as usize];
- // First try the free list for this size class.
- if freelist_head == u32::MAX {
- // Nothing on the free list. Allocate more memory.
- let offset = self.data.len();
- self.data.resize(offset + sclass_size(sclass), u32::MAX);
- offset
- } else {
- // take allocation of the free list (linked list)
- self.free[sclass as usize] = self.data[freelist_head as usize];
- freelist_head as usize
- }
- }
-
- /// Free a storage block with a size given by `sclass`.
- ///
- /// This must be a block that was previously allocated by `alloc()` with the same size class.
- fn free(&mut self, block: usize, sclass: SizeClass) {
- let sclass = sclass as usize;
- // Insert the block on the free list which is a single linked list.
- self.data[block] = self.free[sclass] as u32;
- self.free[sclass] = block as u32
- }
-
- /// Returns two mutable slices representing the two requested blocks.
- ///
- /// The two returned slices can be longer than the blocks. Each block is located at the front
- /// of the respective slice.
- fn mut_slices(&mut self, block0: usize, block1: usize) -> (&mut [u32], &mut [u32]) {
- if block0 < block1 {
- let (s0, s1) = self.data.split_at_mut(block1);
- (&mut s0[block0..], s1)
- } else {
- let (s1, s0) = self.data.split_at_mut(block0);
- (s0, &mut s1[block1..])
- }
- }
-
- /// Reallocate a block to a different size class.
- ///
- /// Copy `elems_to_copy` elements from the old to the new block.
- fn realloc(
- &mut self,
- block: usize,
- from_sclass: SizeClass,
- to_sclass: SizeClass,
- elems_to_copy: usize,
- ) -> usize {
- debug_assert!(elems_to_copy <= sclass_size(from_sclass));
- debug_assert!(elems_to_copy <= sclass_size(to_sclass));
- let new_block = self.alloc(to_sclass);
-
- let (old, new) = self.mut_slices(block, new_block);
- new[0..elems_to_copy].copy_from_slice(&old[0..elems_to_copy]);
-
- self.free(block, from_sclass);
- new_block
- }
-}
-
-impl ListHandle {
- /// Get the number of elements in the list.
- #[allow(clippy::len_without_is_empty)]
- pub fn len(&self, pool: &ListPool) -> u32 {
- if self.generation == pool.generation {
- self.len
- } else {
- 0
- }
- }
-
- /// Get the list as a slice.
- pub fn as_slice<'a>(&'a self, pool: &'a ListPool) -> &'a [u32] {
- let idx = self.index as usize;
- match self.len(pool) {
- 0 => &[],
- 1 => std::slice::from_ref(&self.index),
- len => &pool.data[idx..idx + len as usize],
- }
- }
-
- /// Appends an element to the back of the list.
- /// Returns the index where the element was inserted.
- pub fn push(&mut self, element: u32, pool: &mut ListPool) {
- let len = self.len(pool);
- match len {
- 0 => {
- self.generation = pool.generation;
- self.index = element;
- self.len = 1;
- }
- 1 => {
- // This is an empty list. Allocate a block and set length=1.
- let block = pool.alloc(0);
- pool.data[block] = self.index;
- pool.data[block + 1] = element;
- self.index = block as u32;
- self.len = 2;
- }
- 2..=MAX_CHAIN_LEN => {
- // Do we need to reallocate?
- let block;
- let idx = self.index as usize;
- if is_sclass_max_length(len) {
- // Reallocate, preserving length + all old elements.
- let sclass = sclass_for_length(len);
- block = pool.realloc(idx, sclass - 1, sclass, len as usize);
- self.index = block as u32;
- } else {
- block = idx;
- }
- pool.data[block + len as usize] = element;
- self.len += 1;
- }
-
- // ignore elements longer then MAX_CHAIN_LEN
- // these are rarely relevant and if they are we fall back to myers
- _ => (),
- }
- }
-}
diff --git a/vendor/imara-diff/src/intern.rs b/vendor/imara-diff/src/intern.rs
deleted file mode 100644
index 7d06522c8..000000000
--- a/vendor/imara-diff/src/intern.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-use std::hash::Hash;
-use std::ops::Index;
-
-use ahash::RandomState;
-use hashbrown::raw::RawTable;
-
-/// A token represented as an interned integer.
-///
-/// A token represents the smallest possible unit of change during a diff.
-/// For text this is usually a line, a word or a single character.
-/// All [algorithms](crate::Algorithm) operate on interned tokens instead
-/// of using the token data directly.
-/// This allows for much better performance by amortizing the cost hashing/equality.
-///
-/// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`](crate::intern::InternedInput) module.
-#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)]
-#[repr(transparent)]
-pub struct Token(pub u32);
-
-impl From<u32> for Token {
- fn from(token: u32) -> Self {
- Token(token)
- }
-}
-
-impl From<Token> for u32 {
- fn from(token: Token) -> Self {
- token.0
- }
-}
-
-pub trait TokenSource {
- type Token: Hash + Eq;
- type Tokenizer: Iterator<Item = Self::Token>;
- fn tokenize(&self) -> Self::Tokenizer;
- fn estimate_tokens(&self) -> u32;
-}
-
-/// Two lists of interned [tokens](crate::intern::Token) that can be compared with the [`diff`](crate::diff) function.
-///
-/// A token represents the smallest possible unit of change during a diff.
-/// For text this is usually a line, a word or a single character.
-/// All [algorithms](crate::Algorithm) operate on interned tokens instead
-/// of using the token data directly.
-/// This allows for much better performance by amortizing the cost hashing/equality.
-///
-/// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`](crate::intern::InternedInput) module.
-#[derive(Default)]
-pub struct InternedInput<T: Eq + Hash> {
- pub before: Vec<Token>,
- pub after: Vec<Token>,
- pub interner: Interner<T>,
-}
-
-impl<T: Eq + Hash> InternedInput<T> {
- pub fn new<I: TokenSource<Token = T>>(before: I, after: I) -> Self {
- let token_estimate_before = before.estimate_tokens() as usize;
- let token_estimate_after = after.estimate_tokens() as usize;
- let mut res = Self {
- before: Vec::with_capacity(token_estimate_before),
- after: Vec::with_capacity(token_estimate_after),
- interner: Interner::new(token_estimate_before + token_estimate_after),
- };
- res.update_before(before.tokenize());
- res.update_after(after.tokenize());
- res
- }
-
- /// replaces `self.before` wtih the iterned Tokens yielded by `input`
- /// Note that this does not erase any tokens from the interner and might therefore be considered
- /// a memory leak. If this function is called often over a long_running process
- /// consider clearing the interner with [`clear`](crate::intern::Interner::clear).
- pub fn update_before(&mut self, input: impl Iterator<Item = T>) {
- self.before.clear();
- self.before.extend(input.map(|token| self.interner.intern(token)));
- }
-
- /// replaces `self.before` wtih the iterned Tokens yielded by `input`
- /// Note that this does not erase any tokens from the interner and might therefore be considered
- /// a memory leak. If this function is called often over a long_running process
- /// consider clearing the interner with [`clear`](crate::intern::Interner::clear) or
- /// [`erase_tokens_after`](crate::intern::Interner::erase_tokens_after).
- pub fn update_after(&mut self, input: impl Iterator<Item = T>) {
- self.after.clear();
- self.after.extend(input.map(|token| self.interner.intern(token)));
- }
-
- pub fn clear(&mut self) {
- self.before.clear();
- self.after.clear();
- self.interner.clear();
- }
-}
-
-/// A hastable based interner that allows
-#[derive(Default)]
-pub struct Interner<T: Hash + Eq> {
- tokens: Vec<T>,
- table: RawTable<Token>,
- hasher: RandomState,
-}
-
-impl<T: Hash + Eq> Interner<T> {
- /// Create an Interner with an intial capacity calculated by calling
- /// [`estimate_tokens`](crate::intern::TokenSource::estimate_tokens) methods of `before` and `after`
- pub fn new_for_token_source<S: TokenSource<Token = T>>(before: &S, after: &S) -> Self {
- Self::new(before.estimate_tokens() as usize + after.estimate_tokens() as usize)
- }
-
- /// Create an Interner with inital capacity `capacity`.
- pub fn new(capacity: usize) -> Interner<T> {
- Interner {
- tokens: Vec::with_capacity(capacity),
- table: RawTable::with_capacity(capacity),
- hasher: RandomState::new(),
- }
- }
-
- /// Remove all interned tokens
- pub fn clear(&mut self) {
- self.table.clear_no_drop();
- self.tokens.clear();
- }
-
- /// Intern `token` and return a the interned integer
- pub fn intern(&mut self, token: T) -> Token {
- let hash = self.hasher.hash_one(&token);
- if let Some(&token) = self.table.get(hash, |&it| self.tokens[it.0 as usize] == token) {
- token
- } else {
- let interned = Token(self.tokens.len() as u32);
- self.table.insert(hash, interned, |&token| {
- self.hasher.hash_one(&self.tokens[token.0 as usize])
- });
- self.tokens.push(token);
- interned
- }
- }
-
- /// Returns to total number of **distinct** tokens currently interned.
- pub fn num_tokens(&self) -> u32 {
- self.tokens.len() as u32
- }
-
- /// Erases `first_erased_token` and any tokens interned afterwards from the interner.
- pub fn erase_tokens_after(&mut self, first_erased_token: Token) {
- assert!(first_erased_token.0 <= self.tokens.len() as u32);
- let retained = first_erased_token.0 as usize;
- let erased = self.tokens.len() - retained;
- if retained <= erased {
- self.table.clear_no_drop();
- // safety, we assert that retained is smaller then the table size so the table will never have to grow
- unsafe {
- for (i, token) in self.tokens[0..retained].iter().enumerate() {
- self.table.insert_no_grow(self.hasher.hash_one(token), Token(i as u32));
- }
- }
- } else {
- for (i, token) in self.tokens[retained..].iter().enumerate() {
- self.table.erase_entry(self.hasher.hash_one(token), |token| token.0 == (retained + i) as u32);
- }
- }
- self.tokens.truncate(first_erased_token.0 as usize);
- }
-}
-
-impl<T: Hash + Eq> Index<Token> for Interner<T> {
- type Output = T;
- fn index(&self, index: Token) -> &Self::Output {
- &self.tokens[index.0 as usize]
- }
-}
diff --git a/vendor/imara-diff/src/lib.rs b/vendor/imara-diff/src/lib.rs
deleted file mode 100644
index c20b750ca..000000000
--- a/vendor/imara-diff/src/lib.rs
+++ /dev/null
@@ -1,267 +0,0 @@
-//! Imara-diff is a solid (imara in swahili) diff library for rust.
-//! Solid refers to the fact that imara-diff provides very good runtime performance even
-//! in pathologic cases so that your application never appears to freeze while waiting on a diff.
-//! The performance improvements are achieved using battle tested heuristics used in gnu-diff and git
-//! that are known to yield fast runtime and performance.
-//!
-//! Imara-diff is also designed to be flexible so that it can be used with arbitrary collections and
-//! not just lists and strings and even allows reusing large parts of the computation when
-//! comparing the same file to multiple different files.
-//!
-//! Imara-diff provides two diff algorithms:
-//!
-//! * The linear-space variant of the well known [**myer** algorithm](http://www.xmailserver.org/diff2.pdf)
-//! * The **Histogram** algorithm which variant of the patience diff algorithm.
-//!
-//! Myers algorithm has been enhanced with preprocessing and multiple heuristics to ensure fast runtime in pathological
-//! cases to avoid quadratic time complexity and closely matches the behaviour of gnu-diff and git.
-//! The Histogram algorithm was originally ported from git but has been heavily optimized.
-//! The **Histogram algorithm outperforms Myers diff** by 10% - 100% across a **wide variety of workloads**.
-//!
-//! Imara-diffs algorithms have been benchmarked over a wide variety of real-world code.
-//! For example while comparing multiple different linux kernel it performs up to 30 times better than the `similar` crate:
-#![cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_comparison.svg.base64"), "\"></img>"))]
-//!
-//! # Api Overview
-//!
-//! Imara-diff provides the [`UnifiedDiffBuilder`](crate::UnifiedDiffBuilder) for building
-//! a human-redable diff similar to the output of `git diff` or `diff -u`.
-//! This makes building a tool similar to gnu diff easy:
-//!
-//! ```
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = r#"fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world")
-//! }"#;
-//!
-//! let after = r#"// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world");
-//! println!("{foo}");
-//! }
-//! // foo
-//! "#;
-//!
-//! let input = InternedInput::new(before, after);
-//! let diff = diff(Algorithm::Histogram, &input, UnifiedDiffBuilder::new(&input));
-//! assert_eq!(
-//! diff,
-//! r#"@@ -1,5 +1,8 @@
-//! +// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! - println!("hello world")
-//! + println!("hello world");
-//! + println!("{foo}");
-//! }
-//! +// foo
-//! "#
-//! );
-//! ```
-//!
-//! If you want to process the diff in some way you can provide your own implementation of [`Sink`](crate::sink::Sink).
-//! For closures [`Sink`](crate::sink::Sink) is already implemented, so simple [`Sink`]s can be easily added:
-//!
-//! ```
-//! use std::ops::Range;
-//!
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = r#"fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world")
-//! }"#;
-//!
-//! let after = r#"// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world");
-//! println!("{foo}");
-//! }
-//! // foo
-//! "#;
-//!
-//! let mut insertions = Vec::new();
-//! let mut removals = Vec::new();
-//! let mut replacements = Vec::new();
-//!
-//! let input = InternedInput::new(before, after);
-//! let sink = |before: Range<u32>, after: Range<u32>| {
-//! let hunk_before: Vec<_> = input.before[before.start as usize..before.end as usize]
-//! .iter()
-//! .map(|&line| input.interner[line])
-//! .collect();
-//! let hunk_after: Vec<_> = input.after[after.start as usize..after.end as usize]
-//! .iter()
-//! .map(|&line| input.interner[line])
-//! .collect();
-//! if hunk_after.is_empty() {
-//! removals.push(hunk_before)
-//! } else if hunk_before.is_empty() {
-//! insertions.push(hunk_after)
-//! } else {
-//! replacements.push((hunk_before, hunk_after))
-//! }
-//! };
-//! let diff = diff(Algorithm::Histogram, &input, sink);
-//! assert_eq!(&insertions, &[vec!["// lorem ipsum"], vec!["// foo"]]);
-//! assert!(removals.is_empty());
-//! assert_eq!(
-//! &replacements,
-//! &[(
-//! vec![" println!(\"hello world\")"],
-//! vec![" println!(\"hello world\");", " println!(\"{foo}\");"]
-//! )]
-//! );
-//! ```
-//!
-//! For `&str` and `&[u8]` imara-diff will compute a line diff by default.
-//! To perform diffs of different tokenizations and collections you can implement the [`TokenSource`](crate::intern::TokenSource) trait.
-//! For example the imara-diff provides an alternative tokenziser for line-diffs that includes the line terminator in the line:
-//!
-//! ```
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::sink::Counter;
-//! use imara_diff::sources::lines_with_terminator;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = "foo";
-//! let after = "foo\n";
-//!
-//! let input = InternedInput::new(before, after);
-//! let changes = diff(Algorithm::Histogram, &input, Counter::default());
-//! assert_eq!(changes.insertions, 0);
-//! assert_eq!(changes.removals, 0);
-//!
-//! let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after));
-//! let changes = diff(Algorithm::Histogram, &input, Counter::default());
-//! assert_eq!(changes.insertions, 1);
-//! assert_eq!(changes.removals, 1);
-//! ```
-
-use std::hash::Hash;
-
-#[cfg(feature = "unified_diff")]
-pub use unified_diff::UnifiedDiffBuilder;
-
-use crate::intern::{InternedInput, Token, TokenSource};
-pub use crate::sink::Sink;
-mod histogram;
-pub mod intern;
-mod myers;
-pub mod sink;
-pub mod sources;
-#[cfg(feature = "unified_diff")]
-mod unified_diff;
-mod util;
-
-#[cfg(test)]
-mod tests;
-
-/// `imara-diff` supports multiple different algorithms
-/// for computing an edit sequence.
-/// These algorithms have different performance and all produce different output.
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum Algorithm {
- /// A variation of the [`patience` diff algorithm described by Bram Cohen's blog post](https://bramcohen.livejournal.com/73318.html)
- /// that uses a histogram to find the least common LCS.
- /// Just like the `patience` diff algorithm, this algorithm usually produces
- /// more human readable output then myers algorithm.
- /// However compared to the `patience` diff algorithm (which is slower then myers algorithm),
- /// the Histogram algorithm performs much better.
- ///
- /// The implementation here was originally ported from `git` but has been significantly
- /// modified to improve performance.
- /// As a result it consistently **performs better then myers algorithm** (5%-100%) over
- /// a wide variety of test data. For example a benchmark of diffing linux kernel commits is shown below:
- #[cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_speedup.svg.base64"), "\"></img>"))]
- ///
- /// For pathological subsequences that only contain highly repeating tokens (64+ occurrences)
- /// the algorithm falls back on Myers algorithm (with heuristics) to avoid quadratic behavior.
- ///
- /// Compared to Myers algorithm, the Histogram diff algorithm is more focused on providing
- /// human readable diffs instead of minimal diffs. In practice this means that the edit-sequences
- /// produced by the histogram diff are often longer then those produced by Myers algorithm.
- ///
- /// The heuristic used by the histogram diff does not work well for inputs with small (often repeated)
- /// tokens. For example **character diffs do not work well** as most (english) text is madeup of
- /// a fairly small set of characters. The `Histogram` algorithm will automatically these cases and
- /// fallback to Myers algorithm. However this detection has a nontrivial overhead, so
- /// if its known upfront that the sort of tokens is very small `Myers` algorithm should
- /// be used instead.
- Histogram,
- /// An implementation of the linear space variant of
- /// [Myers `O((N+M)D)` algorithm](http://www.xmailserver.org/diff2.pdf).
- /// The algorithm is enhanced with preprocessing that removes
- /// tokens that don't occur in the other file at all.
- /// Furthermore two heuristics to the middle snake search are implemented
- /// that ensure reasonable runtime (mostly linear time complexity) even for large files.
- ///
- /// Due to the divide and conquer nature of the algorithm
- /// the edit sequenced produced are still fairly small even when the middle snake
- /// search is aborted by a heuristic.
- /// However, the produced edit sequences are not guaranteed to be fully minimal.
- /// If that property is vital to you, use the `MyersMinimal` algorithm instead.
- ///
- /// The implementation (including the preprocessing) are mostly
- /// ported from `git` and `gnu-diff` where Myers algorithm is used
- /// as the default diff algorithm.
- /// Therefore the used heuristics have been heavily battle tested and
- /// are known to behave well over a large variety of inputs
- Myers,
- /// Same as `Myers` but the early abort heuristics are disabled to guarantee
- /// a minimal edit sequence.
- /// This can mean significant slowdown in pathological cases.
- MyersMinimal,
-}
-
-impl Algorithm {
- #[cfg(test)]
- const ALL: [Self; 2] = [Algorithm::Histogram, Algorithm::Myers];
-}
-
-impl Default for Algorithm {
- fn default() -> Self {
- Algorithm::Histogram
- }
-}
-
-/// Computes an edit-script that transforms `input.before` into `input.after` using
-/// the specified `algorithm`
-/// The edit-script is passed to `sink.process_change` while it is produced.
-pub fn diff<S: Sink, T: Eq + Hash>(
- algorithm: Algorithm,
- input: &InternedInput<T>,
- sink: S,
-) -> S::Out {
- diff_with_tokens(algorithm, &input.before, &input.after, input.interner.num_tokens(), sink)
-}
-
-/// Computes an edit-script that transforms `before` into `after` using
-/// the specified `algorithm`
-/// The edit-script is passed to `sink.process_change` while it is produced.
-pub fn diff_with_tokens<S: Sink>(
- algorithm: Algorithm,
- before: &[Token],
- after: &[Token],
- num_tokens: u32,
- sink: S,
-) -> S::Out {
- assert!(before.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX);
- assert!(after.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX);
- match algorithm {
- Algorithm::Histogram => histogram::diff(before, after, num_tokens, sink),
- Algorithm::Myers => myers::diff(before, after, num_tokens, sink, false),
- Algorithm::MyersMinimal => myers::diff(before, after, num_tokens, sink, true),
- }
-}
diff --git a/vendor/imara-diff/src/myers.rs b/vendor/imara-diff/src/myers.rs
deleted file mode 100644
index efebec031..000000000
--- a/vendor/imara-diff/src/myers.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-use std::ptr::NonNull;
-
-use crate::intern::Token;
-use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult};
-use crate::myers::preprocess::PreprocessedFile;
-use crate::myers::slice::FileSlice;
-use crate::util::sqrt;
-use crate::Sink;
-
-mod middle_snake;
-mod preprocess;
-mod slice;
-
-pub struct Myers {
- kvec: NonNull<[i32]>,
- kforward: NonNull<i32>,
- kbackward: NonNull<i32>,
- max_cost: u32,
-}
-
-pub fn diff<S: Sink>(
- before: &[Token],
- after: &[Token],
- _num_tokens: u32,
- mut sink: S,
- minimal: bool,
-) -> S::Out {
- // preprocess the files by removing parts of the file that are not contained in the other file at all
- // this process remaps the token indices and therefore requires us to track changed files in a char array
- // PERF use a bitset?
- let (mut before, mut after) = preprocess::preprocess(before, after);
-
- // Perform the actual diff
- Myers::new(before.tokens.len(), after.tokens.len()).run(
- FileSlice::new(&mut before),
- FileSlice::new(&mut after),
- minimal,
- );
-
- process_changes_with_sink(&before, &after, &mut sink);
- sink.finish()
-}
-
-const HEUR_MIN_COST: u32 = 256;
-const MAX_COST_MIN: u32 = 256;
-
-impl Drop for Myers {
- fn drop(&mut self) {
- unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
- }
-}
-
-impl Myers {
- fn new(len1: usize, len2: usize) -> Self {
- let ndiags = len1 + len2 as usize + 3;
- let kvec = Box::leak(vec![0; 2 * ndiags + 2].into_boxed_slice());
- let kforward = NonNull::from(&mut kvec[len2 + 1]);
- let kbackward = NonNull::from(&mut kvec[ndiags + len2 + 1]);
- Self { kvec: kvec.into(), kforward, kbackward, max_cost: sqrt(ndiags).max(MAX_COST_MIN) }
- }
-
- fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) {
- loop {
- file1.strip_common(&mut file2);
-
- if file1.is_empty() {
- file2.mark_changed();
- return;
- } else if file2.is_empty() {
- file1.mark_changed();
- return;
- }
-
- let split = self.split(&file1, &file2, need_min);
- self.run(
- file1.borrow().slice(..split.token_idx1 as u32),
- file2.borrow().slice(..split.token_idx2 as u32),
- split.minimized_lo,
- );
-
- file1 = file1.slice(split.token_idx1 as u32..);
- file2 = file2.slice(split.token_idx2 as u32..);
- need_min = split.minimized_hi
- }
- }
-
- /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
- /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
- /// the forward diagonal starting from (off1, off2) and the backward diagonal
- /// starting from (lim1, lim2). If the K values on the same diagonal crosses
- /// returns the furthest point of reach. We might encounter expensive edge cases
- /// using this algorithm, so a little bit of heuristic is needed to cut the
- /// search and to return a suboptimal point.
- fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split {
- let mut forward_search =
- unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) };
- let mut backwards_search =
- unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) };
- let is_odd = (file2.len() - file2.len()) & 1 != 0;
-
- let mut ec = 0;
-
- while ec <= self.max_cost {
- let mut found_snake = false;
- forward_search.next_d();
- if is_odd {
- if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| {
- backwards_search.contains(k)
- && backwards_search.x_pos_at_diagonal(k) <= token_idx1
- }) {
- match res {
- SearchResult::Snake => found_snake = true,
- SearchResult::Found { token_idx1, token_idx2 } => {
- return Split {
- token_idx1,
- token_idx2,
- minimized_lo: true,
- minimized_hi: true,
- };
- }
- }
- }
- } else {
- found_snake |= forward_search.run(file1, file2, |_, _| false).is_some()
- };
-
- backwards_search.next_d();
- if !is_odd {
- if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| {
- forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k)
- }) {
- match res {
- SearchResult::Snake => found_snake = true,
- SearchResult::Found { token_idx1, token_idx2 } => {
- return Split {
- token_idx1,
- token_idx2,
- minimized_lo: true,
- minimized_hi: true,
- };
- }
- }
- }
- } else {
- found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some()
- };
-
- if need_min {
- continue;
- }
-
- // If the edit cost is above the heuristic trigger and if
- // we got a good snake, we sample current diagonals to see
- // if some of them have reached an "interesting" path. Our
- // measure is a function of the distance from the diagonal
- // corner (i1 + i2) penalized with the distance from the
- // mid diagonal itself. If this value is above the current
- // edit cost times a magic factor (XDL_K_HEUR) we consider
- // it interesting.
- if found_snake && ec > HEUR_MIN_COST {
- if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2)
- {
- return Split {
- token_idx1,
- token_idx2,
- minimized_lo: true,
- minimized_hi: false,
- };
- }
-
- if let Some((token_idx1, token_idx2)) =
- backwards_search.found_snake(ec, file1, file2)
- {
- return Split {
- token_idx1,
- token_idx2,
- minimized_lo: false,
- minimized_hi: true,
- };
- }
- }
-
- ec += 1;
- }
-
- let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2);
- let (distance_backwards, token_idx1_backwards) =
- backwards_search.best_position(file1, file2);
- if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards {
- Split {
- token_idx1: token_idx1_forward,
- token_idx2: (distance_forward - token_idx1_forward as isize) as i32,
- minimized_lo: true,
- minimized_hi: false,
- }
- } else {
- Split {
- token_idx1: token_idx1_backwards,
- token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32,
- minimized_lo: false,
- minimized_hi: true,
- }
- }
- }
-}
-
-#[derive(Debug)]
-struct Split {
- token_idx1: i32,
- token_idx2: i32,
- minimized_lo: bool,
- minimized_hi: bool,
-}
-
-/// the mapping performed during preprocessing makes it impossible to directly call
-/// the `sink` during the diff itself. Instead `file.changed` is set to true for all
-/// tokens that are changed
-/// below these arrays are used to call the sink function
-fn process_changes_with_sink(
- before: &PreprocessedFile,
- after: &PreprocessedFile,
- sink: &mut impl Sink,
-) {
- let before_end = before.is_changed.len() as u32 + before.offset;
- let after_end = after.is_changed.len() as u32 + after.offset;
-
- let mut before = before
- .is_changed
- .iter()
- .enumerate()
- .map(|(i, removed)| (i as u32 + before.offset, *removed));
-
- let mut after = after
- .is_changed
- .iter()
- .enumerate()
- .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
-
- let mut next1 = before.next();
- let mut next2 = after.next();
-
- while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
- if !(removed | inserted) {
- next1 = before.next();
- next2 = after.next();
- continue;
- }
-
- let mut hunk_before = before_pos..before_pos;
- let mut hunk_after = after_pos..after_pos;
- if removed {
- let end = before.find(|(_, changed)| !changed);
- next1 = end.map(|(end, _)| (end, false));
- hunk_before.end = end.map_or(before_end, |(end, _)| end);
- };
-
- if inserted {
- let end = after.find(|(_, changed)| !changed);
- next2 = end.map(|(end, _)| (end, false));
- hunk_after.end = end.map_or(after_end, |(end, _)| end);
- }
-
- sink.process_change(hunk_before, hunk_after);
- }
-
- if let Some((before_pos, _)) = next1 {
- sink.process_change(before_pos..before_end, after_end..after_end);
- } else if let Some((after_pos, _)) = next2 {
- sink.process_change(before_end..before_end, after_pos..after_end);
- }
-}
diff --git a/vendor/imara-diff/src/myers/middle_snake.rs b/vendor/imara-diff/src/myers/middle_snake.rs
deleted file mode 100644
index 7be333761..000000000
--- a/vendor/imara-diff/src/myers/middle_snake.rs
+++ /dev/null
@@ -1,246 +0,0 @@
-use std::ptr::NonNull;
-
-use crate::myers::slice::FileSlice;
-use crate::util::{common_postfix, common_prefix};
-
-const SNAKE_CNT: u32 = 20;
-const K_HEUR: u32 = 4;
-
-pub struct MiddleSnakeSearch<const BACK: bool> {
- kvec: NonNull<i32>,
- kmin: i32,
- kmax: i32,
- dmin: i32,
- dmax: i32,
-}
-
-impl<const BACK: bool> MiddleSnakeSearch<BACK> {
- /// # Safety
- /// `data` must be valid for reads between `-file1.len()` and `file2.len()`
- pub unsafe fn new(data: NonNull<i32>, file1: &FileSlice, file2: &FileSlice) -> Self {
- let dmin = -(file2.len() as i32);
- let dmax = file1.len() as i32;
- let kmid = if BACK { dmin + dmax } else { 0 };
- let mut res = Self { kvec: data, kmin: kmid, kmax: kmid, dmin, dmax };
- let init = if BACK { file1.len() as i32 } else { 0 };
- res.write_xpos_at_diagonal(kmid, init);
- res
- }
-
- pub fn contains(&self, k: i32) -> bool {
- (self.kmin..=self.kmax).contains(&k)
- }
-
- pub fn bounds_check(&self, k: i32) {
- debug_assert!((self.dmin - 1..=self.dmax + 1).contains(&k));
- }
-
- fn write_xpos_at_diagonal(&mut self, k: i32, token_idx1: i32) {
- self.bounds_check(k);
- unsafe { self.kvec.as_ptr().offset(k as isize).write(token_idx1) }
- }
-
- pub fn x_pos_at_diagonal(&self, diagonal: i32) -> i32 {
- self.bounds_check(diagonal);
- unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() }
- }
-
- pub fn pos_at_diagonal(&self, diagonal: i32) -> (i32, i32) {
- self.bounds_check(diagonal);
- let token_idx1 = unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() };
- let token_idx2 = (token_idx1 as i32 - diagonal) as i32;
- (token_idx1, token_idx2)
- }
-
- /// We need to extend the diagonal "domain" by one. If the next
- /// values exits the box boundaries we need to change it in the
- /// opposite direction because (max - min) must be a power of
- /// two.
- ///
- /// Also we initialize the external K value to -1 so that we can
- /// avoid extra conditions in the check inside the core loop.
- pub fn next_d(&mut self) {
- let init_val = if BACK {
- // value should always be larger then bounds
- i32::MAX
- } else {
- // value should always be smaller then bounds
- i32::MIN
- };
-
- if self.kmin > self.dmin {
- self.kmin -= 1;
- self.write_xpos_at_diagonal(self.kmin - 1, init_val);
- } else {
- self.kmin += 1;
- }
-
- if self.kmax < self.dmax {
- self.kmax += 1;
- self.write_xpos_at_diagonal(self.kmax + 1, init_val);
- } else {
- self.kmax -= 1;
- }
- }
-
- pub fn run(
- &mut self,
- file1: &FileSlice,
- file2: &FileSlice,
- mut f: impl FnMut(i32, i32) -> bool,
- ) -> Option<SearchResult> {
- let mut res = None;
- let mut k = self.kmax;
- while k >= self.kmin {
- let mut token_idx1 = if BACK {
- if self.x_pos_at_diagonal(k - 1) < self.x_pos_at_diagonal(k + 1) {
- self.x_pos_at_diagonal(k - 1)
- } else {
- self.x_pos_at_diagonal(k + 1) - 1
- }
- } else if self.x_pos_at_diagonal(k - 1) >= self.x_pos_at_diagonal(k + 1) {
- self.x_pos_at_diagonal(k - 1) + 1
- } else {
- self.x_pos_at_diagonal(k + 1)
- };
-
- let mut token_idx2 = (token_idx1 as i32 - k) as i32;
- let off = if BACK {
- if token_idx1 > 0 && token_idx2 > 0 {
- let tokens1 = &file1.tokens[..token_idx1 as usize];
- let tokens2 = &file2.tokens[..token_idx2 as usize];
- common_postfix(tokens1, tokens2)
- } else {
- 0
- }
- } else if token_idx1 < file1.len() as i32 && token_idx2 < file2.len() as i32 {
- let tokens1 = &file1.tokens[token_idx1 as usize..];
- let tokens2 = &file2.tokens[token_idx2 as usize..];
- common_prefix(tokens1, tokens2)
- } else {
- 0
- };
-
- if off > SNAKE_CNT {
- res = Some(SearchResult::Snake)
- }
-
- if BACK {
- token_idx1 -= off as i32;
- token_idx2 -= off as i32;
- } else {
- token_idx1 += off as i32;
- token_idx2 += off as i32;
- }
- self.write_xpos_at_diagonal(k, token_idx1);
-
- if f(k, token_idx1) {
- return Some(SearchResult::Found { token_idx1, token_idx2 });
- }
-
- k -= 2;
- }
-
- res
- }
-
- pub fn best_position(&self, file1: &FileSlice, file2: &FileSlice) -> (isize, i32) {
- let mut best_distance: isize = if BACK { isize::MAX } else { -1 };
- let mut best_token_idx1 = if BACK { i32::MAX } else { -1 };
- let mut k = self.kmax;
- while k >= self.kmin {
- let mut token_idx1 = self.x_pos_at_diagonal(k);
- if BACK {
- token_idx1 = token_idx1.max(0);
- } else {
- token_idx1 = token_idx1.min(file1.len() as i32);
- }
- let mut token_idx2 = token_idx1 - k;
- if BACK {
- if token_idx2 < 0 {
- token_idx1 = k;
- token_idx2 = 0;
- }
- } else if token_idx2 > file2.len() as i32 {
- token_idx1 = file2.len() as i32 + k;
- token_idx2 = file2.len() as i32;
- }
-
- let distance = token_idx1 as isize + token_idx2 as isize;
- if BACK && distance < best_distance || !BACK && distance > best_distance {
- best_distance = distance;
- best_token_idx1 = token_idx1;
- }
-
- k -= 2;
- }
- (best_distance, best_token_idx1)
- }
-
- pub fn found_snake(&self, ec: u32, file1: &FileSlice, file2: &FileSlice) -> Option<(i32, i32)> {
- let mut best_score = 0;
- let mut best_token_idx1 = 0;
- let mut best_token_idx2 = 0;
- let mut k = self.kmax;
- while k >= self.kmin {
- let (token_idx1, token_idx2) = self.pos_at_diagonal(k);
- if BACK {
- if !(0..file1.len() as i32 - SNAKE_CNT as i32).contains(&token_idx1) {
- k -= 2;
- continue;
- }
- if !(0..file2.len() as i32 - SNAKE_CNT as i32).contains(&token_idx2) {
- k -= 2;
- continue;
- }
- } else {
- if !(SNAKE_CNT as i32..file1.len() as i32).contains(&token_idx1) {
- k -= 2;
- continue;
- }
- if !(SNAKE_CNT as i32..file2.len() as i32).contains(&token_idx2) {
- k -= 2;
- continue;
- }
- }
-
- let main_diagonal_distance = k.unsigned_abs() as usize;
- let distance = if BACK {
- (file1.len() - token_idx1 as u32) + (file2.len() - token_idx2 as u32)
- } else {
- token_idx1 as u32 + token_idx2 as u32
- };
- let score = distance as usize + main_diagonal_distance;
- if score > (K_HEUR * ec) as usize && score > best_score {
- let is_snake = if BACK {
- file1.tokens[token_idx1 as usize..]
- .iter()
- .zip(&file2.tokens[token_idx2 as usize..])
- .take(SNAKE_CNT as usize)
- .all(|(token1, token2)| token1 == token2)
- } else {
- file1.tokens[..token_idx1 as usize]
- .iter()
- .zip(&file2.tokens[..token_idx2 as usize])
- .rev()
- .take(SNAKE_CNT as usize)
- .all(|(token1, token2)| token1 == token2)
- };
- if is_snake {
- best_token_idx1 = token_idx1;
- best_token_idx2 = token_idx2;
- best_score = score
- }
- }
-
- k -= 2;
- }
-
- (best_score > 0).then(|| (best_token_idx1, best_token_idx2))
- }
-}
-
-pub enum SearchResult {
- Snake,
- Found { token_idx1: i32, token_idx2: i32 },
-}
diff --git a/vendor/imara-diff/src/myers/preprocess.rs b/vendor/imara-diff/src/myers/preprocess.rs
deleted file mode 100644
index 905d331b4..000000000
--- a/vendor/imara-diff/src/myers/preprocess.rs
+++ /dev/null
@@ -1,173 +0,0 @@
-use crate::intern::Token;
-use crate::myers::sqrt;
-use crate::util::{strip_common_postfix, strip_common_prefix};
-
-pub fn preprocess(
- mut file1: &[Token],
- mut file2: &[Token],
-) -> (PreprocessedFile, PreprocessedFile) {
- let common_prefix = strip_common_prefix(&mut file1, &mut file2);
- strip_common_postfix(&mut file1, &mut file2);
- let (hdiff1, hdiff2) = token_occurrences(file1, file2);
- let file1 = PreprocessedFile::new(common_prefix, &hdiff1, file1);
- let file2 = PreprocessedFile::new(common_prefix, &hdiff2, file2);
- (file1, file2)
-}
-
-/// computes how
-fn token_occurrences(file1: &[Token], file2: &[Token]) -> (Vec<Occurances>, Vec<Occurances>) {
- const MAX_EQLIMIT: u32 = 1024;
-
- // compute the limit after which tokens are treated as `Occurances::COMMON`
- let eqlimit1 = sqrt(file1.len()).min(MAX_EQLIMIT);
- let eqlimit2 = sqrt(file2.len()).min(MAX_EQLIMIT);
-
- // first collect how often each token occurs in a file
- let mut occurances1 = Vec::new();
- for token in file1 {
- let bucket = token.0 as usize;
- if bucket >= occurances1.len() {
- occurances1.resize(bucket + 1, 0u32);
- }
- occurances1[bucket] += 1;
- }
-
- // do the same thing for
- let mut occurances2 = Vec::new();
- let token_occurances2: Vec<_> = file2
- .iter()
- .map(|token| {
- let bucket = token.0 as usize;
- if bucket >= occurances2.len() {
- occurances2.resize(bucket + 1, 0);
- }
- occurances2[bucket] += 1;
- let occurances1 = *occurances1.get(bucket).unwrap_or(&0);
- Occurances::from_occurances(occurances1, eqlimit2)
- })
- .collect();
-
- let token_occurances1: Vec<_> = file1
- .iter()
- .map(|token| {
- let bucket = token.0 as usize;
- let occurances2 = *occurances2.get(bucket).unwrap_or(&0);
- Occurances::from_occurances(occurances2, eqlimit1)
- })
- .collect();
-
- (token_occurances1, token_occurances2)
-}
-
-#[derive(Clone, Copy, Debug)]
-enum Occurances {
- /// Token does not occur in this file
- None,
- /// Token occurs at least once
- Some,
- /// Token occurs very frequently (exact number depends on file size).
- /// Such a tokens are usually empty lines or braces and are often not meaningful to a diff
- Common,
-}
-
-impl Occurances {
- pub fn from_occurances(occurances: u32, eqlimit: u32) -> Occurances {
- if occurances == 0 {
- Occurances::None
- } else if occurances >= eqlimit {
- Occurances::Common
- } else {
- Occurances::Some
- }
- }
-}
-
-#[derive(Debug)]
-pub struct PreprocessedFile {
- pub offset: u32,
- pub is_changed: Vec<bool>,
- pub indices: Vec<u32>,
- pub tokens: Vec<Token>,
-}
-
-impl PreprocessedFile {
- fn new(offset: u32, token_diff: &[Occurances], tokens: &[Token]) -> PreprocessedFile {
- let mut changed = vec![false; tokens.len()];
- let (tokens, indices) = prune_unmatched_tokens(tokens, token_diff, &mut changed);
- PreprocessedFile { offset, is_changed: changed, indices, tokens }
- }
-}
-
-fn prune_unmatched_tokens(
- file: &[Token],
- token_status: &[Occurances],
- changed: &mut [bool],
-) -> (Vec<Token>, Vec<u32>) {
- assert_eq!(token_status.len(), file.len());
- file.iter()
- .zip(token_status)
- .enumerate()
- .filter_map(|(i, (&token, &status))| {
- let prune = match status {
- Occurances::None => true,
- Occurances::Some => false,
- Occurances::Common => should_prune_common_line(token_status, i),
- };
- if prune {
- changed[i] = true;
- None
- } else {
- Some((token, i as u32))
- }
- })
- .unzip()
-}
-
-// TODO do not unnecessarily rescan lines
-fn should_prune_common_line(token_status: &[Occurances], pos: usize) -> bool {
- const WINDOW_SIZE: usize = 100;
-
- let mut unmatched_before = 0;
- let mut common_before = 0;
-
- let start = if pos > WINDOW_SIZE { WINDOW_SIZE } else { 0 };
- for status in token_status[start..pos].iter().rev() {
- match status {
- Occurances::None => {
- unmatched_before += 1;
- }
- Occurances::Common => {
- common_before += 1;
- }
- Occurances::Some => break,
- }
- }
-
- if unmatched_before == 0 {
- return false;
- }
-
- let end = token_status.len().min(pos + WINDOW_SIZE);
- let mut unmatched_after = 0;
- let mut common_after = 0;
- for status in token_status[pos..end].iter() {
- match status {
- Occurances::None => {
- unmatched_after += 1;
- }
- Occurances::Common => {
- common_after += 1;
- }
- Occurances::Some => break,
- }
- }
-
- if unmatched_after == 0 {
- return false;
- }
-
- let common = common_before + common_after;
- let unmatched = unmatched_before + unmatched_after;
-
- unmatched > 3 * common
-}
diff --git a/vendor/imara-diff/src/myers/slice.rs b/vendor/imara-diff/src/myers/slice.rs
deleted file mode 100644
index 0f99121b4..000000000
--- a/vendor/imara-diff/src/myers/slice.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-use std::mem::take;
-use std::ops::RangeBounds;
-
-use crate::intern::Token;
-use crate::myers::preprocess::PreprocessedFile;
-use crate::util::common_edges;
-
-#[derive(Default)]
-pub struct FileSlice<'a> {
- pub tokens: &'a [Token],
- indices: &'a [u32],
- changed: &'a mut [bool],
-}
-
-impl<'a> FileSlice<'a> {
- pub fn new(file: &'a mut PreprocessedFile) -> Self {
- Self { tokens: &file.tokens, indices: &file.indices, changed: &mut file.is_changed }
- }
-
- pub fn mark_changed(&mut self) {
- for &i in self.indices {
- self.changed[i as usize] = true;
- }
- }
-
- pub fn borrow(&mut self) -> FileSlice {
- FileSlice { tokens: self.tokens, changed: self.changed, indices: self.indices }
- }
-
- pub fn slice<R: RangeBounds<u32>>(self, range: R) -> Self {
- let start = match range.start_bound() {
- std::ops::Bound::Included(&start) => start,
- std::ops::Bound::Excluded(&start) => start + 1,
- std::ops::Bound::Unbounded => 0,
- };
-
- let end = match range.end_bound() {
- std::ops::Bound::Included(&end) => end + 1,
- std::ops::Bound::Excluded(&end) => end,
- std::ops::Bound::Unbounded => self.len(),
- };
-
- Self {
- tokens: &self.tokens[start as usize..end as usize],
- changed: self.changed,
- indices: &self.indices[start as usize..end as usize],
- }
- }
-
- pub fn strip_common(&mut self, other: &mut Self) {
- let (start, common_postfix) = common_edges(self.tokens, other.tokens);
- let end = self.len() - common_postfix;
- *self = take(self).slice(start..end);
- let end = other.len() - common_postfix;
- *other = take(other).slice(start..end)
- }
-
- pub fn len(&self) -> u32 {
- self.tokens.len() as u32
- }
-
- pub fn is_empty(&self) -> bool {
- self.tokens.is_empty()
- }
-}
diff --git a/vendor/imara-diff/src/sink.rs b/vendor/imara-diff/src/sink.rs
deleted file mode 100644
index 6bcf20f8a..000000000
--- a/vendor/imara-diff/src/sink.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-use std::ops::Range;
-
-/// Trait for processing the edit-scripts computed with [`diff`](crate::diff)
-pub trait Sink: Sized {
- type Out;
-
- /// This method is called whenever a diff [`algorithm`](crate::Algorithm)
- /// finds a change between the two processed input file.
- /// A change is a continous subsequence of [tokens](crate::intern::Token) `before` that needs
- /// to be replaced by a different contious subsequence of tokens `after` to construct the seconds file from the first.
- ///
- /// These token subsequences are passed to this function in in ** strictly montonically increasing order**.
- /// That means that for two subsequenct calls `process_change(before1, after1)` and `process_change(before2, after2)`
- /// the following always holds:
- ///
- /// ``` no_compile
- /// assert!(before1.end < before2.start);
- /// assert!(after1.end < after2.start);
- /// ```
- ///
- /// # Paramters
- /// - **`before`** - the **position** of the removed token subsequence in the orignal file.
- /// - **`after`** - the **position** of the inserted token subsequence in the destination file.
- ///
- /// # Notes
- ////
- /// A `Sink` has no function to indicate that a section of a file remains unchanged.
- /// However due to the montonically increasing calls, implementations can easily determine
- /// which subsequences remain unchanged by saving `before.end`/`after.end`.
- /// The range between `before.start`/`after.end` and the previous `before.end`/`after.end`
- /// is always unchanged.
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>);
-
- /// This function is called after all calls to `process_change` are complete
- /// to obtain the final diff result
- fn finish(self) -> Self::Out;
-
- /// Utility method that constructs a [`Counter`](crate::sink::Counter) that tracks the total number
- /// of inserted and removed tokens in the changes passed to [`process_change`](crate::Sink::process_change).
- fn with_counter(self) -> Counter<Self> {
- Counter::new(self)
- }
-}
-
-impl<T: FnMut(Range<u32>, Range<u32>)> Sink for T {
- type Out = ();
-
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
- self(before, after)
- }
-
- fn finish(self) -> Self::Out {}
-}
-
-impl Sink for () {
- type Out = ();
- fn process_change(&mut self, _before: Range<u32>, _after: Range<u32>) {}
- fn finish(self) -> Self::Out {}
-}
-
-/// A [`Sink`](crate::Sink) which wraps a different sink
-/// and counts the number of `removed` and `inserted` [tokens](crate::intern::Token).
-pub struct Counter<T> {
- /// Total number of recorded inserted [`tokens`](crate::intern::Token).
- /// Computed by summing the lengths of the `after` subsequences pass to [`process_change`](crate::Sink::process_change).
- pub removals: u32,
- /// Total number of recorded inserted [`tokens`](crate::intern::Token).
- /// Computed by summing the lengths of the `after` subsequences pass to [`process_change`](crate::Sink::process_change).
- pub insertions: u32,
- /// The [`Sink`](crate::Sink) for which the counter records [`tokens`](crate::intern::Token).
- /// All calls to [`process_change`](crate::Sink::process_change) are forwarded to the `sink` by the counter.
- /// After [`finish`](crate::Sink::finish) is called, this field contains the output returned by the [`finish`](crate::Sink::finish)
- /// method of the wrapped [`Sink`](crate::Sink)
- pub wrapped: T,
-}
-
-impl<S: Sink> Counter<S> {
- pub fn new(sink: S) -> Self {
- Self { insertions: 0, removals: 0, wrapped: sink }
- }
-}
-
-impl<S: Sink> Sink for Counter<S> {
- type Out = Counter<S::Out>;
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
- self.removals += before.end - before.start;
- self.insertions += after.end - after.start;
- self.wrapped.process_change(before, after)
- }
-
- fn finish(self) -> Self::Out {
- Counter {
- removals: self.removals,
- insertions: self.insertions,
- wrapped: self.wrapped.finish(),
- }
- }
-}
-
-impl<T> Counter<T> {
- pub fn total(&self) -> usize {
- self.insertions as usize + self.removals as usize
- }
-}
-
-impl Default for Counter<()> {
- fn default() -> Self {
- Counter::new(())
- }
-}
diff --git a/vendor/imara-diff/src/sources.rs b/vendor/imara-diff/src/sources.rs
deleted file mode 100644
index 1c75bd026..000000000
--- a/vendor/imara-diff/src/sources.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-use std::mem::take;
-use std::str::from_utf8_unchecked;
-
-use crate::TokenSource;
-
-/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
-/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
-/// not included in the emitted tokens.
-/// This means that changing the newline seperator from `\r\n` to `\n`
-/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
-pub fn lines(data: &str) -> Lines<'_, false> {
- Lines(ByteLines(data.as_bytes()))
-}
-
-/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
-/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
-/// included in the emitted tokens.
-/// This means that changing the newline seperator from `\r\n` to `\n`
-/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff).
-pub fn lines_with_terminator(data: &str) -> Lines<'_, true> {
- Lines(ByteLines(data.as_bytes()))
-}
-
-/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
-/// the lines in `data` as Tokens. A lines is a continous subslice of
-/// `data` which does not contain `\n` (or `\r\n`).
-/// The newline seperator (`\r\n` or `\n`) is not included in the emitted tokens.
-/// This means that changing the newline seperator from `\r\n` to `\n`
-/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
-pub fn byte_lines_with_terminator(data: &[u8]) -> ByteLines<'_, true> {
- ByteLines(data)
-}
-
-/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
-/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
-/// included in the emitted tokens.
-/// This means that changing the newline seperator from `\r\n` to `\n`
-/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff).
-pub fn byte_lines(data: &[u8]) -> ByteLines<'_, false> {
- ByteLines(data)
-}
-
-/// By default a line diff is produced for a string
-impl<'a> TokenSource for &'a str {
- type Token = &'a str;
-
- type Tokenizer = Lines<'a, false>;
-
- fn tokenize(&self) -> Self::Tokenizer {
- lines(self)
- }
-
- fn estimate_tokens(&self) -> u32 {
- lines_with_terminator(self).estimate_tokens()
- }
-}
-
-/// By default a line diff is produced for a bytes
-impl<'a> TokenSource for &'a [u8] {
- type Token = Self;
- type Tokenizer = ByteLines<'a, false>;
-
- fn tokenize(&self) -> Self::Tokenizer {
- byte_lines(self)
- }
-
- fn estimate_tokens(&self) -> u32 {
- byte_lines(self).estimate_tokens()
- }
-}
-
-/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a `str` as tokens.
-/// See [`lines`](crate::sources::lines) and [`lines_with_terminator`](crate::sources::lines_with_terminator) for details
-#[derive(Clone, Copy, PartialEq, Eq)]
-pub struct Lines<'a, const INCLUDE_LINE_TERMINATOR: bool>(ByteLines<'a, INCLUDE_LINE_TERMINATOR>);
-
-impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for Lines<'a, INCLUDE_LINE_TERMINATOR> {
- type Item = &'a str;
-
- fn next(&mut self) -> Option<Self::Item> {
- // safety invariant: this struct may only contain valid utf8
- // dividing valid utf8 bytes by ascii characters always produces valid utf-8
- self.0.next().map(|it| unsafe { from_utf8_unchecked(it) })
- }
-}
-
-/// By default a line diff is produced for a string
-impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource for Lines<'a, INCLUDE_LINE_TERMINATOR> {
- type Token = &'a str;
-
- type Tokenizer = Self;
-
- fn tokenize(&self) -> Self::Tokenizer {
- *self
- }
-
- fn estimate_tokens(&self) -> u32 {
- self.0.estimate_tokens()
- }
-}
-
-/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a byte slice as tokens.
-/// See [`byte_lines`](crate::sources::lines) and [`byte_lines_with_terminator`](crate::sources::byte_lines_with_terminator) for details
-#[derive(Clone, Copy, PartialEq, Eq)]
-pub struct ByteLines<'a, const INCLUDE_LINE_TERMINATOR: bool>(&'a [u8]);
-
-impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for ByteLines<'a, INCLUDE_LINE_TERMINATOR> {
- type Item = &'a [u8];
-
- fn next(&mut self) -> Option<Self::Item> {
- let mut saw_carriage_return = false;
- let mut iter = self.0.iter().enumerate();
- let line_len = loop {
- match iter.next() {
- Some((i, b'\n')) => break i + 1,
- None => {
- return (!self.0.is_empty()).then(|| take(&mut self.0));
- }
- Some((_, &it)) => saw_carriage_return = it == b'\r',
- }
- };
- let (mut line, rem) = self.0.split_at(line_len);
- self.0 = rem;
- if !INCLUDE_LINE_TERMINATOR {
- line = &line[..line_len - 1 - saw_carriage_return as usize];
- }
- Some(line)
- }
-}
-
-/// By default a line diff is produced for a string
-impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource
- for ByteLines<'a, INCLUDE_LINE_TERMINATOR>
-{
- type Token = &'a [u8];
-
- type Tokenizer = Self;
-
- fn tokenize(&self) -> Self::Tokenizer {
- *self
- }
-
- fn estimate_tokens(&self) -> u32 {
- let len: usize = self.take(20).map(|line| line.len()).sum();
- if len == 0 {
- 100
- } else {
- (self.0.len() * 20 / len) as u32
- }
- }
-}
diff --git a/vendor/imara-diff/src/tests.rs b/vendor/imara-diff/src/tests.rs
deleted file mode 100644
index b0df76820..000000000
--- a/vendor/imara-diff/src/tests.rs
+++ /dev/null
@@ -1,152 +0,0 @@
-use std::fs::read_to_string;
-use std::mem::swap;
-use std::path::PathBuf;
-
-use expect_test::{expect, expect_file};
-
-use crate::intern::InternedInput;
-use crate::sink::Counter;
-use crate::{diff, Algorithm, UnifiedDiffBuilder};
-
-#[test]
-fn replace() {
- let before = r#"fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- println!("hello world")
-}"#;
-
- let after = r#"const TEST: i32 = 0;
-fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- println!("hello world");
- println!("hello foo {TEST}");
-}
-
-"#;
- let input = InternedInput::new(before, after);
- for algorithm in Algorithm::ALL {
- println!("{algorithm:?}");
- let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input));
- expect![[r#"
- @@ -1,5 +1,8 @@
- +const TEST: i32 = 0;
- fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- - println!("hello world")
- + println!("hello world");
- + println!("hello foo {TEST}");
- }
- +
- "#]]
- .assert_eq(&diff);
- }
-}
-
-#[test]
-fn identical_files() {
- let file = r#"fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
-}"#;
-
- for algorithm in Algorithm::ALL {
- println!("{algorithm:?}");
- let input = InternedInput::new(file, file);
- let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input));
- assert_eq!(diff, "");
- }
-}
-
-#[test]
-fn simple_insert() {
- let before = r#"fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
-}"#;
-
- let after = r#"fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- println("hello world")
-}"#;
-
- let mut input = InternedInput::new(before, after);
- for algorithm in Algorithm::ALL {
- println!("{algorithm:?}");
- let res = diff(algorithm, &input, UnifiedDiffBuilder::new(&input));
- expect![[r#"
- @@ -1,4 +1,5 @@
- fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- + println("hello world")
- }
- "#]]
- .assert_eq(&res);
-
- swap(&mut input.before, &mut input.after);
-
- let res = diff(algorithm, &input, UnifiedDiffBuilder::new(&input));
- expect![[r#"
- @@ -1,5 +1,4 @@
- fn foo() -> Bar{
- let mut foo = 2.0;
- foo *= 100 / 2;
- - println("hello world")
- }
- "#]]
- .assert_eq(&res);
-
- swap(&mut input.before, &mut input.after);
- }
-}
-
-pub fn project_root() -> PathBuf {
- let dir = env!("CARGO_MANIFEST_DIR");
- let mut res = PathBuf::from(dir);
- while !res.join("README.md").exists() {
- res = res.parent().expect("reached fs root without finding project root").to_owned()
- }
- res
-}
-
-#[test]
-fn hand_checked_udiffs() {
- for algorithm in Algorithm::ALL {
- println!("{algorithm:?}");
- let test_dir = project_root().join("tests");
- for file in ["helix_syntax.rs", "package.txt"] {
- let path_before = test_dir.join(format!("{file}.before"));
- let path_after = test_dir.join(format!("{file}.after"));
- let path_diff = test_dir.join(format!("{file}.{algorithm:?}.diff"));
- let before = read_to_string(path_before).unwrap();
- let after = read_to_string(path_after).unwrap();
- let input = InternedInput::new(&*before, &*after);
- let diff = diff(algorithm, &input, UnifiedDiffBuilder::new(&input));
- expect_file![path_diff].assert_eq(&diff);
- }
- }
-}
-
-#[test]
-fn complex_diffs() {
- for algorithm in Algorithm::ALL {
- println!("{algorithm:?}");
- let test_dir = project_root().join("tests");
- for (file1, file2) in [
- ("test1.json", "test2.json"),
- ("helix_syntax.rs.Histogram.diff", "helix_syntax.rs.after"),
- ] {
- let path_before = test_dir.join(file1);
- let path_diff = test_dir.join(file2);
- let before = read_to_string(path_before).unwrap();
- let after = read_to_string(path_diff).unwrap();
- let input = InternedInput::new(&*before, &*after);
- let res = diff(algorithm, &input, Counter::default());
- println!("{}", res.total())
- }
- }
-}
diff --git a/vendor/imara-diff/src/unified_diff.rs b/vendor/imara-diff/src/unified_diff.rs
deleted file mode 100644
index 9942d9510..000000000
--- a/vendor/imara-diff/src/unified_diff.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-use std::fmt::{Display, Write};
-use std::hash::Hash;
-use std::ops::Range;
-
-use crate::intern::{InternedInput, Interner, Token};
-use crate::Sink;
-
-/// A [`Sink`](crate::sink::Sink) that creates a textual diff
-/// in the format typically output by git or gnu-diff if the `-u` option is used
-pub struct UnifiedDiffBuilder<'a, W, T>
-where
- W: Write,
- T: Hash + Eq + Display,
-{
- before: &'a [Token],
- after: &'a [Token],
- interner: &'a Interner<T>,
-
- pos: u32,
- before_hunk_start: u32,
- after_hunk_start: u32,
- before_hunk_len: u32,
- after_hunk_len: u32,
-
- buffer: String,
- dst: W,
-}
-
-impl<'a, T> UnifiedDiffBuilder<'a, String, T>
-where
- T: Hash + Eq + Display,
-{
- /// Create a new `UnifiedDiffBuilder` for the given `input`,
- /// that will return a [`String`](std::string::String).
- pub fn new(input: &'a InternedInput<T>) -> Self {
- Self {
- before_hunk_start: 0,
- after_hunk_start: 0,
- before_hunk_len: 0,
- after_hunk_len: 0,
- buffer: String::with_capacity(8),
- dst: String::new(),
- interner: &input.interner,
- before: &input.before,
- after: &input.after,
- pos: 0,
- }
- }
-}
-
-impl<'a, W, T> UnifiedDiffBuilder<'a, W, T>
-where
- W: Write,
- T: Hash + Eq + Display,
-{
- /// Create a new `UnifiedDiffBuilder` for the given `input`,
- /// that will writes it output to the provided implementation of [`Write`](std::fmt::Write).
- pub fn with_writer(input: &'a InternedInput<T>, writer: W) -> Self {
- Self {
- before_hunk_start: 0,
- after_hunk_start: 0,
- before_hunk_len: 0,
- after_hunk_len: 0,
- buffer: String::with_capacity(8),
- dst: writer,
- interner: &input.interner,
- before: &input.before,
- after: &input.after,
- pos: 0,
- }
- }
-
- fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
- for &token in tokens {
- writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
- }
- }
-
- fn flush(&mut self) {
- if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
- return;
- }
-
- let end = (self.pos + 3).min(self.before.len() as u32);
- self.update_pos(end, end);
-
- writeln!(
- &mut self.dst,
- "@@ -{},{} +{},{} @@",
- self.before_hunk_start + 1,
- self.before_hunk_len,
- self.after_hunk_start + 1,
- self.after_hunk_len,
- )
- .unwrap();
- write!(&mut self.dst, "{}", &self.buffer).unwrap();
- self.buffer.clear();
- self.before_hunk_len = 0;
- self.after_hunk_len = 0
- }
-
- fn update_pos(&mut self, print_to: u32, move_to: u32) {
- self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
- let len = print_to - self.pos;
- self.pos = move_to;
- self.before_hunk_len += len;
- self.after_hunk_len += len;
- }
-}
-
-impl<W, T> Sink for UnifiedDiffBuilder<'_, W, T>
-where
- W: Write,
- T: Hash + Eq + Display,
-{
- type Out = W;
-
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
- if before.start - self.pos > 6 {
- self.flush();
- self.pos = before.start - 3;
- self.before_hunk_start = self.pos;
- self.after_hunk_start = after.start - 3;
- }
- self.update_pos(before.start, before.end);
- self.before_hunk_len += before.end - before.start;
- self.after_hunk_len += after.end - after.start;
- self.print_tokens(&self.before[before.start as usize..before.end as usize], '-');
- self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
- }
-
- fn finish(mut self) -> Self::Out {
- self.flush();
- self.dst
- }
-}
diff --git a/vendor/imara-diff/src/util.rs b/vendor/imara-diff/src/util.rs
deleted file mode 100644
index 503078d8c..000000000
--- a/vendor/imara-diff/src/util.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-use crate::intern::Token;
-
-pub fn common_prefix(file1: &[Token], file2: &[Token]) -> u32 {
- let mut off = 0;
- for (token1, token2) in file1.iter().zip(file2) {
- if token1 != token2 {
- break;
- }
- off += 1;
- }
- off
-}
-
-pub fn common_postfix(file1: &[Token], file2: &[Token]) -> u32 {
- let mut off = 0;
- for (token1, token2) in file1.iter().rev().zip(file2.iter().rev()) {
- if token1 != token2 {
- break;
- }
- off += 1;
- }
- off
-}
-
-pub fn common_edges(file1: &[Token], file2: &[Token]) -> (u32, u32) {
- let prefix = common_prefix(file1, file2);
- let postfix = common_postfix(&file1[prefix as usize..], &file2[prefix as usize..]);
- (prefix, postfix)
-}
-
-pub fn strip_common_prefix(file1: &mut &[Token], file2: &mut &[Token]) -> u32 {
- let off = common_prefix(file1, file2);
- *file1 = &file1[off as usize..];
- *file2 = &file2[off as usize..];
- off
-}
-
-pub fn strip_common_postfix(file1: &mut &[Token], file2: &mut &[Token]) -> u32 {
- let off = common_postfix(file1, file2);
- *file1 = &file1[..file1.len() - off as usize];
- *file2 = &file2[..file2.len() - off as usize];
- off
-}
-
-pub fn sqrt(val: usize) -> u32 {
- let nbits = (usize::BITS as u32 - val.leading_zeros()) / 2;
- 1 << nbits
-}