From 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:41:41 +0200 Subject: Merging upstream version 1.70.0+dfsg2. Signed-off-by: Daniel Baumann --- vendor/similar/src/algorithms/capture.rs | 117 ++++ vendor/similar/src/algorithms/compact.rs | 355 ++++++++++ vendor/similar/src/algorithms/hook.rs | 178 +++++ vendor/similar/src/algorithms/lcs.rs | 220 ++++++ vendor/similar/src/algorithms/mod.rs | 134 ++++ vendor/similar/src/algorithms/myers.rs | 415 +++++++++++ vendor/similar/src/algorithms/patience.rs | 169 +++++ vendor/similar/src/algorithms/replace.rs | 221 ++++++ ...gorithms__capture__capture_hook_grouping-2.snap | 60 ++ ...algorithms__capture__capture_hook_grouping.snap | 64 ++ .../similar__algorithms__lcs__contiguous.snap | 28 + .../snapshots/similar__algorithms__lcs__diff.snap | 22 + .../snapshots/similar__algorithms__lcs__pat.snap | 31 + .../similar__algorithms__myers__contiguous.snap | 28 + ...milar__algorithms__myers__deadline_reached.snap | 22 + .../similar__algorithms__myers__diff.snap | 22 + .../snapshots/similar__algorithms__myers__pat.snap | 31 + .../similar__algorithms__patience__patience.snap | 45 ++ ...thms__patience__patience_out_of_bounds_bug.snap | 16 + vendor/similar/src/algorithms/utils.rs | 379 ++++++++++ vendor/similar/src/common.rs | 185 +++++ vendor/similar/src/iter.rs | 195 ++++++ vendor/similar/src/lib.rs | 163 +++++ .../snapshots/similar__udiff__unified_diff.snap | 25 + ...imilar__udiff__unified_diff_newline_hint-2.snap | 10 + .../similar__udiff__unified_diff_newline_hint.snap | 11 + vendor/similar/src/text/abstraction.rs | 450 ++++++++++++ vendor/similar/src/text/inline.rs | 337 +++++++++ vendor/similar/src/text/mod.rs | 770 +++++++++++++++++++++ .../snapshots/similar__text__captured_ops.snap | 22 + .../similar__text__captured_word_ops.snap | 202 ++++++ .../text/snapshots/similar__text__char_diff.snap | 39 ++ .../similar__text__inline__line_ops_inline.snap | 126 ++++ .../snapshots/similar__text__inline__serde.snap | 107 +++ .../similar__text__lifetimes_on_iter.snap | 42 ++ .../text/snapshots/similar__text__line_ops.snap | 42 ++ .../src/text/snapshots/similar__text__serde.snap | 55 ++ .../text/snapshots/similar__text__serde_ops.snap | 38 + .../snapshots/similar__text__unified_diff.snap | 12 + .../snapshots/similar__text__virtual_newlines.snap | 32 + vendor/similar/src/text/utils.rs | 55 ++ vendor/similar/src/types.rs | 489 +++++++++++++ vendor/similar/src/udiff.rs | 359 ++++++++++ vendor/similar/src/utils.rs | 415 +++++++++++ 44 files changed, 6738 insertions(+) create mode 100644 vendor/similar/src/algorithms/capture.rs create mode 100644 vendor/similar/src/algorithms/compact.rs create mode 100644 vendor/similar/src/algorithms/hook.rs create mode 100644 vendor/similar/src/algorithms/lcs.rs create mode 100644 vendor/similar/src/algorithms/mod.rs create mode 100644 vendor/similar/src/algorithms/myers.rs create mode 100644 vendor/similar/src/algorithms/patience.rs create mode 100644 vendor/similar/src/algorithms/replace.rs create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping-2.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__contiguous.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__deadline_reached.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__diff.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__pat.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience.snap create mode 100644 vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.snap create mode 100644 vendor/similar/src/algorithms/utils.rs create mode 100644 vendor/similar/src/common.rs create mode 100644 vendor/similar/src/iter.rs create mode 100644 vendor/similar/src/lib.rs create mode 100644 vendor/similar/src/snapshots/similar__udiff__unified_diff.snap create mode 100644 vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap create mode 100644 vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint.snap create mode 100644 vendor/similar/src/text/abstraction.rs create mode 100644 vendor/similar/src/text/inline.rs create mode 100644 vendor/similar/src/text/mod.rs create mode 100644 vendor/similar/src/text/snapshots/similar__text__captured_ops.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__char_diff.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__inline__serde.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__line_ops.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__serde.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__serde_ops.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__unified_diff.snap create mode 100644 vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap create mode 100644 vendor/similar/src/text/utils.rs create mode 100644 vendor/similar/src/types.rs create mode 100644 vendor/similar/src/udiff.rs create mode 100644 vendor/similar/src/utils.rs (limited to 'vendor/similar/src') diff --git a/vendor/similar/src/algorithms/capture.rs b/vendor/similar/src/algorithms/capture.rs new file mode 100644 index 000000000..4f4990d93 --- /dev/null +++ b/vendor/similar/src/algorithms/capture.rs @@ -0,0 +1,117 @@ +use std::convert::Infallible; + +use crate::algorithms::DiffHook; +use crate::{group_diff_ops, DiffOp}; + +/// A [`DiffHook`] that captures all diff operations. +#[derive(Default, Clone)] +pub struct Capture(Vec); + +impl Capture { + /// Creates a new capture hook. + pub fn new() -> Capture { + Capture::default() + } + + /// Converts the capture hook into a vector of ops. + pub fn into_ops(self) -> Vec { + self.0 + } + + /// Isolate change clusters by eliminating ranges with no changes. + /// + /// This is equivalent to calling [`group_diff_ops`] on [`Capture::into_ops`]. + pub fn into_grouped_ops(self, n: usize) -> Vec> { + group_diff_ops(self.into_ops(), n) + } + + /// Accesses the captured operations. + pub fn ops(&self) -> &[DiffOp] { + &self.0 + } +} + +impl DiffHook for Capture { + type Error = Infallible; + + #[inline(always)] + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { + self.0.push(DiffOp::Equal { + old_index, + new_index, + len, + }); + Ok(()) + } + + #[inline(always)] + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), Self::Error> { + self.0.push(DiffOp::Delete { + old_index, + old_len, + new_index, + }); + Ok(()) + } + + #[inline(always)] + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.0.push(DiffOp::Insert { + old_index, + new_index, + new_len, + }); + Ok(()) + } + + #[inline(always)] + fn replace( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.0.push(DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + }); + Ok(()) + } +} + +#[test] +fn test_capture_hook_grouping() { + use crate::algorithms::{diff_slices, Algorithm, Replace}; + + let rng = (1..100).collect::>(); + let mut rng_new = rng.clone(); + rng_new[10] = 1000; + rng_new[13] = 1000; + rng_new[16] = 1000; + rng_new[34] = 1000; + + let mut d = Replace::new(Capture::new()); + diff_slices(Algorithm::Myers, &mut d, &rng, &rng_new).unwrap(); + + let ops = d.into_inner().into_grouped_ops(3); + let tags = ops + .iter() + .map(|group| group.iter().map(|x| x.as_tag_tuple()).collect::>()) + .collect::>(); + + insta::assert_debug_snapshot!(ops); + insta::assert_debug_snapshot!(tags); +} diff --git a/vendor/similar/src/algorithms/compact.rs b/vendor/similar/src/algorithms/compact.rs new file mode 100644 index 000000000..bd5c6c4a2 --- /dev/null +++ b/vendor/similar/src/algorithms/compact.rs @@ -0,0 +1,355 @@ +//! Implements basic compacting. This is based on the compaction logic from +//! diffy by Brandon Williams. +use std::ops::Index; + +use crate::{DiffOp, DiffTag}; + +use super::utils::{common_prefix_len, common_suffix_len}; +use super::DiffHook; + +/// Performs semantic cleanup operations on a diff. +/// +/// This merges similar ops together but also tries to move hunks up and +/// down the diff with the desire to connect as many hunks as possible. +/// It still needs to be combined with [`Replace`](crate::algorithms::Replace) +/// to get actual replace diff ops out. +#[derive(Debug)] +pub struct Compact<'old, 'new, Old: ?Sized, New: ?Sized, D> { + d: D, + ops: Vec, + old: &'old Old, + new: &'new New, +} + +impl<'old, 'new, Old, New, D> Compact<'old, 'new, Old, New, D> +where + D: DiffHook, + Old: Index + ?Sized + 'old, + New: Index + ?Sized + 'new, + New::Output: PartialEq, +{ + /// Creates a new compact hook wrapping another hook. + pub fn new(d: D, old: &'old Old, new: &'new New) -> Self { + Compact { + d, + ops: Vec::new(), + old, + new, + } + } + + /// Extracts the inner hook. + pub fn into_inner(self) -> D { + self.d + } +} + +impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsRef + for Compact<'old, 'new, Old, New, D> +{ + fn as_ref(&self) -> &D { + &self.d + } +} + +impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsMut + for Compact<'old, 'new, Old, New, D> +{ + fn as_mut(&mut self) -> &mut D { + &mut self.d + } +} + +impl<'old, 'new, Old, New, D> DiffHook for Compact<'old, 'new, Old, New, D> +where + D: DiffHook, + Old: Index + ?Sized + 'old, + New: Index + ?Sized + 'new, + New::Output: PartialEq, +{ + type Error = D::Error; + + #[inline(always)] + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { + self.ops.push(DiffOp::Equal { + old_index, + new_index, + len, + }); + Ok(()) + } + + #[inline(always)] + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), Self::Error> { + self.ops.push(DiffOp::Delete { + old_index, + old_len, + new_index, + }); + Ok(()) + } + + #[inline(always)] + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.ops.push(DiffOp::Insert { + old_index, + new_index, + new_len, + }); + Ok(()) + } + + fn finish(&mut self) -> Result<(), Self::Error> { + cleanup_diff_ops(self.old, self.new, &mut self.ops); + for op in &self.ops { + op.apply_to_hook(&mut self.d)?; + } + self.d.finish() + } +} + +// Walks through all edits and shifts them up and then down, trying to see if +// they run into similar edits which can be merged. +pub fn cleanup_diff_ops(old: &Old, new: &New, ops: &mut Vec) +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + // First attempt to compact all Deletions + let mut pointer = 0; + while let Some(&op) = ops.get(pointer) { + if let DiffTag::Delete = op.tag() { + pointer = shift_diff_ops_up(ops, old, new, pointer); + pointer = shift_diff_ops_down(ops, old, new, pointer); + } + pointer += 1; + } + + // Then attempt to compact all Insertions + let mut pointer = 0; + while let Some(&op) = ops.get(pointer) { + if let DiffTag::Insert = op.tag() { + pointer = shift_diff_ops_up(ops, old, new, pointer); + pointer = shift_diff_ops_down(ops, old, new, pointer); + } + pointer += 1; + } +} + +fn shift_diff_ops_up( + ops: &mut Vec, + old: &Old, + new: &New, + mut pointer: usize, +) -> usize +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + while let Some(&prev_op) = pointer.checked_sub(1).and_then(|idx| ops.get(idx)) { + let this_op = ops[pointer]; + match (this_op.tag(), prev_op.tag()) { + // Shift Inserts Upwards + (DiffTag::Insert, DiffTag::Equal) => { + let suffix_len = + common_suffix_len(old, prev_op.old_range(), new, this_op.new_range()); + if suffix_len > 0 { + if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) { + ops[pointer + 1].grow_left(suffix_len); + } else { + ops.insert( + pointer + 1, + DiffOp::Equal { + old_index: prev_op.old_range().end - suffix_len, + new_index: this_op.new_range().end - suffix_len, + len: suffix_len, + }, + ); + } + ops[pointer].shift_left(suffix_len); + ops[pointer - 1].shrink_left(suffix_len); + + if ops[pointer - 1].is_empty() { + ops.remove(pointer - 1); + pointer -= 1; + } + } else if ops[pointer - 1].is_empty() { + ops.remove(pointer - 1); + pointer -= 1; + } else { + // We can't shift upwards anymore + break; + } + } + // Shift Deletions Upwards + (DiffTag::Delete, DiffTag::Equal) => { + // check common suffix for the amount we can shift + let suffix_len = + common_suffix_len(old, prev_op.old_range(), new, this_op.new_range()); + if suffix_len != 0 { + if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) { + ops[pointer + 1].grow_left(suffix_len); + } else { + let old_range = prev_op.old_range(); + ops.insert( + pointer + 1, + DiffOp::Equal { + old_index: old_range.end - suffix_len, + new_index: this_op.new_range().end - suffix_len, + len: old_range.len() - suffix_len, + }, + ); + } + ops[pointer].shift_left(suffix_len); + ops[pointer - 1].shrink_left(suffix_len); + + if ops[pointer - 1].is_empty() { + ops.remove(pointer - 1); + pointer -= 1; + } + } else if ops[pointer - 1].is_empty() { + ops.remove(pointer - 1); + pointer -= 1; + } else { + // We can't shift upwards anymore + break; + } + } + // Swap the Delete and Insert + (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => { + ops.swap(pointer - 1, pointer); + pointer -= 1; + } + // Merge the two ranges + (DiffTag::Insert, DiffTag::Insert) => { + ops[pointer - 1].grow_right(this_op.new_range().len()); + ops.remove(pointer); + pointer -= 1; + } + (DiffTag::Delete, DiffTag::Delete) => { + ops[pointer - 1].grow_right(this_op.old_range().len()); + ops.remove(pointer); + pointer -= 1; + } + _ => unreachable!("unexpected tag"), + } + } + pointer +} + +fn shift_diff_ops_down( + ops: &mut Vec, + old: &Old, + new: &New, + mut pointer: usize, +) -> usize +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + while let Some(&next_op) = pointer.checked_add(1).and_then(|idx| ops.get(idx)) { + let this_op = ops[pointer]; + match (this_op.tag(), next_op.tag()) { + // Shift Inserts Downwards + (DiffTag::Insert, DiffTag::Equal) => { + let prefix_len = + common_prefix_len(old, next_op.old_range(), new, this_op.new_range()); + if prefix_len > 0 { + if let Some(DiffTag::Equal) = pointer + .checked_sub(1) + .and_then(|x| ops.get(x)) + .map(|x| x.tag()) + { + ops[pointer - 1].grow_right(prefix_len); + } else { + ops.insert( + pointer, + DiffOp::Equal { + old_index: next_op.old_range().start, + new_index: this_op.new_range().start, + len: prefix_len, + }, + ); + pointer += 1; + } + ops[pointer].shift_right(prefix_len); + ops[pointer + 1].shrink_right(prefix_len); + + if ops[pointer + 1].is_empty() { + ops.remove(pointer + 1); + } + } else if ops[pointer + 1].is_empty() { + ops.remove(pointer + 1); + } else { + // We can't shift upwards anymore + break; + } + } + // Shift Deletions Downwards + (DiffTag::Delete, DiffTag::Equal) => { + // check common suffix for the amount we can shift + let prefix_len = + common_prefix_len(old, next_op.old_range(), new, this_op.new_range()); + if prefix_len > 0 { + if let Some(DiffTag::Equal) = pointer + .checked_sub(1) + .and_then(|x| ops.get(x)) + .map(|x| x.tag()) + { + ops[pointer - 1].grow_right(prefix_len); + } else { + ops.insert( + pointer, + DiffOp::Equal { + old_index: next_op.old_range().start, + new_index: this_op.new_range().start, + len: prefix_len, + }, + ); + pointer += 1; + } + ops[pointer].shift_right(prefix_len); + ops[pointer + 1].shrink_right(prefix_len); + + if ops[pointer + 1].is_empty() { + ops.remove(pointer + 1); + } + } else if ops[pointer + 1].is_empty() { + ops.remove(pointer + 1); + } else { + // We can't shift downwards anymore + break; + } + } + // Swap the Delete and Insert + (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => { + ops.swap(pointer, pointer + 1); + pointer += 1; + } + // Merge the two ranges + (DiffTag::Insert, DiffTag::Insert) => { + ops[pointer].grow_right(next_op.new_range().len()); + ops.remove(pointer + 1); + } + (DiffTag::Delete, DiffTag::Delete) => { + ops[pointer].grow_right(next_op.old_range().len()); + ops.remove(pointer + 1); + } + _ => unreachable!("unexpected tag"), + } + } + pointer +} diff --git a/vendor/similar/src/algorithms/hook.rs b/vendor/similar/src/algorithms/hook.rs new file mode 100644 index 000000000..23dcb7fd2 --- /dev/null +++ b/vendor/similar/src/algorithms/hook.rs @@ -0,0 +1,178 @@ +/// A trait for reacting to an edit script from the "old" version to +/// the "new" version. +pub trait DiffHook: Sized { + /// The error produced from the hook methods. + type Error; + + /// Called when lines with indices `old_index` (in the old version) and + /// `new_index` (in the new version) start an section equal in both + /// versions, of length `len`. + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { + let _ = old_index; + let _ = new_index; + let _ = len; + Ok(()) + } + + /// Called when a section of length `old_len`, starting at `old_index`, + /// needs to be deleted from the old version. + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), Self::Error> { + let _ = old_index; + let _ = old_len; + let _ = new_index; + Ok(()) + } + + /// Called when a section of the new version, of length `new_len` + /// and starting at `new_index`, needs to be inserted at position `old_index'. + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + let _ = old_index; + let _ = new_index; + let _ = new_len; + Ok(()) + } + + /// Called when a section of the old version, starting at index + /// `old_index` and of length `old_len`, needs to be replaced with a + /// section of length `new_len`, starting at `new_index`, of the new + /// version. + /// + /// The default implementations invokes `delete` and `insert`. + /// + /// You can use the [`Replace`](crate::algorithms::Replace) hook to + /// automatically generate these. + #[inline(always)] + fn replace( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.delete(old_index, old_len, new_index)?; + self.insert(old_index, new_index, new_len) + } + + /// Always called at the end of the algorithm. + #[inline(always)] + fn finish(&mut self) -> Result<(), Self::Error> { + Ok(()) + } +} + +impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D { + type Error = D::Error; + + #[inline(always)] + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { + (*self).equal(old_index, new_index, len) + } + + #[inline(always)] + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), Self::Error> { + (*self).delete(old_index, old_len, new_index) + } + + #[inline(always)] + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + (*self).insert(old_index, new_index, new_len) + } + + #[inline(always)] + fn replace( + &mut self, + old: usize, + old_len: usize, + new: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + (*self).replace(old, old_len, new, new_len) + } + + #[inline(always)] + fn finish(&mut self) -> Result<(), Self::Error> { + (*self).finish() + } +} + +/// Wrapper [`DiffHook`] that prevents calls to [`DiffHook::finish`]. +/// +/// This hook is useful in situations where diff hooks are composed but you +/// want to prevent that the finish hook method is called. +pub struct NoFinishHook(D); + +impl NoFinishHook { + /// Wraps another hook. + pub fn new(d: D) -> NoFinishHook { + NoFinishHook(d) + } + + /// Extracts the inner hook. + pub fn into_inner(self) -> D { + self.0 + } +} + +impl DiffHook for NoFinishHook { + type Error = D::Error; + + #[inline(always)] + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { + self.0.equal(old_index, new_index, len) + } + + #[inline(always)] + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), Self::Error> { + self.0.delete(old_index, old_len, new_index) + } + + #[inline(always)] + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.0.insert(old_index, new_index, new_len) + } + + #[inline(always)] + fn replace( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), Self::Error> { + self.0.replace(old_index, old_len, new_index, new_len) + } + + fn finish(&mut self) -> Result<(), Self::Error> { + Ok(()) + } +} diff --git a/vendor/similar/src/algorithms/lcs.rs b/vendor/similar/src/algorithms/lcs.rs new file mode 100644 index 000000000..6d1af687e --- /dev/null +++ b/vendor/similar/src/algorithms/lcs.rs @@ -0,0 +1,220 @@ +//! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. +//! +//! * time: `O((NM)D log (M)D)` +//! * space `O(MN)` +use std::collections::BTreeMap; +use std::ops::{Index, Range}; +use std::time::Instant; + +use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; +use crate::algorithms::DiffHook; + +/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +/// +/// This diff is done with an optional deadline that defines the maximal +/// execution time permitted before it bails and falls back to an very bad +/// approximation. Deadlines with LCS do not make a lot of sense and should +/// not be used. +pub fn diff( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + diff_deadline(d, old, old_range, new, new_range, None) +} + +/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +/// +/// This diff is done with an optional deadline that defines the maximal +/// execution time permitted before it bails and falls back to an approximation. +pub fn diff_deadline( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + if is_empty_range(&new_range) { + d.delete(old_range.start, old_range.len(), new_range.start)?; + return Ok(()); + } else if is_empty_range(&old_range) { + d.insert(old_range.start, new_range.start, new_range.len())?; + return Ok(()); + } + + let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); + let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); + + let maybe_table = make_table( + old, + common_prefix_len..(old_range.len() - common_suffix_len), + new, + common_prefix_len..(new_range.len() - common_suffix_len), + deadline, + ); + let mut old_idx = 0; + let mut new_idx = 0; + let new_len = new_range.len() - common_prefix_len - common_suffix_len; + let old_len = old_range.len() - common_prefix_len - common_suffix_len; + + if common_prefix_len > 0 { + d.equal(old_range.start, new_range.start, common_prefix_len)?; + } + + if let Some(table) = maybe_table { + while new_idx < new_len && old_idx < old_len { + let old_orig_idx = old_range.start + common_prefix_len + old_idx; + let new_orig_idx = new_range.start + common_prefix_len + new_idx; + + if new[new_orig_idx] == old[old_orig_idx] { + d.equal(old_orig_idx, new_orig_idx, 1)?; + old_idx += 1; + new_idx += 1; + } else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x) + >= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x) + { + d.delete(old_orig_idx, 1, new_orig_idx)?; + old_idx += 1; + } else { + d.insert(old_orig_idx, new_orig_idx, 1)?; + new_idx += 1; + } + } + } else { + let old_orig_idx = old_range.start + common_prefix_len + old_idx; + let new_orig_idx = new_range.start + common_prefix_len + new_idx; + d.delete(old_orig_idx, old_len, new_orig_idx)?; + d.insert(old_orig_idx, new_orig_idx, new_len)?; + } + + if old_idx < old_len { + d.delete( + old_range.start + common_prefix_len + old_idx, + old_len - old_idx, + new_range.start + common_prefix_len + new_idx, + )?; + old_idx += old_len - old_idx; + } + + if new_idx < new_len { + d.insert( + old_range.start + common_prefix_len + old_idx, + new_range.start + common_prefix_len + new_idx, + new_len - new_idx, + )?; + } + + if common_suffix_len > 0 { + d.equal( + old_range.start + old_len + common_prefix_len, + new_range.start + new_len + common_prefix_len, + common_suffix_len, + )?; + } + + d.finish() +} + +fn make_table( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Option> +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + let old_len = old_range.len(); + let new_len = new_range.len(); + let mut table = BTreeMap::new(); + + for i in (0..new_len).rev() { + // are we running for too long? give up on the table + if let Some(deadline) = deadline { + if Instant::now() > deadline { + return None; + } + } + + for j in (0..old_len).rev() { + let val = if new[i] == old[j] { + table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1 + } else { + table + .get(&(i + 1, j)) + .map_or(0, |&x| x) + .max(table.get(&(i, j + 1)).map_or(0, |&x| x)) + }; + if val > 0 { + table.insert((i, j), val); + } + } + } + + Some(table) +} + +#[test] +fn test_table() { + let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap(); + let expected = { + let mut m = BTreeMap::new(); + m.insert((1, 0), 1); + m.insert((0, 0), 1); + m.insert((2, 0), 1); + m + }; + assert_eq!(table, expected); +} + +#[test] +fn test_diff() { + let a: &[usize] = &[0, 1, 2, 3, 4]; + let b: &[usize] = &[0, 1, 2, 9, 4]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_contiguous() { + let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; + let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_pat() { + let a: &[usize] = &[0, 1, 3, 4, 5]; + let b: &[usize] = &[0, 1, 4, 5, 8, 9]; + + let mut d = crate::algorithms::Capture::new(); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.ops()); +} diff --git a/vendor/similar/src/algorithms/mod.rs b/vendor/similar/src/algorithms/mod.rs new file mode 100644 index 000000000..40127b2f2 --- /dev/null +++ b/vendor/similar/src/algorithms/mod.rs @@ -0,0 +1,134 @@ +//! Various diff (longest common subsequence) algorithms. +//! +//! The implementations of the algorithms in this module are relatively low +//! level and expose the most generic bounds possible for the algorithm. To +//! use them you would typically use the higher level API if possible but +//! direct access to these algorithms can be useful in some cases. +//! +//! All these algorithms provide a `diff` function which takes two indexable +//! objects (for instance slices) and a [`DiffHook`]. As the +//! diff is generated the diff hook is invoked. Note that the diff hook does +//! not get access to the actual values but only the indexes. This is why the +//! diff hook is not used outside of the raw algorithm implementations as for +//! most situations access to the values is useful of required. +//! +//! The algorithms module really is the most low-level module in similar and +//! generally not the place to start. +//! +//! # Example +//! +//! This is a simple example that shows how you can calculate the difference +//! between two sequences and capture the ops into a vector. +//! +//! ```rust +//! use similar::algorithms::{Algorithm, Replace, Capture, diff_slices}; +//! +//! let a = vec![1, 2, 3, 4, 5]; +//! let b = vec![1, 2, 3, 4, 7]; +//! let mut d = Replace::new(Capture::new()); +//! diff_slices(Algorithm::Myers, &mut d, &a, &b).unwrap(); +//! let ops = d.into_inner().into_ops(); +//! ``` +//! +//! The above example is equivalent to using +//! [`capture_diff_slices`](crate::capture_diff_slices). + +mod capture; +mod compact; +mod hook; +mod replace; +pub(crate) mod utils; + +use std::hash::Hash; +use std::ops::{Index, Range}; +use std::time::Instant; + +pub use capture::Capture; +pub use compact::Compact; +pub use hook::{DiffHook, NoFinishHook}; +pub use replace::Replace; +pub use utils::IdentifyDistinct; + +#[doc(no_inline)] +pub use crate::Algorithm; + +pub mod lcs; +pub mod myers; +pub mod patience; + +/// Creates a diff between old and new with the given algorithm. +/// +/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. +pub fn diff( + alg: Algorithm, + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + diff_deadline(alg, d, old, old_range, new, new_range, None) +} + +/// Creates a diff between old and new with the given algorithm with deadline. +/// +/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. +/// +/// This diff is done with an optional deadline that defines the maximal +/// execution time permitted before it bails and falls back to an approximation. +/// Note that not all algorithms behave well if they reach the deadline (LCS +/// for instance produces a very simplistic diff when the deadline is reached +/// in all cases). +pub fn diff_deadline( + alg: Algorithm, + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + match alg { + Algorithm::Myers => myers::diff_deadline(d, old, old_range, new, new_range, deadline), + Algorithm::Patience => patience::diff_deadline(d, old, old_range, new, new_range, deadline), + Algorithm::Lcs => lcs::diff_deadline(d, old, old_range, new, new_range, deadline), + } +} + +/// Shortcut for diffing slices with a specific algorithm. +pub fn diff_slices(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> +where + D: DiffHook, + T: Eq + Hash + Ord, +{ + diff(alg, d, old, 0..old.len(), new, 0..new.len()) +} + +/// Shortcut for diffing slices with a specific algorithm. +pub fn diff_slices_deadline( + alg: Algorithm, + d: &mut D, + old: &[T], + new: &[T], + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, + T: Eq + Hash + Ord, +{ + diff_deadline(alg, d, old, 0..old.len(), new, 0..new.len(), deadline) +} diff --git a/vendor/similar/src/algorithms/myers.rs b/vendor/similar/src/algorithms/myers.rs new file mode 100644 index 000000000..542c7ed75 --- /dev/null +++ b/vendor/similar/src/algorithms/myers.rs @@ -0,0 +1,415 @@ +//! Myers' diff algorithm. +//! +//! * time: `O((N+M)D)` +//! * space `O(N+M)` +//! +//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf) +//! describing it. +//! +//! The implementation of this algorithm is based on the implementation by +//! Brandon Williams. +//! +//! # Heuristics +//! +//! At present this implementation of Myers' does not implement any more advanced +//! heuristics that would solve some pathological cases. For instane passing two +//! large and completely distinct sequences to the algorithm will make it spin +//! without making reasonable progress. Currently the only protection in the +//! library against this is to pass a deadline to the diffing algorithm. +//! +//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15). + +use std::ops::{Index, IndexMut, Range}; +use std::time::Instant; + +use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; +use crate::algorithms::DiffHook; + +/// Myers' diff algorithm. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +pub fn diff( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + diff_deadline(d, old, old_range, new, new_range, None) +} + +/// Myers' diff algorithm with deadline. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +/// +/// This diff is done with an optional deadline that defines the maximal +/// execution time permitted before it bails and falls back to an approximation. +pub fn diff_deadline( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + let max_d = max_d(old_range.len(), new_range.len()); + let mut vb = V::new(max_d); + let mut vf = V::new(max_d); + conquer( + d, old, old_range, new, new_range, &mut vf, &mut vb, deadline, + )?; + d.finish() +} + +// A D-path is a path which starts at (0,0) that has exactly D non-diagonal +// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge +// and then a possibly empty sequence of diagonal edges called a snake. + +/// `V` contains the endpoints of the furthest reaching `D-paths`. For each +/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` because +/// `y` can be computed from `x - k`. In other words, `V` is an array of integers +/// where `V[k]` contains the row index of the endpoint of the furthest reaching +/// path in diagonal `k`. +/// +/// We can't use a traditional Vec to represent `V` since we use `k` as an index +/// and it can take on negative values. So instead `V` is represented as a +/// light-weight wrapper around a Vec plus an `offset` which is the maximum value +/// `k` can take on in order to map negative `k`'s back to a value >= 0. +#[derive(Debug)] +struct V { + offset: isize, + v: Vec, // Look into initializing this to -1 and storing isize +} + +impl V { + fn new(max_d: usize) -> Self { + Self { + offset: max_d as isize, + v: vec![0; 2 * max_d], + } + } + + fn len(&self) -> usize { + self.v.len() + } +} + +impl Index for V { + type Output = usize; + + fn index(&self, index: isize) -> &Self::Output { + &self.v[(index + self.offset) as usize] + } +} + +impl IndexMut for V { + fn index_mut(&mut self, index: isize) -> &mut Self::Output { + &mut self.v[(index + self.offset) as usize] + } +} + +fn max_d(len1: usize, len2: usize) -> usize { + // XXX look into reducing the need to have the additional '+ 1' + (len1 + len2 + 1) / 2 + 1 +} + +#[inline(always)] +fn split_at(range: Range, at: usize) -> (Range, Range) { + (range.start..at, at..range.end) +} + +/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally +/// a snake has a start end end point (and it is possible for a snake to have +/// a length of zero, meaning the start and end points are the same) however +/// we do not need the end point which is why it's not implemented here. +/// +/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes +/// some of which may be empty. The divide step requires finding the ceil(D/2) + +/// 1 or middle snake of an optimal D-path. The idea for doing so is to +/// simultaneously run the basic algorithm in both the forward and reverse +/// directions until furthest reaching forward and reverse paths starting at +/// opposing corners 'overlap'. +fn find_middle_snake( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Option<(usize, usize)> +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + let n = old_range.len(); + let m = new_range.len(); + + // By Lemma 1 in the paper, the optimal edit script length is odd or even as + // `delta` is odd or even. + let delta = n as isize - m as isize; + let odd = delta & 1 == 1; + + // The initial point at (0, -1) + vf[1] = 0; + // The initial point at (N, M+1) + vb[1] = 0; + + // We only need to explore ceil(D/2) + 1 + let d_max = max_d(n, m); + assert!(vf.len() >= d_max); + assert!(vb.len() >= d_max); + + for d in 0..d_max as isize { + // are we running for too long? + if let Some(deadline) = deadline { + if Instant::now() > deadline { + break; + } + } + + // Forward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { + vf[k + 1] + } else { + vf[k - 1] + 1 + }; + let y = (x as isize - k) as usize; + + // The coordinate of the start of a snake + let (x0, y0) = (x, y); + // While these sequences are identical, keep moving through the + // graph with no cost + if x < old_range.len() && y < new_range.len() { + let advance = common_prefix_len( + old, + old_range.start + x..old_range.end, + new, + new_range.start + y..new_range.end, + ); + x += advance; + } + + // This is the new best x value + vf[k] = x; + + // Only check for connections from the forward search when N - M is + // odd and when there is a reciprocal k line coming from the other + // direction. + if odd && (k - delta).abs() <= (d - 1) { + // TODO optimize this so we don't have to compare against n + if vf[k] + vb[-(k - delta)] >= n { + // Return the snake + return Some((x0 + old_range.start, y0 + new_range.start)); + } + } + } + + // Backward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { + vb[k + 1] + } else { + vb[k - 1] + 1 + }; + let mut y = (x as isize - k) as usize; + + // The coordinate of the start of a snake + if x < n && y < m { + let advance = common_suffix_len( + old, + old_range.start..old_range.start + n - x, + new, + new_range.start..new_range.start + m - y, + ); + x += advance; + y += advance; + } + + // This is the new best x value + vb[k] = x; + + if !odd && (k - delta).abs() <= d { + // TODO optimize this so we don't have to compare against n + if vb[k] + vf[-(k - delta)] >= n { + // Return the snake + return Some((n - x + old_range.start, m - y + new_range.start)); + } + } + } + + // TODO: Maybe there's an opportunity to optimize and bail early? + } + + // deadline reached + None +} + +#[allow(clippy::too_many_arguments)] +fn conquer( + d: &mut D, + old: &Old, + mut old_range: Range, + new: &New, + mut new_range: Range, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + // Check for common prefix + let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); + if common_prefix_len > 0 { + d.equal(old_range.start, new_range.start, common_prefix_len)?; + } + old_range.start += common_prefix_len; + new_range.start += common_prefix_len; + + // Check for common suffix + let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); + let common_suffix = ( + old_range.end - common_suffix_len, + new_range.end - common_suffix_len, + ); + old_range.end -= common_suffix_len; + new_range.end -= common_suffix_len; + + if is_empty_range(&old_range) && is_empty_range(&new_range) { + // Do nothing + } else if is_empty_range(&new_range) { + d.delete(old_range.start, old_range.len(), new_range.start)?; + } else if is_empty_range(&old_range) { + d.insert(old_range.start, new_range.start, new_range.len())?; + } else if let Some((x_start, y_start)) = find_middle_snake( + old, + old_range.clone(), + new, + new_range.clone(), + vf, + vb, + deadline, + ) { + let (old_a, old_b) = split_at(old_range, x_start); + let (new_a, new_b) = split_at(new_range, y_start); + conquer(d, old, old_a, new, new_a, vf, vb, deadline)?; + conquer(d, old, old_b, new, new_b, vf, vb, deadline)?; + } else { + d.delete( + old_range.start, + old_range.end - old_range.start, + new_range.start, + )?; + d.insert( + old_range.start, + new_range.start, + new_range.end - new_range.start, + )?; + } + + if common_suffix_len > 0 { + d.equal(common_suffix.0, common_suffix.1, common_suffix_len)?; + } + + Ok(()) +} + +#[test] +fn test_find_middle_snake() { + let a = &b"ABCABBA"[..]; + let b = &b"CBABAC"[..]; + let max_d = max_d(a.len(), b.len()); + let mut vf = V::new(max_d); + let mut vb = V::new(max_d); + let (x_start, y_start) = + find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb, None).unwrap(); + assert_eq!(x_start, 4); + assert_eq!(y_start, 1); +} + +#[test] +fn test_diff() { + let a: &[usize] = &[0, 1, 2, 3, 4]; + let b: &[usize] = &[0, 1, 2, 9, 4]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_contiguous() { + let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; + let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_pat() { + let a: &[usize] = &[0, 1, 3, 4, 5]; + let b: &[usize] = &[0, 1, 4, 5, 8, 9]; + + let mut d = crate::algorithms::Capture::new(); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + insta::assert_debug_snapshot!(d.ops()); +} + +#[test] +fn test_deadline_reached() { + use std::ops::Index; + use std::time::Duration; + + let a = (0..100).collect::>(); + let mut b = (0..100).collect::>(); + b[10] = 99; + b[50] = 99; + b[25] = 99; + + struct SlowIndex<'a>(&'a [usize]); + + impl<'a> Index for SlowIndex<'a> { + type Output = usize; + + fn index(&self, index: usize) -> &Self::Output { + std::thread::sleep(Duration::from_millis(1)); + &self.0[index] + } + } + + let slow_a = SlowIndex(&a); + let slow_b = SlowIndex(&b); + + // don't give it enough time to do anything interesting + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff_deadline( + &mut d, + &slow_a, + 0..a.len(), + &slow_b, + 0..b.len(), + Some(Instant::now() + Duration::from_millis(50)), + ) + .unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} diff --git a/vendor/similar/src/algorithms/patience.rs b/vendor/similar/src/algorithms/patience.rs new file mode 100644 index 000000000..2b2faa27c --- /dev/null +++ b/vendor/similar/src/algorithms/patience.rs @@ -0,0 +1,169 @@ +//! Patience diff algorithm. +//! +//! * time: `O(N log N + M log M + (N+M)D)` +//! * space: `O(N+M)` +//! +//! Tends to give more human-readable outputs. See [Bram Cohen's blog +//! post](https://bramcohen.livejournal.com/73318.html) describing it. +//! +//! This is based on the patience implementation of [pijul](https://pijul.org/) +//! by Pierre-Étienne Meunier. +use std::hash::Hash; +use std::ops::{Index, Range}; +use std::time::Instant; + +use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace}; + +use super::utils::{unique, UniqueItem}; + +/// Patience diff algorithm. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +pub fn diff( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + Old::Output: Hash + Eq, + New::Output: PartialEq + Hash + Eq, + D: DiffHook, +{ + diff_deadline(d, old, old_range, new, new_range, None) +} + +/// Patience diff algorithm with deadline. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +/// +/// This diff is done with an optional deadline that defines the maximal +/// execution time permitted before it bails and falls back to an approximation. +pub fn diff_deadline( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + Old::Output: Hash + Eq, + New::Output: PartialEq + Hash + Eq, + D: DiffHook, +{ + let old_indexes = unique(old, old_range.clone()); + let new_indexes = unique(new, new_range.clone()); + + let mut d = Replace::new(Patience { + d, + old, + old_current: old_range.start, + old_end: old_range.end, + old_indexes: &old_indexes, + new, + new_current: new_range.start, + new_end: new_range.end, + new_indexes: &new_indexes, + deadline, + }); + myers::diff_deadline( + &mut d, + &old_indexes, + 0..old_indexes.len(), + &new_indexes, + 0..new_indexes.len(), + deadline, + )?; + Ok(()) +} + +struct Patience<'old, 'new, 'd, Old: ?Sized, New: ?Sized, D> { + d: &'d mut D, + old: &'old Old, + old_current: usize, + old_end: usize, + old_indexes: &'old [UniqueItem<'old, Old>], + new: &'new New, + new_current: usize, + new_end: usize, + new_indexes: &'new [UniqueItem<'new, New>], + deadline: Option, +} + +impl<'old, 'new, 'd, Old, New, D> DiffHook for Patience<'old, 'new, 'd, Old, New, D> +where + D: DiffHook + 'd, + Old: Index + ?Sized + 'old, + New: Index + ?Sized + 'new, + New::Output: PartialEq, +{ + type Error = D::Error; + fn equal(&mut self, old: usize, new: usize, len: usize) -> Result<(), D::Error> { + for (old, new) in (old..old + len).zip(new..new + len) { + let a0 = self.old_current; + let b0 = self.new_current; + while self.old_current < self.old_indexes[old].original_index() + && self.new_current < self.new_indexes[new].original_index() + && self.new[self.new_current] == self.old[self.old_current] + { + self.old_current += 1; + self.new_current += 1; + } + if self.old_current > a0 { + self.d.equal(a0, b0, self.old_current - a0)?; + } + let mut no_finish_d = NoFinishHook::new(&mut self.d); + myers::diff_deadline( + &mut no_finish_d, + self.old, + self.old_current..self.old_indexes[old].original_index(), + self.new, + self.new_current..self.new_indexes[new].original_index(), + self.deadline, + )?; + self.old_current = self.old_indexes[old].original_index(); + self.new_current = self.new_indexes[new].original_index(); + } + Ok(()) + } + + fn finish(&mut self) -> Result<(), D::Error> { + myers::diff_deadline( + self.d, + self.old, + self.old_current..self.old_end, + self.new, + self.new_current..self.new_end, + self.deadline, + ) + } +} + +#[test] +fn test_patience() { + let a: &[usize] = &[11, 1, 2, 2, 3, 4, 4, 4, 5, 47, 19]; + let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18]; + + let mut d = Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_patience_out_of_bounds_bug() { + // this used to be a bug + let a: &[usize] = &[1, 2, 3, 4]; + let b: &[usize] = &[1, 2, 3]; + + let mut d = Replace::new(crate::algorithms::Capture::new()); + diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); + + insta::assert_debug_snapshot!(d.into_inner().ops()); +} diff --git a/vendor/similar/src/algorithms/replace.rs b/vendor/similar/src/algorithms/replace.rs new file mode 100644 index 000000000..309ba05be --- /dev/null +++ b/vendor/similar/src/algorithms/replace.rs @@ -0,0 +1,221 @@ +use crate::algorithms::DiffHook; + +/// A [`DiffHook`] that combines deletions and insertions to give blocks +/// of maximal length, and replacements when appropriate. +/// +/// It will replace [`DiffHook::insert`] and [`DiffHook::delete`] events when +/// possible with [`DiffHook::replace`] events. Note that even though the +/// text processing in the crate does not use replace events and always resolves +/// then back to delete and insert, it's useful to always use the replacer to +/// ensure a consistent order of inserts and deletes. This is why for instance +/// the text diffing automatically uses this hook internally. +pub struct Replace { + d: D, + del: Option<(usize, usize, usize)>, + ins: Option<(usize, usize, usize)>, + eq: Option<(usize, usize, usize)>, +} + +impl Replace { + /// Creates a new replace hook wrapping another hook. + pub fn new(d: D) -> Self { + Replace { + d, + del: None, + ins: None, + eq: None, + } + } + + /// Extracts the inner hook. + pub fn into_inner(self) -> D { + self.d + } + + fn flush_eq(&mut self) -> Result<(), D::Error> { + if let Some((eq_old_index, eq_new_index, eq_len)) = self.eq.take() { + self.d.equal(eq_old_index, eq_new_index, eq_len)? + } + Ok(()) + } + + fn flush_del_ins(&mut self) -> Result<(), D::Error> { + if let Some((del_old_index, del_old_len, del_new_index)) = self.del.take() { + if let Some((_, ins_new_index, ins_new_len)) = self.ins.take() { + self.d + .replace(del_old_index, del_old_len, ins_new_index, ins_new_len)?; + } else { + self.d.delete(del_old_index, del_old_len, del_new_index)?; + } + } else if let Some((ins_old_index, ins_new_index, ins_new_len)) = self.ins.take() { + self.d.insert(ins_old_index, ins_new_index, ins_new_len)?; + } + Ok(()) + } +} + +impl AsRef for Replace { + fn as_ref(&self) -> &D { + &self.d + } +} + +impl AsMut for Replace { + fn as_mut(&mut self) -> &mut D { + &mut self.d + } +} + +impl DiffHook for Replace { + type Error = D::Error; + + fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), D::Error> { + self.flush_del_ins()?; + + self.eq = if let Some((eq_old_index, eq_new_index, eq_len)) = self.eq.take() { + Some((eq_old_index, eq_new_index, eq_len + len)) + } else { + Some((old_index, new_index, len)) + }; + + Ok(()) + } + + fn delete( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + ) -> Result<(), D::Error> { + self.flush_eq()?; + if let Some((del_old_index, del_old_len, del_new_index)) = self.del.take() { + debug_assert_eq!(old_index, del_old_index + del_old_len); + self.del = Some((del_old_index, del_old_len + old_len, del_new_index)); + } else { + self.del = Some((old_index, old_len, new_index)); + } + Ok(()) + } + + fn insert( + &mut self, + old_index: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), D::Error> { + self.flush_eq()?; + self.ins = if let Some((ins_old_index, ins_new_index, ins_new_len)) = self.ins.take() { + debug_assert_eq!(ins_new_index + ins_new_len, new_index); + Some((ins_old_index, ins_new_index, new_len + ins_new_len)) + } else { + Some((old_index, new_index, new_len)) + }; + + Ok(()) + } + + fn replace( + &mut self, + old_index: usize, + old_len: usize, + new_index: usize, + new_len: usize, + ) -> Result<(), D::Error> { + self.flush_eq()?; + self.d.replace(old_index, old_len, new_index, new_len) + } + + fn finish(&mut self) -> Result<(), D::Error> { + self.flush_eq()?; + self.flush_del_ins()?; + self.d.finish() + } +} + +#[test] +fn test_mayers_replace() { + use crate::algorithms::{diff_slices, Algorithm}; + let a: &[&str] = &[ + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n", + "a\n", + "b\n", + "c\n", + "================================\n", + "d\n", + "e\n", + "f\n", + "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", + ]; + let b: &[&str] = &[ + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n", + "x\n", + "b\n", + "c\n", + "================================\n", + "y\n", + "e\n", + "f\n", + "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", + ]; + + let mut d = Replace::new(crate::algorithms::Capture::new()); + diff_slices(Algorithm::Myers, &mut d, a, b).unwrap(); + + insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###" + [ + Equal { + old_index: 0, + new_index: 0, + len: 1, + }, + Replace { + old_index: 1, + old_len: 1, + new_index: 1, + new_len: 1, + }, + Equal { + old_index: 2, + new_index: 2, + len: 3, + }, + Replace { + old_index: 5, + old_len: 1, + new_index: 5, + new_len: 1, + }, + Equal { + old_index: 6, + new_index: 6, + len: 3, + }, + ] + "###); +} + +#[test] +fn test_replace() { + use crate::algorithms::{diff_slices, Algorithm}; + + let a: &[usize] = &[0, 1, 2, 3, 4]; + let b: &[usize] = &[0, 1, 2, 7, 8, 9]; + + let mut d = Replace::new(crate::algorithms::Capture::new()); + diff_slices(Algorithm::Myers, &mut d, a, b).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops(), @r###" + [ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 2, + new_index: 3, + new_len: 3, + }, + ] + "###); +} diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping-2.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping-2.snap new file mode 100644 index 000000000..d00464988 --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping-2.snap @@ -0,0 +1,60 @@ +--- +source: src/algorithms/capture.rs +expression: tags +--- +[ + [ + ( + Equal, + 7..10, + 7..10, + ), + ( + Replace, + 10..11, + 10..11, + ), + ( + Equal, + 11..13, + 11..13, + ), + ( + Replace, + 13..14, + 13..14, + ), + ( + Equal, + 14..16, + 14..16, + ), + ( + Replace, + 16..17, + 16..17, + ), + ( + Equal, + 17..20, + 17..20, + ), + ], + [ + ( + Equal, + 31..34, + 31..34, + ), + ( + Replace, + 34..35, + 34..35, + ), + ( + Equal, + 35..38, + 35..38, + ), + ], +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping.snap new file mode 100644 index 000000000..fbea3c2bf --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping.snap @@ -0,0 +1,64 @@ +--- +source: src/algorithms/capture.rs +expression: ops +--- +[ + [ + Equal { + old_index: 7, + new_index: 7, + len: 3, + }, + Replace { + old_index: 10, + old_len: 1, + new_index: 10, + new_len: 1, + }, + Equal { + old_index: 11, + new_index: 11, + len: 2, + }, + Replace { + old_index: 13, + old_len: 1, + new_index: 13, + new_len: 1, + }, + Equal { + old_index: 14, + new_index: 14, + len: 2, + }, + Replace { + old_index: 16, + old_len: 1, + new_index: 16, + new_len: 1, + }, + Equal { + old_index: 17, + new_index: 17, + len: 3, + }, + ], + [ + Equal { + old_index: 31, + new_index: 31, + len: 3, + }, + Replace { + old_index: 34, + old_len: 1, + new_index: 34, + new_len: 1, + }, + Equal { + old_index: 35, + new_index: 35, + len: 3, + }, + ], +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap new file mode 100644 index 000000000..77e5afa6d --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap @@ -0,0 +1,28 @@ +--- +source: src/algorithms/lcs.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 2, + new_index: 3, + new_len: 2, + }, + Equal { + old_index: 5, + new_index: 5, + len: 2, + }, + Replace { + old_index: 7, + old_len: 1, + new_index: 7, + new_len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap new file mode 100644 index 000000000..d43706ded --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap @@ -0,0 +1,22 @@ +--- +source: src/algorithms/lcs.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 1, + new_index: 3, + new_len: 1, + }, + Equal { + old_index: 4, + new_index: 4, + len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap new file mode 100644 index 000000000..9ed25ae4c --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap @@ -0,0 +1,31 @@ +--- +source: src/algorithms/lcs.rs +expression: d.ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 2, + }, + Delete { + old_index: 2, + old_len: 1, + new_index: 2, + }, + Equal { + old_index: 3, + new_index: 2, + len: 1, + }, + Equal { + old_index: 4, + new_index: 3, + len: 1, + }, + Insert { + old_index: 5, + new_index: 4, + new_len: 2, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__contiguous.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__contiguous.snap new file mode 100644 index 000000000..38cca884e --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__contiguous.snap @@ -0,0 +1,28 @@ +--- +source: src/algorithms/myers.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 1, + new_index: 3, + new_len: 2, + }, + Equal { + old_index: 4, + new_index: 5, + len: 2, + }, + Replace { + old_index: 6, + old_len: 2, + new_index: 7, + new_len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__deadline_reached.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__deadline_reached.snap new file mode 100644 index 000000000..f972cca8a --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__deadline_reached.snap @@ -0,0 +1,22 @@ +--- +source: src/algorithms/myers.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 10, + }, + Replace { + old_index: 10, + old_len: 41, + new_index: 10, + new_len: 41, + }, + Equal { + old_index: 51, + new_index: 51, + len: 49, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__diff.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__diff.snap new file mode 100644 index 000000000..aa3556d40 --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__diff.snap @@ -0,0 +1,22 @@ +--- +source: src/algorithms/myers.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 1, + new_index: 3, + new_len: 1, + }, + Equal { + old_index: 4, + new_index: 4, + len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__pat.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__pat.snap new file mode 100644 index 000000000..469a1fc81 --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__myers__pat.snap @@ -0,0 +1,31 @@ +--- +source: src/algorithms/myers.rs +expression: d.ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 2, + }, + Delete { + old_index: 2, + old_len: 1, + new_index: 2, + }, + Equal { + old_index: 3, + new_index: 2, + len: 2, + }, + Insert { + old_index: 5, + new_index: 4, + new_len: 1, + }, + Insert { + old_index: 5, + new_index: 5, + new_len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience.snap new file mode 100644 index 000000000..3ebd78a11 --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience.snap @@ -0,0 +1,45 @@ +--- +source: src/algorithms/patience.rs +expression: d.into_inner().ops() +--- +[ + Replace { + old_index: 0, + old_len: 1, + new_index: 0, + new_len: 1, + }, + Equal { + old_index: 1, + new_index: 1, + len: 3, + }, + Replace { + old_index: 4, + old_len: 1, + new_index: 4, + new_len: 2, + }, + Equal { + old_index: 5, + new_index: 6, + len: 2, + }, + Replace { + old_index: 7, + old_len: 2, + new_index: 8, + new_len: 1, + }, + Equal { + old_index: 9, + new_index: 9, + len: 1, + }, + Replace { + old_index: 10, + old_len: 1, + new_index: 10, + new_len: 1, + }, +] diff --git a/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.snap b/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.snap new file mode 100644 index 000000000..051c59659 --- /dev/null +++ b/vendor/similar/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.snap @@ -0,0 +1,16 @@ +--- +source: src/algorithms/patience.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Delete { + old_index: 3, + old_len: 1, + new_index: 3, + }, +] diff --git a/vendor/similar/src/algorithms/utils.rs b/vendor/similar/src/algorithms/utils.rs new file mode 100644 index 000000000..0d14ac799 --- /dev/null +++ b/vendor/similar/src/algorithms/utils.rs @@ -0,0 +1,379 @@ +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::{Hash, Hasher}; +use std::ops::{Add, Index, Range}; + +/// Utility function to check if a range is empty that works on older rust versions +#[inline(always)] +#[allow(clippy::neg_cmp_op_on_partial_ord)] +pub fn is_empty_range>(range: &Range) -> bool { + !(range.start < range.end) +} + +/// Represents an item in the vector returend by [`unique`]. +/// +/// It compares like the underlying item does it was created from but +/// carries the index it was originally created from. +pub struct UniqueItem<'a, Idx: ?Sized> { + lookup: &'a Idx, + index: usize, +} + +impl<'a, Idx: ?Sized> UniqueItem<'a, Idx> +where + Idx: Index, +{ + /// Returns the value. + #[inline(always)] + pub fn value(&self) -> &Idx::Output { + &self.lookup[self.index] + } + + /// Returns the original index. + #[inline(always)] + pub fn original_index(&self) -> usize { + self.index + } +} + +impl<'a, Idx: Index + 'a> Debug for UniqueItem<'a, Idx> +where + Idx::Output: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("UniqueItem") + .field("value", &self.value()) + .field("original_index", &self.original_index()) + .finish() + } +} + +impl<'a, 'b, A, B> PartialEq> for UniqueItem<'b, B> +where + A: Index + 'b + ?Sized, + B: Index + 'b + ?Sized, + B::Output: PartialEq, +{ + #[inline(always)] + fn eq(&self, other: &UniqueItem<'a, A>) -> bool { + self.value() == other.value() + } +} + +/// Returns only unique items in the sequence as vector. +/// +/// Each item is wrapped in a [`UniqueItem`] so that both the value and the +/// index can be extracted. +pub fn unique(lookup: &Idx, range: Range) -> Vec> +where + Idx: Index + ?Sized, + Idx::Output: Hash + Eq, +{ + let mut by_item = HashMap::new(); + for index in range { + match by_item.entry(&lookup[index]) { + Entry::Vacant(entry) => { + entry.insert(Some(index)); + } + Entry::Occupied(mut entry) => { + let entry = entry.get_mut(); + if entry.is_some() { + *entry = None + } + } + } + } + let mut rv = by_item + .into_iter() + .filter_map(|(_, x)| x) + .map(|index| UniqueItem { lookup, index }) + .collect::>(); + rv.sort_by_key(|a| a.original_index()); + rv +} + +/// Given two lookups and ranges calculates the length of the common prefix. +pub fn common_prefix_len( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> usize +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .zip(old_range) + .take_while( + #[inline(always)] + |x| new[x.0] == old[x.1], + ) + .count() +} + +/// Given two lookups and ranges calculates the length of common suffix. +pub fn common_suffix_len( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> usize +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .rev() + .zip(old_range.rev()) + .take_while( + #[inline(always)] + |x| new[x.0] == old[x.1], + ) + .count() +} + +struct OffsetLookup { + offset: usize, + vec: Vec, +} + +impl Index for OffsetLookup { + type Output = Int; + + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + &self.vec[index - self.offset] + } +} + +/// A utility struct to convert distinct items to unique integers. +/// +/// This can be helpful on larger inputs to speed up the comparisons +/// performed by doing a first pass where the data set gets reduced +/// to (small) integers. +/// +/// The idea is that instead of passing two sequences to a diffling algorithm +/// you first pass it via [`IdentifyDistinct`]: +/// +/// ```rust +/// use similar::capture_diff; +/// use similar::algorithms::{Algorithm, IdentifyDistinct}; +/// +/// let old = &["foo", "bar", "baz"][..]; +/// let new = &["foo", "blah", "baz"][..]; +/// let h = IdentifyDistinct::::new(old, 0..old.len(), new, 0..new.len()); +/// let ops = capture_diff( +/// Algorithm::Myers, +/// h.old_lookup(), +/// h.old_range(), +/// h.new_lookup(), +/// h.new_range(), +/// ); +/// ``` +/// +/// The indexes are the same as with the passed source ranges. +pub struct IdentifyDistinct { + old: OffsetLookup, + new: OffsetLookup, +} + +impl IdentifyDistinct +where + Int: Add + From + Default + Copy, +{ + /// Creates an int hasher for two sequences. + pub fn new( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + ) -> Self + where + Old: Index + ?Sized, + Old::Output: Eq + Hash, + New: Index + ?Sized, + New::Output: Eq + Hash + PartialEq, + { + enum Key<'old, 'new, Old: ?Sized, New: ?Sized> { + Old(&'old Old), + New(&'new New), + } + + impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New> + where + Old: Hash + ?Sized, + New: Hash + ?Sized, + { + fn hash(&self, state: &mut H) { + match *self { + Key::Old(val) => val.hash(state), + Key::New(val) => val.hash(state), + } + } + } + + impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New> + where + Old: Eq + ?Sized, + New: Eq + PartialEq + ?Sized, + { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Key::Old(a), Key::Old(b)) => a == b, + (Key::New(a), Key::New(b)) => a == b, + (Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a, + } + } + } + + impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New> + where + Old: Eq + ?Sized, + New: Eq + PartialEq + ?Sized, + { + } + + let mut map = HashMap::new(); + let mut old_seq = Vec::new(); + let mut new_seq = Vec::new(); + let mut next_id = Int::default(); + let step = Int::from(1); + let old_start = old_range.start; + let new_start = new_range.start; + + for idx in old_range { + let item = Key::Old(&old[idx]); + let id = match map.entry(item) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let id = next_id; + next_id = next_id + step; + *v.insert(id) + } + }; + old_seq.push(id); + } + + for idx in new_range { + let item = Key::New(&new[idx]); + let id = match map.entry(item) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let id = next_id; + next_id = next_id + step; + *v.insert(id) + } + }; + new_seq.push(id); + } + + IdentifyDistinct { + old: OffsetLookup { + offset: old_start, + vec: old_seq, + }, + new: OffsetLookup { + offset: new_start, + vec: new_seq, + }, + } + } + + /// Returns a lookup for the old side. + pub fn old_lookup(&self) -> &impl Index { + &self.old + } + + /// Returns a lookup for the new side. + pub fn new_lookup(&self) -> &impl Index { + &self.new + } + + /// Convenience method to get back the old range. + pub fn old_range(&self) -> Range { + self.old.offset..self.old.offset + self.old.vec.len() + } + + /// Convenience method to get back the new range. + pub fn new_range(&self) -> Range { + self.new.offset..self.new.offset + self.new.vec.len() + } +} + +#[test] +fn test_unique() { + let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6) + .into_iter() + .map(|x| (*x.value(), x.original_index())) + .collect::>(); + assert_eq!(u, vec![('a', 0), ('c', 2)]); +} + +#[test] +fn test_int_hasher() { + let ih = IdentifyDistinct::::new( + &["", "foo", "bar", "baz"][..], + 1..4, + &["", "foo", "blah", "baz"][..], + 1..4, + ); + assert_eq!(ih.old_lookup()[1], 0); + assert_eq!(ih.old_lookup()[2], 1); + assert_eq!(ih.old_lookup()[3], 2); + assert_eq!(ih.new_lookup()[1], 0); + assert_eq!(ih.new_lookup()[2], 3); + assert_eq!(ih.new_lookup()[3], 2); + assert_eq!(ih.old_range(), 1..4); + assert_eq!(ih.new_range(), 1..4); +} + +#[test] +fn test_common_prefix_len() { + assert_eq!( + common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0), + 0 + ); + assert_eq!( + common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10), + 7 + ); + assert_eq!( + common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9), + 0 + ); + assert_eq!( + common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10), + 4 + ); +} + +#[test] +fn test_common_suffix_len() { + assert_eq!( + common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0), + 0 + ); + assert_eq!( + common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8), + 4 + ); + assert_eq!( + common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4), + 0 + ); + assert_eq!( + common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5), + 2 + ); +} diff --git a/vendor/similar/src/common.rs b/vendor/similar/src/common.rs new file mode 100644 index 000000000..40d1ae7ac --- /dev/null +++ b/vendor/similar/src/common.rs @@ -0,0 +1,185 @@ +use std::hash::Hash; +use std::ops::{Index, Range}; +use std::time::Instant; + +use crate::algorithms::{diff_deadline, Capture, Compact, Replace}; +use crate::{Algorithm, DiffOp}; + +/// Creates a diff between old and new with the given algorithm capturing the ops. +/// +/// This is like [`diff`](crate::algorithms::diff) but instead of using an +/// arbitrary hook this will always use [`Compact`] + [`Replace`] + [`Capture`] +/// and return the captured [`DiffOp`]s. +pub fn capture_diff( + alg: Algorithm, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Vec +where + Old: Index + ?Sized, + New: Index + ?Sized, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + capture_diff_deadline(alg, old, old_range, new, new_range, None) +} + +/// Creates a diff between old and new with the given algorithm capturing the ops. +/// +/// Works like [`capture_diff`] but with an optional deadline. +pub fn capture_diff_deadline( + alg: Algorithm, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, + deadline: Option, +) -> Vec +where + Old: Index + ?Sized, + New: Index + ?Sized, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + let mut d = Compact::new(Replace::new(Capture::new()), old, new); + diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap(); + d.into_inner().into_inner().into_ops() +} + +/// Creates a diff between old and new with the given algorithm capturing the ops. +pub fn capture_diff_slices(alg: Algorithm, old: &[T], new: &[T]) -> Vec +where + T: Eq + Hash + Ord, +{ + capture_diff_slices_deadline(alg, old, new, None) +} + +/// Creates a diff between old and new with the given algorithm capturing the ops. +/// +/// Works like [`capture_diff_slices`] but with an optional deadline. +pub fn capture_diff_slices_deadline( + alg: Algorithm, + old: &[T], + new: &[T], + deadline: Option, +) -> Vec +where + T: Eq + Hash + Ord, +{ + capture_diff_deadline(alg, old, 0..old.len(), new, 0..new.len(), deadline) +} + +/// Return a measure of similarity in the range `0..=1`. +/// +/// A ratio of `1.0` means the two sequences are a complete match, a +/// ratio of `0.0` would indicate completely distinct sequences. The input +/// is the sequence of diff operations and the length of the old and new +/// sequence. +pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 { + let matches = ops + .iter() + .map(|op| { + if let DiffOp::Equal { len, .. } = *op { + len + } else { + 0 + } + }) + .sum::(); + let len = old_len + new_len; + if len == 0 { + 1.0 + } else { + 2.0 * matches as f32 / len as f32 + } +} + +/// Isolate change clusters by eliminating ranges with no changes. +/// +/// This will leave holes behind in long periods of equal ranges so that +/// you can build things like unified diffs. +pub fn group_diff_ops(mut ops: Vec, n: usize) -> Vec> { + if ops.is_empty() { + return vec![]; + } + + let mut pending_group = Vec::new(); + let mut rv = Vec::new(); + + if let Some(DiffOp::Equal { + old_index, + new_index, + len, + }) = ops.first_mut() + { + let offset = (*len).saturating_sub(n); + *old_index += offset; + *new_index += offset; + *len -= offset; + } + + if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() { + *len -= (*len).saturating_sub(n); + } + + for op in ops.into_iter() { + if let DiffOp::Equal { + old_index, + new_index, + len, + } = op + { + // End the current group and start a new one whenever + // there is a large range with no changes. + if len > n * 2 { + pending_group.push(DiffOp::Equal { + old_index, + new_index, + len: n, + }); + rv.push(pending_group); + let offset = len.saturating_sub(n); + pending_group = vec![DiffOp::Equal { + old_index: old_index + offset, + new_index: new_index + offset, + len: len - offset, + }]; + continue; + } + } + pending_group.push(op); + } + + match &pending_group[..] { + &[] | &[DiffOp::Equal { .. }] => {} + _ => rv.push(pending_group), + } + + rv +} + +#[test] +fn test_non_string_iter_change() { + use crate::ChangeTag; + + let old = vec![1, 2, 3]; + let new = vec![1, 2, 4]; + let ops = capture_diff_slices(Algorithm::Myers, &old, &new); + let changes: Vec<_> = ops + .iter() + .flat_map(|x| x.iter_changes(&old, &new)) + .map(|x| (x.tag(), x.value())) + .collect(); + + assert_eq!( + changes, + vec![ + (ChangeTag::Equal, 1), + (ChangeTag::Equal, 2), + (ChangeTag::Delete, 3), + (ChangeTag::Insert, 4), + ] + ); +} diff --git a/vendor/similar/src/iter.rs b/vendor/similar/src/iter.rs new file mode 100644 index 000000000..0a1ae5366 --- /dev/null +++ b/vendor/similar/src/iter.rs @@ -0,0 +1,195 @@ +//! The various iterators this crate provides. +//! +//! These iterators are not a very stable interface and you really should +//! avoid considering them to be concrete types. A lot of the iterators in +//! this crate use `impl Iterator` for this reason but restrictions in the +//! language don't allow this to be used in all places on the versions of +//! rust this crate wants to compile for. +use std::marker::PhantomData; +use std::ops::{Index, Range}; + +use crate::{Change, ChangeTag, DiffOp, DiffTag}; + +/// Iterator for [`DiffOp::iter_changes`]. +pub struct ChangesIter<'lookup, Old: ?Sized, New: ?Sized, T> { + old: &'lookup Old, + new: &'lookup New, + old_range: Range, + new_range: Range, + old_index: usize, + new_index: usize, + old_i: usize, + new_i: usize, + tag: DiffTag, + _marker: PhantomData, +} + +impl<'lookup, Old, New, T> ChangesIter<'lookup, Old, New, T> +where + Old: Index + ?Sized, + New: Index + ?Sized, +{ + pub(crate) fn new(old: &'lookup Old, new: &'lookup New, op: DiffOp) -> Self { + let (tag, old_range, new_range) = op.as_tag_tuple(); + let old_index = old_range.start; + let new_index = new_range.start; + let old_i = old_range.start; + let new_i = new_range.start; + ChangesIter { + old, + new, + old_range, + new_range, + old_index, + new_index, + old_i, + new_i, + tag, + _marker: PhantomData, + } + } +} + +impl<'lookup, Old, New, T> Iterator for ChangesIter<'lookup, Old, New, T> +where + Old: Index + ?Sized, + New: Index + ?Sized, + T: Clone, +{ + type Item = Change; + + fn next(&mut self) -> Option { + match self.tag { + DiffTag::Equal => { + if self.old_i < self.old_range.end { + let value = self.old[self.old_i].clone(); + self.old_i += 1; + self.old_index += 1; + self.new_index += 1; + Some(Change { + tag: ChangeTag::Equal, + old_index: Some(self.old_index - 1), + new_index: Some(self.new_index - 1), + value, + }) + } else { + None + } + } + DiffTag::Delete => { + if self.old_i < self.old_range.end { + let value = self.old[self.old_i].clone(); + self.old_i += 1; + self.old_index += 1; + Some(Change { + tag: ChangeTag::Delete, + old_index: Some(self.old_index - 1), + new_index: None, + value, + }) + } else { + None + } + } + DiffTag::Insert => { + if self.new_i < self.new_range.end { + let value = self.new[self.new_i].clone(); + self.new_i += 1; + self.new_index += 1; + Some(Change { + tag: ChangeTag::Insert, + old_index: None, + new_index: Some(self.new_index - 1), + value, + }) + } else { + None + } + } + DiffTag::Replace => { + if self.old_i < self.old_range.end { + let value = self.old[self.old_i].clone(); + self.old_i += 1; + self.old_index += 1; + Some(Change { + tag: ChangeTag::Delete, + old_index: Some(self.old_index - 1), + new_index: None, + value, + }) + } else if self.new_i < self.new_range.end { + let value = self.new[self.new_i].clone(); + self.new_i += 1; + self.new_index += 1; + Some(Change { + tag: ChangeTag::Insert, + old_index: None, + new_index: Some(self.new_index - 1), + value, + }) + } else { + None + } + } + } + } +} + +#[cfg(feature = "text")] +mod text { + use super::*; + + /// Iterator for [`TextDiff::iter_all_changes`](crate::TextDiff::iter_all_changes). + pub struct AllChangesIter<'slf, 'data, T: ?Sized> { + old: &'slf [&'data T], + new: &'slf [&'data T], + ops: &'slf [DiffOp], + current_iter: Option>, + } + + impl<'slf, 'data, T> AllChangesIter<'slf, 'data, T> + where + T: 'data + ?Sized + PartialEq, + { + pub(crate) fn new( + old: &'slf [&'data T], + new: &'slf [&'data T], + ops: &'slf [DiffOp], + ) -> Self { + AllChangesIter { + old, + new, + ops, + current_iter: None, + } + } + } + + impl<'slf, 'data, T> Iterator for AllChangesIter<'slf, 'data, T> + where + T: PartialEq + 'data + ?Sized, + 'data: 'slf, + { + type Item = Change<&'data T>; + + fn next(&mut self) -> Option { + loop { + if let Some(ref mut iter) = self.current_iter { + if let Some(rv) = iter.next() { + return Some(rv); + } + self.current_iter.take(); + } + if let Some((&first, rest)) = self.ops.split_first() { + self.current_iter = Some(ChangesIter::new(self.old, self.new, first)); + self.ops = rest; + } else { + return None; + } + } + } + } +} + +#[cfg(feature = "text")] +pub use self::text::*; diff --git a/vendor/similar/src/lib.rs b/vendor/similar/src/lib.rs new file mode 100644 index 000000000..8741285f6 --- /dev/null +++ b/vendor/similar/src/lib.rs @@ -0,0 +1,163 @@ +//! This crate implements diffing utilities. It attempts to provide an abstraction +//! interface over different types of diffing algorithms. The design of the +//! library is inspired by pijul's diff library by Pierre-Étienne Meunier and +//! also inherits the patience diff algorithm from there. +//! +//! The API of the crate is split into high and low level functionality. Most +//! of what you probably want to use is available top level. Additionally the +//! following sub modules exist: +//! +//! * [`algorithms`]: This implements the different types of diffing algorithms. +//! It provides both low level access to the algorithms with the minimal +//! trait bounds necessary, as well as a generic interface. +//! * [`udiff`]: Unified diff functionality. +//! * [`utils`]: utilities for common diff related operations. This module +//! provides additional diffing functions for working with text diffs. +//! +//! # Sequence Diffing +//! +//! If you want to diff sequences generally indexable things you can use the +//! [`capture_diff`] and [`capture_diff_slices`] functions. They will directly +//! diff an indexable object or slice and return a vector of [`DiffOp`] objects. +//! +//! ```rust +//! use similar::{Algorithm, capture_diff_slices}; +//! +//! let a = vec![1, 2, 3, 4, 5]; +//! let b = vec![1, 2, 3, 4, 7]; +//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b); +//! ``` +//! +//! # Text Diffing +//! +//! Similar provides helpful utilities for text (and more specifically line) diff +//! operations. The main type you want to work with is [`TextDiff`] which +//! uses the underlying diff algorithms to expose a convenient API to work with +//! texts: +//! +//! ```rust +//! # #[cfg(feature = "text")] { +//! use similar::{ChangeTag, TextDiff}; +//! +//! let diff = TextDiff::from_lines( +//! "Hello World\nThis is the second line.\nThis is the third.", +//! "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", +//! ); +//! +//! for change in diff.iter_all_changes() { +//! let sign = match change.tag() { +//! ChangeTag::Delete => "-", +//! ChangeTag::Insert => "+", +//! ChangeTag::Equal => " ", +//! }; +//! print!("{}{}", sign, change); +//! } +//! # } +//! ``` +//! +//! ## Trailing Newlines +//! +//! When working with line diffs (and unified diffs in general) there are two +//! "philosophies" to look at lines. One is to diff lines without their newline +//! character, the other is to diff with the newline character. Typically the +//! latter is done because text files do not _have_ to end in a newline character. +//! As a result there is a difference between `foo\n` and `foo` as far as diffs +//! are concerned. +//! +//! In similar this is handled on the [`Change`] or [`InlineChange`] level. If +//! a diff was created via [`TextDiff::from_lines`] the text diffing system is +//! instructed to check if there are missing newlines encountered +//! ([`TextDiff::newline_terminated`] returns true). +//! +//! In any case the [`Change`] object has a convenience method called +//! [`Change::missing_newline`] which returns `true` if the change is missing +//! a trailing newline. Armed with that information the caller knows to handle +//! this by either rendering a virtual newline at that position or to indicate +//! it in different ways. For instance the unified diff code will render the +//! special `\ No newline at end of file` marker. +//! +//! ## Bytes vs Unicode +//! +//! Similar module concerns itself with a loser definition of "text" than you would +//! normally see in Rust. While by default it can only operate on [`str`] types +//! by enabling the `bytes` feature it gains support for byte slices with some +//! caveats. +//! +//! A lot of text diff functionality assumes that what is being diffed constitutes +//! text, but in the real world it can often be challenging to ensure that this is +//! all valid utf-8. Because of this the crate is built so that most functionality +//! also still works with bytes for as long as they are roughly ASCII compatible. +//! +//! This means you will be successful in creating a unified diff from latin1 +//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you +//! will only get garbage. +//! +//! # Ops vs Changes +//! +//! Because very commonly two compared sequences will largely match this module +//! splits it's functionality into two layers: +//! +//! Changes are encoded as [diff operations](crate::DiffOp). These are +//! ranges of the differences by index in the source sequence. Because this +//! can be cumbersome to work with a separate method [`DiffOp::iter_changes`] +//! (and [`TextDiff::iter_changes`] when working with text diffs) is provided +//! which expands all the changes on an item by item level encoded in an operation. +//! +//! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes +//! this even works for very long files if paired with this method. +//! +//! # Deadlines and Performance +//! +//! For large and very distinct inputs the algorithms as implemented can take +//! a very, very long time to execute. Too long to make sense in practice. +//! To work around this issue all diffing algorithms also provide a version +//! that accepts a deadline which is the point in time as defined by an +//! [`Instant`](std::time::Instant) after which the algorithm should give up. +//! What giving up means depends on the algorithm. For instance due to the +//! recursive, divide and conquer nature of Myer's diff you will still get a +//! pretty decent diff in many cases when a deadline is reached. Whereas on the +//! other hand the LCS diff is unlikely to give any decent results in such a +//! situation. +//! +//! The [`TextDiff`] type also lets you configure a deadline and/or timeout +//! when performing a text diff. +//! +//! # Feature Flags +//! +//! The crate by default does not have any dependencies however for some use +//! cases it's useful to pull in extra functionality. Likewise you can turn +//! off some functionality. +//! +//! * `text`: this feature is enabled by default and enables the text based +//! diffing types such as [`TextDiff`]. +//! If the crate is used without default features it's removed. +//! * `unicode`: when this feature is enabled the text diffing functionality +//! gains the ability to diff on a grapheme instead of character level. This +//! is particularly useful when working with text containing emojis. This +//! pulls in some relatively complex dependencies for working with the unicode +//! database. +//! * `bytes`: this feature adds support for working with byte slices in text +//! APIs in addition to unicode strings. This pulls in the +//! [`bstr`] dependency. +//! * `inline`: this feature gives access to additional functionality of the +//! text diffing to provide inline information about which values changed +//! in a line diff. This currently also enables the `unicode` feature. +//! * `serde`: this feature enables serialization to some types in this +//! crate. For enums without payload deserialization is then also supported. +#![warn(missing_docs)] +pub mod algorithms; +pub mod iter; +#[cfg(feature = "text")] +pub mod udiff; +#[cfg(feature = "text")] +pub mod utils; + +mod common; +#[cfg(feature = "text")] +mod text; +mod types; + +pub use self::common::*; +#[cfg(feature = "text")] +pub use self::text::*; +pub use self::types::*; diff --git a/vendor/similar/src/snapshots/similar__udiff__unified_diff.snap b/vendor/similar/src/snapshots/similar__udiff__unified_diff.snap new file mode 100644 index 000000000..5345b0b9a --- /dev/null +++ b/vendor/similar/src/snapshots/similar__udiff__unified_diff.snap @@ -0,0 +1,25 @@ +--- +source: src/udiff.rs +expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" +--- +--- a.txt ++++ b.txt +@@ -16,7 +16,7 @@ + p + q + r +-s ++S + t + u + v +@@ -38,7 +38,7 @@ + L + M + N +-O ++o + P + Q + R + diff --git a/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap b/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap new file mode 100644 index 000000000..9a5d92221 --- /dev/null +++ b/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap @@ -0,0 +1,10 @@ +--- +source: src/udiff.rs +expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n \"b.txt\").to_string()" +--- +--- a.txt ++++ b.txt +@@ -1 +1 @@ +-a ++b + diff --git a/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint.snap b/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint.snap new file mode 100644 index 000000000..d181b2828 --- /dev/null +++ b/vendor/similar/src/snapshots/similar__udiff__unified_diff_newline_hint.snap @@ -0,0 +1,11 @@ +--- +source: src/udiff.rs +expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" +--- +--- a.txt ++++ b.txt +@@ -1 +1 @@ +-a ++b +\ No newline at end of file + diff --git a/vendor/similar/src/text/abstraction.rs b/vendor/similar/src/text/abstraction.rs new file mode 100644 index 000000000..99678ff1c --- /dev/null +++ b/vendor/similar/src/text/abstraction.rs @@ -0,0 +1,450 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::ops::Range; + +/// Reference to a [`DiffableStr`]. +/// +/// This type exists because while the library only really provides ways to +/// work with `&str` and `&[u8]` there are types that deref into those string +/// slices such as `String` and `Vec`. +/// +/// This trait is used in the library whenever it's nice to be able to pass +/// strings of different types in. +/// +/// Requires the `text` feature. +pub trait DiffableStrRef { + /// The type of the resolved [`DiffableStr`]. + type Output: DiffableStr + ?Sized; + + /// Resolves the reference. + fn as_diffable_str(&self) -> &Self::Output; +} + +impl DiffableStrRef for T { + type Output = T; + + fn as_diffable_str(&self) -> &T { + self + } +} + +impl DiffableStrRef for String { + type Output = str; + + fn as_diffable_str(&self) -> &str { + self.as_str() + } +} + +impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> { + type Output = T; + + fn as_diffable_str(&self) -> &T { + self + } +} + +/// All supported diffable strings. +/// +/// The text module can work with different types of strings depending +/// on how the crate is compiled. Out of the box `&str` is always supported +/// but with the `bytes` feature one can also work with `[u8]` slices for +/// as long as they are ASCII compatible. +/// +/// Requires the `text` feature. +pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned { + /// Splits the value into newlines with newlines attached. + fn tokenize_lines(&self) -> Vec<&Self>; + + /// Splits the value into newlines with newlines separated. + fn tokenize_lines_and_newlines(&self) -> Vec<&Self>; + + /// Tokenizes into words. + fn tokenize_words(&self) -> Vec<&Self>; + + /// Tokenizes the input into characters. + fn tokenize_chars(&self) -> Vec<&Self>; + + /// Tokenizes into unicode words. + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self>; + + /// Tokenizes into unicode graphemes. + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self>; + + /// Decodes the string (potentially) lossy. + fn as_str(&self) -> Option<&str>; + + /// Decodes the string (potentially) lossy. + fn to_string_lossy(&self) -> Cow<'_, str>; + + /// Checks if the string ends in a newline. + fn ends_with_newline(&self) -> bool; + + /// The length of the string. + fn len(&self) -> usize; + + /// Slices the string. + fn slice(&self, rng: Range) -> &Self; + + /// Returns the string as slice of raw bytes. + fn as_bytes(&self) -> &[u8]; + + /// Checks if the string is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl DiffableStr for str { + fn tokenize_lines(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut last_pos = 0; + let mut lines = vec![]; + + while let Some((idx, c)) = iter.next() { + if c == '\r' { + if iter.peek().map_or(false, |x| x.1 == '\n') { + lines.push(&self[last_pos..=idx + 1]); + iter.next(); + last_pos = idx + 2; + } else { + lines.push(&self[last_pos..=idx]); + last_pos = idx + 1; + } + } else if c == '\n' { + lines.push(&self[last_pos..=idx]); + last_pos = idx + 1; + } + } + + if last_pos < self.len() { + lines.push(&self[last_pos..]); + } + + lines + } + + fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { + let mut rv = vec![]; + let mut iter = self.char_indices().peekable(); + + while let Some((idx, c)) = iter.next() { + let is_newline = c == '\r' || c == '\n'; + let start = idx; + let mut end = idx + c.len_utf8(); + while let Some(&(_, next_char)) = iter.peek() { + if (next_char == '\r' || next_char == '\n') != is_newline { + break; + } + iter.next(); + end += next_char.len_utf8(); + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_words(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut rv = vec![]; + + while let Some((idx, c)) = iter.next() { + let is_whitespace = c.is_whitespace(); + let start = idx; + let mut end = idx + c.len_utf8(); + while let Some(&(_, next_char)) = iter.peek() { + if next_char.is_whitespace() != is_whitespace { + break; + } + iter.next(); + end += next_char.len_utf8(); + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_chars(&self) -> Vec<&Self> { + self.char_indices() + .map(move |(i, c)| &self[i..i + c.len_utf8()]) + .collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self> { + unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self> { + unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect() + } + + fn as_str(&self) -> Option<&str> { + Some(self) + } + + fn to_string_lossy(&self) -> Cow<'_, str> { + Cow::Borrowed(self) + } + + fn ends_with_newline(&self) -> bool { + self.ends_with(&['\r', '\n'][..]) + } + + fn len(&self) -> usize { + str::len(self) + } + + fn slice(&self, rng: Range) -> &Self { + &self[rng] + } + + fn as_bytes(&self) -> &[u8] { + str::as_bytes(self) + } +} + +#[cfg(feature = "bytes")] +mod bytes_support { + use super::*; + + use bstr::ByteSlice; + + impl DiffableStrRef for Vec { + type Output = [u8]; + + fn as_diffable_str(&self) -> &[u8] { + self.as_slice() + } + } + + /// Allows viewing ASCII compatible byte slices as strings. + /// + /// Requires the `bytes` feature. + impl DiffableStr for [u8] { + fn tokenize_lines(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut last_pos = 0; + let mut lines = vec![]; + + while let Some((_, end, c)) = iter.next() { + if c == '\r' { + if iter.peek().map_or(false, |x| x.2 == '\n') { + lines.push(&self[last_pos..end + 1]); + iter.next(); + last_pos = end + 1; + } else { + lines.push(&self[last_pos..end]); + last_pos = end; + } + } else if c == '\n' { + lines.push(&self[last_pos..end]); + last_pos = end; + } + } + + if last_pos < self.len() { + lines.push(&self[last_pos..]); + } + + lines + } + + fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { + let mut rv = vec![]; + let mut iter = self.char_indices().peekable(); + + while let Some((start, mut end, c)) = iter.next() { + let is_newline = c == '\r' || c == '\n'; + while let Some(&(_, new_end, next_char)) = iter.peek() { + if (next_char == '\r' || next_char == '\n') != is_newline { + break; + } + iter.next(); + end = new_end; + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_words(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut rv = vec![]; + + while let Some((start, mut end, c)) = iter.next() { + let is_whitespace = c.is_whitespace(); + while let Some(&(_, new_end, next_char)) = iter.peek() { + if next_char.is_whitespace() != is_whitespace { + break; + } + iter.next(); + end = new_end; + } + rv.push(&self[start..end]); + } + + rv + } + + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self> { + self.words_with_breaks().map(|x| x.as_bytes()).collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self> { + self.graphemes().map(|x| x.as_bytes()).collect() + } + + fn tokenize_chars(&self) -> Vec<&Self> { + self.char_indices() + .map(move |(start, end, _)| &self[start..end]) + .collect() + } + + fn as_str(&self) -> Option<&str> { + std::str::from_utf8(self).ok() + } + + fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self) + } + + fn ends_with_newline(&self) -> bool { + if let Some(b'\r') | Some(b'\n') = self.last_byte() { + true + } else { + false + } + } + + fn len(&self) -> usize { + <[u8]>::len(self) + } + + fn slice(&self, rng: Range) -> &Self { + &self[rng] + } + + fn as_bytes(&self) -> &[u8] { + self + } + } +} + +#[test] +fn test_split_lines() { + assert_eq!( + DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast"), + vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"] + ); + assert_eq!(DiffableStr::tokenize_lines("\n\n"), vec!["\n", "\n"]); + assert_eq!(DiffableStr::tokenize_lines("\n"), vec!["\n"]); + assert!(DiffableStr::tokenize_lines("").is_empty()); +} + +#[test] +fn test_split_words() { + assert_eq!( + DiffableStr::tokenize_words("foo bar baz\n\n aha"), + ["foo", " ", "bar", " ", "baz", "\n\n ", "aha"] + ); +} + +#[test] +fn test_split_chars() { + assert_eq!( + DiffableStr::tokenize_chars("abcfö❄️"), + vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"] + ); +} + +#[test] +#[cfg(feature = "unicode")] +fn test_split_graphemes() { + assert_eq!( + DiffableStr::tokenize_graphemes("abcfö❄️"), + vec!["a", "b", "c", "f", "ö", "❄️"] + ); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_lines_bytes() { + assert_eq!( + DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()), + vec![ + "first\n".as_bytes(), + "second\r".as_bytes(), + "third\r\n".as_bytes(), + "fourth\n".as_bytes(), + "last".as_bytes() + ] + ); + assert_eq!( + DiffableStr::tokenize_lines("\n\n".as_bytes()), + vec!["\n".as_bytes(), "\n".as_bytes()] + ); + assert_eq!( + DiffableStr::tokenize_lines("\n".as_bytes()), + vec!["\n".as_bytes()] + ); + assert!(DiffableStr::tokenize_lines("".as_bytes()).is_empty()); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_words_bytes() { + assert_eq!( + DiffableStr::tokenize_words("foo bar baz\n\n aha".as_bytes()), + [ + &b"foo"[..], + &b" "[..], + &b"bar"[..], + &b" "[..], + &b"baz"[..], + &b"\n\n "[..], + &b"aha"[..] + ] + ); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_chars_bytes() { + assert_eq!( + DiffableStr::tokenize_chars("abcfö❄️".as_bytes()), + vec![ + &b"a"[..], + &b"b"[..], + &b"c"[..], + &b"f"[..], + "ö".as_bytes(), + "❄".as_bytes(), + "\u{fe0f}".as_bytes() + ] + ); +} + +#[test] +#[cfg(all(feature = "bytes", feature = "unicode"))] +fn test_split_graphemes_bytes() { + assert_eq!( + DiffableStr::tokenize_graphemes("abcfö❄️".as_bytes()), + vec![ + &b"a"[..], + &b"b"[..], + &b"c"[..], + &b"f"[..], + "ö".as_bytes(), + "❄️".as_bytes() + ] + ); +} diff --git a/vendor/similar/src/text/inline.rs b/vendor/similar/src/text/inline.rs new file mode 100644 index 000000000..79d6c8238 --- /dev/null +++ b/vendor/similar/src/text/inline.rs @@ -0,0 +1,337 @@ +#![cfg(feature = "inline")] +use std::borrow::Cow; +use std::fmt; + +use crate::text::{DiffableStr, TextDiff}; +use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; +use crate::{capture_diff_deadline, get_diff_ratio}; + +use std::ops::Index; +use std::time::{Duration, Instant}; + +use super::utils::upper_seq_ratio; + +struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> { + strings: &'bufs [&'s T], + seqs: Vec<(&'s T, usize, usize)>, +} + +impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> { + fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> { + let mut seqs = Vec::new(); + for (string_idx, string) in strings.iter().enumerate() { + let mut offset = 0; + let iter = { + #[cfg(feature = "unicode")] + { + string.tokenize_unicode_words() + } + #[cfg(not(feature = "unicode"))] + { + string.tokenize_words() + } + }; + for word in iter { + seqs.push((word, string_idx, offset)); + offset += word.len(); + } + } + MultiLookup { strings, seqs } + } + + pub fn len(&self) -> usize { + self.seqs.len() + } + + fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> { + let mut last = None; + let mut rv = Vec::new(); + + for offset in 0..len { + let (s, str_idx, char_idx) = self.seqs[idx + offset]; + last = match last { + None => Some((str_idx, char_idx, s.len())), + Some((last_str_idx, start_char_idx, last_len)) => { + if last_str_idx == str_idx { + Some((str_idx, start_char_idx, last_len + s.len())) + } else { + rv.push(( + last_str_idx, + self.strings[last_str_idx] + .slice(start_char_idx..start_char_idx + last_len), + )); + Some((str_idx, char_idx, s.len())) + } + } + }; + } + + if let Some((str_idx, start_char_idx, len)) = last { + rv.push(( + str_idx, + self.strings[str_idx].slice(start_char_idx..start_char_idx + len), + )); + } + + rv + } +} + +impl<'bufs, 's, T: DiffableStr + ?Sized> Index for MultiLookup<'bufs, 's, T> { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + self.seqs[index].0 + } +} + +fn push_values<'s, T: DiffableStr + ?Sized>( + v: &mut Vec>, + idx: usize, + emphasized: bool, + s: &'s T, +) { + v.resize_with(v.len().max(idx + 1), Vec::new); + // newlines cause all kinds of wacky stuff if they end up highlighted. + // because of this we want to unemphasize all newlines we encounter. + if emphasized { + for seg in s.tokenize_lines_and_newlines() { + v[idx].push((!seg.ends_with_newline(), seg)); + } + } else { + v[idx].push((false, s)); + } +} + +/// Represents the expanded textual change with inline highlights. +/// +/// This is like [`Change`] but with inline highlight info. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +pub struct InlineChange<'s, T: DiffableStr + ?Sized> { + tag: ChangeTag, + old_index: Option, + new_index: Option, + values: Vec<(bool, &'s T)>, +} + +impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> { + /// Returns the change tag. + pub fn tag(&self) -> ChangeTag { + self.tag + } + + /// Returns the old index if available. + pub fn old_index(&self) -> Option { + self.old_index + } + + /// Returns the new index if available. + pub fn new_index(&self) -> Option { + self.new_index + } + + /// Returns the changed values. + /// + /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` + /// is true if it should be highlighted as an inline diff. + /// + /// Depending on the type of the underlying [`DiffableStr`] this value is + /// more or less useful. If you always want to have a utf-8 string it's + /// better to use the [`InlineChange::iter_strings_lossy`] method. + pub fn values(&self) -> &[(bool, &'s T)] { + &self.values + } + + /// Iterates over all (potentially lossy) utf-8 decoded values. + /// + /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` + /// is true if it should be highlighted as an inline diff. + pub fn iter_strings_lossy(&self) -> impl Iterator)> { + self.values() + .iter() + .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy())) + } + + /// Returns `true` if this change does not end in a newline and must be + /// followed up by one if line based diffs are used. + pub fn missing_newline(&self) -> bool { + !self.values.last().map_or(true, |x| x.1.ends_with_newline()) + } +} + +impl<'s, T: DiffableStr + ?Sized> From> for InlineChange<'s, T> { + fn from(change: Change<&'s T>) -> InlineChange<'s, T> { + InlineChange { + tag: change.tag(), + old_index: change.old_index(), + new_index: change.new_index(), + values: vec![(false, change.value())], + } + } +} + +impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (emphasized, value) in self.iter_strings_lossy() { + let marker = match (emphasized, self.tag) { + (false, _) | (true, ChangeTag::Equal) => "", + (true, ChangeTag::Delete) => "-", + (true, ChangeTag::Insert) => "+", + }; + write!(f, "{}{}{}", marker, value, marker)?; + } + if self.missing_newline() { + writeln!(f)?; + } + Ok(()) + } +} + +const MIN_RATIO: f32 = 0.5; +const TIMEOUT_MS: u64 = 500; + +pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>( + diff: &'diff TextDiff<'old, 'new, 'bufs, T>, + op: &DiffOp, +) -> impl Iterator> + 'diff +where + T: DiffableStr + ?Sized, + 'x: 'diff, + 'old: 'x, + 'new: 'x, +{ + let (tag, old_range, new_range) = op.as_tag_tuple(); + + if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; + } + + let mut old_index = old_range.start; + let mut new_index = new_range.start; + let old_slices = &diff.old_slices()[old_range]; + let new_slices = &diff.new_slices()[new_range]; + + if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; + } + + let old_lookup = MultiLookup::new(old_slices); + let new_lookup = MultiLookup::new(new_slices); + + let ops = capture_diff_deadline( + Algorithm::Patience, + &old_lookup, + 0..old_lookup.len(), + &new_lookup, + 0..new_lookup.len(), + Some(Instant::now() + Duration::from_millis(TIMEOUT_MS)), + ); + + if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; + } + + let mut old_values = Vec::>::new(); + let mut new_values = Vec::>::new(); + + for op in ops { + match op { + DiffOp::Equal { + old_index, + len, + new_index, + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, len) { + push_values(&mut old_values, idx, false, slice); + } + for (idx, slice) in new_lookup.get_original_slices(new_index, len) { + push_values(&mut new_values, idx, false, slice); + } + } + DiffOp::Delete { + old_index, old_len, .. + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { + push_values(&mut old_values, idx, true, slice); + } + } + DiffOp::Insert { + new_index, new_len, .. + } => { + for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { + push_values(&mut new_values, idx, true, slice); + } + } + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { + push_values(&mut old_values, idx, true, slice); + } + for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { + push_values(&mut new_values, idx, true, slice); + } + } + } + } + + let mut rv = Vec::new(); + + for values in old_values { + rv.push(InlineChange { + tag: ChangeTag::Delete, + old_index: Some(old_index), + new_index: None, + values, + }); + old_index += 1; + } + + for values in new_values { + rv.push(InlineChange { + tag: ChangeTag::Insert, + old_index: None, + new_index: Some(new_index), + values, + }); + new_index += 1; + } + + Box::new(rv.into_iter()) as Box> +} + +#[test] +fn test_line_ops_inline() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert_eq!(diff.newline_terminated(), true); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_inline_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert_eq!(diff.newline_terminated(), true); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_inline_changes(op)) + .collect::>(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} diff --git a/vendor/similar/src/text/mod.rs b/vendor/similar/src/text/mod.rs new file mode 100644 index 000000000..b1a45f3c3 --- /dev/null +++ b/vendor/similar/src/text/mod.rs @@ -0,0 +1,770 @@ +//! Text diffing utilities. +use std::borrow::Cow; +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::time::{Duration, Instant}; + +mod abstraction; +#[cfg(feature = "inline")] +mod inline; +mod utils; + +pub use self::abstraction::{DiffableStr, DiffableStrRef}; +#[cfg(feature = "inline")] +pub use self::inline::InlineChange; + +use self::utils::{upper_seq_ratio, QuickSeqRatio}; +use crate::algorithms::IdentifyDistinct; +use crate::iter::{AllChangesIter, ChangesIter}; +use crate::udiff::UnifiedDiff; +use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp}; + +#[derive(Debug, Clone, Copy)] +enum Deadline { + Absolute(Instant), + Relative(Duration), +} + +impl Deadline { + fn into_instant(self) -> Instant { + match self { + Deadline::Absolute(instant) => instant, + Deadline::Relative(duration) => Instant::now() + duration, + } + } +} + +/// A builder type config for more complex uses of [`TextDiff`]. +/// +/// Requires the `text` feature. +#[derive(Clone, Debug)] +pub struct TextDiffConfig { + algorithm: Algorithm, + newline_terminated: Option, + deadline: Option, +} + +impl Default for TextDiffConfig { + fn default() -> TextDiffConfig { + TextDiffConfig { + algorithm: Algorithm::default(), + newline_terminated: None, + deadline: None, + } + } +} + +impl TextDiffConfig { + /// Changes the algorithm. + /// + /// The default algorithm is [`Algorithm::Myers`]. + pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self { + self.algorithm = alg; + self + } + + /// Sets a deadline for the diff operation. + /// + /// By default a diff will take as long as it takes. For certain diff + /// algorthms like Myer's and Patience a maximum running time can be + /// defined after which the algorithm gives up and approximates. + pub fn deadline(&mut self, deadline: Instant) -> &mut Self { + self.deadline = Some(Deadline::Absolute(deadline)); + self + } + + /// Sets a timeout for thediff operation. + /// + /// This is like [`deadline`](Self::deadline) but accepts a duration. + pub fn timeout(&mut self, timeout: Duration) -> &mut Self { + self.deadline = Some(Deadline::Relative(timeout)); + self + } + + /// Changes the newline termination flag. + /// + /// The default is automatic based on input. This flag controls the + /// behavior of [`TextDiff::iter_changes`] and unified diff generation + /// with regards to newlines. When the flag is set to `false` (which + /// is the default) then newlines are added. Otherwise the newlines + /// from the source sequences are reused. + pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { + self.newline_terminated = Some(yes); + self + } + + /// Creates a diff of lines. + /// + /// This splits the text `old` and `new` into lines preserving newlines + /// in the input. Line diffs are very common and because of that enjoy + /// special handling in similar. When a line diff is created with this + /// method the `newline_terminated` flag is flipped to `true` and will + /// influence the behavior of unified diff generation. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a\n"), + /// (ChangeTag::Equal, "b\n"), + /// (ChangeTag::Delete, "c"), + /// (ChangeTag::Insert, "C"), + /// ]); + /// ``` + pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_lines()), + Cow::Owned(new.as_diffable_str().tokenize_lines()), + true, + ) + } + + /// Creates a diff of words. + /// + /// This splits the text into words and whitespace. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` + pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_words()), + Cow::Owned(new.as_diffable_str().tokenize_words()), + false, + ) + } + + /// Creates a diff of characters. + /// + /// Note on character diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a"), + /// (ChangeTag::Equal, "b"), + /// (ChangeTag::Equal, "c"), + /// (ChangeTag::Delete, "d"), + /// (ChangeTag::Delete, "e"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Equal, "f"), + /// ]); + /// ``` + pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_chars()), + Cow::Owned(new.as_diffable_str().tokenize_chars()), + false, + ) + } + + /// Creates a diff of unicode words. + /// + /// This splits the text into words according to unicode rules. This is + /// generally recommended over [`TextDiffConfig::diff_words`] but + /// requires a dependency. + /// + /// This requires the `unicode` feature. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "ah"), + /// (ChangeTag::Equal, "("), + /// (ChangeTag::Delete, "be"), + /// (ChangeTag::Insert, "ah"), + /// (ChangeTag::Equal, ")"), + /// (ChangeTag::Equal, "ce"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_unicode_words()), + Cow::Owned(new.as_diffable_str().tokenize_unicode_words()), + false, + ) + } + + /// Creates a diff of graphemes. + /// + /// This requires the `unicode` feature. + /// + /// Note on grapheme diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "💩"), + /// (ChangeTag::Delete, "🇦🇹"), + /// (ChangeTag::Delete, "🦠"), + /// (ChangeTag::Insert, "🇦🇱"), + /// (ChangeTag::Insert, "❄️"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_graphemes()), + Cow::Owned(new.as_diffable_str().tokenize_graphemes()), + false, + ) + } + + /// Creates a diff of arbitrary slices. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let old = &["foo", "bar", "baz"]; + /// let new = &["foo", "BAR", "baz"]; + /// let diff = TextDiff::configure().diff_slices(old, new); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` + pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( + &self, + old: &'bufs [&'old T], + new: &'bufs [&'new T], + ) -> TextDiff<'old, 'new, 'bufs, T> { + self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false) + } + + fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( + &self, + old: Cow<'bufs, [&'old T]>, + new: Cow<'bufs, [&'new T]>, + newline_terminated: bool, + ) -> TextDiff<'old, 'new, 'bufs, T> { + let deadline = self.deadline.map(|x| x.into_instant()); + let ops = if old.len() > 100 || new.len() > 100 { + let ih = IdentifyDistinct::::new(&old[..], 0..old.len(), &new[..], 0..new.len()); + capture_diff_deadline( + self.algorithm, + ih.old_lookup(), + ih.old_range(), + ih.new_lookup(), + ih.new_range(), + deadline, + ) + } else { + capture_diff_deadline( + self.algorithm, + &old[..], + 0..old.len(), + &new[..], + 0..new.len(), + deadline, + ) + }; + TextDiff { + old, + new, + ops, + newline_terminated: self.newline_terminated.unwrap_or(newline_terminated), + algorithm: self.algorithm, + } + } +} + +/// Captures diff op codes for textual diffs. +/// +/// The exact diff behavior is depending on the underlying [`DiffableStr`]. +/// For instance diffs on bytes and strings are slightly different. You can +/// create a text diff from constructors such as [`TextDiff::from_lines`] or +/// the [`TextDiffConfig`] created by [`TextDiff::configure`]. +/// +/// Requires the `text` feature. +pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> { + old: Cow<'bufs, [&'old T]>, + new: Cow<'bufs, [&'new T]>, + ops: Vec, + newline_terminated: bool, + algorithm: Algorithm, +} + +impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { + /// Configures a text differ before diffing. + pub fn configure() -> TextDiffConfig { + TextDiffConfig::default() + } + + /// Creates a diff of lines. + /// + /// For more information see [`TextDiffConfig::diff_lines`]. + pub fn from_lines( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_lines(old, new) + } + + /// Creates a diff of words. + /// + /// For more information see [`TextDiffConfig::diff_words`]. + pub fn from_words( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_words(old, new) + } + + /// Creates a diff of chars. + /// + /// For more information see [`TextDiffConfig::diff_chars`]. + pub fn from_chars( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_chars(old, new) + } + + /// Creates a diff of unicode words. + /// + /// For more information see [`TextDiffConfig::diff_unicode_words`]. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_unicode_words( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_unicode_words(old, new) + } + + /// Creates a diff of graphemes. + /// + /// For more information see [`TextDiffConfig::diff_graphemes`]. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_graphemes( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_graphemes(old, new) + } +} + +impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> { + /// Creates a diff of arbitrary slices. + /// + /// For more information see [`TextDiffConfig::diff_slices`]. + pub fn from_slices( + old: &'bufs [&'old T], + new: &'bufs [&'new T], + ) -> TextDiff<'old, 'new, 'bufs, T> { + TextDiff::configure().diff_slices(old, new) + } + + /// The name of the algorithm that created the diff. + pub fn algorithm(&self) -> Algorithm { + self.algorithm + } + + /// Returns `true` if items in the slice are newline terminated. + /// + /// This flag is used by the unified diff writer to determine if extra + /// newlines have to be added. + pub fn newline_terminated(&self) -> bool { + self.newline_terminated + } + + /// Returns all old slices. + pub fn old_slices(&self) -> &[&'old T] { + &self.old + } + + /// Returns all new slices. + pub fn new_slices(&self) -> &[&'new T] { + &self.new + } + + /// Return a measure of the sequences' similarity in the range `0..=1`. + /// + /// A ratio of `1.0` means the two sequences are a complete match, a + /// ratio of `0.0` would indicate completely distinct sequences. + /// + /// ```rust + /// # use similar::TextDiff; + /// let diff = TextDiff::from_chars("abcd", "bcde"); + /// assert_eq!(diff.ratio(), 0.75); + /// ``` + pub fn ratio(&self) -> f32 { + get_diff_ratio(self.ops(), self.old.len(), self.new.len()) + } + + /// Iterates over the changes the op expands to. + /// + /// This method is a convenient way to automatically resolve the different + /// ways in which a change could be encoded (insert/delete vs replace), look + /// up the value from the appropriate slice and also handle correct index + /// handling. + pub fn iter_changes<'x, 'slf>( + &'slf self, + op: &DiffOp, + ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T> + where + 'x: 'slf, + 'old: 'x, + 'new: 'x, + { + op.iter_changes(self.old_slices(), self.new_slices()) + } + + /// Returns the captured diff ops. + pub fn ops(&self) -> &[DiffOp] { + &self.ops + } + + /// Isolate change clusters by eliminating ranges with no changes. + /// + /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`]. + pub fn grouped_ops(&self, n: usize) -> Vec> { + group_diff_ops(self.ops().to_vec(), n) + } + + /// Flattens out the diff into all changes. + /// + /// This is a shortcut for combining [`TextDiff::ops`] with + /// [`TextDiff::iter_changes`]. + pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> + where + 'x: 'slf + 'old + 'new, + 'old: 'x, + 'new: 'x, + { + AllChangesIter::new(&self.old[..], &self.new[..], self.ops()) + } + + /// Utility to return a unified diff formatter. + pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { + UnifiedDiff::from_text_diff(self) + } + + /// Iterates over the changes the op expands to with inline emphasis. + /// + /// This is very similar to [`TextDiff::iter_changes`] but it performs a second + /// level diff on adjacent line replacements. The exact behavior of + /// this function with regards to how it detects those inline changes + /// is currently not defined and will likely change over time. + /// + /// As of similar 1.2.0 the behavior of this function changes depending on + /// if the `unicode` feature is enabled or not. It will prefer unicode word + /// splitting over word splitting depending on the feature flag. + /// + /// Requires the `inline` feature. + #[cfg(feature = "inline")] + pub fn iter_inline_changes<'slf>( + &'slf self, + op: &DiffOp, + ) -> impl Iterator> + '_ + where + 'slf: 'old + 'new, + { + inline::iter_inline_changes(self, op) + } +} + +/// Use the text differ to find `n` close matches. +/// +/// `cutoff` defines the threshold which needs to be reached for a word +/// to be considered similar. See [`TextDiff::ratio`] for more information. +/// +/// ``` +/// # use similar::get_close_matches; +/// let matches = get_close_matches( +/// "appel", +/// &["ape", "apple", "peach", "puppy"][..], +/// 3, +/// 0.6 +/// ); +/// assert_eq!(matches, vec!["apple", "ape"]); +/// ``` +/// +/// Requires the `text` feature. +pub fn get_close_matches<'a, T: DiffableStr + ?Sized>( + word: &T, + possibilities: &[&'a T], + n: usize, + cutoff: f32, +) -> Vec<&'a T> { + let mut matches = BinaryHeap::new(); + let seq1 = word.tokenize_chars(); + let quick_ratio = QuickSeqRatio::new(&seq1); + + for &possibility in possibilities { + let seq2 = possibility.tokenize_chars(); + + if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff { + continue; + } + + let diff = TextDiff::from_slices(&seq1, &seq2); + let ratio = diff.ratio(); + if ratio >= cutoff { + // we're putting the word itself in reverse in so that matches with + // the same ratio are ordered lexicographically. + matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility))); + } + } + + let mut rv = vec![]; + for _ in 0..n { + if let Some((_, elt)) = matches.pop() { + rv.push(elt.0); + } else { + break; + } + } + + rv +} + +#[test] +fn test_captured_ops() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + insta::assert_debug_snapshot!(&diff.ops()); +} + +#[test] +fn test_captured_word_ops() { + let diff = TextDiff::from_words( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +fn test_unified_diff() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert_eq!(diff.newline_terminated(), true); + insta::assert_snapshot!(&diff + .unified_diff() + .context_radius(3) + .header("old", "new") + .to_string()); +} + +#[test] +fn test_line_ops() { + let a = "Hello World\nsome stuff here\nsome more stuff here\n"; + let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n"; + let diff = TextDiff::from_lines(a, b); + assert_eq!(diff.newline_terminated(), true); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); + + #[cfg(feature = "bytes")] + { + let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes()); + let byte_changes = byte_diff + .ops() + .iter() + .flat_map(|op| byte_diff.iter_changes(op)) + .collect::>(); + for (change, byte_change) in changes.iter().zip(byte_changes.iter()) { + assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy()); + } + } +} + +#[test] +fn test_virtual_newlines() { + let diff = TextDiff::from_lines("a\nb", "a\nc\n"); + assert_eq!(diff.newline_terminated(), true); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +fn test_char_diff() { + let diff = TextDiff::from_chars("Hello World", "Hallo Welt"); + insta::assert_debug_snapshot!(diff.ops()); + + #[cfg(feature = "bytes")] + { + let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes()); + assert_eq!(diff.ops(), byte_diff.ops()); + } +} + +#[test] +fn test_ratio() { + let diff = TextDiff::from_chars("abcd", "bcde"); + assert_eq!(diff.ratio(), 0.75); + let diff = TextDiff::from_chars("", ""); + assert_eq!(diff.ratio(), 1.0); +} + +#[test] +fn test_get_close_matches() { + let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6); + assert_eq!(matches, vec!["apple", "ape"]); + let matches = get_close_matches( + "hulo", + &[ + "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo", + ][..], + 5, + 0.7, + ); + assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]); +} + +#[test] +fn test_lifetimes_on_iter() { + use crate::Change; + + fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec> + where + T: DiffableStrRef + ?Sized, + { + TextDiff::from_lines(old, new).iter_all_changes().collect() + } + + let a = "1\n2\n3\n".to_string(); + let b = "1\n99\n3\n".to_string(); + let changes = diff_lines(&a, &b); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::>(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde_ops() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff.ops(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap new file mode 100644 index 000000000..cce4066ed --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap @@ -0,0 +1,22 @@ +--- +source: src/text/mod.rs +expression: "&diff.ops()" +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 1, + }, + Replace { + old_index: 1, + old_len: 1, + new_index: 1, + new_len: 1, + }, + Equal { + old_index: 2, + new_index: 2, + len: 1, + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap new file mode 100644 index 000000000..9232c8d26 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap @@ -0,0 +1,202 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "Hello", + }, + Change { + tag: Equal, + old_index: Some( + 1, + ), + new_index: Some( + 1, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "World", + }, + Change { + tag: Equal, + old_index: Some( + 3, + ), + new_index: Some( + 3, + ), + value: "\n", + }, + Change { + tag: Equal, + old_index: Some( + 4, + ), + new_index: Some( + 4, + ), + value: "some", + }, + Change { + tag: Equal, + old_index: Some( + 5, + ), + new_index: Some( + 5, + ), + value: " ", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 6, + ), + value: "amazing", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 7, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 6, + ), + new_index: Some( + 8, + ), + value: "stuff", + }, + Change { + tag: Equal, + old_index: Some( + 7, + ), + new_index: Some( + 9, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 8, + ), + new_index: Some( + 10, + ), + value: "here", + }, + Change { + tag: Equal, + old_index: Some( + 9, + ), + new_index: Some( + 11, + ), + value: "\n", + }, + Change { + tag: Equal, + old_index: Some( + 10, + ), + new_index: Some( + 12, + ), + value: "some", + }, + Change { + tag: Equal, + old_index: Some( + 11, + ), + new_index: Some( + 13, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 12, + ), + new_index: Some( + 14, + ), + value: "more", + }, + Change { + tag: Equal, + old_index: Some( + 13, + ), + new_index: Some( + 15, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 14, + ), + new_index: Some( + 16, + ), + value: "stuff", + }, + Change { + tag: Equal, + old_index: Some( + 15, + ), + new_index: Some( + 17, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 16, + ), + new_index: Some( + 18, + ), + value: "here", + }, + Change { + tag: Equal, + old_index: Some( + 17, + ), + new_index: Some( + 19, + ), + value: "\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__char_diff.snap b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap new file mode 100644 index 000000000..b32f29ac7 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap @@ -0,0 +1,39 @@ +--- +source: src/text/mod.rs +expression: diff.ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 1, + }, + Replace { + old_index: 1, + old_len: 1, + new_index: 1, + new_len: 1, + }, + Equal { + old_index: 2, + new_index: 2, + len: 5, + }, + Replace { + old_index: 7, + old_len: 2, + new_index: 7, + new_len: 1, + }, + Equal { + old_index: 9, + new_index: 8, + len: 1, + }, + Replace { + old_index: 10, + old_len: 1, + new_index: 9, + new_len: 1, + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap new file mode 100644 index 000000000..21334601e --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap @@ -0,0 +1,126 @@ +--- +source: src/text/inline.rs +expression: "&changes" +--- +[ + InlineChange { + tag: Insert, + old_index: None, + new_index: Some( + 0, + ), + values: [ + ( + false, + "Stuff\n", + ), + ], + }, + InlineChange { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 1, + ), + values: [ + ( + false, + "Hello World\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + values: [ + ( + false, + "some ", + ), + ( + false, + "stuff here\n", + ), + ], + }, + InlineChange { + tag: Insert, + old_index: None, + new_index: Some( + 2, + ), + values: [ + ( + false, + "some ", + ), + ( + true, + "amazing ", + ), + ( + false, + "stuff here\n", + ), + ], + }, + InlineChange { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 3, + ), + values: [ + ( + false, + "some more stuff here\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 3, + ), + new_index: None, + values: [ + ( + false, + "\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 4, + ), + new_index: None, + values: [ + ( + false, + "Aha stuff here\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 5, + ), + new_index: None, + values: [ + ( + false, + "and more stuff", + ), + ], + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap new file mode 100644 index 000000000..44ab829aa --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap @@ -0,0 +1,107 @@ +--- +source: src/text/inline.rs +expression: "&json" + +--- +[ + { + "tag": "insert", + "old_index": null, + "new_index": 0, + "values": [ + [ + false, + "Stuff\n" + ] + ] + }, + { + "tag": "equal", + "old_index": 0, + "new_index": 1, + "values": [ + [ + false, + "Hello World\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 1, + "new_index": null, + "values": [ + [ + false, + "some " + ], + [ + false, + "stuff here\n" + ] + ] + }, + { + "tag": "insert", + "old_index": null, + "new_index": 2, + "values": [ + [ + false, + "some " + ], + [ + true, + "amazing " + ], + [ + false, + "stuff here\n" + ] + ] + }, + { + "tag": "equal", + "old_index": 2, + "new_index": 3, + "values": [ + [ + false, + "some more stuff here\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 3, + "new_index": null, + "values": [ + [ + false, + "\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 4, + "new_index": null, + "values": [ + [ + false, + "Aha stuff here\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 5, + "new_index": null, + "values": [ + [ + false, + "and more stuff" + ] + ] + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap new file mode 100644 index 000000000..4bb626dc2 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap @@ -0,0 +1,42 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "1\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "2\n", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "99\n", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "3\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__line_ops.snap b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap new file mode 100644 index 000000000..f18725919 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap @@ -0,0 +1,42 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "Hello World\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "some stuff here\n", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "some amazing stuff here\n", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "some more stuff here\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__serde.snap b/vendor/similar/src/text/snapshots/similar__text__serde.snap new file mode 100644 index 000000000..13418a6f4 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__serde.snap @@ -0,0 +1,55 @@ +--- +source: src/text/mod.rs +expression: "&json" + +--- +[ + { + "tag": "insert", + "old_index": null, + "new_index": 0, + "value": "Stuff\n" + }, + { + "tag": "equal", + "old_index": 0, + "new_index": 1, + "value": "Hello World\n" + }, + { + "tag": "delete", + "old_index": 1, + "new_index": null, + "value": "some stuff here\n" + }, + { + "tag": "insert", + "old_index": null, + "new_index": 2, + "value": "some amazing stuff here\n" + }, + { + "tag": "equal", + "old_index": 2, + "new_index": 3, + "value": "some more stuff here\n" + }, + { + "tag": "delete", + "old_index": 3, + "new_index": null, + "value": "\n" + }, + { + "tag": "delete", + "old_index": 4, + "new_index": null, + "value": "Aha stuff here\n" + }, + { + "tag": "delete", + "old_index": 5, + "new_index": null, + "value": "and more stuff" + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap new file mode 100644 index 000000000..040fe978b --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap @@ -0,0 +1,38 @@ +--- +source: src/text/mod.rs +expression: "&json" + +--- +[ + { + "op": "insert", + "old_index": 0, + "new_index": 0, + "new_len": 1 + }, + { + "op": "equal", + "old_index": 0, + "new_index": 1, + "len": 1 + }, + { + "op": "replace", + "old_index": 1, + "old_len": 1, + "new_index": 2, + "new_len": 1 + }, + { + "op": "equal", + "old_index": 2, + "new_index": 3, + "len": 1 + }, + { + "op": "delete", + "old_index": 3, + "old_len": 3, + "new_index": 4 + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap new file mode 100644 index 000000000..77f409a69 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap @@ -0,0 +1,12 @@ +--- +source: src/text/mod.rs +expression: "&diff.unified_diff().context_radius(3).header(\"old\", \"new\").to_string()" +--- +--- old ++++ new +@@ -1,3 +1,3 @@ + Hello World +-some stuff here ++some amazing stuff here + some more stuff here + diff --git a/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap new file mode 100644 index 000000000..a3915a84e --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap @@ -0,0 +1,32 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "a\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "b", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "c\n", + }, +] diff --git a/vendor/similar/src/text/utils.rs b/vendor/similar/src/text/utils.rs new file mode 100644 index 000000000..d4a440f65 --- /dev/null +++ b/vendor/similar/src/text/utils.rs @@ -0,0 +1,55 @@ +use std::collections::HashMap; +use std::hash::Hash; + +use super::DiffableStrRef; + +// quick and dirty way to get an upper sequence ratio. +pub fn upper_seq_ratio(seq1: &[T], seq2: &[T]) -> f32 { + let n = seq1.len() + seq2.len(); + if n == 0 { + 1.0 + } else { + 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 + } +} + +/// Internal utility to calculate an upper bound for a ratio for +/// [`get_close_matches`]. This is based on Python's difflib approach +/// of considering the two sets to be multisets. +/// +/// It counts the number of matches without regard to order, which is an +/// obvious upper bound. +pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); + +impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { + pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { + let mut counts = HashMap::new(); + for &word in seq { + *counts.entry(word).or_insert(0) += 1; + } + QuickSeqRatio(counts) + } + + pub fn calc(&self, seq: &[&T]) -> f32 { + let n = self.0.len() + seq.len(); + if n == 0 { + return 1.0; + } + + let mut available = HashMap::new(); + let mut matches = 0; + for &word in seq { + let x = if let Some(count) = available.get(&word) { + *count + } else { + self.0.get(&word).copied().unwrap_or(0) + }; + available.insert(word, x - 1); + if x > 0 { + matches += 1; + } + } + + 2.0 * matches as f32 / n as f32 + } +} diff --git a/vendor/similar/src/types.rs b/vendor/similar/src/types.rs new file mode 100644 index 000000000..89676761a --- /dev/null +++ b/vendor/similar/src/types.rs @@ -0,0 +1,489 @@ +use std::fmt; +use std::ops::{Index, Range}; + +use crate::algorithms::utils::is_empty_range; +use crate::algorithms::DiffHook; +use crate::iter::ChangesIter; + +/// An enum representing a diffing algorithm. +#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +pub enum Algorithm { + /// Picks the myers algorithm from [`crate::algorithms::myers`] + Myers, + /// Picks the patience algorithm from [`crate::algorithms::patience`] + Patience, + /// Picks the LCS algorithm from [`crate::algorithms::lcs`] + Lcs, +} + +impl Default for Algorithm { + /// Returns the default algorithm ([`Algorithm::Myers`]). + fn default() -> Algorithm { + Algorithm::Myers + } +} + +/// The tag of a change. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +pub enum ChangeTag { + /// The change indicates equality (not a change) + Equal, + /// The change indicates deleted text. + Delete, + /// The change indicates inserted text. + Insert, +} + +impl fmt::Display for ChangeTag { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match &self { + ChangeTag::Equal => ' ', + ChangeTag::Delete => '-', + ChangeTag::Insert => '+', + } + ) + } +} + +/// Represents the expanded [`DiffOp`] change. +/// +/// This type is returned from [`DiffOp::iter_changes`] and +/// [`TextDiff::iter_changes`](crate::text::TextDiff::iter_changes). +/// +/// It exists so that it's more convenient to work with textual differences as +/// the underlying [`DiffOp`] encodes a group of changes. +/// +/// This type has additional methods that are only available for types +/// implementing [`DiffableStr`](crate::text::DiffableStr). +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +pub struct Change { + pub(crate) tag: ChangeTag, + pub(crate) old_index: Option, + pub(crate) new_index: Option, + pub(crate) value: T, +} + +/// These methods are available for all change types. +impl Change { + /// Returns the change tag. + pub fn tag(&self) -> ChangeTag { + self.tag + } + + /// Returns the old index if available. + pub fn old_index(&self) -> Option { + self.old_index + } + + /// Returns the new index if available. + pub fn new_index(&self) -> Option { + self.new_index + } + + /// Returns the underlying changed value. + /// + /// Depending on the type of the underlying [`crate::text::DiffableStr`] + /// this value is more or less useful. If you always want to have a utf-8 + /// string it's best to use the [`Change::as_str`] and + /// [`Change::to_string_lossy`] methods. + pub fn value(&self) -> T { + self.value.clone() + } +} + +/// Utility enum to capture a diff operation. +/// +/// This is used by [`Capture`](crate::algorithms::Capture). +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case", tag = "op") +)] +pub enum DiffOp { + /// A segment is equal (see [`DiffHook::equal`]) + Equal { + /// The starting index in the old sequence. + old_index: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the segment. + len: usize, + }, + /// A segment was deleted (see [`DiffHook::delete`]) + Delete { + /// The starting index in the old sequence. + old_index: usize, + /// The length of the old segment. + old_len: usize, + /// The starting index in the new sequence. + new_index: usize, + }, + /// A segment was inserted (see [`DiffHook::insert`]) + Insert { + /// The starting index in the old sequence. + old_index: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the new segment. + new_len: usize, + }, + /// A segment was replaced (see [`DiffHook::replace`]) + Replace { + /// The starting index in the old sequence. + old_index: usize, + /// The length of the old segment. + old_len: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the new segment. + new_len: usize, + }, +} + +/// The tag of a diff operation. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +pub enum DiffTag { + /// The diff op encodes an equal segment. + Equal, + /// The diff op encodes a deleted segment. + Delete, + /// The diff op encodes an inserted segment. + Insert, + /// The diff op encodes a replaced segment. + Replace, +} + +impl DiffOp { + /// Returns the tag of the operation. + pub fn tag(self) -> DiffTag { + self.as_tag_tuple().0 + } + + /// Returns the old range. + pub fn old_range(&self) -> Range { + self.as_tag_tuple().1 + } + + /// Returns the new range. + pub fn new_range(&self) -> Range { + self.as_tag_tuple().2 + } + + /// Transform the op into a tuple of diff tag and ranges. + /// + /// This is useful when operating on slices. The returned format is + /// `(tag, i1..i2, j1..j2)`: + /// + /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` + /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). + /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). + /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. + pub fn as_tag_tuple(&self) -> (DiffTag, Range, Range) { + match *self { + DiffOp::Equal { + old_index, + new_index, + len, + } => ( + DiffTag::Equal, + old_index..old_index + len, + new_index..new_index + len, + ), + DiffOp::Delete { + old_index, + new_index, + old_len, + } => ( + DiffTag::Delete, + old_index..old_index + old_len, + new_index..new_index, + ), + DiffOp::Insert { + old_index, + new_index, + new_len, + } => ( + DiffTag::Insert, + old_index..old_index, + new_index..new_index + new_len, + ), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => ( + DiffTag::Replace, + old_index..old_index + old_len, + new_index..new_index + new_len, + ), + } + } + + /// Apply this operation to a diff hook. + pub fn apply_to_hook(&self, d: &mut D) -> Result<(), D::Error> { + match *self { + DiffOp::Equal { + old_index, + new_index, + len, + } => d.equal(old_index, new_index, len), + DiffOp::Delete { + old_index, + old_len, + new_index, + } => d.delete(old_index, old_len, new_index), + DiffOp::Insert { + old_index, + new_index, + new_len, + } => d.insert(old_index, new_index, new_len), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => d.replace(old_index, old_len, new_index, new_len), + } + } + + /// Iterates over all changes encoded in the diff op against old and new + /// sequences. + /// + /// `old` and `new` are two indexable objects like the types you pass to + /// the diffing algorithm functions. + /// + /// ```rust + /// use similar::{ChangeTag, Algorithm}; + /// use similar::capture_diff_slices; + /// let old = vec!["foo", "bar", "baz"]; + /// let new = vec!["foo", "bar", "blah"]; + /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); + /// let changes: Vec<_> = ops + /// .iter() + /// .flat_map(|x| x.iter_changes(&old, &new)) + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Equal, "bar"), + /// (ChangeTag::Delete, "baz"), + /// (ChangeTag::Insert, "blah"), + /// ]); + /// ``` + pub fn iter_changes<'lookup, Old, New, T>( + &self, + old: &'lookup Old, + new: &'lookup New, + ) -> ChangesIter<'lookup, Old, New, T> + where + Old: Index + ?Sized, + New: Index + ?Sized, + { + ChangesIter::new(old, new, *self) + } + + /// Given a diffop yields the changes it encodes against the given slices. + /// + /// This is similar to [`DiffOp::iter_changes`] but instead of yielding the + /// individual changes it yields consequitive changed slices. + /// + /// This will only ever yield a single tuple or two tuples in case a + /// [`DiffOp::Replace`] operation is passed. + /// + /// ```rust + /// use similar::{ChangeTag, Algorithm}; + /// use similar::capture_diff_slices; + /// let old = vec!["foo", "bar", "baz"]; + /// let new = vec!["foo", "bar", "blah"]; + /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); + /// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect(); + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, &["foo", "bar"][..]), + /// (ChangeTag::Delete, &["baz"][..]), + /// (ChangeTag::Insert, &["blah"][..]), + /// ]); + /// ``` + /// + /// Due to lifetime restrictions it's currently impossible for the + /// returned slices to outlive the lookup. + pub fn iter_slices<'lookup, Old, New, T>( + &self, + old: &'lookup Old, + new: &'lookup New, + ) -> impl Iterator + where + T: 'lookup + ?Sized, + Old: Index, Output = T> + ?Sized, + New: Index, Output = T> + ?Sized, + { + match *self { + DiffOp::Equal { old_index, len, .. } => { + Some((ChangeTag::Equal, &old[old_index..old_index + len])) + .into_iter() + .chain(None.into_iter()) + } + DiffOp::Insert { + new_index, new_len, .. + } => Some((ChangeTag::Insert, &new[new_index..new_index + new_len])) + .into_iter() + .chain(None.into_iter()), + DiffOp::Delete { + old_index, old_len, .. + } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) + .into_iter() + .chain(None.into_iter()), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) + .into_iter() + .chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()), + } + } + + pub(crate) fn is_empty(&self) -> bool { + let (_, old, new) = self.as_tag_tuple(); + is_empty_range(&old) && is_empty_range(&new) + } + + pub(crate) fn shift_left(&mut self, adjust: usize) { + self.adjust((adjust, true), (0, false)); + } + + pub(crate) fn shift_right(&mut self, adjust: usize) { + self.adjust((adjust, false), (0, false)); + } + + pub(crate) fn grow_left(&mut self, adjust: usize) { + self.adjust((adjust, true), (adjust, false)); + } + + pub(crate) fn grow_right(&mut self, adjust: usize) { + self.adjust((0, false), (adjust, false)); + } + + pub(crate) fn shrink_left(&mut self, adjust: usize) { + self.adjust((0, false), (adjust, true)); + } + + pub(crate) fn shrink_right(&mut self, adjust: usize) { + self.adjust((adjust, false), (adjust, true)); + } + + fn adjust(&mut self, adjust_offset: (usize, bool), adjust_len: (usize, bool)) { + #[inline(always)] + fn modify(val: &mut usize, adj: (usize, bool)) { + if adj.1 { + *val -= adj.0; + } else { + *val += adj.0; + } + } + + match self { + DiffOp::Equal { + old_index, + new_index, + len, + } => { + modify(old_index, adjust_offset); + modify(new_index, adjust_offset); + modify(len, adjust_len); + } + DiffOp::Delete { + old_index, + old_len, + new_index, + } => { + modify(old_index, adjust_offset); + modify(old_len, adjust_len); + modify(new_index, adjust_offset); + } + DiffOp::Insert { + old_index, + new_index, + new_len, + } => { + modify(old_index, adjust_offset); + modify(new_index, adjust_offset); + modify(new_len, adjust_len); + } + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => { + modify(old_index, adjust_offset); + modify(old_len, adjust_len); + modify(new_index, adjust_offset); + modify(new_len, adjust_len); + } + } + } +} + +#[cfg(feature = "text")] +mod text_additions { + use super::*; + use crate::text::DiffableStr; + use std::borrow::Cow; + + /// The text interface can produce changes over [`DiffableStr`] implementing + /// values. As those are generic interfaces for different types of strings + /// utility methods to make working with standard rust strings more enjoyable. + impl<'s, T: DiffableStr + ?Sized> Change<&'s T> { + /// Returns the value as string if it is utf-8. + pub fn as_str(&self) -> Option<&'s str> { + T::as_str(self.value) + } + + /// Returns the value (lossy) decoded as utf-8 string. + pub fn to_string_lossy(&self) -> Cow<'s, str> { + T::to_string_lossy(self.value) + } + + /// Returns `true` if this change does not end in a newline and must be + /// followed up by one if line based diffs are used. + /// + /// The [`std::fmt::Display`] implementation of [`Change`] will automatically + /// insert a newline after the value if this is true. + pub fn missing_newline(&self) -> bool { + !T::ends_with_newline(self.value) + } + } + + impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<&'s T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}{}", + self.to_string_lossy(), + if self.missing_newline() { "\n" } else { "" } + ) + } + } +} diff --git a/vendor/similar/src/udiff.rs b/vendor/similar/src/udiff.rs new file mode 100644 index 000000000..80a30cf82 --- /dev/null +++ b/vendor/similar/src/udiff.rs @@ -0,0 +1,359 @@ +//! This module provides unified diff functionality. +//! +//! It is available for as long as the `text` feature is enabled which +//! is enabled by default: +//! +//! ```rust +//! use similar::TextDiff; +//! # let old_text = ""; +//! # let new_text = ""; +//! let text_diff = TextDiff::from_lines(old_text, new_text); +//! print!("{}", text_diff +//! .unified_diff() +//! .context_radius(10) +//! .header("old_file", "new_file")); +//! ``` +//! +//! # Unicode vs Bytes +//! +//! The [`UnifiedDiff`] type supports both unicode and byte diffs for all +//! types compatible with [`DiffableStr`]. You can pick between the two +//! versions by using the [`Display`](std::fmt::Display) implementation or +//! [`UnifiedDiff`] or [`UnifiedDiff::to_writer`]. +//! +//! The former uses [`DiffableStr::to_string_lossy`], the latter uses +//! [`DiffableStr::as_bytes`] for each line. +#[cfg(feature = "text")] +use std::{fmt, io}; + +use crate::iter::AllChangesIter; +use crate::text::{DiffableStr, TextDiff}; +use crate::types::{Algorithm, DiffOp}; + +struct MissingNewlineHint(bool); + +impl fmt::Display for MissingNewlineHint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.0 { + write!(f, "\n\\ No newline at end of file")?; + } + Ok(()) + } +} + +#[derive(Copy, Clone, Debug)] +struct UnifiedDiffHunkRange(usize, usize); + +impl UnifiedDiffHunkRange { + fn start(&self) -> usize { + self.0 + } + + fn end(&self) -> usize { + self.1 + } +} + +impl fmt::Display for UnifiedDiffHunkRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut beginning = self.start() + 1; + let len = self.end() - self.start(); + if len == 1 { + write!(f, "{}", beginning) + } else { + if len == 0 { + // empty ranges begin at line just before the range + beginning -= 1; + } + write!(f, "{},{}", beginning, len) + } + } +} + +/// Unified diff hunk header formatter. +pub struct UnifiedHunkHeader { + old_range: UnifiedDiffHunkRange, + new_range: UnifiedDiffHunkRange, +} + +impl UnifiedHunkHeader { + /// Creates a hunk header from a (non empty) slice of diff ops. + pub fn new(ops: &[DiffOp]) -> UnifiedHunkHeader { + let first = ops[0]; + let last = ops[ops.len() - 1]; + let old_start = first.old_range().start; + let new_start = first.new_range().start; + let old_end = last.old_range().end; + let new_end = last.new_range().end; + UnifiedHunkHeader { + old_range: UnifiedDiffHunkRange(old_start, old_end), + new_range: UnifiedDiffHunkRange(new_start, new_end), + } + } +} + +impl fmt::Display for UnifiedHunkHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range) + } +} + +/// Unified diff formatter. +/// +/// ```rust +/// use similar::TextDiff; +/// # let old_text = ""; +/// # let new_text = ""; +/// let text_diff = TextDiff::from_lines(old_text, new_text); +/// print!("{}", text_diff +/// .unified_diff() +/// .context_radius(10) +/// .header("old_file", "new_file")); +/// ``` +/// +/// ## Unicode vs Bytes +/// +/// The [`UnifiedDiff`] type supports both unicode and byte diffs for all +/// types compatible with [`DiffableStr`]. You can pick between the two +/// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`]. +/// The former uses [`DiffableStr::to_string_lossy`], the latter uses +/// [`DiffableStr::as_bytes`] for each line. +pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> { + diff: &'diff TextDiff<'old, 'new, 'bufs, T>, + context_radius: usize, + missing_newline_hint: bool, + header: Option<(String, String)>, +} + +impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { + /// Creates a formatter from a text diff object. + pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self { + UnifiedDiff { + diff, + context_radius: 3, + missing_newline_hint: true, + header: None, + } + } + + /// Changes the context radius. + /// + /// The context radius is the number of lines between changes that should + /// be emitted. This defaults to `3`. + pub fn context_radius(&mut self, n: usize) -> &mut Self { + self.context_radius = n; + self + } + + /// Sets a header to the diff. + /// + /// `a` and `b` are the file names that are added to the top of the unified + /// file format. The names are accepted verbatim which lets you encode + /// a timestamp into it when separated by a tab (`\t`). For more information + /// see [the unified diff format specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10_07) + pub fn header(&mut self, a: &str, b: &str) -> &mut Self { + self.header = Some((a.to_string(), b.to_string())); + self + } + + /// Controls the missing newline hint. + /// + /// By default a special `\ No newline at end of file` marker is added to + /// the output when a file is not terminated with a final newline. This can + /// be disabled with this flag. + pub fn missing_newline_hint(&mut self, yes: bool) -> &mut Self { + self.missing_newline_hint = yes; + self + } + + /// Iterates over all hunks as configured. + pub fn iter_hunks(&self) -> impl Iterator> { + let diff = self.diff; + let missing_newline_hint = self.missing_newline_hint; + self.diff + .grouped_ops(self.context_radius) + .into_iter() + .filter(|ops| !ops.is_empty()) + .map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint)) + } + + /// Write the unified diff as bytes to the output stream. + pub fn to_writer(&self, mut w: W) -> Result<(), io::Error> + where + 'diff: 'old + 'new + 'bufs, + { + let mut header = self.header.as_ref(); + for hunk in self.iter_hunks() { + if let Some((old_file, new_file)) = header.take() { + writeln!(w, "--- {}", old_file)?; + writeln!(w, "+++ {}", new_file)?; + } + write!(w, "{}", hunk)?; + } + Ok(()) + } + + fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self { + if let Some((a, b)) = header { + self.header(a, b); + } + self + } +} + +/// Unified diff hunk formatter. +/// +/// The `Display` this renders out a single unified diff's hunk. +pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> { + diff: &'diff TextDiff<'old, 'new, 'bufs, T>, + ops: Vec, + missing_newline_hint: bool, +} + +impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> + UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> +{ + /// Creates a new hunk for some operations. + pub fn new( + ops: Vec, + diff: &'diff TextDiff<'old, 'new, 'bufs, T>, + missing_newline_hint: bool, + ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> { + UnifiedDiffHunk { + diff, + ops, + missing_newline_hint, + } + } + + /// Returns the header for the hunk. + pub fn header(&self) -> UnifiedHunkHeader { + UnifiedHunkHeader::new(&self.ops) + } + + /// Returns all operations in the hunk. + pub fn ops(&self) -> &[DiffOp] { + &self.ops + } + + /// Returns the value of the `missing_newline_hint` flag. + pub fn missing_newline_hint(&self) -> bool { + self.missing_newline_hint + } + + /// Iterates over all changes in a hunk. + pub fn iter_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> + where + 'x: 'slf + 'old + 'new, + 'old: 'x, + 'new: 'x, + { + AllChangesIter::new(self.diff.old_slices(), self.diff.new_slices(), self.ops()) + } + + /// Write the hunk as bytes to the output stream. + pub fn to_writer(&self, mut w: W) -> Result<(), io::Error> + where + 'diff: 'old + 'new + 'bufs, + { + for (idx, change) in self.iter_changes().enumerate() { + if idx == 0 { + writeln!(w, "{}", self.header())?; + } + write!(w, "{}", change.tag())?; + w.write_all(change.value().as_bytes())?; + if !self.diff.newline_terminated() { + writeln!(w)?; + } + if self.diff.newline_terminated() && change.missing_newline() { + writeln!(w, "{}", MissingNewlineHint(self.missing_newline_hint))?; + } + } + Ok(()) + } +} + +impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display + for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> +where + 'diff: 'old + 'new + 'bufs, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (idx, change) in self.iter_changes().enumerate() { + if idx == 0 { + writeln!(f, "{}", self.header())?; + } + write!(f, "{}{}", change.tag(), change.to_string_lossy())?; + if !self.diff.newline_terminated() { + writeln!(f)?; + } + if self.diff.newline_terminated() && change.missing_newline() { + writeln!(f, "{}", MissingNewlineHint(self.missing_newline_hint))?; + } + } + Ok(()) + } +} + +impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display + for UnifiedDiff<'diff, 'old, 'new, 'bufs, T> +where + 'diff: 'old + 'new + 'bufs, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut header = self.header.as_ref(); + for hunk in self.iter_hunks() { + if let Some((old_file, new_file)) = header.take() { + writeln!(f, "--- {}", old_file)?; + writeln!(f, "+++ {}", new_file)?; + } + write!(f, "{}", hunk)?; + } + Ok(()) + } +} + +/// Quick way to get a unified diff as string. +/// +/// `n` configures [`UnifiedDiff::context_radius`] and +/// `header` configures [`UnifiedDiff::header`] when not `None`. +pub fn unified_diff<'old, 'new>( + alg: Algorithm, + old: &'old str, + new: &'new str, + n: usize, + header: Option<(&str, &str)>, +) -> String { + TextDiff::configure() + .algorithm(alg) + .diff_lines(old, new) + .unified_diff() + .context_radius(n) + .header_opt(header) + .to_string() +} + +#[test] +fn test_unified_diff() { + let diff = TextDiff::from_lines( + "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ", + "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\nS\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\no\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ", + ); + insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string()); +} +#[test] +fn test_empty_unified_diff() { + let diff = TextDiff::from_lines("abc", "abc"); + assert_eq!(diff.unified_diff().header("a.txt", "b.txt").to_string(), ""); +} + +#[test] +fn test_unified_diff_newline_hint() { + let diff = TextDiff::from_lines("a\n", "b"); + insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string()); + insta::assert_snapshot!(&diff + .unified_diff() + .missing_newline_hint(false) + .header("a.txt", "b.txt") + .to_string()); +} diff --git a/vendor/similar/src/utils.rs b/vendor/similar/src/utils.rs new file mode 100644 index 000000000..1f8fdc989 --- /dev/null +++ b/vendor/similar/src/utils.rs @@ -0,0 +1,415 @@ +//! Utilities for common diff related operations. +//! +//! This module provides specialized utilities and simplified diff operations +//! for common operations. It's useful when you want to work with text diffs +//! and you're interested in getting vectors of these changes directly. +//! +//! # Slice Remapping +//! +//! When working with [`TextDiff`] it's common that one takes advantage of the +//! built-in tokenization of the differ. This for instance lets you do +//! grapheme level diffs. This is implemented by the differ generating rather +//! small slices of strings and running a diff algorithm over them. +//! +//! The downside of this is that all the [`DiffOp`] objects produced by the +//! diffing algorithm encode operations on these rather small slices. For +//! a lot of use cases this is not what one wants which can make this very +//! inconvenient. This module provides a [`TextDiffRemapper`] which lets you +//! map from the ranges that the [`TextDiff`] returns to the original input +//! strings. For more information see [`TextDiffRemapper`]. +//! +//! # Simple Diff Functions +//! +//! This module provides a range of common test diff functions that will +//! produce vectors of `(change_tag, value)` tuples. They will automatically +//! optimize towards returning the most useful slice that one would expect for +//! the type of diff performed. + +use std::hash::Hash; +use std::ops::{Index, Range}; + +use crate::{ + capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff, +}; + +struct SliceRemapper<'x, T: ?Sized> { + source: &'x T, + indexes: Vec>, +} + +impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> { + fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> { + let indexes = slices + .iter() + .scan(0, |state, item| { + let start = *state; + let end = start + item.len(); + *state = end; + Some(start..end) + }) + .collect(); + SliceRemapper { source, indexes } + } + + fn slice(&self, range: Range) -> Option<&'x T> { + let start = self.indexes.get(range.start)?.start; + let end = self.indexes.get(range.end - 1)?.end; + Some(self.source.slice(start..end)) + } +} + +impl<'x, T: DiffableStr + ?Sized> Index> for SliceRemapper<'x, T> { + type Output = T; + + fn index(&self, range: Range) -> &Self::Output { + self.slice(range).expect("out of bounds") + } +} + +/// A remapper that can remap diff ops to the original slices. +/// +/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from +/// two strings and the internal tokenization is used, this remapper can take +/// a range in the tokenized sequences and remap it to the original string. +/// This is particularly useful when you want to do things like character or +/// grapheme level diffs but you want to not have to iterate over small sequences +/// but large consequitive ones from the source. +/// +/// ```rust +/// use similar::{ChangeTag, TextDiff}; +/// use similar::utils::TextDiffRemapper; +/// +/// let old = "yo! foo bar baz"; +/// let new = "yo! foo bor baz"; +/// let diff = TextDiff::from_words(old, new); +/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); +/// let changes: Vec<_> = diff.ops() +/// .iter() +/// .flat_map(move |x| remapper.iter_slices(x)) +/// .collect(); +/// +/// assert_eq!(changes, vec![ +/// (ChangeTag::Equal, "yo! foo "), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "bor"), +/// (ChangeTag::Equal, " baz") +/// ]); +pub struct TextDiffRemapper<'x, T: ?Sized> { + old: SliceRemapper<'x, T>, + new: SliceRemapper<'x, T>, +} + +impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> { + /// Creates a new remapper from strings and slices. + pub fn new( + old_slices: &[&'x T], + new_slices: &[&'x T], + old: &'x T, + new: &'x T, + ) -> TextDiffRemapper<'x, T> { + TextDiffRemapper { + old: SliceRemapper::new(old, old_slices), + new: SliceRemapper::new(new, new_slices), + } + } + + /// Creates a new remapper from a text diff and the original strings. + pub fn from_text_diff<'old, 'new, 'bufs>( + diff: &TextDiff<'old, 'new, 'bufs, T>, + old: &'x T, + new: &'x T, + ) -> TextDiffRemapper<'x, T> + where + 'old: 'x, + 'new: 'x, + { + TextDiffRemapper { + old: SliceRemapper::new(old, diff.old_slices()), + new: SliceRemapper::new(new, diff.new_slices()), + } + } + + /// Slices into the old string. + pub fn slice_old(&self, range: Range) -> Option<&'x T> { + self.old.slice(range) + } + + /// Slices into the new string. + pub fn slice_new(&self, range: Range) -> Option<&'x T> { + self.new.slice(range) + } + + /// Given a diffop yields the changes it encodes against the original strings. + /// + /// This is the same as the [`DiffOp::iter_slices`] method. + /// + /// ## Panics + /// + /// This method can panic if the input strings passed to the constructor + /// are incompatible with the input strings passed to the diffing algorithm. + pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator { + // note: this is equivalent to the code in `DiffOp::iter_slices`. It is + // a copy/paste because the slicing currently cannot be well abstracted + // because of lifetime issues caused by the `Index` trait. + match *op { + DiffOp::Equal { old_index, len, .. } => { + Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len))) + .into_iter() + .chain(None.into_iter()) + } + DiffOp::Insert { + new_index, new_len, .. + } => Some(( + ChangeTag::Insert, + self.new.slice(new_index..new_index + new_len), + )) + .into_iter() + .chain(None.into_iter()), + DiffOp::Delete { + old_index, old_len, .. + } => Some(( + ChangeTag::Delete, + self.old.slice(old_index..old_index + old_len), + )) + .into_iter() + .chain(None.into_iter()), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => Some(( + ChangeTag::Delete, + self.old.slice(old_index..old_index + old_len), + )) + .into_iter() + .chain( + Some(( + ChangeTag::Insert, + self.new.slice(new_index..new_index + new_len), + )) + .into_iter(), + ), + } + .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds"))) + } +} + +/// Shortcut for diffing two slices. +/// +/// This function produces the diff of two slices and returns a vector +/// with the changes. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_slices; +/// +/// let old = "foo\nbar\nbaz".lines().collect::>(); +/// let new = "foo\nbar\nBAZ".lines().collect::>(); +/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![ +/// (ChangeTag::Equal, &["foo", "bar"][..]), +/// (ChangeTag::Delete, &["baz"][..]), +/// (ChangeTag::Insert, &["BAZ"][..]), +/// ]); +/// ``` +pub fn diff_slices<'x, T: PartialEq + Hash + Ord>( + alg: Algorithm, + old: &'x [T], + new: &'x [T], +) -> Vec<(ChangeTag, &'x [T])> { + capture_diff_slices(alg, old, new) + .iter() + .flat_map(|op| op.iter_slices(old, new)) + .collect() +} + +/// Shortcut for making a character level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than character level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_chars; +/// +/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![ +/// (ChangeTag::Equal, "foo"), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "BAR"), +/// (ChangeTag::Equal, "baz"), +/// ]); +/// ``` +pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a word level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than word level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_words; +/// +/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![ +/// (ChangeTag::Equal, "foo "), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "bor"), +/// (ChangeTag::Equal, " baz"), +/// ]); +/// ``` +pub fn diff_words<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure().algorithm(alg).diff_words(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a unicode word level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than word level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_unicode_words; +/// +/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; +/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?"; +/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![ +/// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "), +/// (ChangeTag::Delete, "32.3"), +/// (ChangeTag::Insert, "9.84"), +/// (ChangeTag::Equal, " "), +/// (ChangeTag::Delete, "feet"), +/// (ChangeTag::Insert, "meters"), +/// (ChangeTag::Equal, ", right?") +/// ]); +/// ``` +/// +/// This requires the `unicode` feature. +#[cfg(feature = "unicode")] +pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure() + .algorithm(alg) + .diff_unicode_words(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a grapheme level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than grapheme level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_graphemes; +/// +/// let old = "The flag of Austria is 🇦🇹"; +/// let new = "The flag of Albania is 🇦🇱"; +/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![ +/// (ChangeTag::Equal, "The flag of A"), +/// (ChangeTag::Delete, "ustr"), +/// (ChangeTag::Insert, "lban"), +/// (ChangeTag::Equal, "ia is "), +/// (ChangeTag::Delete, "🇦🇹"), +/// (ChangeTag::Insert, "🇦🇱"), +/// ]); +/// ``` +/// +/// This requires the `unicode` feature. +#[cfg(feature = "unicode")] +pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure() + .algorithm(alg) + .diff_graphemes(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a line diff. +/// +/// This function produces the diff of two slices and returns a vector +/// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a +/// change tag for each line. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_lines; +/// +/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![ +/// (ChangeTag::Equal, "foo\n"), +/// (ChangeTag::Equal, "bar\n"), +/// (ChangeTag::Equal, "baz\n"), +/// (ChangeTag::Delete, "blah"), +/// (ChangeTag::Insert, "blurgh"), +/// ]); +/// ``` +pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + TextDiff::configure() + .algorithm(alg) + .diff_lines(old, new) + .iter_all_changes() + .map(|change| (change.tag(), change.value())) + .collect() +} + +#[test] +fn test_remapper() { + let a = "foo bar baz"; + let words = a.tokenize_words(); + dbg!(&words); + let remap = SliceRemapper::new(a, &words); + assert_eq!(remap.slice(0..3), Some("foo bar")); + assert_eq!(remap.slice(1..3), Some(" bar")); + assert_eq!(remap.slice(0..1), Some("foo")); + assert_eq!(remap.slice(0..5), Some("foo bar baz")); + assert_eq!(remap.slice(0..6), None); +} -- cgit v1.2.3