diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
commit | 2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch) | |
tree | 033cc839730fda84ff08db877037977be94e5e3a /vendor/similar/src/text | |
parent | Initial commit. (diff) | |
download | cargo-upstream.tar.xz cargo-upstream.zip |
Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/similar/src/text')
15 files changed, 2330 insertions, 0 deletions
diff --git a/vendor/similar/src/text/abstraction.rs b/vendor/similar/src/text/abstraction.rs new file mode 100644 index 0000000..99678ff --- /dev/null +++ b/vendor/similar/src/text/abstraction.rs @@ -0,0 +1,450 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::ops::Range; + +/// Reference to a [`DiffableStr`]. +/// +/// This type exists because while the library only really provides ways to +/// work with `&str` and `&[u8]` there are types that deref into those string +/// slices such as `String` and `Vec<u8>`. +/// +/// This trait is used in the library whenever it's nice to be able to pass +/// strings of different types in. +/// +/// Requires the `text` feature. +pub trait DiffableStrRef { + /// The type of the resolved [`DiffableStr`]. + type Output: DiffableStr + ?Sized; + + /// Resolves the reference. + fn as_diffable_str(&self) -> &Self::Output; +} + +impl<T: DiffableStr + ?Sized> DiffableStrRef for T { + type Output = T; + + fn as_diffable_str(&self) -> &T { + self + } +} + +impl DiffableStrRef for String { + type Output = str; + + fn as_diffable_str(&self) -> &str { + self.as_str() + } +} + +impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> { + type Output = T; + + fn as_diffable_str(&self) -> &T { + self + } +} + +/// All supported diffable strings. +/// +/// The text module can work with different types of strings depending +/// on how the crate is compiled. Out of the box `&str` is always supported +/// but with the `bytes` feature one can also work with `[u8]` slices for +/// as long as they are ASCII compatible. +/// +/// Requires the `text` feature. +pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned { + /// Splits the value into newlines with newlines attached. + fn tokenize_lines(&self) -> Vec<&Self>; + + /// Splits the value into newlines with newlines separated. + fn tokenize_lines_and_newlines(&self) -> Vec<&Self>; + + /// Tokenizes into words. + fn tokenize_words(&self) -> Vec<&Self>; + + /// Tokenizes the input into characters. + fn tokenize_chars(&self) -> Vec<&Self>; + + /// Tokenizes into unicode words. + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self>; + + /// Tokenizes into unicode graphemes. + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self>; + + /// Decodes the string (potentially) lossy. + fn as_str(&self) -> Option<&str>; + + /// Decodes the string (potentially) lossy. + fn to_string_lossy(&self) -> Cow<'_, str>; + + /// Checks if the string ends in a newline. + fn ends_with_newline(&self) -> bool; + + /// The length of the string. + fn len(&self) -> usize; + + /// Slices the string. + fn slice(&self, rng: Range<usize>) -> &Self; + + /// Returns the string as slice of raw bytes. + fn as_bytes(&self) -> &[u8]; + + /// Checks if the string is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl DiffableStr for str { + fn tokenize_lines(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut last_pos = 0; + let mut lines = vec![]; + + while let Some((idx, c)) = iter.next() { + if c == '\r' { + if iter.peek().map_or(false, |x| x.1 == '\n') { + lines.push(&self[last_pos..=idx + 1]); + iter.next(); + last_pos = idx + 2; + } else { + lines.push(&self[last_pos..=idx]); + last_pos = idx + 1; + } + } else if c == '\n' { + lines.push(&self[last_pos..=idx]); + last_pos = idx + 1; + } + } + + if last_pos < self.len() { + lines.push(&self[last_pos..]); + } + + lines + } + + fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { + let mut rv = vec![]; + let mut iter = self.char_indices().peekable(); + + while let Some((idx, c)) = iter.next() { + let is_newline = c == '\r' || c == '\n'; + let start = idx; + let mut end = idx + c.len_utf8(); + while let Some(&(_, next_char)) = iter.peek() { + if (next_char == '\r' || next_char == '\n') != is_newline { + break; + } + iter.next(); + end += next_char.len_utf8(); + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_words(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut rv = vec![]; + + while let Some((idx, c)) = iter.next() { + let is_whitespace = c.is_whitespace(); + let start = idx; + let mut end = idx + c.len_utf8(); + while let Some(&(_, next_char)) = iter.peek() { + if next_char.is_whitespace() != is_whitespace { + break; + } + iter.next(); + end += next_char.len_utf8(); + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_chars(&self) -> Vec<&Self> { + self.char_indices() + .map(move |(i, c)| &self[i..i + c.len_utf8()]) + .collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self> { + unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self> { + unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect() + } + + fn as_str(&self) -> Option<&str> { + Some(self) + } + + fn to_string_lossy(&self) -> Cow<'_, str> { + Cow::Borrowed(self) + } + + fn ends_with_newline(&self) -> bool { + self.ends_with(&['\r', '\n'][..]) + } + + fn len(&self) -> usize { + str::len(self) + } + + fn slice(&self, rng: Range<usize>) -> &Self { + &self[rng] + } + + fn as_bytes(&self) -> &[u8] { + str::as_bytes(self) + } +} + +#[cfg(feature = "bytes")] +mod bytes_support { + use super::*; + + use bstr::ByteSlice; + + impl DiffableStrRef for Vec<u8> { + type Output = [u8]; + + fn as_diffable_str(&self) -> &[u8] { + self.as_slice() + } + } + + /// Allows viewing ASCII compatible byte slices as strings. + /// + /// Requires the `bytes` feature. + impl DiffableStr for [u8] { + fn tokenize_lines(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut last_pos = 0; + let mut lines = vec![]; + + while let Some((_, end, c)) = iter.next() { + if c == '\r' { + if iter.peek().map_or(false, |x| x.2 == '\n') { + lines.push(&self[last_pos..end + 1]); + iter.next(); + last_pos = end + 1; + } else { + lines.push(&self[last_pos..end]); + last_pos = end; + } + } else if c == '\n' { + lines.push(&self[last_pos..end]); + last_pos = end; + } + } + + if last_pos < self.len() { + lines.push(&self[last_pos..]); + } + + lines + } + + fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { + let mut rv = vec![]; + let mut iter = self.char_indices().peekable(); + + while let Some((start, mut end, c)) = iter.next() { + let is_newline = c == '\r' || c == '\n'; + while let Some(&(_, new_end, next_char)) = iter.peek() { + if (next_char == '\r' || next_char == '\n') != is_newline { + break; + } + iter.next(); + end = new_end; + } + rv.push(&self[start..end]); + } + + rv + } + + fn tokenize_words(&self) -> Vec<&Self> { + let mut iter = self.char_indices().peekable(); + let mut rv = vec![]; + + while let Some((start, mut end, c)) = iter.next() { + let is_whitespace = c.is_whitespace(); + while let Some(&(_, new_end, next_char)) = iter.peek() { + if next_char.is_whitespace() != is_whitespace { + break; + } + iter.next(); + end = new_end; + } + rv.push(&self[start..end]); + } + + rv + } + + #[cfg(feature = "unicode")] + fn tokenize_unicode_words(&self) -> Vec<&Self> { + self.words_with_breaks().map(|x| x.as_bytes()).collect() + } + + #[cfg(feature = "unicode")] + fn tokenize_graphemes(&self) -> Vec<&Self> { + self.graphemes().map(|x| x.as_bytes()).collect() + } + + fn tokenize_chars(&self) -> Vec<&Self> { + self.char_indices() + .map(move |(start, end, _)| &self[start..end]) + .collect() + } + + fn as_str(&self) -> Option<&str> { + std::str::from_utf8(self).ok() + } + + fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self) + } + + fn ends_with_newline(&self) -> bool { + if let Some(b'\r') | Some(b'\n') = self.last_byte() { + true + } else { + false + } + } + + fn len(&self) -> usize { + <[u8]>::len(self) + } + + fn slice(&self, rng: Range<usize>) -> &Self { + &self[rng] + } + + fn as_bytes(&self) -> &[u8] { + self + } + } +} + +#[test] +fn test_split_lines() { + assert_eq!( + DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast"), + vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"] + ); + assert_eq!(DiffableStr::tokenize_lines("\n\n"), vec!["\n", "\n"]); + assert_eq!(DiffableStr::tokenize_lines("\n"), vec!["\n"]); + assert!(DiffableStr::tokenize_lines("").is_empty()); +} + +#[test] +fn test_split_words() { + assert_eq!( + DiffableStr::tokenize_words("foo bar baz\n\n aha"), + ["foo", " ", "bar", " ", "baz", "\n\n ", "aha"] + ); +} + +#[test] +fn test_split_chars() { + assert_eq!( + DiffableStr::tokenize_chars("abcfö❄️"), + vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"] + ); +} + +#[test] +#[cfg(feature = "unicode")] +fn test_split_graphemes() { + assert_eq!( + DiffableStr::tokenize_graphemes("abcfö❄️"), + vec!["a", "b", "c", "f", "ö", "❄️"] + ); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_lines_bytes() { + assert_eq!( + DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()), + vec![ + "first\n".as_bytes(), + "second\r".as_bytes(), + "third\r\n".as_bytes(), + "fourth\n".as_bytes(), + "last".as_bytes() + ] + ); + assert_eq!( + DiffableStr::tokenize_lines("\n\n".as_bytes()), + vec!["\n".as_bytes(), "\n".as_bytes()] + ); + assert_eq!( + DiffableStr::tokenize_lines("\n".as_bytes()), + vec!["\n".as_bytes()] + ); + assert!(DiffableStr::tokenize_lines("".as_bytes()).is_empty()); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_words_bytes() { + assert_eq!( + DiffableStr::tokenize_words("foo bar baz\n\n aha".as_bytes()), + [ + &b"foo"[..], + &b" "[..], + &b"bar"[..], + &b" "[..], + &b"baz"[..], + &b"\n\n "[..], + &b"aha"[..] + ] + ); +} + +#[test] +#[cfg(feature = "bytes")] +fn test_split_chars_bytes() { + assert_eq!( + DiffableStr::tokenize_chars("abcfö❄️".as_bytes()), + vec![ + &b"a"[..], + &b"b"[..], + &b"c"[..], + &b"f"[..], + "ö".as_bytes(), + "❄".as_bytes(), + "\u{fe0f}".as_bytes() + ] + ); +} + +#[test] +#[cfg(all(feature = "bytes", feature = "unicode"))] +fn test_split_graphemes_bytes() { + assert_eq!( + DiffableStr::tokenize_graphemes("abcfö❄️".as_bytes()), + vec![ + &b"a"[..], + &b"b"[..], + &b"c"[..], + &b"f"[..], + "ö".as_bytes(), + "❄️".as_bytes() + ] + ); +} diff --git a/vendor/similar/src/text/inline.rs b/vendor/similar/src/text/inline.rs new file mode 100644 index 0000000..c9f0f7f --- /dev/null +++ b/vendor/similar/src/text/inline.rs @@ -0,0 +1,337 @@ +#![cfg(feature = "inline")] +use std::borrow::Cow; +use std::fmt; + +use crate::text::{DiffableStr, TextDiff}; +use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; +use crate::{capture_diff_deadline, get_diff_ratio}; + +use std::ops::Index; +use std::time::{Duration, Instant}; + +use super::utils::upper_seq_ratio; + +struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> { + strings: &'bufs [&'s T], + seqs: Vec<(&'s T, usize, usize)>, +} + +impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> { + fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> { + let mut seqs = Vec::new(); + for (string_idx, string) in strings.iter().enumerate() { + let mut offset = 0; + let iter = { + #[cfg(feature = "unicode")] + { + string.tokenize_unicode_words() + } + #[cfg(not(feature = "unicode"))] + { + string.tokenize_words() + } + }; + for word in iter { + seqs.push((word, string_idx, offset)); + offset += word.len(); + } + } + MultiLookup { strings, seqs } + } + + pub fn len(&self) -> usize { + self.seqs.len() + } + + fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> { + let mut last = None; + let mut rv = Vec::new(); + + for offset in 0..len { + let (s, str_idx, char_idx) = self.seqs[idx + offset]; + last = match last { + None => Some((str_idx, char_idx, s.len())), + Some((last_str_idx, start_char_idx, last_len)) => { + if last_str_idx == str_idx { + Some((str_idx, start_char_idx, last_len + s.len())) + } else { + rv.push(( + last_str_idx, + self.strings[last_str_idx] + .slice(start_char_idx..start_char_idx + last_len), + )); + Some((str_idx, char_idx, s.len())) + } + } + }; + } + + if let Some((str_idx, start_char_idx, len)) = last { + rv.push(( + str_idx, + self.strings[str_idx].slice(start_char_idx..start_char_idx + len), + )); + } + + rv + } +} + +impl<'bufs, 's, T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'bufs, 's, T> { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + self.seqs[index].0 + } +} + +fn push_values<'s, T: DiffableStr + ?Sized>( + v: &mut Vec<Vec<(bool, &'s T)>>, + idx: usize, + emphasized: bool, + s: &'s T, +) { + v.resize_with(v.len().max(idx + 1), Vec::new); + // newlines cause all kinds of wacky stuff if they end up highlighted. + // because of this we want to unemphasize all newlines we encounter. + if emphasized { + for seg in s.tokenize_lines_and_newlines() { + v[idx].push((!seg.ends_with_newline(), seg)); + } + } else { + v[idx].push((false, s)); + } +} + +/// Represents the expanded textual change with inline highlights. +/// +/// This is like [`Change`] but with inline highlight info. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +pub struct InlineChange<'s, T: DiffableStr + ?Sized> { + tag: ChangeTag, + old_index: Option<usize>, + new_index: Option<usize>, + values: Vec<(bool, &'s T)>, +} + +impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> { + /// Returns the change tag. + pub fn tag(&self) -> ChangeTag { + self.tag + } + + /// Returns the old index if available. + pub fn old_index(&self) -> Option<usize> { + self.old_index + } + + /// Returns the new index if available. + pub fn new_index(&self) -> Option<usize> { + self.new_index + } + + /// Returns the changed values. + /// + /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` + /// is true if it should be highlighted as an inline diff. + /// + /// Depending on the type of the underlying [`DiffableStr`] this value is + /// more or less useful. If you always want to have a utf-8 string it's + /// better to use the [`InlineChange::iter_strings_lossy`] method. + pub fn values(&self) -> &[(bool, &'s T)] { + &self.values + } + + /// Iterates over all (potentially lossy) utf-8 decoded values. + /// + /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` + /// is true if it should be highlighted as an inline diff. + pub fn iter_strings_lossy(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> { + self.values() + .iter() + .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy())) + } + + /// Returns `true` if this change does not end in a newline and must be + /// followed up by one if line based diffs are used. + pub fn missing_newline(&self) -> bool { + !self.values.last().map_or(true, |x| x.1.ends_with_newline()) + } +} + +impl<'s, T: DiffableStr + ?Sized> From<Change<&'s T>> for InlineChange<'s, T> { + fn from(change: Change<&'s T>) -> InlineChange<'s, T> { + InlineChange { + tag: change.tag(), + old_index: change.old_index(), + new_index: change.new_index(), + values: vec![(false, change.value())], + } + } +} + +impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (emphasized, value) in self.iter_strings_lossy() { + let marker = match (emphasized, self.tag) { + (false, _) | (true, ChangeTag::Equal) => "", + (true, ChangeTag::Delete) => "-", + (true, ChangeTag::Insert) => "+", + }; + write!(f, "{}{}{}", marker, value, marker)?; + } + if self.missing_newline() { + writeln!(f)?; + } + Ok(()) + } +} + +const MIN_RATIO: f32 = 0.5; +const TIMEOUT_MS: u64 = 500; + +pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>( + diff: &'diff TextDiff<'old, 'new, 'bufs, T>, + op: &DiffOp, +) -> impl Iterator<Item = InlineChange<'x, T>> + 'diff +where + T: DiffableStr + ?Sized, + 'x: 'diff, + 'old: 'x, + 'new: 'x, +{ + let (tag, old_range, new_range) = op.as_tag_tuple(); + + if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; + } + + let mut old_index = old_range.start; + let mut new_index = new_range.start; + let old_slices = &diff.old_slices()[old_range]; + let new_slices = &diff.new_slices()[new_range]; + + if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; + } + + let old_lookup = MultiLookup::new(old_slices); + let new_lookup = MultiLookup::new(new_slices); + + let ops = capture_diff_deadline( + Algorithm::Patience, + &old_lookup, + 0..old_lookup.len(), + &new_lookup, + 0..new_lookup.len(), + Some(Instant::now() + Duration::from_millis(TIMEOUT_MS)), + ); + + if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO { + return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>; + } + + let mut old_values = Vec::<Vec<_>>::new(); + let mut new_values = Vec::<Vec<_>>::new(); + + for op in ops { + match op { + DiffOp::Equal { + old_index, + len, + new_index, + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, len) { + push_values(&mut old_values, idx, false, slice); + } + for (idx, slice) in new_lookup.get_original_slices(new_index, len) { + push_values(&mut new_values, idx, false, slice); + } + } + DiffOp::Delete { + old_index, old_len, .. + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { + push_values(&mut old_values, idx, true, slice); + } + } + DiffOp::Insert { + new_index, new_len, .. + } => { + for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { + push_values(&mut new_values, idx, true, slice); + } + } + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => { + for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { + push_values(&mut old_values, idx, true, slice); + } + for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { + push_values(&mut new_values, idx, true, slice); + } + } + } + } + + let mut rv = Vec::new(); + + for values in old_values { + rv.push(InlineChange { + tag: ChangeTag::Delete, + old_index: Some(old_index), + new_index: None, + values, + }); + old_index += 1; + } + + for values in new_values { + rv.push(InlineChange { + tag: ChangeTag::Insert, + old_index: None, + new_index: Some(new_index), + values, + }); + new_index += 1; + } + + Box::new(rv.into_iter()) as Box<dyn Iterator<Item = _>> +} + +#[test] +fn test_line_ops_inline() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert!(diff.newline_terminated()); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_inline_changes(op)) + .collect::<Vec<_>>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert!(diff.newline_terminated()); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_inline_changes(op)) + .collect::<Vec<_>>(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} diff --git a/vendor/similar/src/text/mod.rs b/vendor/similar/src/text/mod.rs new file mode 100644 index 0000000..0a441d1 --- /dev/null +++ b/vendor/similar/src/text/mod.rs @@ -0,0 +1,771 @@ +//! Text diffing utilities. +use std::borrow::Cow; +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::time::{Duration, Instant}; + +mod abstraction; +#[cfg(feature = "inline")] +mod inline; +mod utils; + +pub use self::abstraction::{DiffableStr, DiffableStrRef}; +#[cfg(feature = "inline")] +pub use self::inline::InlineChange; + +use self::utils::{upper_seq_ratio, QuickSeqRatio}; +use crate::algorithms::IdentifyDistinct; +use crate::iter::{AllChangesIter, ChangesIter}; +use crate::udiff::UnifiedDiff; +use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp}; + +#[derive(Debug, Clone, Copy)] +enum Deadline { + Absolute(Instant), + Relative(Duration), +} + +impl Deadline { + fn into_instant(self) -> Instant { + match self { + Deadline::Absolute(instant) => instant, + Deadline::Relative(duration) => Instant::now() + duration, + } + } +} + +/// A builder type config for more complex uses of [`TextDiff`]. +/// +/// Requires the `text` feature. +#[derive(Clone, Debug, Default)] +pub struct TextDiffConfig { + algorithm: Algorithm, + newline_terminated: Option<bool>, + deadline: Option<Deadline>, +} + +impl TextDiffConfig { + /// Changes the algorithm. + /// + /// The default algorithm is [`Algorithm::Myers`]. + pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self { + self.algorithm = alg; + self + } + + /// Sets a deadline for the diff operation. + /// + /// By default a diff will take as long as it takes. For certain diff + /// algorithms like Myer's and Patience a maximum running time can be + /// defined after which the algorithm gives up and approximates. + pub fn deadline(&mut self, deadline: Instant) -> &mut Self { + self.deadline = Some(Deadline::Absolute(deadline)); + self + } + + /// Sets a timeout for thediff operation. + /// + /// This is like [`deadline`](Self::deadline) but accepts a duration. + pub fn timeout(&mut self, timeout: Duration) -> &mut Self { + self.deadline = Some(Deadline::Relative(timeout)); + self + } + + /// Changes the newline termination flag. + /// + /// The default is automatic based on input. This flag controls the + /// behavior of [`TextDiff::iter_changes`] and unified diff generation + /// with regards to newlines. When the flag is set to `false` (which + /// is the default) then newlines are added. Otherwise the newlines + /// from the source sequences are reused. + pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { + self.newline_terminated = Some(yes); + self + } + + /// Creates a diff of lines. + /// + /// This splits the text `old` and `new` into lines preserving newlines + /// in the input. Line diffs are very common and because of that enjoy + /// special handling in similar. When a line diff is created with this + /// method the `newline_terminated` flag is flipped to `true` and will + /// influence the behavior of unified diff generation. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a\n"), + /// (ChangeTag::Equal, "b\n"), + /// (ChangeTag::Delete, "c"), + /// (ChangeTag::Insert, "C"), + /// ]); + /// ``` + pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_lines()), + Cow::Owned(new.as_diffable_str().tokenize_lines()), + true, + ) + } + + /// Creates a diff of words. + /// + /// This splits the text into words and whitespace. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` + pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_words()), + Cow::Owned(new.as_diffable_str().tokenize_words()), + false, + ) + } + + /// Creates a diff of characters. + /// + /// Note on character diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a"), + /// (ChangeTag::Equal, "b"), + /// (ChangeTag::Equal, "c"), + /// (ChangeTag::Delete, "d"), + /// (ChangeTag::Delete, "e"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Equal, "f"), + /// ]); + /// ``` + pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_chars()), + Cow::Owned(new.as_diffable_str().tokenize_chars()), + false, + ) + } + + /// Creates a diff of unicode words. + /// + /// This splits the text into words according to unicode rules. This is + /// generally recommended over [`TextDiffConfig::diff_words`] but + /// requires a dependency. + /// + /// This requires the `unicode` feature. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "ah"), + /// (ChangeTag::Equal, "("), + /// (ChangeTag::Delete, "be"), + /// (ChangeTag::Insert, "ah"), + /// (ChangeTag::Equal, ")"), + /// (ChangeTag::Equal, "ce"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_unicode_words()), + Cow::Owned(new.as_diffable_str().tokenize_unicode_words()), + false, + ) + } + + /// Creates a diff of graphemes. + /// + /// This requires the `unicode` feature. + /// + /// Note on grapheme diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "💩"), + /// (ChangeTag::Delete, "🇦🇹"), + /// (ChangeTag::Delete, "🦠"), + /// (ChangeTag::Insert, "🇦🇱"), + /// (ChangeTag::Insert, "❄️"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( + &self, + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + self.diff( + Cow::Owned(old.as_diffable_str().tokenize_graphemes()), + Cow::Owned(new.as_diffable_str().tokenize_graphemes()), + false, + ) + } + + /// Creates a diff of arbitrary slices. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let old = &["foo", "bar", "baz"]; + /// let new = &["foo", "BAR", "baz"]; + /// let diff = TextDiff::configure().diff_slices(old, new); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` + pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( + &self, + old: &'bufs [&'old T], + new: &'bufs [&'new T], + ) -> TextDiff<'old, 'new, 'bufs, T> { + self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false) + } + + fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( + &self, + old: Cow<'bufs, [&'old T]>, + new: Cow<'bufs, [&'new T]>, + newline_terminated: bool, + ) -> TextDiff<'old, 'new, 'bufs, T> { + let deadline = self.deadline.map(|x| x.into_instant()); + let ops = if old.len() > 100 || new.len() > 100 { + let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len()); + capture_diff_deadline( + self.algorithm, + ih.old_lookup(), + ih.old_range(), + ih.new_lookup(), + ih.new_range(), + deadline, + ) + } else { + capture_diff_deadline( + self.algorithm, + &old[..], + 0..old.len(), + &new[..], + 0..new.len(), + deadline, + ) + }; + TextDiff { + old, + new, + ops, + newline_terminated: self.newline_terminated.unwrap_or(newline_terminated), + algorithm: self.algorithm, + } + } +} + +/// Captures diff op codes for textual diffs. +/// +/// The exact diff behavior is depending on the underlying [`DiffableStr`]. +/// For instance diffs on bytes and strings are slightly different. You can +/// create a text diff from constructors such as [`TextDiff::from_lines`] or +/// the [`TextDiffConfig`] created by [`TextDiff::configure`]. +/// +/// Requires the `text` feature. +pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> { + old: Cow<'bufs, [&'old T]>, + new: Cow<'bufs, [&'new T]>, + ops: Vec<DiffOp>, + newline_terminated: bool, + algorithm: Algorithm, +} + +impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { + /// Configures a text differ before diffing. + pub fn configure() -> TextDiffConfig { + TextDiffConfig::default() + } + + /// Creates a diff of lines. + /// + /// For more information see [`TextDiffConfig::diff_lines`]. + pub fn from_lines<T: DiffableStrRef + ?Sized>( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_lines(old, new) + } + + /// Creates a diff of words. + /// + /// For more information see [`TextDiffConfig::diff_words`]. + pub fn from_words<T: DiffableStrRef + ?Sized>( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_words(old, new) + } + + /// Creates a diff of chars. + /// + /// For more information see [`TextDiffConfig::diff_chars`]. + pub fn from_chars<T: DiffableStrRef + ?Sized>( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_chars(old, new) + } + + /// Creates a diff of unicode words. + /// + /// For more information see [`TextDiffConfig::diff_unicode_words`]. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_unicode_words<T: DiffableStrRef + ?Sized>( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_unicode_words(old, new) + } + + /// Creates a diff of graphemes. + /// + /// For more information see [`TextDiffConfig::diff_graphemes`]. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_graphemes<T: DiffableStrRef + ?Sized>( + old: &'old T, + new: &'new T, + ) -> TextDiff<'old, 'new, 'bufs, T::Output> { + TextDiff::configure().diff_graphemes(old, new) + } +} + +impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> { + /// Creates a diff of arbitrary slices. + /// + /// For more information see [`TextDiffConfig::diff_slices`]. + pub fn from_slices( + old: &'bufs [&'old T], + new: &'bufs [&'new T], + ) -> TextDiff<'old, 'new, 'bufs, T> { + TextDiff::configure().diff_slices(old, new) + } + + /// The name of the algorithm that created the diff. + pub fn algorithm(&self) -> Algorithm { + self.algorithm + } + + /// Returns `true` if items in the slice are newline terminated. + /// + /// This flag is used by the unified diff writer to determine if extra + /// newlines have to be added. + pub fn newline_terminated(&self) -> bool { + self.newline_terminated + } + + /// Returns all old slices. + pub fn old_slices(&self) -> &[&'old T] { + &self.old + } + + /// Returns all new slices. + pub fn new_slices(&self) -> &[&'new T] { + &self.new + } + + /// Return a measure of the sequences' similarity in the range `0..=1`. + /// + /// A ratio of `1.0` means the two sequences are a complete match, a + /// ratio of `0.0` would indicate completely distinct sequences. + /// + /// ```rust + /// # use similar::TextDiff; + /// let diff = TextDiff::from_chars("abcd", "bcde"); + /// assert_eq!(diff.ratio(), 0.75); + /// ``` + pub fn ratio(&self) -> f32 { + get_diff_ratio(self.ops(), self.old.len(), self.new.len()) + } + + /// Iterates over the changes the op expands to. + /// + /// This method is a convenient way to automatically resolve the different + /// ways in which a change could be encoded (insert/delete vs replace), look + /// up the value from the appropriate slice and also handle correct index + /// handling. + pub fn iter_changes<'x, 'slf>( + &'slf self, + op: &DiffOp, + ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T> + where + 'x: 'slf, + 'old: 'x, + 'new: 'x, + { + op.iter_changes(self.old_slices(), self.new_slices()) + } + + /// Returns the captured diff ops. + pub fn ops(&self) -> &[DiffOp] { + &self.ops + } + + /// Isolate change clusters by eliminating ranges with no changes. + /// + /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`]. + pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> { + group_diff_ops(self.ops().to_vec(), n) + } + + /// Flattens out the diff into all changes. + /// + /// This is a shortcut for combining [`TextDiff::ops`] with + /// [`TextDiff::iter_changes`]. + pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> + where + 'x: 'slf + 'old + 'new, + 'old: 'x, + 'new: 'x, + { + AllChangesIter::new(&self.old[..], &self.new[..], self.ops()) + } + + /// Utility to return a unified diff formatter. + pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { + UnifiedDiff::from_text_diff(self) + } + + /// Iterates over the changes the op expands to with inline emphasis. + /// + /// This is very similar to [`TextDiff::iter_changes`] but it performs a second + /// level diff on adjacent line replacements. The exact behavior of + /// this function with regards to how it detects those inline changes + /// is currently not defined and will likely change over time. + /// + /// As of similar 1.2.0 the behavior of this function changes depending on + /// if the `unicode` feature is enabled or not. It will prefer unicode word + /// splitting over word splitting depending on the feature flag. + /// + /// Requires the `inline` feature. + #[cfg(feature = "inline")] + pub fn iter_inline_changes<'slf>( + &'slf self, + op: &DiffOp, + ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_ + where + 'slf: 'old + 'new, + { + inline::iter_inline_changes(self, op) + } +} + +/// Use the text differ to find `n` close matches. +/// +/// `cutoff` defines the threshold which needs to be reached for a word +/// to be considered similar. See [`TextDiff::ratio`] for more information. +/// +/// ``` +/// # use similar::get_close_matches; +/// let matches = get_close_matches( +/// "appel", +/// &["ape", "apple", "peach", "puppy"][..], +/// 3, +/// 0.6 +/// ); +/// assert_eq!(matches, vec!["apple", "ape"]); +/// ``` +/// +/// Requires the `text` feature. +pub fn get_close_matches<'a, T: DiffableStr + ?Sized>( + word: &T, + possibilities: &[&'a T], + n: usize, + cutoff: f32, +) -> Vec<&'a T> { + let mut matches = BinaryHeap::new(); + let seq1 = word.tokenize_chars(); + let quick_ratio = QuickSeqRatio::new(&seq1); + + for &possibility in possibilities { + let seq2 = possibility.tokenize_chars(); + + if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff { + continue; + } + + let diff = TextDiff::from_slices(&seq1, &seq2); + let ratio = diff.ratio(); + if ratio >= cutoff { + // we're putting the word itself in reverse in so that matches with + // the same ratio are ordered lexicographically. + matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility))); + } + } + + let mut rv = vec![]; + for _ in 0..n { + if let Some((_, elt)) = matches.pop() { + rv.push(elt.0); + } else { + break; + } + } + + rv +} + +#[test] +fn test_captured_ops() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + insta::assert_debug_snapshot!(&diff.ops()); +} + +#[test] +fn test_captured_word_ops() { + let diff = TextDiff::from_words( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::<Vec<_>>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +fn test_unified_diff() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert!(diff.newline_terminated()); + insta::assert_snapshot!(&diff + .unified_diff() + .context_radius(3) + .header("old", "new") + .to_string()); +} + +#[test] +fn test_line_ops() { + let a = "Hello World\nsome stuff here\nsome more stuff here\n"; + let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n"; + let diff = TextDiff::from_lines(a, b); + assert!(diff.newline_terminated()); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::<Vec<_>>(); + insta::assert_debug_snapshot!(&changes); + + #[cfg(feature = "bytes")] + { + let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes()); + let byte_changes = byte_diff + .ops() + .iter() + .flat_map(|op| byte_diff.iter_changes(op)) + .collect::<Vec<_>>(); + for (change, byte_change) in changes.iter().zip(byte_changes.iter()) { + assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy()); + } + } +} + +#[test] +fn test_virtual_newlines() { + let diff = TextDiff::from_lines("a\nb", "a\nc\n"); + assert!(diff.newline_terminated()); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::<Vec<_>>(); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +fn test_char_diff() { + let diff = TextDiff::from_chars("Hello World", "Hallo Welt"); + insta::assert_debug_snapshot!(diff.ops()); + + #[cfg(feature = "bytes")] + { + let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes()); + assert_eq!(diff.ops(), byte_diff.ops()); + } +} + +#[test] +fn test_ratio() { + let diff = TextDiff::from_chars("abcd", "bcde"); + assert_eq!(diff.ratio(), 0.75); + let diff = TextDiff::from_chars("", ""); + assert_eq!(diff.ratio(), 1.0); +} + +#[test] +fn test_get_close_matches() { + let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6); + assert_eq!(matches, vec!["apple", "ape"]); + let matches = get_close_matches( + "hulo", + &[ + "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo", + ][..], + 5, + 0.7, + ); + assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]); +} + +#[test] +fn test_lifetimes_on_iter() { + use crate::Change; + + fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec<Change<&'x T::Output>> + where + T: DiffableStrRef + ?Sized, + { + TextDiff::from_lines(old, new).iter_all_changes().collect() + } + + let a = "1\n2\n3\n".to_string(); + let b = "1\n99\n3\n".to_string(); + let changes = diff_lines(&a, &b); + insta::assert_debug_snapshot!(&changes); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::<Vec<_>>(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} + +#[test] +#[cfg(feature = "serde")] +fn test_serde_ops() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff.ops(); + let json = serde_json::to_string_pretty(&changes).unwrap(); + insta::assert_snapshot!(&json); +} + +#[test] +fn test_regression_issue_37() { + let config = TextDiffConfig::default(); + let diff = config.diff_lines("\u{18}\n\n", "\n\n\r"); + let mut output = diff.unified_diff(); + assert_eq!( + output.context_radius(0).to_string(), + "@@ -1 +1,0 @@\n-\u{18}\n@@ -2,0 +2,2 @@\n+\n+\r" + ); +} diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap new file mode 100644 index 0000000..cce4066 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap @@ -0,0 +1,22 @@ +--- +source: src/text/mod.rs +expression: "&diff.ops()" +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 1, + }, + Replace { + old_index: 1, + old_len: 1, + new_index: 1, + new_len: 1, + }, + Equal { + old_index: 2, + new_index: 2, + len: 1, + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap new file mode 100644 index 0000000..9232c8d --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap @@ -0,0 +1,202 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "Hello", + }, + Change { + tag: Equal, + old_index: Some( + 1, + ), + new_index: Some( + 1, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "World", + }, + Change { + tag: Equal, + old_index: Some( + 3, + ), + new_index: Some( + 3, + ), + value: "\n", + }, + Change { + tag: Equal, + old_index: Some( + 4, + ), + new_index: Some( + 4, + ), + value: "some", + }, + Change { + tag: Equal, + old_index: Some( + 5, + ), + new_index: Some( + 5, + ), + value: " ", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 6, + ), + value: "amazing", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 7, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 6, + ), + new_index: Some( + 8, + ), + value: "stuff", + }, + Change { + tag: Equal, + old_index: Some( + 7, + ), + new_index: Some( + 9, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 8, + ), + new_index: Some( + 10, + ), + value: "here", + }, + Change { + tag: Equal, + old_index: Some( + 9, + ), + new_index: Some( + 11, + ), + value: "\n", + }, + Change { + tag: Equal, + old_index: Some( + 10, + ), + new_index: Some( + 12, + ), + value: "some", + }, + Change { + tag: Equal, + old_index: Some( + 11, + ), + new_index: Some( + 13, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 12, + ), + new_index: Some( + 14, + ), + value: "more", + }, + Change { + tag: Equal, + old_index: Some( + 13, + ), + new_index: Some( + 15, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 14, + ), + new_index: Some( + 16, + ), + value: "stuff", + }, + Change { + tag: Equal, + old_index: Some( + 15, + ), + new_index: Some( + 17, + ), + value: " ", + }, + Change { + tag: Equal, + old_index: Some( + 16, + ), + new_index: Some( + 18, + ), + value: "here", + }, + Change { + tag: Equal, + old_index: Some( + 17, + ), + new_index: Some( + 19, + ), + value: "\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__char_diff.snap b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap new file mode 100644 index 0000000..b32f29a --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap @@ -0,0 +1,39 @@ +--- +source: src/text/mod.rs +expression: diff.ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 1, + }, + Replace { + old_index: 1, + old_len: 1, + new_index: 1, + new_len: 1, + }, + Equal { + old_index: 2, + new_index: 2, + len: 5, + }, + Replace { + old_index: 7, + old_len: 2, + new_index: 7, + new_len: 1, + }, + Equal { + old_index: 9, + new_index: 8, + len: 1, + }, + Replace { + old_index: 10, + old_len: 1, + new_index: 9, + new_len: 1, + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap new file mode 100644 index 0000000..2133460 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap @@ -0,0 +1,126 @@ +--- +source: src/text/inline.rs +expression: "&changes" +--- +[ + InlineChange { + tag: Insert, + old_index: None, + new_index: Some( + 0, + ), + values: [ + ( + false, + "Stuff\n", + ), + ], + }, + InlineChange { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 1, + ), + values: [ + ( + false, + "Hello World\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + values: [ + ( + false, + "some ", + ), + ( + false, + "stuff here\n", + ), + ], + }, + InlineChange { + tag: Insert, + old_index: None, + new_index: Some( + 2, + ), + values: [ + ( + false, + "some ", + ), + ( + true, + "amazing ", + ), + ( + false, + "stuff here\n", + ), + ], + }, + InlineChange { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 3, + ), + values: [ + ( + false, + "some more stuff here\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 3, + ), + new_index: None, + values: [ + ( + false, + "\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 4, + ), + new_index: None, + values: [ + ( + false, + "Aha stuff here\n", + ), + ], + }, + InlineChange { + tag: Delete, + old_index: Some( + 5, + ), + new_index: None, + values: [ + ( + false, + "and more stuff", + ), + ], + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap new file mode 100644 index 0000000..44ab829 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap @@ -0,0 +1,107 @@ +--- +source: src/text/inline.rs +expression: "&json" + +--- +[ + { + "tag": "insert", + "old_index": null, + "new_index": 0, + "values": [ + [ + false, + "Stuff\n" + ] + ] + }, + { + "tag": "equal", + "old_index": 0, + "new_index": 1, + "values": [ + [ + false, + "Hello World\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 1, + "new_index": null, + "values": [ + [ + false, + "some " + ], + [ + false, + "stuff here\n" + ] + ] + }, + { + "tag": "insert", + "old_index": null, + "new_index": 2, + "values": [ + [ + false, + "some " + ], + [ + true, + "amazing " + ], + [ + false, + "stuff here\n" + ] + ] + }, + { + "tag": "equal", + "old_index": 2, + "new_index": 3, + "values": [ + [ + false, + "some more stuff here\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 3, + "new_index": null, + "values": [ + [ + false, + "\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 4, + "new_index": null, + "values": [ + [ + false, + "Aha stuff here\n" + ] + ] + }, + { + "tag": "delete", + "old_index": 5, + "new_index": null, + "values": [ + [ + false, + "and more stuff" + ] + ] + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap new file mode 100644 index 0000000..4bb626d --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap @@ -0,0 +1,42 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "1\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "2\n", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "99\n", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "3\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__line_ops.snap b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap new file mode 100644 index 0000000..f187259 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap @@ -0,0 +1,42 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "Hello World\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "some stuff here\n", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "some amazing stuff here\n", + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "some more stuff here\n", + }, +] diff --git a/vendor/similar/src/text/snapshots/similar__text__serde.snap b/vendor/similar/src/text/snapshots/similar__text__serde.snap new file mode 100644 index 0000000..13418a6 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__serde.snap @@ -0,0 +1,55 @@ +--- +source: src/text/mod.rs +expression: "&json" + +--- +[ + { + "tag": "insert", + "old_index": null, + "new_index": 0, + "value": "Stuff\n" + }, + { + "tag": "equal", + "old_index": 0, + "new_index": 1, + "value": "Hello World\n" + }, + { + "tag": "delete", + "old_index": 1, + "new_index": null, + "value": "some stuff here\n" + }, + { + "tag": "insert", + "old_index": null, + "new_index": 2, + "value": "some amazing stuff here\n" + }, + { + "tag": "equal", + "old_index": 2, + "new_index": 3, + "value": "some more stuff here\n" + }, + { + "tag": "delete", + "old_index": 3, + "new_index": null, + "value": "\n" + }, + { + "tag": "delete", + "old_index": 4, + "new_index": null, + "value": "Aha stuff here\n" + }, + { + "tag": "delete", + "old_index": 5, + "new_index": null, + "value": "and more stuff" + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap new file mode 100644 index 0000000..040fe97 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap @@ -0,0 +1,38 @@ +--- +source: src/text/mod.rs +expression: "&json" + +--- +[ + { + "op": "insert", + "old_index": 0, + "new_index": 0, + "new_len": 1 + }, + { + "op": "equal", + "old_index": 0, + "new_index": 1, + "len": 1 + }, + { + "op": "replace", + "old_index": 1, + "old_len": 1, + "new_index": 2, + "new_len": 1 + }, + { + "op": "equal", + "old_index": 2, + "new_index": 3, + "len": 1 + }, + { + "op": "delete", + "old_index": 3, + "old_len": 3, + "new_index": 4 + } +] diff --git a/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap new file mode 100644 index 0000000..77f409a --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap @@ -0,0 +1,12 @@ +--- +source: src/text/mod.rs +expression: "&diff.unified_diff().context_radius(3).header(\"old\", \"new\").to_string()" +--- +--- old ++++ new +@@ -1,3 +1,3 @@ + Hello World +-some stuff here ++some amazing stuff here + some more stuff here + diff --git a/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap new file mode 100644 index 0000000..a3915a8 --- /dev/null +++ b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap @@ -0,0 +1,32 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "a\n", + }, + Change { + tag: Delete, + old_index: Some( + 1, + ), + new_index: None, + value: "b", + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 1, + ), + value: "c\n", + }, +] diff --git a/vendor/similar/src/text/utils.rs b/vendor/similar/src/text/utils.rs new file mode 100644 index 0000000..d4a440f --- /dev/null +++ b/vendor/similar/src/text/utils.rs @@ -0,0 +1,55 @@ +use std::collections::HashMap; +use std::hash::Hash; + +use super::DiffableStrRef; + +// quick and dirty way to get an upper sequence ratio. +pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 { + let n = seq1.len() + seq2.len(); + if n == 0 { + 1.0 + } else { + 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 + } +} + +/// Internal utility to calculate an upper bound for a ratio for +/// [`get_close_matches`]. This is based on Python's difflib approach +/// of considering the two sets to be multisets. +/// +/// It counts the number of matches without regard to order, which is an +/// obvious upper bound. +pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); + +impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { + pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { + let mut counts = HashMap::new(); + for &word in seq { + *counts.entry(word).or_insert(0) += 1; + } + QuickSeqRatio(counts) + } + + pub fn calc(&self, seq: &[&T]) -> f32 { + let n = self.0.len() + seq.len(); + if n == 0 { + return 1.0; + } + + let mut available = HashMap::new(); + let mut matches = 0; + for &word in seq { + let x = if let Some(count) = available.get(&word) { + *count + } else { + self.0.get(&word).copied().unwrap_or(0) + }; + available.insert(word, x - 1); + if x > 0 { + matches += 1; + } + } + + 2.0 * matches as f32 / n as f32 + } +} |