summaryrefslogtreecommitdiffstats
path: root/vendor/similar/src/text
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:47:55 +0000
commit2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch)
tree033cc839730fda84ff08db877037977be94e5e3a /vendor/similar/src/text
parentInitial commit. (diff)
downloadcargo-upstream.tar.xz
cargo-upstream.zip
Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/similar/src/text')
-rw-r--r--vendor/similar/src/text/abstraction.rs450
-rw-r--r--vendor/similar/src/text/inline.rs337
-rw-r--r--vendor/similar/src/text/mod.rs771
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__captured_ops.snap22
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap202
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__char_diff.snap39
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap126
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__inline__serde.snap107
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap42
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__line_ops.snap42
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__serde.snap55
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__serde_ops.snap38
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__unified_diff.snap12
-rw-r--r--vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap32
-rw-r--r--vendor/similar/src/text/utils.rs55
15 files changed, 2330 insertions, 0 deletions
diff --git a/vendor/similar/src/text/abstraction.rs b/vendor/similar/src/text/abstraction.rs
new file mode 100644
index 0000000..99678ff
--- /dev/null
+++ b/vendor/similar/src/text/abstraction.rs
@@ -0,0 +1,450 @@
+use std::borrow::Cow;
+use std::hash::Hash;
+use std::ops::Range;
+
+/// Reference to a [`DiffableStr`].
+///
+/// This type exists because while the library only really provides ways to
+/// work with `&str` and `&[u8]` there are types that deref into those string
+/// slices such as `String` and `Vec<u8>`.
+///
+/// This trait is used in the library whenever it's nice to be able to pass
+/// strings of different types in.
+///
+/// Requires the `text` feature.
+pub trait DiffableStrRef {
+ /// The type of the resolved [`DiffableStr`].
+ type Output: DiffableStr + ?Sized;
+
+ /// Resolves the reference.
+ fn as_diffable_str(&self) -> &Self::Output;
+}
+
+impl<T: DiffableStr + ?Sized> DiffableStrRef for T {
+ type Output = T;
+
+ fn as_diffable_str(&self) -> &T {
+ self
+ }
+}
+
+impl DiffableStrRef for String {
+ type Output = str;
+
+ fn as_diffable_str(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> {
+ type Output = T;
+
+ fn as_diffable_str(&self) -> &T {
+ self
+ }
+}
+
+/// All supported diffable strings.
+///
+/// The text module can work with different types of strings depending
+/// on how the crate is compiled. Out of the box `&str` is always supported
+/// but with the `bytes` feature one can also work with `[u8]` slices for
+/// as long as they are ASCII compatible.
+///
+/// Requires the `text` feature.
+pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
+ /// Splits the value into newlines with newlines attached.
+ fn tokenize_lines(&self) -> Vec<&Self>;
+
+ /// Splits the value into newlines with newlines separated.
+ fn tokenize_lines_and_newlines(&self) -> Vec<&Self>;
+
+ /// Tokenizes into words.
+ fn tokenize_words(&self) -> Vec<&Self>;
+
+ /// Tokenizes the input into characters.
+ fn tokenize_chars(&self) -> Vec<&Self>;
+
+ /// Tokenizes into unicode words.
+ #[cfg(feature = "unicode")]
+ fn tokenize_unicode_words(&self) -> Vec<&Self>;
+
+ /// Tokenizes into unicode graphemes.
+ #[cfg(feature = "unicode")]
+ fn tokenize_graphemes(&self) -> Vec<&Self>;
+
+ /// Decodes the string (potentially) lossy.
+ fn as_str(&self) -> Option<&str>;
+
+ /// Decodes the string (potentially) lossy.
+ fn to_string_lossy(&self) -> Cow<'_, str>;
+
+ /// Checks if the string ends in a newline.
+ fn ends_with_newline(&self) -> bool;
+
+ /// The length of the string.
+ fn len(&self) -> usize;
+
+ /// Slices the string.
+ fn slice(&self, rng: Range<usize>) -> &Self;
+
+ /// Returns the string as slice of raw bytes.
+ fn as_bytes(&self) -> &[u8];
+
+ /// Checks if the string is empty.
+ fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
+
+impl DiffableStr for str {
+ fn tokenize_lines(&self) -> Vec<&Self> {
+ let mut iter = self.char_indices().peekable();
+ let mut last_pos = 0;
+ let mut lines = vec![];
+
+ while let Some((idx, c)) = iter.next() {
+ if c == '\r' {
+ if iter.peek().map_or(false, |x| x.1 == '\n') {
+ lines.push(&self[last_pos..=idx + 1]);
+ iter.next();
+ last_pos = idx + 2;
+ } else {
+ lines.push(&self[last_pos..=idx]);
+ last_pos = idx + 1;
+ }
+ } else if c == '\n' {
+ lines.push(&self[last_pos..=idx]);
+ last_pos = idx + 1;
+ }
+ }
+
+ if last_pos < self.len() {
+ lines.push(&self[last_pos..]);
+ }
+
+ lines
+ }
+
+ fn tokenize_lines_and_newlines(&self) -> Vec<&Self> {
+ let mut rv = vec![];
+ let mut iter = self.char_indices().peekable();
+
+ while let Some((idx, c)) = iter.next() {
+ let is_newline = c == '\r' || c == '\n';
+ let start = idx;
+ let mut end = idx + c.len_utf8();
+ while let Some(&(_, next_char)) = iter.peek() {
+ if (next_char == '\r' || next_char == '\n') != is_newline {
+ break;
+ }
+ iter.next();
+ end += next_char.len_utf8();
+ }
+ rv.push(&self[start..end]);
+ }
+
+ rv
+ }
+
+ fn tokenize_words(&self) -> Vec<&Self> {
+ let mut iter = self.char_indices().peekable();
+ let mut rv = vec![];
+
+ while let Some((idx, c)) = iter.next() {
+ let is_whitespace = c.is_whitespace();
+ let start = idx;
+ let mut end = idx + c.len_utf8();
+ while let Some(&(_, next_char)) = iter.peek() {
+ if next_char.is_whitespace() != is_whitespace {
+ break;
+ }
+ iter.next();
+ end += next_char.len_utf8();
+ }
+ rv.push(&self[start..end]);
+ }
+
+ rv
+ }
+
+ fn tokenize_chars(&self) -> Vec<&Self> {
+ self.char_indices()
+ .map(move |(i, c)| &self[i..i + c.len_utf8()])
+ .collect()
+ }
+
+ #[cfg(feature = "unicode")]
+ fn tokenize_unicode_words(&self) -> Vec<&Self> {
+ unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect()
+ }
+
+ #[cfg(feature = "unicode")]
+ fn tokenize_graphemes(&self) -> Vec<&Self> {
+ unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect()
+ }
+
+ fn as_str(&self) -> Option<&str> {
+ Some(self)
+ }
+
+ fn to_string_lossy(&self) -> Cow<'_, str> {
+ Cow::Borrowed(self)
+ }
+
+ fn ends_with_newline(&self) -> bool {
+ self.ends_with(&['\r', '\n'][..])
+ }
+
+ fn len(&self) -> usize {
+ str::len(self)
+ }
+
+ fn slice(&self, rng: Range<usize>) -> &Self {
+ &self[rng]
+ }
+
+ fn as_bytes(&self) -> &[u8] {
+ str::as_bytes(self)
+ }
+}
+
+#[cfg(feature = "bytes")]
+mod bytes_support {
+ use super::*;
+
+ use bstr::ByteSlice;
+
+ impl DiffableStrRef for Vec<u8> {
+ type Output = [u8];
+
+ fn as_diffable_str(&self) -> &[u8] {
+ self.as_slice()
+ }
+ }
+
+ /// Allows viewing ASCII compatible byte slices as strings.
+ ///
+ /// Requires the `bytes` feature.
+ impl DiffableStr for [u8] {
+ fn tokenize_lines(&self) -> Vec<&Self> {
+ let mut iter = self.char_indices().peekable();
+ let mut last_pos = 0;
+ let mut lines = vec![];
+
+ while let Some((_, end, c)) = iter.next() {
+ if c == '\r' {
+ if iter.peek().map_or(false, |x| x.2 == '\n') {
+ lines.push(&self[last_pos..end + 1]);
+ iter.next();
+ last_pos = end + 1;
+ } else {
+ lines.push(&self[last_pos..end]);
+ last_pos = end;
+ }
+ } else if c == '\n' {
+ lines.push(&self[last_pos..end]);
+ last_pos = end;
+ }
+ }
+
+ if last_pos < self.len() {
+ lines.push(&self[last_pos..]);
+ }
+
+ lines
+ }
+
+ fn tokenize_lines_and_newlines(&self) -> Vec<&Self> {
+ let mut rv = vec![];
+ let mut iter = self.char_indices().peekable();
+
+ while let Some((start, mut end, c)) = iter.next() {
+ let is_newline = c == '\r' || c == '\n';
+ while let Some(&(_, new_end, next_char)) = iter.peek() {
+ if (next_char == '\r' || next_char == '\n') != is_newline {
+ break;
+ }
+ iter.next();
+ end = new_end;
+ }
+ rv.push(&self[start..end]);
+ }
+
+ rv
+ }
+
+ fn tokenize_words(&self) -> Vec<&Self> {
+ let mut iter = self.char_indices().peekable();
+ let mut rv = vec![];
+
+ while let Some((start, mut end, c)) = iter.next() {
+ let is_whitespace = c.is_whitespace();
+ while let Some(&(_, new_end, next_char)) = iter.peek() {
+ if next_char.is_whitespace() != is_whitespace {
+ break;
+ }
+ iter.next();
+ end = new_end;
+ }
+ rv.push(&self[start..end]);
+ }
+
+ rv
+ }
+
+ #[cfg(feature = "unicode")]
+ fn tokenize_unicode_words(&self) -> Vec<&Self> {
+ self.words_with_breaks().map(|x| x.as_bytes()).collect()
+ }
+
+ #[cfg(feature = "unicode")]
+ fn tokenize_graphemes(&self) -> Vec<&Self> {
+ self.graphemes().map(|x| x.as_bytes()).collect()
+ }
+
+ fn tokenize_chars(&self) -> Vec<&Self> {
+ self.char_indices()
+ .map(move |(start, end, _)| &self[start..end])
+ .collect()
+ }
+
+ fn as_str(&self) -> Option<&str> {
+ std::str::from_utf8(self).ok()
+ }
+
+ fn to_string_lossy(&self) -> Cow<'_, str> {
+ String::from_utf8_lossy(self)
+ }
+
+ fn ends_with_newline(&self) -> bool {
+ if let Some(b'\r') | Some(b'\n') = self.last_byte() {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn len(&self) -> usize {
+ <[u8]>::len(self)
+ }
+
+ fn slice(&self, rng: Range<usize>) -> &Self {
+ &self[rng]
+ }
+
+ fn as_bytes(&self) -> &[u8] {
+ self
+ }
+ }
+}
+
+#[test]
+fn test_split_lines() {
+ assert_eq!(
+ DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast"),
+ vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
+ );
+ assert_eq!(DiffableStr::tokenize_lines("\n\n"), vec!["\n", "\n"]);
+ assert_eq!(DiffableStr::tokenize_lines("\n"), vec!["\n"]);
+ assert!(DiffableStr::tokenize_lines("").is_empty());
+}
+
+#[test]
+fn test_split_words() {
+ assert_eq!(
+ DiffableStr::tokenize_words("foo bar baz\n\n aha"),
+ ["foo", " ", "bar", " ", "baz", "\n\n ", "aha"]
+ );
+}
+
+#[test]
+fn test_split_chars() {
+ assert_eq!(
+ DiffableStr::tokenize_chars("abcfö❄️"),
+ vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
+ );
+}
+
+#[test]
+#[cfg(feature = "unicode")]
+fn test_split_graphemes() {
+ assert_eq!(
+ DiffableStr::tokenize_graphemes("abcfö❄️"),
+ vec!["a", "b", "c", "f", "ö", "❄️"]
+ );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_lines_bytes() {
+ assert_eq!(
+ DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()),
+ vec![
+ "first\n".as_bytes(),
+ "second\r".as_bytes(),
+ "third\r\n".as_bytes(),
+ "fourth\n".as_bytes(),
+ "last".as_bytes()
+ ]
+ );
+ assert_eq!(
+ DiffableStr::tokenize_lines("\n\n".as_bytes()),
+ vec!["\n".as_bytes(), "\n".as_bytes()]
+ );
+ assert_eq!(
+ DiffableStr::tokenize_lines("\n".as_bytes()),
+ vec!["\n".as_bytes()]
+ );
+ assert!(DiffableStr::tokenize_lines("".as_bytes()).is_empty());
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_words_bytes() {
+ assert_eq!(
+ DiffableStr::tokenize_words("foo bar baz\n\n aha".as_bytes()),
+ [
+ &b"foo"[..],
+ &b" "[..],
+ &b"bar"[..],
+ &b" "[..],
+ &b"baz"[..],
+ &b"\n\n "[..],
+ &b"aha"[..]
+ ]
+ );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_chars_bytes() {
+ assert_eq!(
+ DiffableStr::tokenize_chars("abcfö❄️".as_bytes()),
+ vec![
+ &b"a"[..],
+ &b"b"[..],
+ &b"c"[..],
+ &b"f"[..],
+ "ö".as_bytes(),
+ "❄".as_bytes(),
+ "\u{fe0f}".as_bytes()
+ ]
+ );
+}
+
+#[test]
+#[cfg(all(feature = "bytes", feature = "unicode"))]
+fn test_split_graphemes_bytes() {
+ assert_eq!(
+ DiffableStr::tokenize_graphemes("abcfö❄️".as_bytes()),
+ vec![
+ &b"a"[..],
+ &b"b"[..],
+ &b"c"[..],
+ &b"f"[..],
+ "ö".as_bytes(),
+ "❄️".as_bytes()
+ ]
+ );
+}
diff --git a/vendor/similar/src/text/inline.rs b/vendor/similar/src/text/inline.rs
new file mode 100644
index 0000000..c9f0f7f
--- /dev/null
+++ b/vendor/similar/src/text/inline.rs
@@ -0,0 +1,337 @@
+#![cfg(feature = "inline")]
+use std::borrow::Cow;
+use std::fmt;
+
+use crate::text::{DiffableStr, TextDiff};
+use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
+use crate::{capture_diff_deadline, get_diff_ratio};
+
+use std::ops::Index;
+use std::time::{Duration, Instant};
+
+use super::utils::upper_seq_ratio;
+
+struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> {
+ strings: &'bufs [&'s T],
+ seqs: Vec<(&'s T, usize, usize)>,
+}
+
+impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> {
+ fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> {
+ let mut seqs = Vec::new();
+ for (string_idx, string) in strings.iter().enumerate() {
+ let mut offset = 0;
+ let iter = {
+ #[cfg(feature = "unicode")]
+ {
+ string.tokenize_unicode_words()
+ }
+ #[cfg(not(feature = "unicode"))]
+ {
+ string.tokenize_words()
+ }
+ };
+ for word in iter {
+ seqs.push((word, string_idx, offset));
+ offset += word.len();
+ }
+ }
+ MultiLookup { strings, seqs }
+ }
+
+ pub fn len(&self) -> usize {
+ self.seqs.len()
+ }
+
+ fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> {
+ let mut last = None;
+ let mut rv = Vec::new();
+
+ for offset in 0..len {
+ let (s, str_idx, char_idx) = self.seqs[idx + offset];
+ last = match last {
+ None => Some((str_idx, char_idx, s.len())),
+ Some((last_str_idx, start_char_idx, last_len)) => {
+ if last_str_idx == str_idx {
+ Some((str_idx, start_char_idx, last_len + s.len()))
+ } else {
+ rv.push((
+ last_str_idx,
+ self.strings[last_str_idx]
+ .slice(start_char_idx..start_char_idx + last_len),
+ ));
+ Some((str_idx, char_idx, s.len()))
+ }
+ }
+ };
+ }
+
+ if let Some((str_idx, start_char_idx, len)) = last {
+ rv.push((
+ str_idx,
+ self.strings[str_idx].slice(start_char_idx..start_char_idx + len),
+ ));
+ }
+
+ rv
+ }
+}
+
+impl<'bufs, 's, T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'bufs, 's, T> {
+ type Output = T;
+
+ fn index(&self, index: usize) -> &Self::Output {
+ self.seqs[index].0
+ }
+}
+
+fn push_values<'s, T: DiffableStr + ?Sized>(
+ v: &mut Vec<Vec<(bool, &'s T)>>,
+ idx: usize,
+ emphasized: bool,
+ s: &'s T,
+) {
+ v.resize_with(v.len().max(idx + 1), Vec::new);
+ // newlines cause all kinds of wacky stuff if they end up highlighted.
+ // because of this we want to unemphasize all newlines we encounter.
+ if emphasized {
+ for seg in s.tokenize_lines_and_newlines() {
+ v[idx].push((!seg.ends_with_newline(), seg));
+ }
+ } else {
+ v[idx].push((false, s));
+ }
+}
+
+/// Represents the expanded textual change with inline highlights.
+///
+/// This is like [`Change`] but with inline highlight info.
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize))]
+pub struct InlineChange<'s, T: DiffableStr + ?Sized> {
+ tag: ChangeTag,
+ old_index: Option<usize>,
+ new_index: Option<usize>,
+ values: Vec<(bool, &'s T)>,
+}
+
+impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
+ /// Returns the change tag.
+ pub fn tag(&self) -> ChangeTag {
+ self.tag
+ }
+
+ /// Returns the old index if available.
+ pub fn old_index(&self) -> Option<usize> {
+ self.old_index
+ }
+
+ /// Returns the new index if available.
+ pub fn new_index(&self) -> Option<usize> {
+ self.new_index
+ }
+
+ /// Returns the changed values.
+ ///
+ /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
+ /// is true if it should be highlighted as an inline diff.
+ ///
+ /// Depending on the type of the underlying [`DiffableStr`] this value is
+ /// more or less useful. If you always want to have a utf-8 string it's
+ /// better to use the [`InlineChange::iter_strings_lossy`] method.
+ pub fn values(&self) -> &[(bool, &'s T)] {
+ &self.values
+ }
+
+ /// Iterates over all (potentially lossy) utf-8 decoded values.
+ ///
+ /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
+ /// is true if it should be highlighted as an inline diff.
+ pub fn iter_strings_lossy(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
+ self.values()
+ .iter()
+ .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy()))
+ }
+
+ /// Returns `true` if this change does not end in a newline and must be
+ /// followed up by one if line based diffs are used.
+ pub fn missing_newline(&self) -> bool {
+ !self.values.last().map_or(true, |x| x.1.ends_with_newline())
+ }
+}
+
+impl<'s, T: DiffableStr + ?Sized> From<Change<&'s T>> for InlineChange<'s, T> {
+ fn from(change: Change<&'s T>) -> InlineChange<'s, T> {
+ InlineChange {
+ tag: change.tag(),
+ old_index: change.old_index(),
+ new_index: change.new_index(),
+ values: vec![(false, change.value())],
+ }
+ }
+}
+
+impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ for (emphasized, value) in self.iter_strings_lossy() {
+ let marker = match (emphasized, self.tag) {
+ (false, _) | (true, ChangeTag::Equal) => "",
+ (true, ChangeTag::Delete) => "-",
+ (true, ChangeTag::Insert) => "+",
+ };
+ write!(f, "{}{}{}", marker, value, marker)?;
+ }
+ if self.missing_newline() {
+ writeln!(f)?;
+ }
+ Ok(())
+ }
+}
+
+const MIN_RATIO: f32 = 0.5;
+const TIMEOUT_MS: u64 = 500;
+
+pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>(
+ diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
+ op: &DiffOp,
+) -> impl Iterator<Item = InlineChange<'x, T>> + 'diff
+where
+ T: DiffableStr + ?Sized,
+ 'x: 'diff,
+ 'old: 'x,
+ 'new: 'x,
+{
+ let (tag, old_range, new_range) = op.as_tag_tuple();
+
+ if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag {
+ return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
+ }
+
+ let mut old_index = old_range.start;
+ let mut new_index = new_range.start;
+ let old_slices = &diff.old_slices()[old_range];
+ let new_slices = &diff.new_slices()[new_range];
+
+ if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO {
+ return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
+ }
+
+ let old_lookup = MultiLookup::new(old_slices);
+ let new_lookup = MultiLookup::new(new_slices);
+
+ let ops = capture_diff_deadline(
+ Algorithm::Patience,
+ &old_lookup,
+ 0..old_lookup.len(),
+ &new_lookup,
+ 0..new_lookup.len(),
+ Some(Instant::now() + Duration::from_millis(TIMEOUT_MS)),
+ );
+
+ if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO {
+ return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
+ }
+
+ let mut old_values = Vec::<Vec<_>>::new();
+ let mut new_values = Vec::<Vec<_>>::new();
+
+ for op in ops {
+ match op {
+ DiffOp::Equal {
+ old_index,
+ len,
+ new_index,
+ } => {
+ for (idx, slice) in old_lookup.get_original_slices(old_index, len) {
+ push_values(&mut old_values, idx, false, slice);
+ }
+ for (idx, slice) in new_lookup.get_original_slices(new_index, len) {
+ push_values(&mut new_values, idx, false, slice);
+ }
+ }
+ DiffOp::Delete {
+ old_index, old_len, ..
+ } => {
+ for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) {
+ push_values(&mut old_values, idx, true, slice);
+ }
+ }
+ DiffOp::Insert {
+ new_index, new_len, ..
+ } => {
+ for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) {
+ push_values(&mut new_values, idx, true, slice);
+ }
+ }
+ DiffOp::Replace {
+ old_index,
+ old_len,
+ new_index,
+ new_len,
+ } => {
+ for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) {
+ push_values(&mut old_values, idx, true, slice);
+ }
+ for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) {
+ push_values(&mut new_values, idx, true, slice);
+ }
+ }
+ }
+ }
+
+ let mut rv = Vec::new();
+
+ for values in old_values {
+ rv.push(InlineChange {
+ tag: ChangeTag::Delete,
+ old_index: Some(old_index),
+ new_index: None,
+ values,
+ });
+ old_index += 1;
+ }
+
+ for values in new_values {
+ rv.push(InlineChange {
+ tag: ChangeTag::Insert,
+ old_index: None,
+ new_index: Some(new_index),
+ values,
+ });
+ new_index += 1;
+ }
+
+ Box::new(rv.into_iter()) as Box<dyn Iterator<Item = _>>
+}
+
+#[test]
+fn test_line_ops_inline() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
+ "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ assert!(diff.newline_terminated());
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_inline_changes(op))
+ .collect::<Vec<_>>();
+ insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+#[cfg(feature = "serde")]
+fn test_serde() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
+ "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ assert!(diff.newline_terminated());
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_inline_changes(op))
+ .collect::<Vec<_>>();
+ let json = serde_json::to_string_pretty(&changes).unwrap();
+ insta::assert_snapshot!(&json);
+}
diff --git a/vendor/similar/src/text/mod.rs b/vendor/similar/src/text/mod.rs
new file mode 100644
index 0000000..0a441d1
--- /dev/null
+++ b/vendor/similar/src/text/mod.rs
@@ -0,0 +1,771 @@
+//! Text diffing utilities.
+use std::borrow::Cow;
+use std::cmp::Reverse;
+use std::collections::BinaryHeap;
+use std::time::{Duration, Instant};
+
+mod abstraction;
+#[cfg(feature = "inline")]
+mod inline;
+mod utils;
+
+pub use self::abstraction::{DiffableStr, DiffableStrRef};
+#[cfg(feature = "inline")]
+pub use self::inline::InlineChange;
+
+use self::utils::{upper_seq_ratio, QuickSeqRatio};
+use crate::algorithms::IdentifyDistinct;
+use crate::iter::{AllChangesIter, ChangesIter};
+use crate::udiff::UnifiedDiff;
+use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
+
+#[derive(Debug, Clone, Copy)]
+enum Deadline {
+ Absolute(Instant),
+ Relative(Duration),
+}
+
+impl Deadline {
+ fn into_instant(self) -> Instant {
+ match self {
+ Deadline::Absolute(instant) => instant,
+ Deadline::Relative(duration) => Instant::now() + duration,
+ }
+ }
+}
+
+/// A builder type config for more complex uses of [`TextDiff`].
+///
+/// Requires the `text` feature.
+#[derive(Clone, Debug, Default)]
+pub struct TextDiffConfig {
+ algorithm: Algorithm,
+ newline_terminated: Option<bool>,
+ deadline: Option<Deadline>,
+}
+
+impl TextDiffConfig {
+ /// Changes the algorithm.
+ ///
+ /// The default algorithm is [`Algorithm::Myers`].
+ pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
+ self.algorithm = alg;
+ self
+ }
+
+ /// Sets a deadline for the diff operation.
+ ///
+ /// By default a diff will take as long as it takes. For certain diff
+ /// algorithms like Myer's and Patience a maximum running time can be
+ /// defined after which the algorithm gives up and approximates.
+ pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
+ self.deadline = Some(Deadline::Absolute(deadline));
+ self
+ }
+
+ /// Sets a timeout for thediff operation.
+ ///
+ /// This is like [`deadline`](Self::deadline) but accepts a duration.
+ pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
+ self.deadline = Some(Deadline::Relative(timeout));
+ self
+ }
+
+ /// Changes the newline termination flag.
+ ///
+ /// The default is automatic based on input. This flag controls the
+ /// behavior of [`TextDiff::iter_changes`] and unified diff generation
+ /// with regards to newlines. When the flag is set to `false` (which
+ /// is the default) then newlines are added. Otherwise the newlines
+ /// from the source sequences are reused.
+ pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
+ self.newline_terminated = Some(yes);
+ self
+ }
+
+ /// Creates a diff of lines.
+ ///
+ /// This splits the text `old` and `new` into lines preserving newlines
+ /// in the input. Line diffs are very common and because of that enjoy
+ /// special handling in similar. When a line diff is created with this
+ /// method the `newline_terminated` flag is flipped to `true` and will
+ /// influence the behavior of unified diff generation.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC");
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "a\n"),
+ /// (ChangeTag::Equal, "b\n"),
+ /// (ChangeTag::Delete, "c"),
+ /// (ChangeTag::Insert, "C"),
+ /// ]);
+ /// ```
+ pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+ &self,
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ self.diff(
+ Cow::Owned(old.as_diffable_str().tokenize_lines()),
+ Cow::Owned(new.as_diffable_str().tokenize_lines()),
+ true,
+ )
+ }
+
+ /// Creates a diff of words.
+ ///
+ /// This splits the text into words and whitespace.
+ ///
+ /// Note on word diffs: because the text differ will tokenize the strings
+ /// into small segments it can be inconvenient to work with the results
+ /// depending on the use case. You might also want to combine word level
+ /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
+ /// which lets you remap the diffs back to the original input strings.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz");
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "foo"),
+ /// (ChangeTag::Equal, " "),
+ /// (ChangeTag::Delete, "bar"),
+ /// (ChangeTag::Insert, "BAR"),
+ /// (ChangeTag::Equal, " "),
+ /// (ChangeTag::Equal, "baz"),
+ /// ]);
+ /// ```
+ pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+ &self,
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ self.diff(
+ Cow::Owned(old.as_diffable_str().tokenize_words()),
+ Cow::Owned(new.as_diffable_str().tokenize_words()),
+ false,
+ )
+ }
+
+ /// Creates a diff of characters.
+ ///
+ /// Note on character diffs: because the text differ will tokenize the strings
+ /// into small segments it can be inconvenient to work with the results
+ /// depending on the use case. You might also want to combine word level
+ /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
+ /// which lets you remap the diffs back to the original input strings.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf");
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "a"),
+ /// (ChangeTag::Equal, "b"),
+ /// (ChangeTag::Equal, "c"),
+ /// (ChangeTag::Delete, "d"),
+ /// (ChangeTag::Delete, "e"),
+ /// (ChangeTag::Insert, "D"),
+ /// (ChangeTag::Insert, "D"),
+ /// (ChangeTag::Equal, "f"),
+ /// ]);
+ /// ```
+ pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+ &self,
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ self.diff(
+ Cow::Owned(old.as_diffable_str().tokenize_chars()),
+ Cow::Owned(new.as_diffable_str().tokenize_chars()),
+ false,
+ )
+ }
+
+ /// Creates a diff of unicode words.
+ ///
+ /// This splits the text into words according to unicode rules. This is
+ /// generally recommended over [`TextDiffConfig::diff_words`] but
+ /// requires a dependency.
+ ///
+ /// This requires the `unicode` feature.
+ ///
+ /// Note on word diffs: because the text differ will tokenize the strings
+ /// into small segments it can be inconvenient to work with the results
+ /// depending on the use case. You might also want to combine word level
+ /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
+ /// which lets you remap the diffs back to the original input strings.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce");
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "ah"),
+ /// (ChangeTag::Equal, "("),
+ /// (ChangeTag::Delete, "be"),
+ /// (ChangeTag::Insert, "ah"),
+ /// (ChangeTag::Equal, ")"),
+ /// (ChangeTag::Equal, "ce"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+ &self,
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ self.diff(
+ Cow::Owned(old.as_diffable_str().tokenize_unicode_words()),
+ Cow::Owned(new.as_diffable_str().tokenize_unicode_words()),
+ false,
+ )
+ }
+
+ /// Creates a diff of graphemes.
+ ///
+ /// This requires the `unicode` feature.
+ ///
+ /// Note on grapheme diffs: because the text differ will tokenize the strings
+ /// into small segments it can be inconvenient to work with the results
+ /// depending on the use case. You might also want to combine word level
+ /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
+ /// which lets you remap the diffs back to the original input strings.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️");
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "💩"),
+ /// (ChangeTag::Delete, "🇦🇹"),
+ /// (ChangeTag::Delete, "🦠"),
+ /// (ChangeTag::Insert, "🇦🇱"),
+ /// (ChangeTag::Insert, "❄️"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+ &self,
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ self.diff(
+ Cow::Owned(old.as_diffable_str().tokenize_graphemes()),
+ Cow::Owned(new.as_diffable_str().tokenize_graphemes()),
+ false,
+ )
+ }
+
+ /// Creates a diff of arbitrary slices.
+ ///
+ /// ```rust
+ /// use similar::{TextDiff, ChangeTag};
+ ///
+ /// let old = &["foo", "bar", "baz"];
+ /// let new = &["foo", "BAR", "baz"];
+ /// let diff = TextDiff::configure().diff_slices(old, new);
+ /// let changes: Vec<_> = diff
+ /// .iter_all_changes()
+ /// .map(|x| (x.tag(), x.value()))
+ /// .collect();
+ ///
+ /// assert_eq!(changes, vec![
+ /// (ChangeTag::Equal, "foo"),
+ /// (ChangeTag::Delete, "bar"),
+ /// (ChangeTag::Insert, "BAR"),
+ /// (ChangeTag::Equal, "baz"),
+ /// ]);
+ /// ```
+ pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
+ &self,
+ old: &'bufs [&'old T],
+ new: &'bufs [&'new T],
+ ) -> TextDiff<'old, 'new, 'bufs, T> {
+ self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
+ }
+
+ fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
+ &self,
+ old: Cow<'bufs, [&'old T]>,
+ new: Cow<'bufs, [&'new T]>,
+ newline_terminated: bool,
+ ) -> TextDiff<'old, 'new, 'bufs, T> {
+ let deadline = self.deadline.map(|x| x.into_instant());
+ let ops = if old.len() > 100 || new.len() > 100 {
+ let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len());
+ capture_diff_deadline(
+ self.algorithm,
+ ih.old_lookup(),
+ ih.old_range(),
+ ih.new_lookup(),
+ ih.new_range(),
+ deadline,
+ )
+ } else {
+ capture_diff_deadline(
+ self.algorithm,
+ &old[..],
+ 0..old.len(),
+ &new[..],
+ 0..new.len(),
+ deadline,
+ )
+ };
+ TextDiff {
+ old,
+ new,
+ ops,
+ newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
+ algorithm: self.algorithm,
+ }
+ }
+}
+
+/// Captures diff op codes for textual diffs.
+///
+/// The exact diff behavior is depending on the underlying [`DiffableStr`].
+/// For instance diffs on bytes and strings are slightly different. You can
+/// create a text diff from constructors such as [`TextDiff::from_lines`] or
+/// the [`TextDiffConfig`] created by [`TextDiff::configure`].
+///
+/// Requires the `text` feature.
+pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
+ old: Cow<'bufs, [&'old T]>,
+ new: Cow<'bufs, [&'new T]>,
+ ops: Vec<DiffOp>,
+ newline_terminated: bool,
+ algorithm: Algorithm,
+}
+
+impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
+ /// Configures a text differ before diffing.
+ pub fn configure() -> TextDiffConfig {
+ TextDiffConfig::default()
+ }
+
+ /// Creates a diff of lines.
+ ///
+ /// For more information see [`TextDiffConfig::diff_lines`].
+ pub fn from_lines<T: DiffableStrRef + ?Sized>(
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ TextDiff::configure().diff_lines(old, new)
+ }
+
+ /// Creates a diff of words.
+ ///
+ /// For more information see [`TextDiffConfig::diff_words`].
+ pub fn from_words<T: DiffableStrRef + ?Sized>(
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ TextDiff::configure().diff_words(old, new)
+ }
+
+ /// Creates a diff of chars.
+ ///
+ /// For more information see [`TextDiffConfig::diff_chars`].
+ pub fn from_chars<T: DiffableStrRef + ?Sized>(
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ TextDiff::configure().diff_chars(old, new)
+ }
+
+ /// Creates a diff of unicode words.
+ ///
+ /// For more information see [`TextDiffConfig::diff_unicode_words`].
+ ///
+ /// This requires the `unicode` feature.
+ #[cfg(feature = "unicode")]
+ pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ TextDiff::configure().diff_unicode_words(old, new)
+ }
+
+ /// Creates a diff of graphemes.
+ ///
+ /// For more information see [`TextDiffConfig::diff_graphemes`].
+ ///
+ /// This requires the `unicode` feature.
+ #[cfg(feature = "unicode")]
+ pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
+ old: &'old T,
+ new: &'new T,
+ ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+ TextDiff::configure().diff_graphemes(old, new)
+ }
+}
+
+impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
+ /// Creates a diff of arbitrary slices.
+ ///
+ /// For more information see [`TextDiffConfig::diff_slices`].
+ pub fn from_slices(
+ old: &'bufs [&'old T],
+ new: &'bufs [&'new T],
+ ) -> TextDiff<'old, 'new, 'bufs, T> {
+ TextDiff::configure().diff_slices(old, new)
+ }
+
+ /// The name of the algorithm that created the diff.
+ pub fn algorithm(&self) -> Algorithm {
+ self.algorithm
+ }
+
+ /// Returns `true` if items in the slice are newline terminated.
+ ///
+ /// This flag is used by the unified diff writer to determine if extra
+ /// newlines have to be added.
+ pub fn newline_terminated(&self) -> bool {
+ self.newline_terminated
+ }
+
+ /// Returns all old slices.
+ pub fn old_slices(&self) -> &[&'old T] {
+ &self.old
+ }
+
+ /// Returns all new slices.
+ pub fn new_slices(&self) -> &[&'new T] {
+ &self.new
+ }
+
+ /// Return a measure of the sequences' similarity in the range `0..=1`.
+ ///
+ /// A ratio of `1.0` means the two sequences are a complete match, a
+ /// ratio of `0.0` would indicate completely distinct sequences.
+ ///
+ /// ```rust
+ /// # use similar::TextDiff;
+ /// let diff = TextDiff::from_chars("abcd", "bcde");
+ /// assert_eq!(diff.ratio(), 0.75);
+ /// ```
+ pub fn ratio(&self) -> f32 {
+ get_diff_ratio(self.ops(), self.old.len(), self.new.len())
+ }
+
+ /// Iterates over the changes the op expands to.
+ ///
+ /// This method is a convenient way to automatically resolve the different
+ /// ways in which a change could be encoded (insert/delete vs replace), look
+ /// up the value from the appropriate slice and also handle correct index
+ /// handling.
+ pub fn iter_changes<'x, 'slf>(
+ &'slf self,
+ op: &DiffOp,
+ ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T>
+ where
+ 'x: 'slf,
+ 'old: 'x,
+ 'new: 'x,
+ {
+ op.iter_changes(self.old_slices(), self.new_slices())
+ }
+
+ /// Returns the captured diff ops.
+ pub fn ops(&self) -> &[DiffOp] {
+ &self.ops
+ }
+
+ /// Isolate change clusters by eliminating ranges with no changes.
+ ///
+ /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`].
+ pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
+ group_diff_ops(self.ops().to_vec(), n)
+ }
+
+ /// Flattens out the diff into all changes.
+ ///
+ /// This is a shortcut for combining [`TextDiff::ops`] with
+ /// [`TextDiff::iter_changes`].
+ pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T>
+ where
+ 'x: 'slf + 'old + 'new,
+ 'old: 'x,
+ 'new: 'x,
+ {
+ AllChangesIter::new(&self.old[..], &self.new[..], self.ops())
+ }
+
+ /// Utility to return a unified diff formatter.
+ pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
+ UnifiedDiff::from_text_diff(self)
+ }
+
+ /// Iterates over the changes the op expands to with inline emphasis.
+ ///
+ /// This is very similar to [`TextDiff::iter_changes`] but it performs a second
+ /// level diff on adjacent line replacements. The exact behavior of
+ /// this function with regards to how it detects those inline changes
+ /// is currently not defined and will likely change over time.
+ ///
+ /// As of similar 1.2.0 the behavior of this function changes depending on
+ /// if the `unicode` feature is enabled or not. It will prefer unicode word
+ /// splitting over word splitting depending on the feature flag.
+ ///
+ /// Requires the `inline` feature.
+ #[cfg(feature = "inline")]
+ pub fn iter_inline_changes<'slf>(
+ &'slf self,
+ op: &DiffOp,
+ ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_
+ where
+ 'slf: 'old + 'new,
+ {
+ inline::iter_inline_changes(self, op)
+ }
+}
+
+/// Use the text differ to find `n` close matches.
+///
+/// `cutoff` defines the threshold which needs to be reached for a word
+/// to be considered similar. See [`TextDiff::ratio`] for more information.
+///
+/// ```
+/// # use similar::get_close_matches;
+/// let matches = get_close_matches(
+/// "appel",
+/// &["ape", "apple", "peach", "puppy"][..],
+/// 3,
+/// 0.6
+/// );
+/// assert_eq!(matches, vec!["apple", "ape"]);
+/// ```
+///
+/// Requires the `text` feature.
+pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
+ word: &T,
+ possibilities: &[&'a T],
+ n: usize,
+ cutoff: f32,
+) -> Vec<&'a T> {
+ let mut matches = BinaryHeap::new();
+ let seq1 = word.tokenize_chars();
+ let quick_ratio = QuickSeqRatio::new(&seq1);
+
+ for &possibility in possibilities {
+ let seq2 = possibility.tokenize_chars();
+
+ if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
+ continue;
+ }
+
+ let diff = TextDiff::from_slices(&seq1, &seq2);
+ let ratio = diff.ratio();
+ if ratio >= cutoff {
+ // we're putting the word itself in reverse in so that matches with
+ // the same ratio are ordered lexicographically.
+ matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility)));
+ }
+ }
+
+ let mut rv = vec![];
+ for _ in 0..n {
+ if let Some((_, elt)) = matches.pop() {
+ rv.push(elt.0);
+ } else {
+ break;
+ }
+ }
+
+ rv
+}
+
+#[test]
+fn test_captured_ops() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n",
+ "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ insta::assert_debug_snapshot!(&diff.ops());
+}
+
+#[test]
+fn test_captured_word_ops() {
+ let diff = TextDiff::from_words(
+ "Hello World\nsome stuff here\nsome more stuff here\n",
+ "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_changes(op))
+ .collect::<Vec<_>>();
+ insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+fn test_unified_diff() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n",
+ "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ assert!(diff.newline_terminated());
+ insta::assert_snapshot!(&diff
+ .unified_diff()
+ .context_radius(3)
+ .header("old", "new")
+ .to_string());
+}
+
+#[test]
+fn test_line_ops() {
+ let a = "Hello World\nsome stuff here\nsome more stuff here\n";
+ let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
+ let diff = TextDiff::from_lines(a, b);
+ assert!(diff.newline_terminated());
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_changes(op))
+ .collect::<Vec<_>>();
+ insta::assert_debug_snapshot!(&changes);
+
+ #[cfg(feature = "bytes")]
+ {
+ let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
+ let byte_changes = byte_diff
+ .ops()
+ .iter()
+ .flat_map(|op| byte_diff.iter_changes(op))
+ .collect::<Vec<_>>();
+ for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
+ assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
+ }
+ }
+}
+
+#[test]
+fn test_virtual_newlines() {
+ let diff = TextDiff::from_lines("a\nb", "a\nc\n");
+ assert!(diff.newline_terminated());
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_changes(op))
+ .collect::<Vec<_>>();
+ insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+fn test_char_diff() {
+ let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
+ insta::assert_debug_snapshot!(diff.ops());
+
+ #[cfg(feature = "bytes")]
+ {
+ let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
+ assert_eq!(diff.ops(), byte_diff.ops());
+ }
+}
+
+#[test]
+fn test_ratio() {
+ let diff = TextDiff::from_chars("abcd", "bcde");
+ assert_eq!(diff.ratio(), 0.75);
+ let diff = TextDiff::from_chars("", "");
+ assert_eq!(diff.ratio(), 1.0);
+}
+
+#[test]
+fn test_get_close_matches() {
+ let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
+ assert_eq!(matches, vec!["apple", "ape"]);
+ let matches = get_close_matches(
+ "hulo",
+ &[
+ "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo",
+ ][..],
+ 5,
+ 0.7,
+ );
+ assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]);
+}
+
+#[test]
+fn test_lifetimes_on_iter() {
+ use crate::Change;
+
+ fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec<Change<&'x T::Output>>
+ where
+ T: DiffableStrRef + ?Sized,
+ {
+ TextDiff::from_lines(old, new).iter_all_changes().collect()
+ }
+
+ let a = "1\n2\n3\n".to_string();
+ let b = "1\n99\n3\n".to_string();
+ let changes = diff_lines(&a, &b);
+ insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+#[cfg(feature = "serde")]
+fn test_serde() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
+ "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ let changes = diff
+ .ops()
+ .iter()
+ .flat_map(|op| diff.iter_changes(op))
+ .collect::<Vec<_>>();
+ let json = serde_json::to_string_pretty(&changes).unwrap();
+ insta::assert_snapshot!(&json);
+}
+
+#[test]
+#[cfg(feature = "serde")]
+fn test_serde_ops() {
+ let diff = TextDiff::from_lines(
+ "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
+ "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
+ );
+ let changes = diff.ops();
+ let json = serde_json::to_string_pretty(&changes).unwrap();
+ insta::assert_snapshot!(&json);
+}
+
+#[test]
+fn test_regression_issue_37() {
+ let config = TextDiffConfig::default();
+ let diff = config.diff_lines("\u{18}\n\n", "\n\n\r");
+ let mut output = diff.unified_diff();
+ assert_eq!(
+ output.context_radius(0).to_string(),
+ "@@ -1 +1,0 @@\n-\u{18}\n@@ -2,0 +2,2 @@\n+\n+\r"
+ );
+}
diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap
new file mode 100644
index 0000000..cce4066
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__captured_ops.snap
@@ -0,0 +1,22 @@
+---
+source: src/text/mod.rs
+expression: "&diff.ops()"
+---
+[
+ Equal {
+ old_index: 0,
+ new_index: 0,
+ len: 1,
+ },
+ Replace {
+ old_index: 1,
+ old_len: 1,
+ new_index: 1,
+ new_len: 1,
+ },
+ Equal {
+ old_index: 2,
+ new_index: 2,
+ len: 1,
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap
new file mode 100644
index 0000000..9232c8d
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__captured_word_ops.snap
@@ -0,0 +1,202 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 0,
+ ),
+ new_index: Some(
+ 0,
+ ),
+ value: "Hello",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 1,
+ ),
+ new_index: Some(
+ 1,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 2,
+ ),
+ new_index: Some(
+ 2,
+ ),
+ value: "World",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 3,
+ ),
+ new_index: Some(
+ 3,
+ ),
+ value: "\n",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 4,
+ ),
+ new_index: Some(
+ 4,
+ ),
+ value: "some",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 5,
+ ),
+ new_index: Some(
+ 5,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 6,
+ ),
+ value: "amazing",
+ },
+ Change {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 7,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 6,
+ ),
+ new_index: Some(
+ 8,
+ ),
+ value: "stuff",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 7,
+ ),
+ new_index: Some(
+ 9,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 8,
+ ),
+ new_index: Some(
+ 10,
+ ),
+ value: "here",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 9,
+ ),
+ new_index: Some(
+ 11,
+ ),
+ value: "\n",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 10,
+ ),
+ new_index: Some(
+ 12,
+ ),
+ value: "some",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 11,
+ ),
+ new_index: Some(
+ 13,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 12,
+ ),
+ new_index: Some(
+ 14,
+ ),
+ value: "more",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 13,
+ ),
+ new_index: Some(
+ 15,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 14,
+ ),
+ new_index: Some(
+ 16,
+ ),
+ value: "stuff",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 15,
+ ),
+ new_index: Some(
+ 17,
+ ),
+ value: " ",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 16,
+ ),
+ new_index: Some(
+ 18,
+ ),
+ value: "here",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 17,
+ ),
+ new_index: Some(
+ 19,
+ ),
+ value: "\n",
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__char_diff.snap b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap
new file mode 100644
index 0000000..b32f29a
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__char_diff.snap
@@ -0,0 +1,39 @@
+---
+source: src/text/mod.rs
+expression: diff.ops()
+---
+[
+ Equal {
+ old_index: 0,
+ new_index: 0,
+ len: 1,
+ },
+ Replace {
+ old_index: 1,
+ old_len: 1,
+ new_index: 1,
+ new_len: 1,
+ },
+ Equal {
+ old_index: 2,
+ new_index: 2,
+ len: 5,
+ },
+ Replace {
+ old_index: 7,
+ old_len: 2,
+ new_index: 7,
+ new_len: 1,
+ },
+ Equal {
+ old_index: 9,
+ new_index: 8,
+ len: 1,
+ },
+ Replace {
+ old_index: 10,
+ old_len: 1,
+ new_index: 9,
+ new_len: 1,
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap
new file mode 100644
index 0000000..2133460
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__inline__line_ops_inline.snap
@@ -0,0 +1,126 @@
+---
+source: src/text/inline.rs
+expression: "&changes"
+---
+[
+ InlineChange {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 0,
+ ),
+ values: [
+ (
+ false,
+ "Stuff\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Equal,
+ old_index: Some(
+ 0,
+ ),
+ new_index: Some(
+ 1,
+ ),
+ values: [
+ (
+ false,
+ "Hello World\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Delete,
+ old_index: Some(
+ 1,
+ ),
+ new_index: None,
+ values: [
+ (
+ false,
+ "some ",
+ ),
+ (
+ false,
+ "stuff here\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 2,
+ ),
+ values: [
+ (
+ false,
+ "some ",
+ ),
+ (
+ true,
+ "amazing ",
+ ),
+ (
+ false,
+ "stuff here\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Equal,
+ old_index: Some(
+ 2,
+ ),
+ new_index: Some(
+ 3,
+ ),
+ values: [
+ (
+ false,
+ "some more stuff here\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Delete,
+ old_index: Some(
+ 3,
+ ),
+ new_index: None,
+ values: [
+ (
+ false,
+ "\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Delete,
+ old_index: Some(
+ 4,
+ ),
+ new_index: None,
+ values: [
+ (
+ false,
+ "Aha stuff here\n",
+ ),
+ ],
+ },
+ InlineChange {
+ tag: Delete,
+ old_index: Some(
+ 5,
+ ),
+ new_index: None,
+ values: [
+ (
+ false,
+ "and more stuff",
+ ),
+ ],
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap
new file mode 100644
index 0000000..44ab829
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__inline__serde.snap
@@ -0,0 +1,107 @@
+---
+source: src/text/inline.rs
+expression: "&json"
+
+---
+[
+ {
+ "tag": "insert",
+ "old_index": null,
+ "new_index": 0,
+ "values": [
+ [
+ false,
+ "Stuff\n"
+ ]
+ ]
+ },
+ {
+ "tag": "equal",
+ "old_index": 0,
+ "new_index": 1,
+ "values": [
+ [
+ false,
+ "Hello World\n"
+ ]
+ ]
+ },
+ {
+ "tag": "delete",
+ "old_index": 1,
+ "new_index": null,
+ "values": [
+ [
+ false,
+ "some "
+ ],
+ [
+ false,
+ "stuff here\n"
+ ]
+ ]
+ },
+ {
+ "tag": "insert",
+ "old_index": null,
+ "new_index": 2,
+ "values": [
+ [
+ false,
+ "some "
+ ],
+ [
+ true,
+ "amazing "
+ ],
+ [
+ false,
+ "stuff here\n"
+ ]
+ ]
+ },
+ {
+ "tag": "equal",
+ "old_index": 2,
+ "new_index": 3,
+ "values": [
+ [
+ false,
+ "some more stuff here\n"
+ ]
+ ]
+ },
+ {
+ "tag": "delete",
+ "old_index": 3,
+ "new_index": null,
+ "values": [
+ [
+ false,
+ "\n"
+ ]
+ ]
+ },
+ {
+ "tag": "delete",
+ "old_index": 4,
+ "new_index": null,
+ "values": [
+ [
+ false,
+ "Aha stuff here\n"
+ ]
+ ]
+ },
+ {
+ "tag": "delete",
+ "old_index": 5,
+ "new_index": null,
+ "values": [
+ [
+ false,
+ "and more stuff"
+ ]
+ ]
+ }
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap
new file mode 100644
index 0000000..4bb626d
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__lifetimes_on_iter.snap
@@ -0,0 +1,42 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 0,
+ ),
+ new_index: Some(
+ 0,
+ ),
+ value: "1\n",
+ },
+ Change {
+ tag: Delete,
+ old_index: Some(
+ 1,
+ ),
+ new_index: None,
+ value: "2\n",
+ },
+ Change {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 1,
+ ),
+ value: "99\n",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 2,
+ ),
+ new_index: Some(
+ 2,
+ ),
+ value: "3\n",
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__line_ops.snap b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap
new file mode 100644
index 0000000..f187259
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__line_ops.snap
@@ -0,0 +1,42 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 0,
+ ),
+ new_index: Some(
+ 0,
+ ),
+ value: "Hello World\n",
+ },
+ Change {
+ tag: Delete,
+ old_index: Some(
+ 1,
+ ),
+ new_index: None,
+ value: "some stuff here\n",
+ },
+ Change {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 1,
+ ),
+ value: "some amazing stuff here\n",
+ },
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 2,
+ ),
+ new_index: Some(
+ 2,
+ ),
+ value: "some more stuff here\n",
+ },
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__serde.snap b/vendor/similar/src/text/snapshots/similar__text__serde.snap
new file mode 100644
index 0000000..13418a6
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__serde.snap
@@ -0,0 +1,55 @@
+---
+source: src/text/mod.rs
+expression: "&json"
+
+---
+[
+ {
+ "tag": "insert",
+ "old_index": null,
+ "new_index": 0,
+ "value": "Stuff\n"
+ },
+ {
+ "tag": "equal",
+ "old_index": 0,
+ "new_index": 1,
+ "value": "Hello World\n"
+ },
+ {
+ "tag": "delete",
+ "old_index": 1,
+ "new_index": null,
+ "value": "some stuff here\n"
+ },
+ {
+ "tag": "insert",
+ "old_index": null,
+ "new_index": 2,
+ "value": "some amazing stuff here\n"
+ },
+ {
+ "tag": "equal",
+ "old_index": 2,
+ "new_index": 3,
+ "value": "some more stuff here\n"
+ },
+ {
+ "tag": "delete",
+ "old_index": 3,
+ "new_index": null,
+ "value": "\n"
+ },
+ {
+ "tag": "delete",
+ "old_index": 4,
+ "new_index": null,
+ "value": "Aha stuff here\n"
+ },
+ {
+ "tag": "delete",
+ "old_index": 5,
+ "new_index": null,
+ "value": "and more stuff"
+ }
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap
new file mode 100644
index 0000000..040fe97
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__serde_ops.snap
@@ -0,0 +1,38 @@
+---
+source: src/text/mod.rs
+expression: "&json"
+
+---
+[
+ {
+ "op": "insert",
+ "old_index": 0,
+ "new_index": 0,
+ "new_len": 1
+ },
+ {
+ "op": "equal",
+ "old_index": 0,
+ "new_index": 1,
+ "len": 1
+ },
+ {
+ "op": "replace",
+ "old_index": 1,
+ "old_len": 1,
+ "new_index": 2,
+ "new_len": 1
+ },
+ {
+ "op": "equal",
+ "old_index": 2,
+ "new_index": 3,
+ "len": 1
+ },
+ {
+ "op": "delete",
+ "old_index": 3,
+ "old_len": 3,
+ "new_index": 4
+ }
+]
diff --git a/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap
new file mode 100644
index 0000000..77f409a
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__unified_diff.snap
@@ -0,0 +1,12 @@
+---
+source: src/text/mod.rs
+expression: "&diff.unified_diff().context_radius(3).header(\"old\", \"new\").to_string()"
+---
+--- old
++++ new
+@@ -1,3 +1,3 @@
+ Hello World
+-some stuff here
++some amazing stuff here
+ some more stuff here
+
diff --git a/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap
new file mode 100644
index 0000000..a3915a8
--- /dev/null
+++ b/vendor/similar/src/text/snapshots/similar__text__virtual_newlines.snap
@@ -0,0 +1,32 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+ Change {
+ tag: Equal,
+ old_index: Some(
+ 0,
+ ),
+ new_index: Some(
+ 0,
+ ),
+ value: "a\n",
+ },
+ Change {
+ tag: Delete,
+ old_index: Some(
+ 1,
+ ),
+ new_index: None,
+ value: "b",
+ },
+ Change {
+ tag: Insert,
+ old_index: None,
+ new_index: Some(
+ 1,
+ ),
+ value: "c\n",
+ },
+]
diff --git a/vendor/similar/src/text/utils.rs b/vendor/similar/src/text/utils.rs
new file mode 100644
index 0000000..d4a440f
--- /dev/null
+++ b/vendor/similar/src/text/utils.rs
@@ -0,0 +1,55 @@
+use std::collections::HashMap;
+use std::hash::Hash;
+
+use super::DiffableStrRef;
+
+// quick and dirty way to get an upper sequence ratio.
+pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
+ let n = seq1.len() + seq2.len();
+ if n == 0 {
+ 1.0
+ } else {
+ 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
+ }
+}
+
+/// Internal utility to calculate an upper bound for a ratio for
+/// [`get_close_matches`]. This is based on Python's difflib approach
+/// of considering the two sets to be multisets.
+///
+/// It counts the number of matches without regard to order, which is an
+/// obvious upper bound.
+pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
+
+impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
+ pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
+ let mut counts = HashMap::new();
+ for &word in seq {
+ *counts.entry(word).or_insert(0) += 1;
+ }
+ QuickSeqRatio(counts)
+ }
+
+ pub fn calc(&self, seq: &[&T]) -> f32 {
+ let n = self.0.len() + seq.len();
+ if n == 0 {
+ return 1.0;
+ }
+
+ let mut available = HashMap::new();
+ let mut matches = 0;
+ for &word in seq {
+ let x = if let Some(count) = available.get(&word) {
+ *count
+ } else {
+ self.0.get(&word).copied().unwrap_or(0)
+ };
+ available.insert(word, x - 1);
+ if x > 0 {
+ matches += 1;
+ }
+ }
+
+ 2.0 * matches as f32 / n as f32
+ }
+}